Accept numeric keep alive in Ollama settings (#13046)

This adds the ability to set the keep alive as an integer, including
`-1` for staying alive indefinitely until a new model is loaded or
Ollama exits. I've also set the default to `-1` so that models stay
ready to go for Zed to use.

Release Notes:

- N/A
This commit is contained in:
Kyle Kelley 2024-06-14 09:35:04 -07:00 committed by GitHub
parent 44f66aa426
commit d9c21b4eb1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -2,6 +2,7 @@ use anyhow::{anyhow, Context, Result};
use futures::{io::BufReader, stream::BoxStream, AsyncBufReadExt, AsyncReadExt, StreamExt}; use futures::{io::BufReader, stream::BoxStream, AsyncBufReadExt, AsyncReadExt, StreamExt};
use http::{AsyncBody, HttpClient, Method, Request as HttpRequest}; use http::{AsyncBody, HttpClient, Method, Request as HttpRequest};
use isahc::config::Configurable; use isahc::config::Configurable;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{convert::TryFrom, time::Duration}; use std::{convert::TryFrom, time::Duration};
@ -38,12 +39,34 @@ impl From<Role> for String {
} }
} }
#[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialEq, JsonSchema)]
#[serde(untagged)]
pub enum KeepAlive {
/// Keep model alive for N seconds
Seconds(isize),
/// Keep model alive for a fixed duration. Accepts durations like "5m", "10m", "1h", "1d", etc.
Duration(String),
}
impl KeepAlive {
/// Keep model alive until a new model is loaded or until Ollama shuts down
fn indefinite() -> Self {
Self::Seconds(-1)
}
}
impl Default for KeepAlive {
fn default() -> Self {
Self::indefinite()
}
}
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)] #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct Model { pub struct Model {
pub name: String, pub name: String,
pub max_tokens: usize, pub max_tokens: usize,
pub keep_alive: Option<String>, pub keep_alive: KeepAlive,
} }
impl Model { impl Model {
@ -51,7 +74,7 @@ impl Model {
Self { Self {
name: name.to_owned(), name: name.to_owned(),
max_tokens: 2048, max_tokens: 2048,
keep_alive: Some("10m".to_owned()), keep_alive: KeepAlive::indefinite(),
} }
} }
@ -81,7 +104,7 @@ pub struct ChatRequest {
pub model: String, pub model: String,
pub messages: Vec<ChatMessage>, pub messages: Vec<ChatMessage>,
pub stream: bool, pub stream: bool,
pub keep_alive: Option<String>, pub keep_alive: KeepAlive,
pub options: Option<ChatOptions>, pub options: Option<ChatOptions>,
} }