Hard code max token counts for supported models (#9675)

2024-12-26 10:40:54 +00:00 · 2024-03-21 20:30:33 -06:00 · 2024-03-21 20:30:33 -06:00 · 6d5787cfdc
commit 6d5787cfdc
parent 441677c90a
3 changed files with 19 additions and 7 deletions
--- a/crates/assistant/src/assistant.rs
+++ b/crates/assistant/src/assistant.rs
@ -97,13 +97,8 @@ impl LanguageModel {

    pub fn max_token_count(&self) -> usize {
        match self {
-            LanguageModel::OpenAi(model) => tiktoken_rs::model::get_context_size(model.id()),
-            LanguageModel::ZedDotDev(model) => match model {
-                ZedDotDevModel::GptThreePointFiveTurbo
-                | ZedDotDevModel::GptFour
-                | ZedDotDevModel::GptFourTurbo => tiktoken_rs::model::get_context_size(model.id()),
-                ZedDotDevModel::Custom(_) => 30720, // TODO: Base this on the selected model.
-            },
+            LanguageModel::OpenAi(model) => model.max_token_count(),
+            LanguageModel::ZedDotDev(model) => model.max_token_count(),
        }
    }

--- a/crates/assistant/src/assistant_settings.rs
+++ b/crates/assistant/src/assistant_settings.rs
@ -109,6 +109,15 @@ impl ZedDotDevModel {
            Self::Custom(id) => id.as_str(),
        }
    }
+
+    pub fn max_token_count(&self) -> usize {
+        match self {
+            Self::GptThreePointFiveTurbo => 2048,
+            Self::GptFour => 4096,
+            Self::GptFourTurbo => 128000,
+            Self::Custom(_) => 4096, // TODO: Make this configurable
+        }
+    }
 }

 #[derive(Copy, Clone, Default, Debug, Serialize, Deserialize, JsonSchema)]
--- a/crates/open_ai/src/open_ai.rs
+++ b/crates/open_ai/src/open_ai.rs
@ -72,6 +72,14 @@ impl Model {
            Self::FourTurbo => "gpt-4-turbo",
        }
    }
+
+    pub fn max_token_count(&self) -> usize {
+        match self {
+            Model::ThreePointFiveTurbo => 4096,
+            Model::Four => 8192,
+            Model::FourTurbo => 128000,
+        }
+    }
 }

 #[derive(Debug, Serialize)]