diff --git a/crates/language_model/src/model/cloud_model.rs b/crates/language_model/src/model/cloud_model.rs index be0812eab90e701214e74dc067a1e83c695969d2..2ce48931f6d4db0a944f1bd078cab728f2c7636c 100644 --- a/crates/language_model/src/model/cloud_model.rs +++ b/crates/language_model/src/model/cloud_model.rs @@ -102,6 +102,8 @@ impl CloudModel { | open_ai::Model::FourTurbo | open_ai::Model::FourOmni | open_ai::Model::FourOmniMini + | open_ai::Model::O1Mini + | open_ai::Model::O1Preview | open_ai::Model::Custom { .. } => { LanguageModelAvailability::RequiresPlan(Plan::ZedPro) } diff --git a/crates/language_model/src/provider/open_ai.rs b/crates/language_model/src/provider/open_ai.rs index 98424a23aad8fe2249610e829a752fe8ec1e3649..222c1530412aab1cb24abe85a124434ce5f0dc4e 100644 --- a/crates/language_model/src/provider/open_ai.rs +++ b/crates/language_model/src/provider/open_ai.rs @@ -372,10 +372,13 @@ pub fn count_open_ai_tokens( }) .collect::>(); - if let open_ai::Model::Custom { .. } = model { - tiktoken_rs::num_tokens_from_messages("gpt-4", &messages) - } else { - tiktoken_rs::num_tokens_from_messages(model.id(), &messages) + match model { + open_ai::Model::Custom { .. } + | open_ai::Model::O1Mini + | open_ai::Model::O1Preview => { + tiktoken_rs::num_tokens_from_messages("gpt-4", &messages) + } + _ => tiktoken_rs::num_tokens_from_messages(model.id(), &messages), } }) .boxed() diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 4162e9df87037ab6eb98bea184a40c76d6f8fc11..dd480b8aaf38c22a99e8878b4dd8db73f2a125a9 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -241,6 +241,7 @@ pub struct LanguageModelRequest { impl LanguageModelRequest { pub fn into_open_ai(self, model: String, max_output_tokens: Option) -> open_ai::Request { + let stream = !model.starts_with("o1-"); open_ai::Request { model, messages: self @@ -259,7 +260,7 @@ impl LanguageModelRequest { }, }) .collect(), - stream: true, + stream, stop: self.stop, temperature: self.temperature, max_tokens: max_output_tokens, diff --git a/crates/open_ai/src/open_ai.rs b/crates/open_ai/src/open_ai.rs index 7b0294bd9c0835c544cf875b99f2eb645faa4f22..e67fe1af27cdb8be652c5f24a7ab345dacdc2aa7 100644 --- a/crates/open_ai/src/open_ai.rs +++ b/crates/open_ai/src/open_ai.rs @@ -63,17 +63,22 @@ impl From for String { #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)] pub enum Model { - #[serde(rename = "gpt-3.5-turbo", alias = "gpt-3.5-turbo-0613")] + #[serde(rename = "gpt-3.5-turbo", alias = "gpt-3.5-turbo")] ThreePointFiveTurbo, - #[serde(rename = "gpt-4", alias = "gpt-4-0613")] + #[serde(rename = "gpt-4", alias = "gpt-4")] Four, - #[serde(rename = "gpt-4-turbo-preview", alias = "gpt-4-1106-preview")] + #[serde(rename = "gpt-4-turbo", alias = "gpt-4-turbo")] FourTurbo, - #[serde(rename = "gpt-4o", alias = "gpt-4o-2024-05-13")] + #[serde(rename = "gpt-4o", alias = "gpt-4o")] #[default] FourOmni, - #[serde(rename = "gpt-4o-mini", alias = "gpt-4o-mini-2024-07-18")] + #[serde(rename = "gpt-4o-mini", alias = "gpt-4o-mini")] FourOmniMini, + #[serde(rename = "o1-preview", alias = "o1-preview")] + O1Preview, + #[serde(rename = "o1-mini", alias = "o1-mini")] + O1Mini, + #[serde(rename = "custom")] Custom { name: String, @@ -93,6 +98,8 @@ impl Model { "gpt-4-turbo-preview" => Ok(Self::FourTurbo), "gpt-4o" => Ok(Self::FourOmni), "gpt-4o-mini" => Ok(Self::FourOmniMini), + "o1-preview" => Ok(Self::O1Preview), + "o1-mini" => Ok(Self::O1Mini), _ => Err(anyhow!("invalid model id")), } } @@ -101,9 +108,11 @@ impl Model { match self { Self::ThreePointFiveTurbo => "gpt-3.5-turbo", Self::Four => "gpt-4", - Self::FourTurbo => "gpt-4-turbo-preview", + Self::FourTurbo => "gpt-4-turbo", Self::FourOmni => "gpt-4o", Self::FourOmniMini => "gpt-4o-mini", + Self::O1Preview => "o1-preview", + Self::O1Mini => "o1-mini", Self::Custom { name, .. } => name, } } @@ -115,6 +124,8 @@ impl Model { Self::FourTurbo => "gpt-4-turbo", Self::FourOmni => "gpt-4o", Self::FourOmniMini => "gpt-4o-mini", + Self::O1Preview => "o1-preview", + Self::O1Mini => "o1-mini", Self::Custom { name, display_name, .. } => display_name.as_ref().unwrap_or(name), @@ -123,11 +134,13 @@ impl Model { pub fn max_token_count(&self) -> usize { match self { - Self::ThreePointFiveTurbo => 4096, + Self::ThreePointFiveTurbo => 16385, Self::Four => 8192, Self::FourTurbo => 128000, Self::FourOmni => 128000, Self::FourOmniMini => 128000, + Self::O1Preview => 128000, + Self::O1Mini => 128000, Self::Custom { max_tokens, .. } => *max_tokens, } } diff --git a/docs/src/assistant/configuration.md b/docs/src/assistant/configuration.md index 0fd242c6191b4d79ba4c4d96b05b930525d38669..4d9870e8960a0a601b6485f237fffbb6f00e0ea2 100644 --- a/docs/src/assistant/configuration.md +++ b/docs/src/assistant/configuration.md @@ -165,7 +165,7 @@ Zed will also use the `OPENAI_API_KEY` environment variable if it's defined. #### OpenAI Custom Models {#openai-custom-models} -The Zed Assistant comes pre-configured to use the latest version for common models (GPT-3.5 Turbo, GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o mini). If you wish to use alternate models, perhaps a preview release or a dated model release, you can do so by adding the following to your Zed `settings.json`: +The Zed Assistant comes pre-configured to use the latest version for common models (GPT-3.5 Turbo, GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o mini). If you wish to use alternate models, perhaps a preview release or a dated model release or you wish to control the request parameters you can do so by adding the following to your Zed `settings.json`: ```json { @@ -176,6 +176,12 @@ The Zed Assistant comes pre-configured to use the latest version for common mode "provider": "openai", "name": "gpt-4o-2024-08-06", "max_tokens": 128000 + }, + { + "name": "o1-mini", + "display_name": "o1-mini", + "max_tokens": 128000, + "max_completion_tokens": 20000 } ] } @@ -183,7 +189,7 @@ The Zed Assistant comes pre-configured to use the latest version for common mode } ``` -You must provide the model's Context Window in the `max_tokens` parameter, this can be found [OpenAI Model Docs](https://platform.openai.com/docs/models). Custom models will be listed in the model dropdown in the assistant panel. +You must provide the model's Context Window in the `max_tokens` parameter, this can be found [OpenAI Model Docs](https://platform.openai.com/docs/models). OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs. Custom models will be listed in the model dropdown in the assistant panel. ### Advanced configuration {#advanced-configuration}