OpenAI o1-preview and o1-mini support (#17796)

Peter Tripp , Jason Mancuso , and Bennet created 1 year ago

Release Notes:

- Added support for OpenAI o1-mini and o1-preview models.

---------

Co-authored-by: Jason Mancuso <7891333+jvmncs@users.noreply.github.com>
Co-authored-by: Bennet <bennet@zed.dev>

Change summary

crates/language_model/src/model/cloud_model.rs |  2 +
crates/language_model/src/provider/open_ai.rs  | 11 +++++--
crates/language_model/src/request.rs           |  3 +
crates/open_ai/src/open_ai.rs                  | 27 ++++++++++++++-----
docs/src/assistant/configuration.md            | 10 +++++-
5 files changed, 39 insertions(+), 14 deletions(-)

Detailed changes

crates/language_model/src/model/cloud_model.rs 🔗

@@ -102,6 +102,8 @@ impl CloudModel {
                 | open_ai::Model::FourTurbo
                 | open_ai::Model::FourOmni
                 | open_ai::Model::FourOmniMini
+                | open_ai::Model::O1Mini
+                | open_ai::Model::O1Preview
                 | open_ai::Model::Custom { .. } => {
                     LanguageModelAvailability::RequiresPlan(Plan::ZedPro)
                 }

crates/language_model/src/provider/open_ai.rs 🔗

@@ -372,10 +372,13 @@ pub fn count_open_ai_tokens(
                 })
                 .collect::<Vec<_>>();
 
-            if let open_ai::Model::Custom { .. } = model {
-                tiktoken_rs::num_tokens_from_messages("gpt-4", &messages)
-            } else {
-                tiktoken_rs::num_tokens_from_messages(model.id(), &messages)
+            match model {
+                open_ai::Model::Custom { .. }
+                | open_ai::Model::O1Mini
+                | open_ai::Model::O1Preview => {
+                    tiktoken_rs::num_tokens_from_messages("gpt-4", &messages)
+                }
+                _ => tiktoken_rs::num_tokens_from_messages(model.id(), &messages),
             }
         })
         .boxed()

crates/language_model/src/request.rs 🔗

@@ -241,6 +241,7 @@ pub struct LanguageModelRequest {
 
 impl LanguageModelRequest {
     pub fn into_open_ai(self, model: String, max_output_tokens: Option<u32>) -> open_ai::Request {
+        let stream = !model.starts_with("o1-");
         open_ai::Request {
             model,
             messages: self
@@ -259,7 +260,7 @@ impl LanguageModelRequest {
                     },
                 })
                 .collect(),
-            stream: true,
+            stream,
             stop: self.stop,
             temperature: self.temperature,
             max_tokens: max_output_tokens,

crates/open_ai/src/open_ai.rs 🔗

@@ -63,17 +63,22 @@ impl From<Role> for String {
 #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
 pub enum Model {
-    #[serde(rename = "gpt-3.5-turbo", alias = "gpt-3.5-turbo-0613")]
+    #[serde(rename = "gpt-3.5-turbo", alias = "gpt-3.5-turbo")]
     ThreePointFiveTurbo,
-    #[serde(rename = "gpt-4", alias = "gpt-4-0613")]
+    #[serde(rename = "gpt-4", alias = "gpt-4")]
     Four,
-    #[serde(rename = "gpt-4-turbo-preview", alias = "gpt-4-1106-preview")]
+    #[serde(rename = "gpt-4-turbo", alias = "gpt-4-turbo")]
     FourTurbo,
-    #[serde(rename = "gpt-4o", alias = "gpt-4o-2024-05-13")]
+    #[serde(rename = "gpt-4o", alias = "gpt-4o")]
     #[default]
     FourOmni,
-    #[serde(rename = "gpt-4o-mini", alias = "gpt-4o-mini-2024-07-18")]
+    #[serde(rename = "gpt-4o-mini", alias = "gpt-4o-mini")]
     FourOmniMini,
+    #[serde(rename = "o1-preview", alias = "o1-preview")]
+    O1Preview,
+    #[serde(rename = "o1-mini", alias = "o1-mini")]
+    O1Mini,
+
     #[serde(rename = "custom")]
     Custom {
         name: String,
@@ -93,6 +98,8 @@ impl Model {
             "gpt-4-turbo-preview" => Ok(Self::FourTurbo),
             "gpt-4o" => Ok(Self::FourOmni),
             "gpt-4o-mini" => Ok(Self::FourOmniMini),
+            "o1-preview" => Ok(Self::O1Preview),
+            "o1-mini" => Ok(Self::O1Mini),
             _ => Err(anyhow!("invalid model id")),
         }
     }
@@ -101,9 +108,11 @@ impl Model {
         match self {
             Self::ThreePointFiveTurbo => "gpt-3.5-turbo",
             Self::Four => "gpt-4",
-            Self::FourTurbo => "gpt-4-turbo-preview",
+            Self::FourTurbo => "gpt-4-turbo",
             Self::FourOmni => "gpt-4o",
             Self::FourOmniMini => "gpt-4o-mini",
+            Self::O1Preview => "o1-preview",
+            Self::O1Mini => "o1-mini",
             Self::Custom { name, .. } => name,
         }
     }
@@ -115,6 +124,8 @@ impl Model {
             Self::FourTurbo => "gpt-4-turbo",
             Self::FourOmni => "gpt-4o",
             Self::FourOmniMini => "gpt-4o-mini",
+            Self::O1Preview => "o1-preview",
+            Self::O1Mini => "o1-mini",
             Self::Custom {
                 name, display_name, ..
             } => display_name.as_ref().unwrap_or(name),
@@ -123,11 +134,13 @@ impl Model {
 
     pub fn max_token_count(&self) -> usize {
         match self {
-            Self::ThreePointFiveTurbo => 4096,
+            Self::ThreePointFiveTurbo => 16385,
             Self::Four => 8192,
             Self::FourTurbo => 128000,
             Self::FourOmni => 128000,
             Self::FourOmniMini => 128000,
+            Self::O1Preview => 128000,
+            Self::O1Mini => 128000,
             Self::Custom { max_tokens, .. } => *max_tokens,
         }
     }

docs/src/assistant/configuration.md 🔗

@@ -165,7 +165,7 @@ Zed will also use the `OPENAI_API_KEY` environment variable if it's defined.
 
 #### OpenAI Custom Models {#openai-custom-models}
 
-The Zed Assistant comes pre-configured to use the latest version for common models (GPT-3.5 Turbo, GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o mini). If you wish to use alternate models, perhaps a preview release or a dated model release, you can do so by adding the following to your Zed `settings.json`:
+The Zed Assistant comes pre-configured to use the latest version for common models (GPT-3.5 Turbo, GPT-4, GPT-4 Turbo, GPT-4o, GPT-4o mini). If you wish to use alternate models, perhaps a preview release or a dated model release or you wish to control the request parameters you can do so by adding the following to your Zed `settings.json`:
 
 ```json
 {
@@ -176,6 +176,12 @@ The Zed Assistant comes pre-configured to use the latest version for common mode
           "provider": "openai",
           "name": "gpt-4o-2024-08-06",
           "max_tokens": 128000
+        },
+        {
+          "name": "o1-mini",
+          "display_name": "o1-mini",
+          "max_tokens": 128000,
+          "max_completion_tokens": 20000
         }
       ]
     }
@@ -183,7 +189,7 @@ The Zed Assistant comes pre-configured to use the latest version for common mode
 }
 ```
 
-You must provide the model's Context Window in the `max_tokens` parameter, this can be found [OpenAI Model Docs](https://platform.openai.com/docs/models). Custom models will be listed in the model dropdown in the assistant panel.
+You must provide the model's Context Window in the `max_tokens` parameter, this can be found [OpenAI Model Docs](https://platform.openai.com/docs/models). OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs. Custom models will be listed in the model dropdown in the assistant panel.
 
 ### Advanced configuration {#advanced-configuration}