language_models: Add thinking support to OpenRouter provider (#32541)

Umesh Yadav created 8 months ago

Did some bit cleanup of code for loading models for settings as that is
not required as we are fetching all the models from openrouter so it's
better to maintain one source of truth

Release Notes:

- Add thinking support to OpenRouter provider

Change summary

crates/language_models/src/provider/open_router.rs | 74 ++++++++++++++-
crates/open_router/src/open_router.rs              | 40 ++++++++
docs/src/ai/configuration.md                       | 43 +++++++++
3 files changed, 148 insertions(+), 9 deletions(-)

Detailed changes

crates/language_models/src/provider/open_router.rs 🔗

@@ -14,7 +14,9 @@ use language_model::{
     LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent,
     RateLimiter, Role, StopReason, TokenUsage,
 };
-use open_router::{Model, ResponseStreamEvent, list_models, stream_completion};
+use open_router::{
+    Model, ModelMode as OpenRouterModelMode, ResponseStreamEvent, list_models, stream_completion,
+};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::{Settings, SettingsStore};
@@ -45,6 +47,39 @@ pub struct AvailableModel {
     pub max_completion_tokens: Option<u64>,
     pub supports_tools: Option<bool>,
     pub supports_images: Option<bool>,
+    pub mode: Option<ModelMode>,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
+}
+
+impl From<ModelMode> for OpenRouterModelMode {
+    fn from(value: ModelMode) -> Self {
+        match value {
+            ModelMode::Default => OpenRouterModelMode::Default,
+            ModelMode::Thinking { budget_tokens } => {
+                OpenRouterModelMode::Thinking { budget_tokens }
+            }
+        }
+    }
+}
+
+impl From<OpenRouterModelMode> for ModelMode {
+    fn from(value: OpenRouterModelMode) -> Self {
+        match value {
+            OpenRouterModelMode::Default => ModelMode::Default,
+            OpenRouterModelMode::Thinking { budget_tokens } => {
+                ModelMode::Thinking { budget_tokens }
+            }
+        }
+    }
 }
 
 pub struct OpenRouterLanguageModelProvider {
@@ -242,6 +277,7 @@ impl LanguageModelProvider for OpenRouterLanguageModelProvider {
                 max_tokens: model.max_tokens,
                 supports_tools: model.supports_tools,
                 supports_images: model.supports_images,
+                mode: model.mode.clone().unwrap_or_default().into(),
             });
         }
 
@@ -403,13 +439,12 @@ pub fn into_open_router(
     for message in request.messages {
         for content in message.content {
             match content {
-                MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
-                    add_message_content_part(
-                        open_router::MessagePart::Text { text },
-                        message.role,
-                        &mut messages,
-                    )
-                }
+                MessageContent::Text(text) => add_message_content_part(
+                    open_router::MessagePart::Text { text },
+                    message.role,
+                    &mut messages,
+                ),
+                MessageContent::Thinking { .. } => {}
                 MessageContent::RedactedThinking(_) => {}
                 MessageContent::Image(image) => {
                     add_message_content_part(
@@ -479,6 +514,16 @@ pub fn into_open_router(
             None
         },
         usage: open_router::RequestUsage { include: true },
+        reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
+            Some(open_router::Reasoning {
+                effort: None,
+                max_tokens: budget_tokens,
+                exclude: Some(false),
+                enabled: Some(true),
+            })
+        } else {
+            None
+        },
         tools: request
             .tools
             .into_iter()
@@ -569,8 +614,19 @@ impl OpenRouterEventMapper {
         };
 
         let mut events = Vec::new();
+        if let Some(reasoning) = choice.delta.reasoning.clone() {
+            events.push(Ok(LanguageModelCompletionEvent::Thinking {
+                text: reasoning,
+                signature: None,
+            }));
+        }
+
         if let Some(content) = choice.delta.content.clone() {
-            events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+            // OpenRouter send empty content string with the reasoning content
+            // This is a workaround for the OpenRouter API bug
+            if !content.is_empty() {
+                events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+            }
         }
 
         if let Some(tool_calls) = choice.delta.tool_calls.as_ref() {

crates/open_router/src/open_router.rs 🔗

@@ -53,6 +53,18 @@ pub struct Model {
     pub max_tokens: u64,
     pub supports_tools: Option<bool>,
     pub supports_images: Option<bool>,
+    #[serde(default)]
+    pub mode: ModelMode,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub enum ModelMode {
+    #[default]
+    Default,
+    Thinking {
+        budget_tokens: Option<u32>,
+    },
 }
 
 impl Model {
@@ -63,6 +75,7 @@ impl Model {
             Some(2000000),
             Some(true),
             Some(false),
+            Some(ModelMode::Default),
         )
     }
 
@@ -76,6 +89,7 @@ impl Model {
         max_tokens: Option<u64>,
         supports_tools: Option<bool>,
         supports_images: Option<bool>,
+        mode: Option<ModelMode>,
     ) -> Self {
         Self {
             name: name.to_owned(),
@@ -83,6 +97,7 @@ impl Model {
             max_tokens: max_tokens.unwrap_or(2000000),
             supports_tools,
             supports_images,
+            mode: mode.unwrap_or(ModelMode::Default),
         }
     }
 
@@ -127,6 +142,8 @@ pub struct Request {
     pub parallel_tool_calls: Option<bool>,
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub tools: Vec<ToolDefinition>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
     pub usage: RequestUsage,
 }
 
@@ -160,6 +177,18 @@ pub struct FunctionDefinition {
     pub parameters: Option<Value>,
 }
 
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Reasoning {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exclude: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enabled: Option<bool>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
 #[serde(tag = "role", rename_all = "lowercase")]
 pub enum RequestMessage {
@@ -299,6 +328,7 @@ pub struct FunctionContent {
 pub struct ResponseMessageDelta {
     pub role: Option<Role>,
     pub content: Option<String>,
+    pub reasoning: Option<String>,
     #[serde(default, skip_serializing_if = "is_none_or_empty")]
     pub tool_calls: Option<Vec<ToolCallChunk>>,
 }
@@ -591,6 +621,16 @@ pub async fn list_models(client: &dyn HttpClient, api_url: &str) -> Result<Vec<M
                         .map(|arch| arch.input_modalities.contains(&"image".to_string()))
                         .unwrap_or(false),
                 ),
+                mode: if entry
+                    .supported_parameters
+                    .contains(&"reasoning".to_string())
+                {
+                    ModelMode::Thinking {
+                        budget_tokens: Some(4_096),
+                    }
+                } else {
+                    ModelMode::Default
+                },
             })
             .collect();

docs/src/ai/configuration.md 🔗

@@ -489,6 +489,49 @@ The OpenRouter API key will be saved in your keychain.
 
 Zed will also use the `OPENROUTER_API_KEY` environment variable if it's defined.
 
+#### Custom Models {#openrouter-custom-models}
+
+You can add custom models to the OpenRouter provider by adding the following to your Zed `settings.json`:
+
+```json
+{
+  "language_models": {
+    "open_router": {
+      "api_url": "https://openrouter.ai/api/v1",
+      "available_models": [
+        {
+          "name": "google/gemini-2.0-flash-thinking-exp",
+          "display_name": "Gemini 2.0 Flash (Thinking)",
+          "max_tokens": 200000,
+          "max_output_tokens": 8192,
+          "supports_tools": true,
+          "supports_images": true,
+          "mode": {
+            "type": "thinking",
+            "budget_tokens": 8000
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+The available configuration options for each model are:
+
+- `name`: The model identifier used by OpenRouter (required)
+- `display_name`: A human-readable name shown in the UI (optional)
+- `max_tokens`: The model's context window size (required)
+- `max_output_tokens`: Maximum tokens the model can generate (optional)
+- `max_completion_tokens`: Maximum completion tokens (optional)
+- `supports_tools`: Whether the model supports tool/function calling (optional)
+- `supports_images`: Whether the model supports image inputs (optional)
+- `mode`: Special mode configuration for thinking models (optional)
+
+You can find available models and their specifications on the [OpenRouter models page](https://openrouter.ai/models).
+
+Custom models will be listed in the model dropdown in the Agent Panel.
+
 ## Advanced Configuration {#advanced-configuration}
 
 ### Custom Provider Endpoints {#custom-provider-endpoint}