Detailed changes
@@ -14,7 +14,9 @@ use language_model::{
LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent,
RateLimiter, Role, StopReason, TokenUsage,
};
-use open_router::{Model, ResponseStreamEvent, list_models, stream_completion};
+use open_router::{
+ Model, ModelMode as OpenRouterModelMode, ResponseStreamEvent, list_models, stream_completion,
+};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
@@ -45,6 +47,39 @@ pub struct AvailableModel {
pub max_completion_tokens: Option<u64>,
pub supports_tools: Option<bool>,
pub supports_images: Option<bool>,
+ pub mode: Option<ModelMode>,
+}
+
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize, JsonSchema)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ModelMode {
+ #[default]
+ Default,
+ Thinking {
+ budget_tokens: Option<u32>,
+ },
+}
+
+impl From<ModelMode> for OpenRouterModelMode {
+ fn from(value: ModelMode) -> Self {
+ match value {
+ ModelMode::Default => OpenRouterModelMode::Default,
+ ModelMode::Thinking { budget_tokens } => {
+ OpenRouterModelMode::Thinking { budget_tokens }
+ }
+ }
+ }
+}
+
+impl From<OpenRouterModelMode> for ModelMode {
+ fn from(value: OpenRouterModelMode) -> Self {
+ match value {
+ OpenRouterModelMode::Default => ModelMode::Default,
+ OpenRouterModelMode::Thinking { budget_tokens } => {
+ ModelMode::Thinking { budget_tokens }
+ }
+ }
+ }
}
pub struct OpenRouterLanguageModelProvider {
@@ -242,6 +277,7 @@ impl LanguageModelProvider for OpenRouterLanguageModelProvider {
max_tokens: model.max_tokens,
supports_tools: model.supports_tools,
supports_images: model.supports_images,
+ mode: model.mode.clone().unwrap_or_default().into(),
});
}
@@ -403,13 +439,12 @@ pub fn into_open_router(
for message in request.messages {
for content in message.content {
match content {
- MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
- add_message_content_part(
- open_router::MessagePart::Text { text },
- message.role,
- &mut messages,
- )
- }
+ MessageContent::Text(text) => add_message_content_part(
+ open_router::MessagePart::Text { text },
+ message.role,
+ &mut messages,
+ ),
+ MessageContent::Thinking { .. } => {}
MessageContent::RedactedThinking(_) => {}
MessageContent::Image(image) => {
add_message_content_part(
@@ -479,6 +514,16 @@ pub fn into_open_router(
None
},
usage: open_router::RequestUsage { include: true },
+ reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode {
+ Some(open_router::Reasoning {
+ effort: None,
+ max_tokens: budget_tokens,
+ exclude: Some(false),
+ enabled: Some(true),
+ })
+ } else {
+ None
+ },
tools: request
.tools
.into_iter()
@@ -569,8 +614,19 @@ impl OpenRouterEventMapper {
};
let mut events = Vec::new();
+ if let Some(reasoning) = choice.delta.reasoning.clone() {
+ events.push(Ok(LanguageModelCompletionEvent::Thinking {
+ text: reasoning,
+ signature: None,
+ }));
+ }
+
if let Some(content) = choice.delta.content.clone() {
- events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+ // OpenRouter send empty content string with the reasoning content
+ // This is a workaround for the OpenRouter API bug
+ if !content.is_empty() {
+ events.push(Ok(LanguageModelCompletionEvent::Text(content)));
+ }
}
if let Some(tool_calls) = choice.delta.tool_calls.as_ref() {
@@ -53,6 +53,18 @@ pub struct Model {
pub max_tokens: u64,
pub supports_tools: Option<bool>,
pub supports_images: Option<bool>,
+ #[serde(default)]
+ pub mode: ModelMode,
+}
+
+#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
+pub enum ModelMode {
+ #[default]
+ Default,
+ Thinking {
+ budget_tokens: Option<u32>,
+ },
}
impl Model {
@@ -63,6 +75,7 @@ impl Model {
Some(2000000),
Some(true),
Some(false),
+ Some(ModelMode::Default),
)
}
@@ -76,6 +89,7 @@ impl Model {
max_tokens: Option<u64>,
supports_tools: Option<bool>,
supports_images: Option<bool>,
+ mode: Option<ModelMode>,
) -> Self {
Self {
name: name.to_owned(),
@@ -83,6 +97,7 @@ impl Model {
max_tokens: max_tokens.unwrap_or(2000000),
supports_tools,
supports_images,
+ mode: mode.unwrap_or(ModelMode::Default),
}
}
@@ -127,6 +142,8 @@ pub struct Request {
pub parallel_tool_calls: Option<bool>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ToolDefinition>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub reasoning: Option<Reasoning>,
pub usage: RequestUsage,
}
@@ -160,6 +177,18 @@ pub struct FunctionDefinition {
pub parameters: Option<Value>,
}
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Reasoning {
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub effort: Option<String>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub max_tokens: Option<u32>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub exclude: Option<bool>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub enabled: Option<bool>,
+}
+
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
#[serde(tag = "role", rename_all = "lowercase")]
pub enum RequestMessage {
@@ -299,6 +328,7 @@ pub struct FunctionContent {
pub struct ResponseMessageDelta {
pub role: Option<Role>,
pub content: Option<String>,
+ pub reasoning: Option<String>,
#[serde(default, skip_serializing_if = "is_none_or_empty")]
pub tool_calls: Option<Vec<ToolCallChunk>>,
}
@@ -591,6 +621,16 @@ pub async fn list_models(client: &dyn HttpClient, api_url: &str) -> Result<Vec<M
.map(|arch| arch.input_modalities.contains(&"image".to_string()))
.unwrap_or(false),
),
+ mode: if entry
+ .supported_parameters
+ .contains(&"reasoning".to_string())
+ {
+ ModelMode::Thinking {
+ budget_tokens: Some(4_096),
+ }
+ } else {
+ ModelMode::Default
+ },
})
.collect();
@@ -489,6 +489,49 @@ The OpenRouter API key will be saved in your keychain.
Zed will also use the `OPENROUTER_API_KEY` environment variable if it's defined.
+#### Custom Models {#openrouter-custom-models}
+
+You can add custom models to the OpenRouter provider by adding the following to your Zed `settings.json`:
+
+```json
+{
+ "language_models": {
+ "open_router": {
+ "api_url": "https://openrouter.ai/api/v1",
+ "available_models": [
+ {
+ "name": "google/gemini-2.0-flash-thinking-exp",
+ "display_name": "Gemini 2.0 Flash (Thinking)",
+ "max_tokens": 200000,
+ "max_output_tokens": 8192,
+ "supports_tools": true,
+ "supports_images": true,
+ "mode": {
+ "type": "thinking",
+ "budget_tokens": 8000
+ }
+ }
+ ]
+ }
+ }
+}
+```
+
+The available configuration options for each model are:
+
+- `name`: The model identifier used by OpenRouter (required)
+- `display_name`: A human-readable name shown in the UI (optional)
+- `max_tokens`: The model's context window size (required)
+- `max_output_tokens`: Maximum tokens the model can generate (optional)
+- `max_completion_tokens`: Maximum completion tokens (optional)
+- `supports_tools`: Whether the model supports tool/function calling (optional)
+- `supports_images`: Whether the model supports image inputs (optional)
+- `mode`: Special mode configuration for thinking models (optional)
+
+You can find available models and their specifications on the [OpenRouter models page](https://openrouter.ai/models).
+
+Custom models will be listed in the model dropdown in the Agent Panel.
+
## Advanced Configuration {#advanced-configuration}
### Custom Provider Endpoints {#custom-provider-endpoint}