diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 1f2654dac5bf31481f79f00b03d9376f00bf6f03..6a20ad8f83dd984c74a001fb86ccd564b110ce24 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1284,6 +1284,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(&model, cx), + thinking_allowed: true, }; let available_tools = self.available_tools(cx, model.clone()); @@ -1449,6 +1450,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(model, cx), + thinking_allowed: false, }; for message in &self.messages { diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs index a4553fc9011b3f0bee51d08853200fac0a2950ee..0e0e3756e3792cd36a7a32663a0d2f7a10d290df 100644 --- a/crates/agent_ui/src/active_thread.rs +++ b/crates/agent_ui/src/active_thread.rs @@ -1461,6 +1461,7 @@ impl ActiveThread { &configured_model.model, cx, ), + thinking_allowed: true, }; Some(configured_model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs index 117dcf4f8e17bc99c4bd6ed75af070d84e5b1015..64498e928130d0debfd8a30bdcbcc010c0de48a1 100644 --- a/crates/agent_ui/src/buffer_codegen.rs +++ b/crates/agent_ui/src/buffer_codegen.rs @@ -475,6 +475,7 @@ impl CodegenAlternative { stop: Vec::new(), temperature, messages: vec![request_message], + thinking_allowed: false, } })) } diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs index d1eae02246834599cfbd4873894fd4e069d4e542..8bc93f0f5845b11b1e23fbba367dc9ff973d6020 100644 --- a/crates/agent_ui/src/message_editor.rs +++ b/crates/agent_ui/src/message_editor.rs @@ -1454,6 +1454,7 @@ impl MessageEditor { tool_choice: None, stop: vec![], temperature: AgentSettings::temperature_for_model(&model.model, cx), + thinking_allowed: true, }; Some(model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs index 162b45413f3aeb4295aa7878e34919b4a0c73be9..91867957cdcd1b3cb2ff9c40d385737b74d969f1 100644 --- a/crates/agent_ui/src/terminal_inline_assistant.rs +++ b/crates/agent_ui/src/terminal_inline_assistant.rs @@ -297,6 +297,7 @@ impl TerminalInlineAssistant { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, } })) } diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs index aaaef152503e477c0bff4e8036c6460d6e9fde46..136468e084593ef6b6475d29d8526d683b1bdc7b 100644 --- a/crates/assistant_context/src/assistant_context.rs +++ b/crates/assistant_context/src/assistant_context.rs @@ -2293,6 +2293,7 @@ impl AssistantContext { tool_choice: None, stop: Vec::new(), temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)), + thinking_allowed: true, }; for message in self.messages(cx) { if message.status != MessageStatus::Done { diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index c2540633f76209343766ccc202d3b8abc614a107..af7dae2e2014c223f18ef8ac4451b6108adeedfb 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -719,6 +719,7 @@ impl EditAgent { tools, stop: Vec::new(), temperature: None, + thinking_allowed: false, }; Ok(self.model.stream_completion_text(request, cx).await?.stream) diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 8df8f677f20861c2cd5834bdcec6ac3ba414cdb0..d2ee03f08f142b024b69eeaea739ba121c35b375 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1263,6 +1263,7 @@ impl EvalAssertion { content: vec![prompt.into()], cache: false, }], + thinking_allowed: true, ..Default::default() }; let mut response = retry_on_rate_limit(async || { @@ -1599,6 +1600,7 @@ impl EditAgentTest { let conversation = LanguageModelRequest { messages, tools, + thinking_allowed: true, ..Default::default() }; diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index d17dc89d0ba9d3e0a301fd19c4c47ff6f5a531ad..0f2b4c18eade06060f9002615b6b995d9bfdde0d 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -594,6 +594,7 @@ impl ExampleInstance { tools: Vec::new(), tool_choice: None, stop: Vec::new(), + thinking_allowed: true, }; let model = model.clone(); diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 84ce97a982652369036996261ae0d45e58d8d0ae..c50e2f8912ef5b4570a7141378f55701151f3f71 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -1830,6 +1830,7 @@ impl GitPanel { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, }; let stream = model.stream_completion_text(request, &cx); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 451a62775e6331b139ef5c4da57e4d7d930af6f8..6f3d420ad5ac1304daf1f3341b2fb05da8662a18 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -391,6 +391,7 @@ pub struct LanguageModelRequest { pub tool_choice: Option, pub stop: Vec, pub temperature: Option, + pub thinking_allowed: bool, } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 6ddb1a438108bd6611d9139a042f297b3481549b..959cbccf39bcd4660d4336325cc9e5268c8e99c8 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -663,7 +663,9 @@ pub fn into_anthropic( } else { Some(anthropic::StringOrContents::String(system_message)) }, - thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let AnthropicModelMode::Thinking { budget_tokens } = mode + { Some(anthropic::Thinking::Enabled { budget_tokens }) } else { None @@ -1108,6 +1110,7 @@ mod tests { temperature: None, tools: vec![], tool_choice: None, + thinking_allowed: true, }; let anthropic_request = into_anthropic( diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 9c0d48160701f82bde79c55ac4b3a3f168a99d3d..65ce1dbc4b61cb1d6432fa6e6011aadc4479613f 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -799,7 +799,9 @@ pub fn into_bedrock( max_tokens: max_output_tokens, system: Some(system_message), tools: Some(tool_config), - thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let BedrockModelMode::Thinking { budget_tokens } = mode + { Some(bedrock::Thinking::Enabled { budget_tokens }) } else { None diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index 9b7fee228aa6139859cdb4b54b013223684b8048..aaaeb478c09bf688289220d8378c9b907843ae24 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel { let use_cloud = cx .update(|cx| cx.has_flag::()) .unwrap_or(false); + let thinking_allowed = request.thinking_allowed; match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => { let request = into_anthropic( @@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel { self.model.id.to_string(), 1.0, self.model.max_output_tokens as u64, - if self.model.id.0.ends_with("-thinking") { + if thinking_allowed && self.model.id.0.ends_with("-thinking") { AnthropicModelMode::Thinking { budget_tokens: Some(4_096), } diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index bb19a3901a10416abc655ae21f0288bc1b6f436c..d1539dd22cfb64b4ed194830f3f9c5babc2a6cea 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -559,11 +559,11 @@ pub fn into_google( stop_sequences: Some(request.stop), max_output_tokens: None, temperature: request.temperature.map(|t| t as f64).or(Some(1.0)), - thinking_config: match mode { - GoogleModelMode::Thinking { budget_tokens } => { + thinking_config: match (request.thinking_allowed, mode) { + (true, GoogleModelMode::Thinking { budget_tokens }) => { budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget }) } - GoogleModelMode::Default => None, + _ => None, }, top_p: None, top_k: None, diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index c58622d4e0bddb30981d7edc519ca8c5b7c21513..11497fda350a02ec9433cb2311a28e1901dfeb4f 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -911,6 +911,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "mistral-small-latest".into(), None); @@ -943,6 +944,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None); diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 0866cfa4c83f645a28b8052d86c244ed313cd74f..dc81e8be1897aa3ae51b8d2cb26b7cdec0e55cbf 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -334,7 +334,10 @@ impl OllamaLanguageModel { temperature: request.temperature.or(Some(1.0)), ..Default::default() }), - think: self.model.supports_thinking, + think: self + .model + .supports_thinking + .map(|supports_thinking| supports_thinking && request.thinking_allowed), tools: request.tools.into_iter().map(tool_into_ollama).collect(), } } diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 476c1715ae2e65971227e86fb2087c99284cf969..76f2fbe303c4bed0cfeefbfca6358667420aed51 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -999,6 +999,7 @@ mod tests { tool_choice: None, stop: vec![], temperature: None, + thinking_allowed: true, }; // Validate that all models are supported by tiktoken-rs diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs index 5883da1e2f7871122e91ced23f41c8e9b75fc59f..c46135ff3eae704f5d54027457d8f86fbef4820a 100644 --- a/crates/language_models/src/provider/open_router.rs +++ b/crates/language_models/src/provider/open_router.rs @@ -523,7 +523,9 @@ pub fn into_open_router( None }, usage: open_router::RequestUsage { include: true }, - reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode { + reasoning: if request.thinking_allowed + && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode + { Some(open_router::Reasoning { effort: None, max_tokens: budget_tokens, diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index 66f589bfd39cbb941cbc7ff693f13b87c8d06c83..f871416f391d844d324ee3a11d9c41465ea0dccd 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -981,6 +981,7 @@ impl RulesLibrary { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }, cx, ) diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs index 108130ebc9883414284b736199fe0114def413dc..6e3aae1344d8873ef2ac602e6afd648ceff57384 100644 --- a/crates/semantic_index/src/summary_index.rs +++ b/crates/semantic_index/src/summary_index.rs @@ -570,6 +570,7 @@ impl SummaryIndex { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }; let code_len = code.len();