From 41fe2a2ab4600c41e94d0fa97b9fe1e76977ea33 Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Wed, 9 Jul 2025 20:05:39 +0200 Subject: [PATCH] agent: Disable thinking when using inline assistant/edit file tool (#34141) This introduces a new field `thinking_allowed` on `LanguageModelRequest` which lets us control whether thinking should be enabled if the model supports it. We permit thinking in the Inline Assistant, Edit File tool and the Git Commit message generator, this should make generation faster when using a thinking model, e.g. `claude-sonnet-4-thinking` Release Notes: - N/A --- crates/agent/src/thread.rs | 2 ++ crates/agent_ui/src/active_thread.rs | 1 + crates/agent_ui/src/buffer_codegen.rs | 1 + crates/agent_ui/src/message_editor.rs | 1 + crates/agent_ui/src/terminal_inline_assistant.rs | 1 + crates/assistant_context/src/assistant_context.rs | 1 + crates/assistant_tools/src/edit_agent.rs | 1 + crates/assistant_tools/src/edit_agent/evals.rs | 2 ++ crates/eval/src/instance.rs | 1 + crates/git_ui/src/git_panel.rs | 1 + crates/language_model/src/request.rs | 1 + crates/language_models/src/provider/anthropic.rs | 5 ++++- crates/language_models/src/provider/bedrock.rs | 4 +++- crates/language_models/src/provider/cloud.rs | 3 ++- crates/language_models/src/provider/google.rs | 6 +++--- crates/language_models/src/provider/mistral.rs | 2 ++ crates/language_models/src/provider/ollama.rs | 5 ++++- crates/language_models/src/provider/open_ai.rs | 1 + crates/language_models/src/provider/open_router.rs | 4 +++- crates/rules_library/src/rules_library.rs | 1 + crates/semantic_index/src/summary_index.rs | 1 + 21 files changed, 37 insertions(+), 8 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 1f2654dac5bf31481f79f00b03d9376f00bf6f03..6a20ad8f83dd984c74a001fb86ccd564b110ce24 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -1284,6 +1284,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(&model, cx), + thinking_allowed: true, }; let available_tools = self.available_tools(cx, model.clone()); @@ -1449,6 +1450,7 @@ impl Thread { tool_choice: None, stop: Vec::new(), temperature: AgentSettings::temperature_for_model(model, cx), + thinking_allowed: false, }; for message in &self.messages { diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs index a4553fc9011b3f0bee51d08853200fac0a2950ee..0e0e3756e3792cd36a7a32663a0d2f7a10d290df 100644 --- a/crates/agent_ui/src/active_thread.rs +++ b/crates/agent_ui/src/active_thread.rs @@ -1461,6 +1461,7 @@ impl ActiveThread { &configured_model.model, cx, ), + thinking_allowed: true, }; Some(configured_model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/buffer_codegen.rs b/crates/agent_ui/src/buffer_codegen.rs index 117dcf4f8e17bc99c4bd6ed75af070d84e5b1015..64498e928130d0debfd8a30bdcbcc010c0de48a1 100644 --- a/crates/agent_ui/src/buffer_codegen.rs +++ b/crates/agent_ui/src/buffer_codegen.rs @@ -475,6 +475,7 @@ impl CodegenAlternative { stop: Vec::new(), temperature, messages: vec![request_message], + thinking_allowed: false, } })) } diff --git a/crates/agent_ui/src/message_editor.rs b/crates/agent_ui/src/message_editor.rs index d1eae02246834599cfbd4873894fd4e069d4e542..8bc93f0f5845b11b1e23fbba367dc9ff973d6020 100644 --- a/crates/agent_ui/src/message_editor.rs +++ b/crates/agent_ui/src/message_editor.rs @@ -1454,6 +1454,7 @@ impl MessageEditor { tool_choice: None, stop: vec![], temperature: AgentSettings::temperature_for_model(&model.model, cx), + thinking_allowed: true, }; Some(model.model.count_tokens(request, cx)) diff --git a/crates/agent_ui/src/terminal_inline_assistant.rs b/crates/agent_ui/src/terminal_inline_assistant.rs index 162b45413f3aeb4295aa7878e34919b4a0c73be9..91867957cdcd1b3cb2ff9c40d385737b74d969f1 100644 --- a/crates/agent_ui/src/terminal_inline_assistant.rs +++ b/crates/agent_ui/src/terminal_inline_assistant.rs @@ -297,6 +297,7 @@ impl TerminalInlineAssistant { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, } })) } diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs index aaaef152503e477c0bff4e8036c6460d6e9fde46..136468e084593ef6b6475d29d8526d683b1bdc7b 100644 --- a/crates/assistant_context/src/assistant_context.rs +++ b/crates/assistant_context/src/assistant_context.rs @@ -2293,6 +2293,7 @@ impl AssistantContext { tool_choice: None, stop: Vec::new(), temperature: model.and_then(|model| AgentSettings::temperature_for_model(model, cx)), + thinking_allowed: true, }; for message in self.messages(cx) { if message.status != MessageStatus::Done { diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs index c2540633f76209343766ccc202d3b8abc614a107..af7dae2e2014c223f18ef8ac4451b6108adeedfb 100644 --- a/crates/assistant_tools/src/edit_agent.rs +++ b/crates/assistant_tools/src/edit_agent.rs @@ -719,6 +719,7 @@ impl EditAgent { tools, stop: Vec::new(), temperature: None, + thinking_allowed: false, }; Ok(self.model.stream_completion_text(request, cx).await?.stream) diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs index 8df8f677f20861c2cd5834bdcec6ac3ba414cdb0..d2ee03f08f142b024b69eeaea739ba121c35b375 100644 --- a/crates/assistant_tools/src/edit_agent/evals.rs +++ b/crates/assistant_tools/src/edit_agent/evals.rs @@ -1263,6 +1263,7 @@ impl EvalAssertion { content: vec![prompt.into()], cache: false, }], + thinking_allowed: true, ..Default::default() }; let mut response = retry_on_rate_limit(async || { @@ -1599,6 +1600,7 @@ impl EditAgentTest { let conversation = LanguageModelRequest { messages, tools, + thinking_allowed: true, ..Default::default() }; diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index d17dc89d0ba9d3e0a301fd19c4c47ff6f5a531ad..0f2b4c18eade06060f9002615b6b995d9bfdde0d 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -594,6 +594,7 @@ impl ExampleInstance { tools: Vec::new(), tool_choice: None, stop: Vec::new(), + thinking_allowed: true, }; let model = model.clone(); diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 84ce97a982652369036996261ae0d45e58d8d0ae..c50e2f8912ef5b4570a7141378f55701151f3f71 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -1830,6 +1830,7 @@ impl GitPanel { tool_choice: None, stop: Vec::new(), temperature, + thinking_allowed: false, }; let stream = model.stream_completion_text(request, &cx); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 451a62775e6331b139ef5c4da57e4d7d930af6f8..6f3d420ad5ac1304daf1f3341b2fb05da8662a18 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -391,6 +391,7 @@ pub struct LanguageModelRequest { pub tool_choice: Option, pub stop: Vec, pub temperature: Option, + pub thinking_allowed: bool, } #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 6ddb1a438108bd6611d9139a042f297b3481549b..959cbccf39bcd4660d4336325cc9e5268c8e99c8 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -663,7 +663,9 @@ pub fn into_anthropic( } else { Some(anthropic::StringOrContents::String(system_message)) }, - thinking: if let AnthropicModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let AnthropicModelMode::Thinking { budget_tokens } = mode + { Some(anthropic::Thinking::Enabled { budget_tokens }) } else { None @@ -1108,6 +1110,7 @@ mod tests { temperature: None, tools: vec![], tool_choice: None, + thinking_allowed: true, }; let anthropic_request = into_anthropic( diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 9c0d48160701f82bde79c55ac4b3a3f168a99d3d..65ce1dbc4b61cb1d6432fa6e6011aadc4479613f 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -799,7 +799,9 @@ pub fn into_bedrock( max_tokens: max_output_tokens, system: Some(system_message), tools: Some(tool_config), - thinking: if let BedrockModelMode::Thinking { budget_tokens } = mode { + thinking: if request.thinking_allowed + && let BedrockModelMode::Thinking { budget_tokens } = mode + { Some(bedrock::Thinking::Enabled { budget_tokens }) } else { None diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index 9b7fee228aa6139859cdb4b54b013223684b8048..aaaeb478c09bf688289220d8378c9b907843ae24 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -849,6 +849,7 @@ impl LanguageModel for CloudLanguageModel { let use_cloud = cx .update(|cx| cx.has_flag::()) .unwrap_or(false); + let thinking_allowed = request.thinking_allowed; match self.model.provider { zed_llm_client::LanguageModelProvider::Anthropic => { let request = into_anthropic( @@ -856,7 +857,7 @@ impl LanguageModel for CloudLanguageModel { self.model.id.to_string(), 1.0, self.model.max_output_tokens as u64, - if self.model.id.0.ends_with("-thinking") { + if thinking_allowed && self.model.id.0.ends_with("-thinking") { AnthropicModelMode::Thinking { budget_tokens: Some(4_096), } diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index bb19a3901a10416abc655ae21f0288bc1b6f436c..d1539dd22cfb64b4ed194830f3f9c5babc2a6cea 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -559,11 +559,11 @@ pub fn into_google( stop_sequences: Some(request.stop), max_output_tokens: None, temperature: request.temperature.map(|t| t as f64).or(Some(1.0)), - thinking_config: match mode { - GoogleModelMode::Thinking { budget_tokens } => { + thinking_config: match (request.thinking_allowed, mode) { + (true, GoogleModelMode::Thinking { budget_tokens }) => { budget_tokens.map(|thinking_budget| ThinkingConfig { thinking_budget }) } - GoogleModelMode::Default => None, + _ => None, }, top_p: None, top_k: None, diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index c58622d4e0bddb30981d7edc519ca8c5b7c21513..11497fda350a02ec9433cb2311a28e1901dfeb4f 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -911,6 +911,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "mistral-small-latest".into(), None); @@ -943,6 +944,7 @@ mod tests { intent: None, mode: None, stop: vec![], + thinking_allowed: true, }; let mistral_request = into_mistral(request, "pixtral-12b-latest".into(), None); diff --git a/crates/language_models/src/provider/ollama.rs b/crates/language_models/src/provider/ollama.rs index 0866cfa4c83f645a28b8052d86c244ed313cd74f..dc81e8be1897aa3ae51b8d2cb26b7cdec0e55cbf 100644 --- a/crates/language_models/src/provider/ollama.rs +++ b/crates/language_models/src/provider/ollama.rs @@ -334,7 +334,10 @@ impl OllamaLanguageModel { temperature: request.temperature.or(Some(1.0)), ..Default::default() }), - think: self.model.supports_thinking, + think: self + .model + .supports_thinking + .map(|supports_thinking| supports_thinking && request.thinking_allowed), tools: request.tools.into_iter().map(tool_into_ollama).collect(), } } diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index 476c1715ae2e65971227e86fb2087c99284cf969..76f2fbe303c4bed0cfeefbfca6358667420aed51 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -999,6 +999,7 @@ mod tests { tool_choice: None, stop: vec![], temperature: None, + thinking_allowed: true, }; // Validate that all models are supported by tiktoken-rs diff --git a/crates/language_models/src/provider/open_router.rs b/crates/language_models/src/provider/open_router.rs index 5883da1e2f7871122e91ced23f41c8e9b75fc59f..c46135ff3eae704f5d54027457d8f86fbef4820a 100644 --- a/crates/language_models/src/provider/open_router.rs +++ b/crates/language_models/src/provider/open_router.rs @@ -523,7 +523,9 @@ pub fn into_open_router( None }, usage: open_router::RequestUsage { include: true }, - reasoning: if let OpenRouterModelMode::Thinking { budget_tokens } = model.mode { + reasoning: if request.thinking_allowed + && let OpenRouterModelMode::Thinking { budget_tokens } = model.mode + { Some(open_router::Reasoning { effort: None, max_tokens: budget_tokens, diff --git a/crates/rules_library/src/rules_library.rs b/crates/rules_library/src/rules_library.rs index 66f589bfd39cbb941cbc7ff693f13b87c8d06c83..f871416f391d844d324ee3a11d9c41465ea0dccd 100644 --- a/crates/rules_library/src/rules_library.rs +++ b/crates/rules_library/src/rules_library.rs @@ -981,6 +981,7 @@ impl RulesLibrary { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }, cx, ) diff --git a/crates/semantic_index/src/summary_index.rs b/crates/semantic_index/src/summary_index.rs index 108130ebc9883414284b736199fe0114def413dc..6e3aae1344d8873ef2ac602e6afd648ceff57384 100644 --- a/crates/semantic_index/src/summary_index.rs +++ b/crates/semantic_index/src/summary_index.rs @@ -570,6 +570,7 @@ impl SummaryIndex { tool_choice: None, stop: Vec::new(), temperature: None, + thinking_allowed: true, }; let code_len = code.len();