From 1115f768c9b714f397ea6b9bfebebd4655e707c5 Mon Sep 17 00:00:00 2001 From: Abhishek Tripathi Date: Mon, 20 Apr 2026 18:03:18 +0530 Subject: [PATCH] copilot: Wire up reasoning tokens for GPT models (#53313) Fix two issues with reasoning support in the Copilot provider: - Responses API path: use the user's thinking_effort setting instead of hardcoding Medium effort - Chat Completions path: compute and pass thinking_budget when thinking is enabled, instead of unconditionally setting it to None Self-Review Checklist: - [x] I've reviewed my own diff for quality, security, and reliability - [ ] Unsafe blocks (if any) have justifying comments - [ ] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [ ] Tests cover the new/changed behavior - [ ] Performance impact has been considered and is acceptable Closes #52140 Release Notes: - Fixed a bug where copilot wouldn't use the thinking level the user's have set --------- Co-authored-by: Bennet Bo Fenner Co-authored-by: Bennet Bo Fenner --- crates/copilot_chat/src/copilot_chat.rs | 12 +++---- .../src/provider/copilot_chat.rs | 33 ++++++++++++++----- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/crates/copilot_chat/src/copilot_chat.rs b/crates/copilot_chat/src/copilot_chat.rs index 850190701e526fe3fad896a17cdc704b89253fea..fb89c2e0853f737c1b29da4955b8161030199456 100644 --- a/crates/copilot_chat/src/copilot_chat.rs +++ b/crates/copilot_chat/src/copilot_chat.rs @@ -289,13 +289,8 @@ impl Model { } pub fn supports_response(&self) -> bool { - self.supported_endpoints.len() > 0 - && !self - .supported_endpoints - .contains(&ModelSupportedEndpoint::ChatCompletions) - && self - .supported_endpoints - .contains(&ModelSupportedEndpoint::Responses) + self.supported_endpoints + .contains(&ModelSupportedEndpoint::Responses) } pub fn supports_messages(&self) -> bool { @@ -315,6 +310,7 @@ impl Model { self.supports_thinking() || self.supports_adaptive_thinking() || self.max_thinking_budget().is_some() + || !self.reasoning_effort_levels().is_empty() } pub fn max_thinking_budget(&self) -> Option { @@ -1731,7 +1727,7 @@ mod tests { assert!(!model_with_chat_completions.supports_response()); // Both endpoints (has /chat/completions) -> supports_response = false - assert!(!model_with_both.supports_response()); + assert!(model_with_both.supports_response()); // Only /v1/messages endpoint -> supports_response = false (doesn't have /responses) assert!(!model_with_messages.supports_response()); diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs index 8b46c38f2524a048d60ba4b2892b200a824f69e8..0d7d03c8c754217c664238cfcb51134b36fb9ce5 100644 --- a/crates/language_models/src/provider/copilot_chat.rs +++ b/crates/language_models/src/provider/copilot_chat.rs @@ -272,6 +272,7 @@ impl LanguageModel for CopilotChatLanguageModel { "low" => "Low".into(), "medium" => "Medium".into(), "high" => "High".into(), + "xhigh" => "Extra High".into(), _ => language_model::SharedString::from(level.clone()), }; LanguageModelEffortLevel { @@ -382,7 +383,7 @@ impl LanguageModel for CopilotChatLanguageModel { AnthropicModelMode::Thinking { budget_tokens: None, } - } else if model.can_think() { + } else if model.supports_thinking() { AnthropicModelMode::Thinking { budget_tokens: compute_thinking_budget( model.min_thinking_budget(), @@ -412,11 +413,12 @@ impl LanguageModel for CopilotChatLanguageModel { } } - let anthropic_beta = if !model.supports_adaptive_thinking() && model.can_think() { - Some("interleaved-thinking-2025-05-14".to_string()) - } else { - None - }; + let anthropic_beta = + if !model.supports_adaptive_thinking() && model.supports_thinking() { + Some("interleaved-thinking-2025-05-14".to_string()) + } else { + None + }; let body = serde_json::to_string(&anthropic::StreamingRequest { base: anthropic_request, @@ -883,6 +885,7 @@ fn into_copilot_chat( ) -> Result { let temperature = request.temperature; let tool_choice = request.tool_choice; + let thinking_allowed = request.thinking_allowed; let mut request_messages: Vec = Vec::new(); for message in request.messages { @@ -1052,7 +1055,15 @@ fn into_copilot_chat( LanguageModelToolChoice::Any => ToolChoice::Required, LanguageModelToolChoice::None => ToolChoice::None, }), - thinking_budget: None, + thinking_budget: if thinking_allowed && model.supports_thinking() { + compute_thinking_budget( + model.min_thinking_budget(), + model.max_thinking_budget(), + model.max_output_tokens() as u32, + ) + } else { + None + }, }) } @@ -1104,7 +1115,7 @@ fn into_copilot_responses( stop: _, temperature, thinking_allowed, - thinking_effort: _, + thinking_effort, speed: _, } = request; @@ -1271,8 +1282,12 @@ fn into_copilot_responses( tools: converted_tools, tool_choice: mapped_tool_choice, reasoning: if thinking_allowed { + let effort = thinking_effort + .as_deref() + .and_then(|e| e.parse::().ok()) + .unwrap_or(copilot_responses::ReasoningEffort::Medium); Some(copilot_responses::ReasoningConfig { - effort: copilot_responses::ReasoningEffort::Medium, + effort, summary: Some(copilot_responses::ReasoningSummary::Detailed), }) } else {