copilot: Wire up reasoning tokens for GPT models (#53313)

Abhishek Tripathi , Bennet Bo Fenner , and Bennet Bo Fenner created

Fix two issues with reasoning support in the Copilot provider:

- Responses API path: use the user's thinking_effort setting instead of
hardcoding Medium effort
- Chat Completions path: compute and pass thinking_budget when thinking
is enabled, instead of unconditionally setting it to None

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [ ] Unsafe blocks (if any) have justifying comments
- [ ] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [ ] Tests cover the new/changed behavior
- [ ] Performance impact has been considered and is acceptable

Closes #52140

Release Notes:

- Fixed a bug where copilot wouldn't use the thinking level the user's
have set

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>

Change summary

crates/copilot_chat/src/copilot_chat.rs             | 12 +---
crates/language_models/src/provider/copilot_chat.rs | 33 ++++++++++----
2 files changed, 28 insertions(+), 17 deletions(-)

Detailed changes

crates/copilot_chat/src/copilot_chat.rs 🔗

@@ -289,13 +289,8 @@ impl Model {
     }
 
     pub fn supports_response(&self) -> bool {
-        self.supported_endpoints.len() > 0
-            && !self
-                .supported_endpoints
-                .contains(&ModelSupportedEndpoint::ChatCompletions)
-            && self
-                .supported_endpoints
-                .contains(&ModelSupportedEndpoint::Responses)
+        self.supported_endpoints
+            .contains(&ModelSupportedEndpoint::Responses)
     }
 
     pub fn supports_messages(&self) -> bool {
@@ -315,6 +310,7 @@ impl Model {
         self.supports_thinking()
             || self.supports_adaptive_thinking()
             || self.max_thinking_budget().is_some()
+            || !self.reasoning_effort_levels().is_empty()
     }
 
     pub fn max_thinking_budget(&self) -> Option<u32> {
@@ -1731,7 +1727,7 @@ mod tests {
         assert!(!model_with_chat_completions.supports_response());
 
         // Both endpoints (has /chat/completions) -> supports_response = false
-        assert!(!model_with_both.supports_response());
+        assert!(model_with_both.supports_response());
 
         // Only /v1/messages endpoint -> supports_response = false (doesn't have /responses)
         assert!(!model_with_messages.supports_response());

crates/language_models/src/provider/copilot_chat.rs 🔗

@@ -272,6 +272,7 @@ impl LanguageModel for CopilotChatLanguageModel {
                     "low" => "Low".into(),
                     "medium" => "Medium".into(),
                     "high" => "High".into(),
+                    "xhigh" => "Extra High".into(),
                     _ => language_model::SharedString::from(level.clone()),
                 };
                 LanguageModelEffortLevel {
@@ -382,7 +383,7 @@ impl LanguageModel for CopilotChatLanguageModel {
                         AnthropicModelMode::Thinking {
                             budget_tokens: None,
                         }
-                    } else if model.can_think() {
+                    } else if model.supports_thinking() {
                         AnthropicModelMode::Thinking {
                             budget_tokens: compute_thinking_budget(
                                 model.min_thinking_budget(),
@@ -412,11 +413,12 @@ impl LanguageModel for CopilotChatLanguageModel {
                     }
                 }
 
-                let anthropic_beta = if !model.supports_adaptive_thinking() && model.can_think() {
-                    Some("interleaved-thinking-2025-05-14".to_string())
-                } else {
-                    None
-                };
+                let anthropic_beta =
+                    if !model.supports_adaptive_thinking() && model.supports_thinking() {
+                        Some("interleaved-thinking-2025-05-14".to_string())
+                    } else {
+                        None
+                    };
 
                 let body = serde_json::to_string(&anthropic::StreamingRequest {
                     base: anthropic_request,
@@ -883,6 +885,7 @@ fn into_copilot_chat(
 ) -> Result<CopilotChatRequest> {
     let temperature = request.temperature;
     let tool_choice = request.tool_choice;
+    let thinking_allowed = request.thinking_allowed;
 
     let mut request_messages: Vec<LanguageModelRequestMessage> = Vec::new();
     for message in request.messages {
@@ -1052,7 +1055,15 @@ fn into_copilot_chat(
             LanguageModelToolChoice::Any => ToolChoice::Required,
             LanguageModelToolChoice::None => ToolChoice::None,
         }),
-        thinking_budget: None,
+        thinking_budget: if thinking_allowed && model.supports_thinking() {
+            compute_thinking_budget(
+                model.min_thinking_budget(),
+                model.max_thinking_budget(),
+                model.max_output_tokens() as u32,
+            )
+        } else {
+            None
+        },
     })
 }
 
@@ -1104,7 +1115,7 @@ fn into_copilot_responses(
         stop: _,
         temperature,
         thinking_allowed,
-        thinking_effort: _,
+        thinking_effort,
         speed: _,
     } = request;
 
@@ -1271,8 +1282,12 @@ fn into_copilot_responses(
         tools: converted_tools,
         tool_choice: mapped_tool_choice,
         reasoning: if thinking_allowed {
+            let effort = thinking_effort
+                .as_deref()
+                .and_then(|e| e.parse::<copilot_responses::ReasoningEffort>().ok())
+                .unwrap_or(copilot_responses::ReasoningEffort::Medium);
             Some(copilot_responses::ReasoningConfig {
-                effort: copilot_responses::ReasoningEffort::Medium,
+                effort,
                 summary: Some(copilot_responses::ReasoningSummary::Detailed),
             })
         } else {