Enable extended reasoning for Anthropic models in Copilot (#46540)

John Tur created 1 month ago

Fixes https://github.com/zed-industries/zed/issues/45668

https://github.com/microsoft/vscode-copilot-chat used as a reference for
headers and properties we need to set

| Before | After | 
| --- | --- |
| <img width="300"
src="https://github.com/user-attachments/assets/d112a9ef-52d2-42ff-a77b-4b4b15f950fe"
/>| <img width="300"
src="https://github.com/user-attachments/assets/0f1d7ae0-bee1-46f7-92ef-aea0fa6cde7a"
/> |

Release Notes:

- Enabled thinking mode when using Anthropic models with Copilot

Change summary

Cargo.lock                                          |   1 
crates/anthropic/src/anthropic.rs                   |   2 
crates/copilot_chat/Cargo.toml                      |   1 
crates/copilot_chat/src/copilot_chat.rs             | 185 ++++++++++++++
crates/copilot_chat/src/responses.rs                |  18 
crates/language_models/src/provider/copilot_chat.rs | 194 +++++++++++++-
6 files changed, 370 insertions(+), 31 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -3696,6 +3696,7 @@ dependencies = [
 name = "copilot_chat"
 version = "0.1.0"
 dependencies = [
+ "anthropic",
  "anyhow",
  "collections",
  "dirs 4.0.0",

crates/anthropic/src/anthropic.rs 🔗

@@ -995,7 +995,7 @@ pub enum Speed {
 }
 
 #[derive(Debug, Serialize, Deserialize)]
-struct StreamingRequest {
+pub struct StreamingRequest {
     #[serde(flatten)]
     pub base: Request,
     pub stream: bool,

crates/copilot_chat/Cargo.toml 🔗

@@ -21,6 +21,7 @@ test-support = [
 ]
 
 [dependencies]
+anthropic.workspace = true
 anyhow.workspace = true
 collections.workspace = true
 dirs.workspace = true

crates/copilot_chat/src/copilot_chat.rs 🔗

@@ -52,6 +52,10 @@ impl CopilotChatConfiguration {
         format!("{}/responses", api_endpoint)
     }
 
+    pub fn messages_url(&self, api_endpoint: &str) -> String {
+        format!("{}/v1/messages", api_endpoint)
+    }
+
     pub fn models_url(&self, api_endpoint: &str) -> String {
         format!("{}/models", api_endpoint)
     }
@@ -77,6 +81,30 @@ pub enum Role {
     System,
 }
 
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub enum ChatLocation {
+    #[default]
+    Panel,
+    Editor,
+    EditingSession,
+    Terminal,
+    Agent,
+    Other,
+}
+
+impl ChatLocation {
+    pub fn to_intent_string(self) -> &'static str {
+        match self {
+            ChatLocation::Panel => "conversation-panel",
+            ChatLocation::Editor => "conversation-inline",
+            ChatLocation::EditingSession => "conversation-edits",
+            ChatLocation::Terminal => "conversation-terminal",
+            ChatLocation::Agent => "conversation-agent",
+            ChatLocation::Other => "conversation-other",
+        }
+    }
+}
+
 #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
 pub enum ModelSupportedEndpoint {
     #[serde(rename = "/chat/completions")]
@@ -179,6 +207,16 @@ struct ModelSupportedFeatures {
     parallel_tool_calls: bool,
     #[serde(default)]
     vision: bool,
+    #[serde(default)]
+    thinking: bool,
+    #[serde(default)]
+    adaptive_thinking: bool,
+    #[serde(default)]
+    max_thinking_budget: Option<u32>,
+    #[serde(default)]
+    min_thinking_budget: Option<u32>,
+    #[serde(default)]
+    reasoning_effort: Vec<String>,
 }
 
 #[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)]
@@ -226,6 +264,10 @@ impl Model {
         self.capabilities.limits.max_context_window_tokens as u64
     }
 
+    pub fn max_output_tokens(&self) -> usize {
+        self.capabilities.limits.max_output_tokens
+    }
+
     pub fn supports_tools(&self) -> bool {
         self.capabilities.supports.tool_calls
     }
@@ -256,6 +298,41 @@ impl Model {
                 .contains(&ModelSupportedEndpoint::Responses)
     }
 
+    pub fn supports_messages(&self) -> bool {
+        self.supported_endpoints
+            .contains(&ModelSupportedEndpoint::Messages)
+    }
+
+    pub fn supports_thinking(&self) -> bool {
+        self.capabilities.supports.thinking
+    }
+
+    pub fn supports_adaptive_thinking(&self) -> bool {
+        self.capabilities.supports.adaptive_thinking
+    }
+
+    pub fn can_think(&self) -> bool {
+        self.supports_thinking()
+            || self.supports_adaptive_thinking()
+            || self.max_thinking_budget().is_some()
+    }
+
+    pub fn max_thinking_budget(&self) -> Option<u32> {
+        self.capabilities.supports.max_thinking_budget
+    }
+
+    pub fn min_thinking_budget(&self) -> Option<u32> {
+        self.capabilities.supports.min_thinking_budget
+    }
+
+    pub fn reasoning_effort_levels(&self) -> &[String] {
+        &self.capabilities.supports.reasoning_effort
+    }
+
+    pub fn family(&self) -> &str {
+        &self.capabilities.family
+    }
+
     pub fn multiplier(&self) -> f64 {
         self.billing.multiplier
     }
@@ -263,7 +340,6 @@ impl Model {
 
 #[derive(Serialize, Deserialize)]
 pub struct Request {
-    pub intent: bool,
     pub n: usize,
     pub stream: bool,
     pub temperature: f32,
@@ -273,6 +349,8 @@ pub struct Request {
     pub tools: Vec<Tool>,
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub tool_choice: Option<ToolChoice>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_budget: Option<u32>,
 }
 
 #[derive(Serialize, Deserialize)]
@@ -550,6 +628,7 @@ impl CopilotChat {
 
     pub async fn stream_completion(
         request: Request,
+        location: ChatLocation,
         is_user_initiated: bool,
         mut cx: AsyncApp,
     ) -> Result<BoxStream<'static, Result<ResponseEvent>>> {
@@ -563,12 +642,14 @@ impl CopilotChat {
             api_url.into(),
             request,
             is_user_initiated,
+            location,
         )
         .await
     }
 
     pub async fn stream_response(
         request: responses::Request,
+        location: ChatLocation,
         is_user_initiated: bool,
         mut cx: AsyncApp,
     ) -> Result<BoxStream<'static, Result<responses::StreamEvent>>> {
@@ -582,6 +663,30 @@ impl CopilotChat {
             api_url,
             request,
             is_user_initiated,
+            location,
+        )
+        .await
+    }
+
+    pub async fn stream_messages(
+        body: String,
+        location: ChatLocation,
+        is_user_initiated: bool,
+        anthropic_beta: Option<String>,
+        mut cx: AsyncApp,
+    ) -> Result<BoxStream<'static, Result<anthropic::Event, anthropic::AnthropicError>>> {
+        let (client, oauth_token, api_endpoint, configuration) =
+            Self::get_auth_details(&mut cx).await?;
+
+        let api_url = configuration.messages_url(&api_endpoint);
+        stream_messages(
+            client.clone(),
+            oauth_token,
+            api_url,
+            body,
+            is_user_initiated,
+            location,
+            anthropic_beta,
         )
         .await
     }
@@ -755,6 +860,7 @@ pub(crate) fn copilot_request_headers(
     builder: http_client::Builder,
     oauth_token: &str,
     is_user_initiated: Option<bool>,
+    location: Option<ChatLocation>,
 ) -> http_client::Builder {
     builder
         .header("Authorization", format!("Bearer {}", oauth_token))
@@ -766,12 +872,19 @@ pub(crate) fn copilot_request_headers(
                 option_env!("CARGO_PKG_VERSION").unwrap_or("unknown")
             ),
         )
+        .header("X-GitHub-Api-Version", "2025-10-01")
         .when_some(is_user_initiated, |builder, is_user_initiated| {
             builder.header(
                 "X-Initiator",
                 if is_user_initiated { "user" } else { "agent" },
             )
         })
+        .when_some(location, |builder, loc| {
+            let interaction_type = loc.to_intent_string();
+            builder
+                .header("X-Interaction-Type", interaction_type)
+                .header("OpenAI-Intent", interaction_type)
+        })
 }
 
 async fn request_models(
@@ -785,8 +898,8 @@ async fn request_models(
             .uri(models_url.as_ref()),
         &oauth_token,
         None,
-    )
-    .header("x-github-api-version", "2025-05-01");
+        None,
+    );
 
     let request = request_builder.body(AsyncBody::empty())?;
 
@@ -830,6 +943,7 @@ async fn stream_completion(
     completion_url: Arc<str>,
     request: Request,
     is_user_initiated: bool,
+    location: ChatLocation,
 ) -> Result<BoxStream<'static, Result<ResponseEvent>>> {
     let is_vision_request = request.messages.iter().any(|message| match message {
         ChatMessage::User { content }
@@ -846,6 +960,7 @@ async fn stream_completion(
             .uri(completion_url.as_ref()),
         &oauth_token,
         Some(is_user_initiated),
+        Some(location),
     )
     .when(is_vision_request, |builder| {
         builder.header("Copilot-Vision-Request", is_vision_request.to_string())
@@ -905,6 +1020,65 @@ async fn stream_completion(
     }
 }
 
+async fn stream_messages(
+    client: Arc<dyn HttpClient>,
+    oauth_token: String,
+    api_url: String,
+    body: String,
+    is_user_initiated: bool,
+    location: ChatLocation,
+    anthropic_beta: Option<String>,
+) -> Result<BoxStream<'static, Result<anthropic::Event, anthropic::AnthropicError>>> {
+    let mut request_builder = copilot_request_headers(
+        HttpRequest::builder().method(Method::POST).uri(&api_url),
+        &oauth_token,
+        Some(is_user_initiated),
+        Some(location),
+    );
+
+    if let Some(beta) = &anthropic_beta {
+        request_builder = request_builder.header("anthropic-beta", beta.as_str());
+    }
+
+    let request = request_builder.body(AsyncBody::from(body))?;
+    let mut response = client.send(request).await?;
+
+    if !response.status().is_success() {
+        let mut body = String::new();
+        response.body_mut().read_to_string(&mut body).await?;
+        anyhow::bail!("Failed to connect to API: {} {}", response.status(), body);
+    }
+
+    let reader = BufReader::new(response.into_body());
+    Ok(reader
+        .lines()
+        .filter_map(|line| async move {
+            match line {
+                Ok(line) => {
+                    let line = line
+                        .strip_prefix("data: ")
+                        .or_else(|| line.strip_prefix("data:"))?;
+                    if line.starts_with("[DONE]") || line.is_empty() {
+                        return None;
+                    }
+                    match serde_json::from_str(line) {
+                        Ok(event) => Some(Ok(event)),
+                        Err(error) => {
+                            log::error!(
+                                "Failed to parse Copilot messages stream event: `{}`\nResponse: `{}`",
+                                error,
+                                line,
+                            );
+                            Some(Err(anthropic::AnthropicError::DeserializeResponse(error)))
+                        }
+                    }
+                }
+                Err(error) => Some(Err(anthropic::AnthropicError::ReadResponse(error))),
+            }
+        })
+        .boxed())
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1513,6 +1687,11 @@ mod tests {
                     tool_calls: true,
                     parallel_tool_calls: false,
                     vision: false,
+                    thinking: false,
+                    adaptive_thinking: false,
+                    max_thinking_budget: None,
+                    min_thinking_budget: None,
+                    reasoning_effort: vec![],
                 },
                 model_type: "chat".to_string(),
                 tokenizer: None,

crates/copilot_chat/src/responses.rs 🔗

@@ -1,9 +1,9 @@
 use std::sync::Arc;
 
-use super::copilot_request_headers;
+use super::{ChatLocation, copilot_request_headers};
 use anyhow::{Result, anyhow};
 use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
-use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+use http_client::{AsyncBody, HttpClient, HttpRequestExt, Method, Request as HttpRequest};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 pub use settings::OpenAiReasoningEffort as ReasoningEffort;
@@ -24,6 +24,7 @@ pub struct Request {
     pub reasoning: Option<ReasoningConfig>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub include: Option<Vec<ResponseIncludable>>,
+    pub store: bool,
 }
 
 #[derive(Serialize, Deserialize, Debug, Clone)]
@@ -280,6 +281,7 @@ pub async fn stream_response(
     api_url: String,
     request: Request,
     is_user_initiated: bool,
+    location: ChatLocation,
 ) -> Result<BoxStream<'static, Result<StreamEvent>>> {
     let is_vision_request = request.input.iter().any(|item| match item {
         ResponseInputItem::Message {
@@ -295,13 +297,11 @@ pub async fn stream_response(
         HttpRequest::builder().method(Method::POST).uri(&api_url),
         &oauth_token,
         Some(is_user_initiated),
-    );
-
-    let request_builder = if is_vision_request {
-        request_builder.header("Copilot-Vision-Request", "true")
-    } else {
-        request_builder
-    };
+        Some(location),
+    )
+    .when(is_vision_request, |builder| {
+        builder.header("Copilot-Vision-Request", "true")
+    });
 
     let is_streaming = request.stream;
     let json = serde_json::to_string(&request)?;

crates/language_models/src/provider/copilot_chat.rs 🔗

@@ -2,15 +2,17 @@ use std::pin::Pin;
 use std::str::FromStr as _;
 use std::sync::Arc;
 
+use anthropic::AnthropicModelMode;
 use anyhow::{Result, anyhow};
 use cloud_llm_client::CompletionIntent;
 use collections::HashMap;
 use copilot::{GlobalCopilotAuth, Status};
 use copilot_chat::responses as copilot_responses;
 use copilot_chat::{
-    ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat, CopilotChatConfiguration,
-    Function, FunctionContent, ImageUrl, Model as CopilotChatModel, ModelVendor,
-    Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent, ToolChoice,
+    ChatLocation, ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat,
+    CopilotChatConfiguration, Function, FunctionContent, ImageUrl, Model as CopilotChatModel,
+    ModelVendor, Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent,
+    ToolChoice,
 };
 use futures::future::BoxFuture;
 use futures::stream::BoxStream;
@@ -20,8 +22,8 @@ use http_client::StatusCode;
 use language::language_settings::all_language_settings;
 use language_model::{
     AuthenticateError, IconOrSvg, LanguageModel, LanguageModelCompletionError,
-    LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelId, LanguageModelName,
-    LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+    LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelEffortLevel, LanguageModelId,
+    LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
     LanguageModelProviderState, LanguageModelRequest, LanguageModelRequestMessage,
     LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolSchemaFormat,
     LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason, TokenUsage,
@@ -30,6 +32,7 @@ use settings::SettingsStore;
 use ui::prelude::*;
 use util::debug_panic;
 
+use crate::provider::anthropic::{AnthropicEventMapper, into_anthropic};
 use crate::provider::util::parse_tool_arguments;
 
 const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("copilot_chat");
@@ -254,6 +257,33 @@ impl LanguageModel for CopilotChatLanguageModel {
         self.model.supports_vision()
     }
 
+    fn supports_thinking(&self) -> bool {
+        self.model.can_think()
+    }
+
+    fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
+        let levels = self.model.reasoning_effort_levels();
+        if levels.is_empty() {
+            return vec![];
+        }
+        levels
+            .iter()
+            .map(|level| {
+                let name: SharedString = match level.as_str() {
+                    "low" => "Low".into(),
+                    "medium" => "Medium".into(),
+                    "high" => "High".into(),
+                    _ => SharedString::from(level.clone()),
+                };
+                LanguageModelEffortLevel {
+                    name,
+                    value: SharedString::from(level.clone()),
+                    is_default: level == "high",
+                }
+            })
+            .collect()
+    }
+
     fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
         match self.model.vendor() {
             ModelVendor::OpenAI | ModelVendor::Anthropic => {
@@ -333,12 +363,94 @@ impl LanguageModel for CopilotChatLanguageModel {
             | CompletionIntent::EditFile => false,
         });
 
+        if self.model.supports_messages() {
+            let location = intent_to_chat_location(request.intent);
+            let model = self.model.clone();
+            let request_limiter = self.request_limiter.clone();
+            let future = cx.spawn(async move |cx| {
+                let effort = request
+                    .thinking_effort
+                    .as_ref()
+                    .and_then(|e| anthropic::Effort::from_str(e).ok());
+
+                let mut anthropic_request = into_anthropic(
+                    request,
+                    model.id().to_string(),
+                    0.0,
+                    model.max_output_tokens() as u64,
+                    if model.supports_adaptive_thinking() {
+                        AnthropicModelMode::Thinking {
+                            budget_tokens: None,
+                        }
+                    } else if model.can_think() {
+                        AnthropicModelMode::Thinking {
+                            budget_tokens: compute_thinking_budget(
+                                model.min_thinking_budget(),
+                                model.max_thinking_budget(),
+                                model.max_output_tokens() as u32,
+                            ),
+                        }
+                    } else {
+                        AnthropicModelMode::Default
+                    },
+                );
+
+                anthropic_request.temperature = None;
+
+                // The Copilot proxy doesn't support eager_input_streaming on tools.
+                for tool in &mut anthropic_request.tools {
+                    tool.eager_input_streaming = false;
+                }
+
+                if model.supports_adaptive_thinking() {
+                    if anthropic_request.thinking.is_some() {
+                        anthropic_request.thinking = Some(anthropic::Thinking::Adaptive);
+                        anthropic_request.output_config = Some(anthropic::OutputConfig { effort });
+                    }
+                }
+
+                let anthropic_beta = if !model.supports_adaptive_thinking() && model.can_think() {
+                    Some("interleaved-thinking-2025-05-14".to_string())
+                } else {
+                    None
+                };
+
+                let body = serde_json::to_string(&anthropic::StreamingRequest {
+                    base: anthropic_request,
+                    stream: true,
+                })
+                .map_err(|e| anyhow::anyhow!(e))?;
+
+                let stream = CopilotChat::stream_messages(
+                    body,
+                    location,
+                    is_user_initiated,
+                    anthropic_beta,
+                    cx.clone(),
+                );
+
+                request_limiter
+                    .stream(async move {
+                        let events = stream.await?;
+                        let mapper = AnthropicEventMapper::new();
+                        Ok(mapper.map_stream(events).boxed())
+                    })
+                    .await
+            });
+            return async move { Ok(future.await?.boxed()) }.boxed();
+        }
+
         if self.model.supports_response() {
+            let location = intent_to_chat_location(request.intent);
             let responses_request = into_copilot_responses(&self.model, request);
             let request_limiter = self.request_limiter.clone();
             let future = cx.spawn(async move |cx| {
-                let request =
-                    CopilotChat::stream_response(responses_request, is_user_initiated, cx.clone());
+                let request = CopilotChat::stream_response(
+                    responses_request,
+                    location,
+                    is_user_initiated,
+                    cx.clone(),
+                );
                 request_limiter
                     .stream(async move {
                         let stream = request.await?;
@@ -350,6 +462,7 @@ impl LanguageModel for CopilotChatLanguageModel {
             return async move { Ok(future.await?.boxed()) }.boxed();
         }
 
+        let location = intent_to_chat_location(request.intent);
         let copilot_request = match into_copilot_chat(&self.model, request) {
             Ok(request) => request,
             Err(err) => return futures::future::ready(Err(err.into())).boxed(),
@@ -358,8 +471,12 @@ impl LanguageModel for CopilotChatLanguageModel {
 
         let request_limiter = self.request_limiter.clone();
         let future = cx.spawn(async move |cx| {
-            let request =
-                CopilotChat::stream_completion(copilot_request, is_user_initiated, cx.clone());
+            let request = CopilotChat::stream_completion(
+                copilot_request,
+                location,
+                is_user_initiated,
+                cx.clone(),
+            );
             request_limiter
                 .stream(async move {
                     let response = request.await?;
@@ -761,6 +878,9 @@ fn into_copilot_chat(
     model: &CopilotChatModel,
     request: LanguageModelRequest,
 ) -> Result<CopilotChatRequest> {
+    let temperature = request.temperature;
+    let tool_choice = request.tool_choice;
+
     let mut request_messages: Vec<LanguageModelRequestMessage> = Vec::new();
     for message in request.messages {
         if let Some(last_message) = request_messages.last_mut() {
@@ -859,10 +979,9 @@ fn into_copilot_chat(
                 let text_content = {
                     let mut buffer = String::new();
                     for string in message.content.iter().filter_map(|content| match content {
-                        MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
-                            Some(text.as_str())
-                        }
-                        MessageContent::ToolUse(_)
+                        MessageContent::Text(text) => Some(text.as_str()),
+                        MessageContent::Thinking { .. }
+                        | MessageContent::ToolUse(_)
                         | MessageContent::RedactedThinking(_)
                         | MessageContent::ToolResult(_)
                         | MessageContent::Image(_) => None,
@@ -919,21 +1038,52 @@ fn into_copilot_chat(
         .collect::<Vec<_>>();
 
     Ok(CopilotChatRequest {
-        intent: true,
         n: 1,
         stream: model.uses_streaming(),
-        temperature: 0.1,
+        temperature: temperature.unwrap_or(0.1),
         model: model.id().to_string(),
         messages,
         tools,
-        tool_choice: request.tool_choice.map(|choice| match choice {
+        tool_choice: tool_choice.map(|choice| match choice {
             LanguageModelToolChoice::Auto => ToolChoice::Auto,
             LanguageModelToolChoice::Any => ToolChoice::Any,
             LanguageModelToolChoice::None => ToolChoice::None,
         }),
+        thinking_budget: None,
     })
 }
 
+fn compute_thinking_budget(
+    min_budget: Option<u32>,
+    max_budget: Option<u32>,
+    max_output_tokens: u32,
+) -> Option<u32> {
+    let configured_budget: u32 = 16000;
+    let min_budget = min_budget.unwrap_or(1024);
+    let max_budget = max_budget.unwrap_or(max_output_tokens.saturating_sub(1));
+    let normalized = configured_budget.max(min_budget);
+    Some(
+        normalized
+            .min(max_budget)
+            .min(max_output_tokens.saturating_sub(1)),
+    )
+}
+
+fn intent_to_chat_location(intent: Option<CompletionIntent>) -> ChatLocation {
+    match intent {
+        Some(CompletionIntent::UserPrompt) => ChatLocation::Agent,
+        Some(CompletionIntent::ToolResults) => ChatLocation::Agent,
+        Some(CompletionIntent::ThreadSummarization) => ChatLocation::Panel,
+        Some(CompletionIntent::ThreadContextSummarization) => ChatLocation::Panel,
+        Some(CompletionIntent::CreateFile) => ChatLocation::Agent,
+        Some(CompletionIntent::EditFile) => ChatLocation::Agent,
+        Some(CompletionIntent::InlineAssist) => ChatLocation::Editor,
+        Some(CompletionIntent::TerminalInlineAssist) => ChatLocation::Terminal,
+        Some(CompletionIntent::GenerateGitCommitMessage) => ChatLocation::Other,
+        None => ChatLocation::Panel,
+    }
+}
+
 fn into_copilot_responses(
     model: &CopilotChatModel,
     request: LanguageModelRequest,
@@ -949,7 +1099,7 @@ fn into_copilot_responses(
         tool_choice,
         stop: _,
         temperature,
-        thinking_allowed: _,
+        thinking_allowed,
         thinking_effort: _,
         speed: _,
     } = request;
@@ -1128,10 +1278,18 @@ fn into_copilot_responses(
         temperature,
         tools: converted_tools,
         tool_choice: mapped_tool_choice,
-        reasoning: None, // We would need to add support for setting from user settings.
+        reasoning: if thinking_allowed {
+            Some(copilot_responses::ReasoningConfig {
+                effort: copilot_responses::ReasoningEffort::Medium,
+                summary: Some(copilot_responses::ReasoningSummary::Detailed),
+            })
+        } else {
+            None
+        },
         include: Some(vec![
             copilot_responses::ResponseIncludable::ReasoningEncryptedContent,
         ]),
+        store: false,
     }
 }