Cargo.lock 🔗
@@ -3696,6 +3696,7 @@ dependencies = [
name = "copilot_chat"
version = "0.1.0"
dependencies = [
+ "anthropic",
"anyhow",
"collections",
"dirs 4.0.0",
John Tur created
Fixes https://github.com/zed-industries/zed/issues/45668
https://github.com/microsoft/vscode-copilot-chat used as a reference for
headers and properties we need to set
| Before | After |
| --- | --- |
| <img width="300"
src="https://github.com/user-attachments/assets/d112a9ef-52d2-42ff-a77b-4b4b15f950fe"
/>| <img width="300"
src="https://github.com/user-attachments/assets/0f1d7ae0-bee1-46f7-92ef-aea0fa6cde7a"
/> |
Release Notes:
- Enabled thinking mode when using Anthropic models with Copilot
Cargo.lock | 1
crates/anthropic/src/anthropic.rs | 2
crates/copilot_chat/Cargo.toml | 1
crates/copilot_chat/src/copilot_chat.rs | 185 ++++++++++++++
crates/copilot_chat/src/responses.rs | 18
crates/language_models/src/provider/copilot_chat.rs | 194 +++++++++++++-
6 files changed, 370 insertions(+), 31 deletions(-)
@@ -3696,6 +3696,7 @@ dependencies = [
name = "copilot_chat"
version = "0.1.0"
dependencies = [
+ "anthropic",
"anyhow",
"collections",
"dirs 4.0.0",
@@ -995,7 +995,7 @@ pub enum Speed {
}
#[derive(Debug, Serialize, Deserialize)]
-struct StreamingRequest {
+pub struct StreamingRequest {
#[serde(flatten)]
pub base: Request,
pub stream: bool,
@@ -21,6 +21,7 @@ test-support = [
]
[dependencies]
+anthropic.workspace = true
anyhow.workspace = true
collections.workspace = true
dirs.workspace = true
@@ -52,6 +52,10 @@ impl CopilotChatConfiguration {
format!("{}/responses", api_endpoint)
}
+ pub fn messages_url(&self, api_endpoint: &str) -> String {
+ format!("{}/v1/messages", api_endpoint)
+ }
+
pub fn models_url(&self, api_endpoint: &str) -> String {
format!("{}/models", api_endpoint)
}
@@ -77,6 +81,30 @@ pub enum Role {
System,
}
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
+pub enum ChatLocation {
+ #[default]
+ Panel,
+ Editor,
+ EditingSession,
+ Terminal,
+ Agent,
+ Other,
+}
+
+impl ChatLocation {
+ pub fn to_intent_string(self) -> &'static str {
+ match self {
+ ChatLocation::Panel => "conversation-panel",
+ ChatLocation::Editor => "conversation-inline",
+ ChatLocation::EditingSession => "conversation-edits",
+ ChatLocation::Terminal => "conversation-terminal",
+ ChatLocation::Agent => "conversation-agent",
+ ChatLocation::Other => "conversation-other",
+ }
+ }
+}
+
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
pub enum ModelSupportedEndpoint {
#[serde(rename = "/chat/completions")]
@@ -179,6 +207,16 @@ struct ModelSupportedFeatures {
parallel_tool_calls: bool,
#[serde(default)]
vision: bool,
+ #[serde(default)]
+ thinking: bool,
+ #[serde(default)]
+ adaptive_thinking: bool,
+ #[serde(default)]
+ max_thinking_budget: Option<u32>,
+ #[serde(default)]
+ min_thinking_budget: Option<u32>,
+ #[serde(default)]
+ reasoning_effort: Vec<String>,
}
#[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)]
@@ -226,6 +264,10 @@ impl Model {
self.capabilities.limits.max_context_window_tokens as u64
}
+ pub fn max_output_tokens(&self) -> usize {
+ self.capabilities.limits.max_output_tokens
+ }
+
pub fn supports_tools(&self) -> bool {
self.capabilities.supports.tool_calls
}
@@ -256,6 +298,41 @@ impl Model {
.contains(&ModelSupportedEndpoint::Responses)
}
+ pub fn supports_messages(&self) -> bool {
+ self.supported_endpoints
+ .contains(&ModelSupportedEndpoint::Messages)
+ }
+
+ pub fn supports_thinking(&self) -> bool {
+ self.capabilities.supports.thinking
+ }
+
+ pub fn supports_adaptive_thinking(&self) -> bool {
+ self.capabilities.supports.adaptive_thinking
+ }
+
+ pub fn can_think(&self) -> bool {
+ self.supports_thinking()
+ || self.supports_adaptive_thinking()
+ || self.max_thinking_budget().is_some()
+ }
+
+ pub fn max_thinking_budget(&self) -> Option<u32> {
+ self.capabilities.supports.max_thinking_budget
+ }
+
+ pub fn min_thinking_budget(&self) -> Option<u32> {
+ self.capabilities.supports.min_thinking_budget
+ }
+
+ pub fn reasoning_effort_levels(&self) -> &[String] {
+ &self.capabilities.supports.reasoning_effort
+ }
+
+ pub fn family(&self) -> &str {
+ &self.capabilities.family
+ }
+
pub fn multiplier(&self) -> f64 {
self.billing.multiplier
}
@@ -263,7 +340,6 @@ impl Model {
#[derive(Serialize, Deserialize)]
pub struct Request {
- pub intent: bool,
pub n: usize,
pub stream: bool,
pub temperature: f32,
@@ -273,6 +349,8 @@ pub struct Request {
pub tools: Vec<Tool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoice>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub thinking_budget: Option<u32>,
}
#[derive(Serialize, Deserialize)]
@@ -550,6 +628,7 @@ impl CopilotChat {
pub async fn stream_completion(
request: Request,
+ location: ChatLocation,
is_user_initiated: bool,
mut cx: AsyncApp,
) -> Result<BoxStream<'static, Result<ResponseEvent>>> {
@@ -563,12 +642,14 @@ impl CopilotChat {
api_url.into(),
request,
is_user_initiated,
+ location,
)
.await
}
pub async fn stream_response(
request: responses::Request,
+ location: ChatLocation,
is_user_initiated: bool,
mut cx: AsyncApp,
) -> Result<BoxStream<'static, Result<responses::StreamEvent>>> {
@@ -582,6 +663,30 @@ impl CopilotChat {
api_url,
request,
is_user_initiated,
+ location,
+ )
+ .await
+ }
+
+ pub async fn stream_messages(
+ body: String,
+ location: ChatLocation,
+ is_user_initiated: bool,
+ anthropic_beta: Option<String>,
+ mut cx: AsyncApp,
+ ) -> Result<BoxStream<'static, Result<anthropic::Event, anthropic::AnthropicError>>> {
+ let (client, oauth_token, api_endpoint, configuration) =
+ Self::get_auth_details(&mut cx).await?;
+
+ let api_url = configuration.messages_url(&api_endpoint);
+ stream_messages(
+ client.clone(),
+ oauth_token,
+ api_url,
+ body,
+ is_user_initiated,
+ location,
+ anthropic_beta,
)
.await
}
@@ -755,6 +860,7 @@ pub(crate) fn copilot_request_headers(
builder: http_client::Builder,
oauth_token: &str,
is_user_initiated: Option<bool>,
+ location: Option<ChatLocation>,
) -> http_client::Builder {
builder
.header("Authorization", format!("Bearer {}", oauth_token))
@@ -766,12 +872,19 @@ pub(crate) fn copilot_request_headers(
option_env!("CARGO_PKG_VERSION").unwrap_or("unknown")
),
)
+ .header("X-GitHub-Api-Version", "2025-10-01")
.when_some(is_user_initiated, |builder, is_user_initiated| {
builder.header(
"X-Initiator",
if is_user_initiated { "user" } else { "agent" },
)
})
+ .when_some(location, |builder, loc| {
+ let interaction_type = loc.to_intent_string();
+ builder
+ .header("X-Interaction-Type", interaction_type)
+ .header("OpenAI-Intent", interaction_type)
+ })
}
async fn request_models(
@@ -785,8 +898,8 @@ async fn request_models(
.uri(models_url.as_ref()),
&oauth_token,
None,
- )
- .header("x-github-api-version", "2025-05-01");
+ None,
+ );
let request = request_builder.body(AsyncBody::empty())?;
@@ -830,6 +943,7 @@ async fn stream_completion(
completion_url: Arc<str>,
request: Request,
is_user_initiated: bool,
+ location: ChatLocation,
) -> Result<BoxStream<'static, Result<ResponseEvent>>> {
let is_vision_request = request.messages.iter().any(|message| match message {
ChatMessage::User { content }
@@ -846,6 +960,7 @@ async fn stream_completion(
.uri(completion_url.as_ref()),
&oauth_token,
Some(is_user_initiated),
+ Some(location),
)
.when(is_vision_request, |builder| {
builder.header("Copilot-Vision-Request", is_vision_request.to_string())
@@ -905,6 +1020,65 @@ async fn stream_completion(
}
}
+async fn stream_messages(
+ client: Arc<dyn HttpClient>,
+ oauth_token: String,
+ api_url: String,
+ body: String,
+ is_user_initiated: bool,
+ location: ChatLocation,
+ anthropic_beta: Option<String>,
+) -> Result<BoxStream<'static, Result<anthropic::Event, anthropic::AnthropicError>>> {
+ let mut request_builder = copilot_request_headers(
+ HttpRequest::builder().method(Method::POST).uri(&api_url),
+ &oauth_token,
+ Some(is_user_initiated),
+ Some(location),
+ );
+
+ if let Some(beta) = &anthropic_beta {
+ request_builder = request_builder.header("anthropic-beta", beta.as_str());
+ }
+
+ let request = request_builder.body(AsyncBody::from(body))?;
+ let mut response = client.send(request).await?;
+
+ if !response.status().is_success() {
+ let mut body = String::new();
+ response.body_mut().read_to_string(&mut body).await?;
+ anyhow::bail!("Failed to connect to API: {} {}", response.status(), body);
+ }
+
+ let reader = BufReader::new(response.into_body());
+ Ok(reader
+ .lines()
+ .filter_map(|line| async move {
+ match line {
+ Ok(line) => {
+ let line = line
+ .strip_prefix("data: ")
+ .or_else(|| line.strip_prefix("data:"))?;
+ if line.starts_with("[DONE]") || line.is_empty() {
+ return None;
+ }
+ match serde_json::from_str(line) {
+ Ok(event) => Some(Ok(event)),
+ Err(error) => {
+ log::error!(
+ "Failed to parse Copilot messages stream event: `{}`\nResponse: `{}`",
+ error,
+ line,
+ );
+ Some(Err(anthropic::AnthropicError::DeserializeResponse(error)))
+ }
+ }
+ }
+ Err(error) => Some(Err(anthropic::AnthropicError::ReadResponse(error))),
+ }
+ })
+ .boxed())
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -1513,6 +1687,11 @@ mod tests {
tool_calls: true,
parallel_tool_calls: false,
vision: false,
+ thinking: false,
+ adaptive_thinking: false,
+ max_thinking_budget: None,
+ min_thinking_budget: None,
+ reasoning_effort: vec![],
},
model_type: "chat".to_string(),
tokenizer: None,
@@ -1,9 +1,9 @@
use std::sync::Arc;
-use super::copilot_request_headers;
+use super::{ChatLocation, copilot_request_headers};
use anyhow::{Result, anyhow};
use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
-use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+use http_client::{AsyncBody, HttpClient, HttpRequestExt, Method, Request as HttpRequest};
use serde::{Deserialize, Serialize};
use serde_json::Value;
pub use settings::OpenAiReasoningEffort as ReasoningEffort;
@@ -24,6 +24,7 @@ pub struct Request {
pub reasoning: Option<ReasoningConfig>,
#[serde(skip_serializing_if = "Option::is_none")]
pub include: Option<Vec<ResponseIncludable>>,
+ pub store: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
@@ -280,6 +281,7 @@ pub async fn stream_response(
api_url: String,
request: Request,
is_user_initiated: bool,
+ location: ChatLocation,
) -> Result<BoxStream<'static, Result<StreamEvent>>> {
let is_vision_request = request.input.iter().any(|item| match item {
ResponseInputItem::Message {
@@ -295,13 +297,11 @@ pub async fn stream_response(
HttpRequest::builder().method(Method::POST).uri(&api_url),
&oauth_token,
Some(is_user_initiated),
- );
-
- let request_builder = if is_vision_request {
- request_builder.header("Copilot-Vision-Request", "true")
- } else {
- request_builder
- };
+ Some(location),
+ )
+ .when(is_vision_request, |builder| {
+ builder.header("Copilot-Vision-Request", "true")
+ });
let is_streaming = request.stream;
let json = serde_json::to_string(&request)?;
@@ -2,15 +2,17 @@ use std::pin::Pin;
use std::str::FromStr as _;
use std::sync::Arc;
+use anthropic::AnthropicModelMode;
use anyhow::{Result, anyhow};
use cloud_llm_client::CompletionIntent;
use collections::HashMap;
use copilot::{GlobalCopilotAuth, Status};
use copilot_chat::responses as copilot_responses;
use copilot_chat::{
- ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat, CopilotChatConfiguration,
- Function, FunctionContent, ImageUrl, Model as CopilotChatModel, ModelVendor,
- Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent, ToolChoice,
+ ChatLocation, ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat,
+ CopilotChatConfiguration, Function, FunctionContent, ImageUrl, Model as CopilotChatModel,
+ ModelVendor, Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent,
+ ToolChoice,
};
use futures::future::BoxFuture;
use futures::stream::BoxStream;
@@ -20,8 +22,8 @@ use http_client::StatusCode;
use language::language_settings::all_language_settings;
use language_model::{
AuthenticateError, IconOrSvg, LanguageModel, LanguageModelCompletionError,
- LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelId, LanguageModelName,
- LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+ LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelEffortLevel, LanguageModelId,
+ LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
LanguageModelProviderState, LanguageModelRequest, LanguageModelRequestMessage,
LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolSchemaFormat,
LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason, TokenUsage,
@@ -30,6 +32,7 @@ use settings::SettingsStore;
use ui::prelude::*;
use util::debug_panic;
+use crate::provider::anthropic::{AnthropicEventMapper, into_anthropic};
use crate::provider::util::parse_tool_arguments;
const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("copilot_chat");
@@ -254,6 +257,33 @@ impl LanguageModel for CopilotChatLanguageModel {
self.model.supports_vision()
}
+ fn supports_thinking(&self) -> bool {
+ self.model.can_think()
+ }
+
+ fn supported_effort_levels(&self) -> Vec<LanguageModelEffortLevel> {
+ let levels = self.model.reasoning_effort_levels();
+ if levels.is_empty() {
+ return vec![];
+ }
+ levels
+ .iter()
+ .map(|level| {
+ let name: SharedString = match level.as_str() {
+ "low" => "Low".into(),
+ "medium" => "Medium".into(),
+ "high" => "High".into(),
+ _ => SharedString::from(level.clone()),
+ };
+ LanguageModelEffortLevel {
+ name,
+ value: SharedString::from(level.clone()),
+ is_default: level == "high",
+ }
+ })
+ .collect()
+ }
+
fn tool_input_format(&self) -> LanguageModelToolSchemaFormat {
match self.model.vendor() {
ModelVendor::OpenAI | ModelVendor::Anthropic => {
@@ -333,12 +363,94 @@ impl LanguageModel for CopilotChatLanguageModel {
| CompletionIntent::EditFile => false,
});
+ if self.model.supports_messages() {
+ let location = intent_to_chat_location(request.intent);
+ let model = self.model.clone();
+ let request_limiter = self.request_limiter.clone();
+ let future = cx.spawn(async move |cx| {
+ let effort = request
+ .thinking_effort
+ .as_ref()
+ .and_then(|e| anthropic::Effort::from_str(e).ok());
+
+ let mut anthropic_request = into_anthropic(
+ request,
+ model.id().to_string(),
+ 0.0,
+ model.max_output_tokens() as u64,
+ if model.supports_adaptive_thinking() {
+ AnthropicModelMode::Thinking {
+ budget_tokens: None,
+ }
+ } else if model.can_think() {
+ AnthropicModelMode::Thinking {
+ budget_tokens: compute_thinking_budget(
+ model.min_thinking_budget(),
+ model.max_thinking_budget(),
+ model.max_output_tokens() as u32,
+ ),
+ }
+ } else {
+ AnthropicModelMode::Default
+ },
+ );
+
+ anthropic_request.temperature = None;
+
+ // The Copilot proxy doesn't support eager_input_streaming on tools.
+ for tool in &mut anthropic_request.tools {
+ tool.eager_input_streaming = false;
+ }
+
+ if model.supports_adaptive_thinking() {
+ if anthropic_request.thinking.is_some() {
+ anthropic_request.thinking = Some(anthropic::Thinking::Adaptive);
+ anthropic_request.output_config = Some(anthropic::OutputConfig { effort });
+ }
+ }
+
+ let anthropic_beta = if !model.supports_adaptive_thinking() && model.can_think() {
+ Some("interleaved-thinking-2025-05-14".to_string())
+ } else {
+ None
+ };
+
+ let body = serde_json::to_string(&anthropic::StreamingRequest {
+ base: anthropic_request,
+ stream: true,
+ })
+ .map_err(|e| anyhow::anyhow!(e))?;
+
+ let stream = CopilotChat::stream_messages(
+ body,
+ location,
+ is_user_initiated,
+ anthropic_beta,
+ cx.clone(),
+ );
+
+ request_limiter
+ .stream(async move {
+ let events = stream.await?;
+ let mapper = AnthropicEventMapper::new();
+ Ok(mapper.map_stream(events).boxed())
+ })
+ .await
+ });
+ return async move { Ok(future.await?.boxed()) }.boxed();
+ }
+
if self.model.supports_response() {
+ let location = intent_to_chat_location(request.intent);
let responses_request = into_copilot_responses(&self.model, request);
let request_limiter = self.request_limiter.clone();
let future = cx.spawn(async move |cx| {
- let request =
- CopilotChat::stream_response(responses_request, is_user_initiated, cx.clone());
+ let request = CopilotChat::stream_response(
+ responses_request,
+ location,
+ is_user_initiated,
+ cx.clone(),
+ );
request_limiter
.stream(async move {
let stream = request.await?;
@@ -350,6 +462,7 @@ impl LanguageModel for CopilotChatLanguageModel {
return async move { Ok(future.await?.boxed()) }.boxed();
}
+ let location = intent_to_chat_location(request.intent);
let copilot_request = match into_copilot_chat(&self.model, request) {
Ok(request) => request,
Err(err) => return futures::future::ready(Err(err.into())).boxed(),
@@ -358,8 +471,12 @@ impl LanguageModel for CopilotChatLanguageModel {
let request_limiter = self.request_limiter.clone();
let future = cx.spawn(async move |cx| {
- let request =
- CopilotChat::stream_completion(copilot_request, is_user_initiated, cx.clone());
+ let request = CopilotChat::stream_completion(
+ copilot_request,
+ location,
+ is_user_initiated,
+ cx.clone(),
+ );
request_limiter
.stream(async move {
let response = request.await?;
@@ -761,6 +878,9 @@ fn into_copilot_chat(
model: &CopilotChatModel,
request: LanguageModelRequest,
) -> Result<CopilotChatRequest> {
+ let temperature = request.temperature;
+ let tool_choice = request.tool_choice;
+
let mut request_messages: Vec<LanguageModelRequestMessage> = Vec::new();
for message in request.messages {
if let Some(last_message) = request_messages.last_mut() {
@@ -859,10 +979,9 @@ fn into_copilot_chat(
let text_content = {
let mut buffer = String::new();
for string in message.content.iter().filter_map(|content| match content {
- MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
- Some(text.as_str())
- }
- MessageContent::ToolUse(_)
+ MessageContent::Text(text) => Some(text.as_str()),
+ MessageContent::Thinking { .. }
+ | MessageContent::ToolUse(_)
| MessageContent::RedactedThinking(_)
| MessageContent::ToolResult(_)
| MessageContent::Image(_) => None,
@@ -919,21 +1038,52 @@ fn into_copilot_chat(
.collect::<Vec<_>>();
Ok(CopilotChatRequest {
- intent: true,
n: 1,
stream: model.uses_streaming(),
- temperature: 0.1,
+ temperature: temperature.unwrap_or(0.1),
model: model.id().to_string(),
messages,
tools,
- tool_choice: request.tool_choice.map(|choice| match choice {
+ tool_choice: tool_choice.map(|choice| match choice {
LanguageModelToolChoice::Auto => ToolChoice::Auto,
LanguageModelToolChoice::Any => ToolChoice::Any,
LanguageModelToolChoice::None => ToolChoice::None,
}),
+ thinking_budget: None,
})
}
+fn compute_thinking_budget(
+ min_budget: Option<u32>,
+ max_budget: Option<u32>,
+ max_output_tokens: u32,
+) -> Option<u32> {
+ let configured_budget: u32 = 16000;
+ let min_budget = min_budget.unwrap_or(1024);
+ let max_budget = max_budget.unwrap_or(max_output_tokens.saturating_sub(1));
+ let normalized = configured_budget.max(min_budget);
+ Some(
+ normalized
+ .min(max_budget)
+ .min(max_output_tokens.saturating_sub(1)),
+ )
+}
+
+fn intent_to_chat_location(intent: Option<CompletionIntent>) -> ChatLocation {
+ match intent {
+ Some(CompletionIntent::UserPrompt) => ChatLocation::Agent,
+ Some(CompletionIntent::ToolResults) => ChatLocation::Agent,
+ Some(CompletionIntent::ThreadSummarization) => ChatLocation::Panel,
+ Some(CompletionIntent::ThreadContextSummarization) => ChatLocation::Panel,
+ Some(CompletionIntent::CreateFile) => ChatLocation::Agent,
+ Some(CompletionIntent::EditFile) => ChatLocation::Agent,
+ Some(CompletionIntent::InlineAssist) => ChatLocation::Editor,
+ Some(CompletionIntent::TerminalInlineAssist) => ChatLocation::Terminal,
+ Some(CompletionIntent::GenerateGitCommitMessage) => ChatLocation::Other,
+ None => ChatLocation::Panel,
+ }
+}
+
fn into_copilot_responses(
model: &CopilotChatModel,
request: LanguageModelRequest,
@@ -949,7 +1099,7 @@ fn into_copilot_responses(
tool_choice,
stop: _,
temperature,
- thinking_allowed: _,
+ thinking_allowed,
thinking_effort: _,
speed: _,
} = request;
@@ -1128,10 +1278,18 @@ fn into_copilot_responses(
temperature,
tools: converted_tools,
tool_choice: mapped_tool_choice,
- reasoning: None, // We would need to add support for setting from user settings.
+ reasoning: if thinking_allowed {
+ Some(copilot_responses::ReasoningConfig {
+ effort: copilot_responses::ReasoningEffort::Medium,
+ summary: Some(copilot_responses::ReasoningSummary::Detailed),
+ })
+ } else {
+ None
+ },
include: Some(vec![
copilot_responses::ResponseIncludable::ReasoningEncryptedContent,
]),
+ store: false,
}
}