diff --git a/Cargo.lock b/Cargo.lock index 3b5ad9a7b35b8e9acd37b5e40efd8a32e65bdc21..2436baad07e78670837490cf8e9bc897ba0b6716 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3696,6 +3696,7 @@ dependencies = [ name = "copilot_chat" version = "0.1.0" dependencies = [ + "anthropic", "anyhow", "collections", "dirs 4.0.0", diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index 6bff2be4c15841de597309b626e768bbf79e880a..a6509c81fa1ecabac32ff9e8bb0fafdddd9e7414 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -995,7 +995,7 @@ pub enum Speed { } #[derive(Debug, Serialize, Deserialize)] -struct StreamingRequest { +pub struct StreamingRequest { #[serde(flatten)] pub base: Request, pub stream: bool, diff --git a/crates/copilot_chat/Cargo.toml b/crates/copilot_chat/Cargo.toml index 991a58ac85227ebc84fad5a6d631fe17811fabd4..79159d59cc05aecd5d4298831a33698762d9a743 100644 --- a/crates/copilot_chat/Cargo.toml +++ b/crates/copilot_chat/Cargo.toml @@ -21,6 +21,7 @@ test-support = [ ] [dependencies] +anthropic.workspace = true anyhow.workspace = true collections.workspace = true dirs.workspace = true diff --git a/crates/copilot_chat/src/copilot_chat.rs b/crates/copilot_chat/src/copilot_chat.rs index 6ac7167c94f0b85e6470b2a20bbf3a17fe190b43..d1f339f89a01d1ed0d17e03b8712b42232177db8 100644 --- a/crates/copilot_chat/src/copilot_chat.rs +++ b/crates/copilot_chat/src/copilot_chat.rs @@ -52,6 +52,10 @@ impl CopilotChatConfiguration { format!("{}/responses", api_endpoint) } + pub fn messages_url(&self, api_endpoint: &str) -> String { + format!("{}/v1/messages", api_endpoint) + } + pub fn models_url(&self, api_endpoint: &str) -> String { format!("{}/models", api_endpoint) } @@ -77,6 +81,30 @@ pub enum Role { System, } +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] +pub enum ChatLocation { + #[default] + Panel, + Editor, + EditingSession, + Terminal, + Agent, + Other, +} + +impl ChatLocation { + pub fn to_intent_string(self) -> &'static str { + match self { + ChatLocation::Panel => "conversation-panel", + ChatLocation::Editor => "conversation-inline", + ChatLocation::EditingSession => "conversation-edits", + ChatLocation::Terminal => "conversation-terminal", + ChatLocation::Agent => "conversation-agent", + ChatLocation::Other => "conversation-other", + } + } +} + #[derive(Deserialize, Serialize, Debug, Clone, PartialEq)] pub enum ModelSupportedEndpoint { #[serde(rename = "/chat/completions")] @@ -179,6 +207,16 @@ struct ModelSupportedFeatures { parallel_tool_calls: bool, #[serde(default)] vision: bool, + #[serde(default)] + thinking: bool, + #[serde(default)] + adaptive_thinking: bool, + #[serde(default)] + max_thinking_budget: Option, + #[serde(default)] + min_thinking_budget: Option, + #[serde(default)] + reasoning_effort: Vec, } #[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)] @@ -226,6 +264,10 @@ impl Model { self.capabilities.limits.max_context_window_tokens as u64 } + pub fn max_output_tokens(&self) -> usize { + self.capabilities.limits.max_output_tokens + } + pub fn supports_tools(&self) -> bool { self.capabilities.supports.tool_calls } @@ -256,6 +298,41 @@ impl Model { .contains(&ModelSupportedEndpoint::Responses) } + pub fn supports_messages(&self) -> bool { + self.supported_endpoints + .contains(&ModelSupportedEndpoint::Messages) + } + + pub fn supports_thinking(&self) -> bool { + self.capabilities.supports.thinking + } + + pub fn supports_adaptive_thinking(&self) -> bool { + self.capabilities.supports.adaptive_thinking + } + + pub fn can_think(&self) -> bool { + self.supports_thinking() + || self.supports_adaptive_thinking() + || self.max_thinking_budget().is_some() + } + + pub fn max_thinking_budget(&self) -> Option { + self.capabilities.supports.max_thinking_budget + } + + pub fn min_thinking_budget(&self) -> Option { + self.capabilities.supports.min_thinking_budget + } + + pub fn reasoning_effort_levels(&self) -> &[String] { + &self.capabilities.supports.reasoning_effort + } + + pub fn family(&self) -> &str { + &self.capabilities.family + } + pub fn multiplier(&self) -> f64 { self.billing.multiplier } @@ -263,7 +340,6 @@ impl Model { #[derive(Serialize, Deserialize)] pub struct Request { - pub intent: bool, pub n: usize, pub stream: bool, pub temperature: f32, @@ -273,6 +349,8 @@ pub struct Request { pub tools: Vec, #[serde(default, skip_serializing_if = "Option::is_none")] pub tool_choice: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub thinking_budget: Option, } #[derive(Serialize, Deserialize)] @@ -550,6 +628,7 @@ impl CopilotChat { pub async fn stream_completion( request: Request, + location: ChatLocation, is_user_initiated: bool, mut cx: AsyncApp, ) -> Result>> { @@ -563,12 +642,14 @@ impl CopilotChat { api_url.into(), request, is_user_initiated, + location, ) .await } pub async fn stream_response( request: responses::Request, + location: ChatLocation, is_user_initiated: bool, mut cx: AsyncApp, ) -> Result>> { @@ -582,6 +663,30 @@ impl CopilotChat { api_url, request, is_user_initiated, + location, + ) + .await + } + + pub async fn stream_messages( + body: String, + location: ChatLocation, + is_user_initiated: bool, + anthropic_beta: Option, + mut cx: AsyncApp, + ) -> Result>> { + let (client, oauth_token, api_endpoint, configuration) = + Self::get_auth_details(&mut cx).await?; + + let api_url = configuration.messages_url(&api_endpoint); + stream_messages( + client.clone(), + oauth_token, + api_url, + body, + is_user_initiated, + location, + anthropic_beta, ) .await } @@ -755,6 +860,7 @@ pub(crate) fn copilot_request_headers( builder: http_client::Builder, oauth_token: &str, is_user_initiated: Option, + location: Option, ) -> http_client::Builder { builder .header("Authorization", format!("Bearer {}", oauth_token)) @@ -766,12 +872,19 @@ pub(crate) fn copilot_request_headers( option_env!("CARGO_PKG_VERSION").unwrap_or("unknown") ), ) + .header("X-GitHub-Api-Version", "2025-10-01") .when_some(is_user_initiated, |builder, is_user_initiated| { builder.header( "X-Initiator", if is_user_initiated { "user" } else { "agent" }, ) }) + .when_some(location, |builder, loc| { + let interaction_type = loc.to_intent_string(); + builder + .header("X-Interaction-Type", interaction_type) + .header("OpenAI-Intent", interaction_type) + }) } async fn request_models( @@ -785,8 +898,8 @@ async fn request_models( .uri(models_url.as_ref()), &oauth_token, None, - ) - .header("x-github-api-version", "2025-05-01"); + None, + ); let request = request_builder.body(AsyncBody::empty())?; @@ -830,6 +943,7 @@ async fn stream_completion( completion_url: Arc, request: Request, is_user_initiated: bool, + location: ChatLocation, ) -> Result>> { let is_vision_request = request.messages.iter().any(|message| match message { ChatMessage::User { content } @@ -846,6 +960,7 @@ async fn stream_completion( .uri(completion_url.as_ref()), &oauth_token, Some(is_user_initiated), + Some(location), ) .when(is_vision_request, |builder| { builder.header("Copilot-Vision-Request", is_vision_request.to_string()) @@ -905,6 +1020,65 @@ async fn stream_completion( } } +async fn stream_messages( + client: Arc, + oauth_token: String, + api_url: String, + body: String, + is_user_initiated: bool, + location: ChatLocation, + anthropic_beta: Option, +) -> Result>> { + let mut request_builder = copilot_request_headers( + HttpRequest::builder().method(Method::POST).uri(&api_url), + &oauth_token, + Some(is_user_initiated), + Some(location), + ); + + if let Some(beta) = &anthropic_beta { + request_builder = request_builder.header("anthropic-beta", beta.as_str()); + } + + let request = request_builder.body(AsyncBody::from(body))?; + let mut response = client.send(request).await?; + + if !response.status().is_success() { + let mut body = String::new(); + response.body_mut().read_to_string(&mut body).await?; + anyhow::bail!("Failed to connect to API: {} {}", response.status(), body); + } + + let reader = BufReader::new(response.into_body()); + Ok(reader + .lines() + .filter_map(|line| async move { + match line { + Ok(line) => { + let line = line + .strip_prefix("data: ") + .or_else(|| line.strip_prefix("data:"))?; + if line.starts_with("[DONE]") || line.is_empty() { + return None; + } + match serde_json::from_str(line) { + Ok(event) => Some(Ok(event)), + Err(error) => { + log::error!( + "Failed to parse Copilot messages stream event: `{}`\nResponse: `{}`", + error, + line, + ); + Some(Err(anthropic::AnthropicError::DeserializeResponse(error))) + } + } + } + Err(error) => Some(Err(anthropic::AnthropicError::ReadResponse(error))), + } + }) + .boxed()) +} + #[cfg(test)] mod tests { use super::*; @@ -1513,6 +1687,11 @@ mod tests { tool_calls: true, parallel_tool_calls: false, vision: false, + thinking: false, + adaptive_thinking: false, + max_thinking_budget: None, + min_thinking_budget: None, + reasoning_effort: vec![], }, model_type: "chat".to_string(), tokenizer: None, diff --git a/crates/copilot_chat/src/responses.rs b/crates/copilot_chat/src/responses.rs index 473e583027bf77f3f7dc43d7914f6d2afff743a0..4f30ba1eb083c8a70c9a91853c7df37e65783ce3 100644 --- a/crates/copilot_chat/src/responses.rs +++ b/crates/copilot_chat/src/responses.rs @@ -1,9 +1,9 @@ use std::sync::Arc; -use super::copilot_request_headers; +use super::{ChatLocation, copilot_request_headers}; use anyhow::{Result, anyhow}; use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream}; -use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest}; +use http_client::{AsyncBody, HttpClient, HttpRequestExt, Method, Request as HttpRequest}; use serde::{Deserialize, Serialize}; use serde_json::Value; pub use settings::OpenAiReasoningEffort as ReasoningEffort; @@ -24,6 +24,7 @@ pub struct Request { pub reasoning: Option, #[serde(skip_serializing_if = "Option::is_none")] pub include: Option>, + pub store: bool, } #[derive(Serialize, Deserialize, Debug, Clone)] @@ -280,6 +281,7 @@ pub async fn stream_response( api_url: String, request: Request, is_user_initiated: bool, + location: ChatLocation, ) -> Result>> { let is_vision_request = request.input.iter().any(|item| match item { ResponseInputItem::Message { @@ -295,13 +297,11 @@ pub async fn stream_response( HttpRequest::builder().method(Method::POST).uri(&api_url), &oauth_token, Some(is_user_initiated), - ); - - let request_builder = if is_vision_request { - request_builder.header("Copilot-Vision-Request", "true") - } else { - request_builder - }; + Some(location), + ) + .when(is_vision_request, |builder| { + builder.header("Copilot-Vision-Request", "true") + }); let is_streaming = request.stream; let json = serde_json::to_string(&request)?; diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs index 599dd8ac51fd6591987d4ee564b854fcf018d88f..47d1b316a581c8013843940ecb3e55ed29bc4500 100644 --- a/crates/language_models/src/provider/copilot_chat.rs +++ b/crates/language_models/src/provider/copilot_chat.rs @@ -2,15 +2,17 @@ use std::pin::Pin; use std::str::FromStr as _; use std::sync::Arc; +use anthropic::AnthropicModelMode; use anyhow::{Result, anyhow}; use cloud_llm_client::CompletionIntent; use collections::HashMap; use copilot::{GlobalCopilotAuth, Status}; use copilot_chat::responses as copilot_responses; use copilot_chat::{ - ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat, CopilotChatConfiguration, - Function, FunctionContent, ImageUrl, Model as CopilotChatModel, ModelVendor, - Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent, ToolChoice, + ChatLocation, ChatMessage, ChatMessageContent, ChatMessagePart, CopilotChat, + CopilotChatConfiguration, Function, FunctionContent, ImageUrl, Model as CopilotChatModel, + ModelVendor, Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall, ToolCallContent, + ToolChoice, }; use futures::future::BoxFuture; use futures::stream::BoxStream; @@ -20,8 +22,8 @@ use http_client::StatusCode; use language::language_settings::all_language_settings; use language_model::{ AuthenticateError, IconOrSvg, LanguageModel, LanguageModelCompletionError, - LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelId, LanguageModelName, - LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, + LanguageModelCompletionEvent, LanguageModelCostInfo, LanguageModelEffortLevel, LanguageModelId, + LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelRequestMessage, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason, TokenUsage, @@ -30,6 +32,7 @@ use settings::SettingsStore; use ui::prelude::*; use util::debug_panic; +use crate::provider::anthropic::{AnthropicEventMapper, into_anthropic}; use crate::provider::util::parse_tool_arguments; const PROVIDER_ID: LanguageModelProviderId = LanguageModelProviderId::new("copilot_chat"); @@ -254,6 +257,33 @@ impl LanguageModel for CopilotChatLanguageModel { self.model.supports_vision() } + fn supports_thinking(&self) -> bool { + self.model.can_think() + } + + fn supported_effort_levels(&self) -> Vec { + let levels = self.model.reasoning_effort_levels(); + if levels.is_empty() { + return vec![]; + } + levels + .iter() + .map(|level| { + let name: SharedString = match level.as_str() { + "low" => "Low".into(), + "medium" => "Medium".into(), + "high" => "High".into(), + _ => SharedString::from(level.clone()), + }; + LanguageModelEffortLevel { + name, + value: SharedString::from(level.clone()), + is_default: level == "high", + } + }) + .collect() + } + fn tool_input_format(&self) -> LanguageModelToolSchemaFormat { match self.model.vendor() { ModelVendor::OpenAI | ModelVendor::Anthropic => { @@ -333,12 +363,94 @@ impl LanguageModel for CopilotChatLanguageModel { | CompletionIntent::EditFile => false, }); + if self.model.supports_messages() { + let location = intent_to_chat_location(request.intent); + let model = self.model.clone(); + let request_limiter = self.request_limiter.clone(); + let future = cx.spawn(async move |cx| { + let effort = request + .thinking_effort + .as_ref() + .and_then(|e| anthropic::Effort::from_str(e).ok()); + + let mut anthropic_request = into_anthropic( + request, + model.id().to_string(), + 0.0, + model.max_output_tokens() as u64, + if model.supports_adaptive_thinking() { + AnthropicModelMode::Thinking { + budget_tokens: None, + } + } else if model.can_think() { + AnthropicModelMode::Thinking { + budget_tokens: compute_thinking_budget( + model.min_thinking_budget(), + model.max_thinking_budget(), + model.max_output_tokens() as u32, + ), + } + } else { + AnthropicModelMode::Default + }, + ); + + anthropic_request.temperature = None; + + // The Copilot proxy doesn't support eager_input_streaming on tools. + for tool in &mut anthropic_request.tools { + tool.eager_input_streaming = false; + } + + if model.supports_adaptive_thinking() { + if anthropic_request.thinking.is_some() { + anthropic_request.thinking = Some(anthropic::Thinking::Adaptive); + anthropic_request.output_config = Some(anthropic::OutputConfig { effort }); + } + } + + let anthropic_beta = if !model.supports_adaptive_thinking() && model.can_think() { + Some("interleaved-thinking-2025-05-14".to_string()) + } else { + None + }; + + let body = serde_json::to_string(&anthropic::StreamingRequest { + base: anthropic_request, + stream: true, + }) + .map_err(|e| anyhow::anyhow!(e))?; + + let stream = CopilotChat::stream_messages( + body, + location, + is_user_initiated, + anthropic_beta, + cx.clone(), + ); + + request_limiter + .stream(async move { + let events = stream.await?; + let mapper = AnthropicEventMapper::new(); + Ok(mapper.map_stream(events).boxed()) + }) + .await + }); + return async move { Ok(future.await?.boxed()) }.boxed(); + } + if self.model.supports_response() { + let location = intent_to_chat_location(request.intent); let responses_request = into_copilot_responses(&self.model, request); let request_limiter = self.request_limiter.clone(); let future = cx.spawn(async move |cx| { - let request = - CopilotChat::stream_response(responses_request, is_user_initiated, cx.clone()); + let request = CopilotChat::stream_response( + responses_request, + location, + is_user_initiated, + cx.clone(), + ); request_limiter .stream(async move { let stream = request.await?; @@ -350,6 +462,7 @@ impl LanguageModel for CopilotChatLanguageModel { return async move { Ok(future.await?.boxed()) }.boxed(); } + let location = intent_to_chat_location(request.intent); let copilot_request = match into_copilot_chat(&self.model, request) { Ok(request) => request, Err(err) => return futures::future::ready(Err(err.into())).boxed(), @@ -358,8 +471,12 @@ impl LanguageModel for CopilotChatLanguageModel { let request_limiter = self.request_limiter.clone(); let future = cx.spawn(async move |cx| { - let request = - CopilotChat::stream_completion(copilot_request, is_user_initiated, cx.clone()); + let request = CopilotChat::stream_completion( + copilot_request, + location, + is_user_initiated, + cx.clone(), + ); request_limiter .stream(async move { let response = request.await?; @@ -761,6 +878,9 @@ fn into_copilot_chat( model: &CopilotChatModel, request: LanguageModelRequest, ) -> Result { + let temperature = request.temperature; + let tool_choice = request.tool_choice; + let mut request_messages: Vec = Vec::new(); for message in request.messages { if let Some(last_message) = request_messages.last_mut() { @@ -859,10 +979,9 @@ fn into_copilot_chat( let text_content = { let mut buffer = String::new(); for string in message.content.iter().filter_map(|content| match content { - MessageContent::Text(text) | MessageContent::Thinking { text, .. } => { - Some(text.as_str()) - } - MessageContent::ToolUse(_) + MessageContent::Text(text) => Some(text.as_str()), + MessageContent::Thinking { .. } + | MessageContent::ToolUse(_) | MessageContent::RedactedThinking(_) | MessageContent::ToolResult(_) | MessageContent::Image(_) => None, @@ -919,21 +1038,52 @@ fn into_copilot_chat( .collect::>(); Ok(CopilotChatRequest { - intent: true, n: 1, stream: model.uses_streaming(), - temperature: 0.1, + temperature: temperature.unwrap_or(0.1), model: model.id().to_string(), messages, tools, - tool_choice: request.tool_choice.map(|choice| match choice { + tool_choice: tool_choice.map(|choice| match choice { LanguageModelToolChoice::Auto => ToolChoice::Auto, LanguageModelToolChoice::Any => ToolChoice::Any, LanguageModelToolChoice::None => ToolChoice::None, }), + thinking_budget: None, }) } +fn compute_thinking_budget( + min_budget: Option, + max_budget: Option, + max_output_tokens: u32, +) -> Option { + let configured_budget: u32 = 16000; + let min_budget = min_budget.unwrap_or(1024); + let max_budget = max_budget.unwrap_or(max_output_tokens.saturating_sub(1)); + let normalized = configured_budget.max(min_budget); + Some( + normalized + .min(max_budget) + .min(max_output_tokens.saturating_sub(1)), + ) +} + +fn intent_to_chat_location(intent: Option) -> ChatLocation { + match intent { + Some(CompletionIntent::UserPrompt) => ChatLocation::Agent, + Some(CompletionIntent::ToolResults) => ChatLocation::Agent, + Some(CompletionIntent::ThreadSummarization) => ChatLocation::Panel, + Some(CompletionIntent::ThreadContextSummarization) => ChatLocation::Panel, + Some(CompletionIntent::CreateFile) => ChatLocation::Agent, + Some(CompletionIntent::EditFile) => ChatLocation::Agent, + Some(CompletionIntent::InlineAssist) => ChatLocation::Editor, + Some(CompletionIntent::TerminalInlineAssist) => ChatLocation::Terminal, + Some(CompletionIntent::GenerateGitCommitMessage) => ChatLocation::Other, + None => ChatLocation::Panel, + } +} + fn into_copilot_responses( model: &CopilotChatModel, request: LanguageModelRequest, @@ -949,7 +1099,7 @@ fn into_copilot_responses( tool_choice, stop: _, temperature, - thinking_allowed: _, + thinking_allowed, thinking_effort: _, speed: _, } = request; @@ -1128,10 +1278,18 @@ fn into_copilot_responses( temperature, tools: converted_tools, tool_choice: mapped_tool_choice, - reasoning: None, // We would need to add support for setting from user settings. + reasoning: if thinking_allowed { + Some(copilot_responses::ReasoningConfig { + effort: copilot_responses::ReasoningEffort::Medium, + summary: Some(copilot_responses::ReasoningSummary::Detailed), + }) + } else { + None + }, include: Some(vec![ copilot_responses::ResponseIncludable::ReasoningEncryptedContent, ]), + store: false, } }