diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs index bdbb5e465eb07e32c2cf7224ac2c98b0257bc622..e9f0ea51a99562a149960806573cbbd17678e827 100644 --- a/crates/anthropic/src/anthropic.rs +++ b/crates/anthropic/src/anthropic.rs @@ -42,6 +42,7 @@ pub enum Model { tool_override: Option, /// Indicates whether this custom model supports caching. cache_configuration: Option, + max_output_tokens: Option, }, } @@ -105,6 +106,16 @@ impl Model { } } + pub fn max_output_tokens(&self) -> u32 { + match self { + Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096, + Self::Claude3_5Sonnet => 8_192, + Self::Custom { + max_output_tokens, .. + } => max_output_tokens.unwrap_or(4_096), + } + } + pub fn tool_model_id(&self) -> &str { if let Self::Custom { tool_override: Some(tool_override), @@ -131,7 +142,7 @@ pub async fn complete( .header("Anthropic-Version", "2023-06-01") .header( "Anthropic-Beta", - "tools-2024-04-04,prompt-caching-2024-07-31", + "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15", ) .header("X-Api-Key", api_key) .header("Content-Type", "application/json"); @@ -191,7 +202,7 @@ pub async fn stream_completion( .header("Anthropic-Version", "2023-06-01") .header( "Anthropic-Beta", - "tools-2024-04-04,prompt-caching-2024-07-31", + "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15", ) .header("X-Api-Key", api_key) .header("Content-Type", "application/json"); diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 1df651ad9ed4090414157da894e3fdb693d75db5..0d23023c2a47700b8e2ee1fbb3ccd85fb3422409 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -64,6 +64,9 @@ pub trait LanguageModel: Send + Sync { } fn max_token_count(&self) -> usize; + fn max_output_tokens(&self) -> Option { + None + } fn count_tokens( &self, diff --git a/crates/language_model/src/provider/anthropic.rs b/crates/language_model/src/provider/anthropic.rs index fa5401a38fe39f1ce8bd49fa54ed110a78314a8b..3b4485669756464930b1fc46abd231977768b435 100644 --- a/crates/language_model/src/provider/anthropic.rs +++ b/crates/language_model/src/provider/anthropic.rs @@ -39,6 +39,7 @@ pub struct AvailableModel { pub max_tokens: usize, pub tool_override: Option, pub cache_configuration: Option, + pub max_output_tokens: Option, } pub struct AnthropicLanguageModelProvider { @@ -179,6 +180,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider { min_total_token: config.min_total_token, } }), + max_output_tokens: model.max_output_tokens, }, ); } @@ -331,6 +333,10 @@ impl LanguageModel for AnthropicModel { self.model.max_token_count() } + fn max_output_tokens(&self) -> Option { + Some(self.model.max_output_tokens()) + } + fn count_tokens( &self, request: LanguageModelRequest, @@ -344,7 +350,8 @@ impl LanguageModel for AnthropicModel { request: LanguageModelRequest, cx: &AsyncAppContext, ) -> BoxFuture<'static, Result>>> { - let request = request.into_anthropic(self.model.id().into()); + let request = + request.into_anthropic(self.model.id().into(), self.model.max_output_tokens()); let request = self.stream_completion(request, cx); let future = self.request_limiter.stream(async move { let response = request.await.map_err(|err| anyhow!(err))?; @@ -377,7 +384,10 @@ impl LanguageModel for AnthropicModel { input_schema: serde_json::Value, cx: &AsyncAppContext, ) -> BoxFuture<'static, Result>>> { - let mut request = request.into_anthropic(self.model.tool_model_id().into()); + let mut request = request.into_anthropic( + self.model.tool_model_id().into(), + self.model.max_output_tokens(), + ); request.tool_choice = Some(anthropic::ToolChoice::Tool { name: tool_name.clone(), }); diff --git a/crates/language_model/src/provider/cloud.rs b/crates/language_model/src/provider/cloud.rs index 517cb13342859c9e62667b6768c7d74b7b1dfb92..38478e4de35cf381eff02bcdf4d8ca0a4bba3c65 100644 --- a/crates/language_model/src/provider/cloud.rs +++ b/crates/language_model/src/provider/cloud.rs @@ -57,6 +57,7 @@ pub struct AvailableModel { max_tokens: usize, tool_override: Option, cache_configuration: Option, + max_output_tokens: Option, } pub struct CloudLanguageModelProvider { @@ -210,6 +211,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider { min_total_token: config.min_total_token, } }), + max_output_tokens: model.max_output_tokens, }) } AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom { @@ -446,7 +448,7 @@ impl LanguageModel for CloudLanguageModel { ) -> BoxFuture<'static, Result>>> { match &self.model { CloudModel::Anthropic(model) => { - let request = request.into_anthropic(model.id().into()); + let request = request.into_anthropic(model.id().into(), model.max_output_tokens()); let client = self.client.clone(); let llm_api_token = self.llm_api_token.clone(); let future = self.request_limiter.stream(async move { @@ -556,7 +558,8 @@ impl LanguageModel for CloudLanguageModel { match &self.model { CloudModel::Anthropic(model) => { - let mut request = request.into_anthropic(model.tool_model_id().into()); + let mut request = + request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens()); request.tool_choice = Some(anthropic::ToolChoice::Tool { name: tool_name.clone(), }); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 6c4f1bb50b86a08505322e4411f02adc1af87c81..ecebc5e86802cd42b30b6fc2b98212b68db86573 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -286,7 +286,7 @@ impl LanguageModelRequest { } } - pub fn into_anthropic(self, model: String) -> anthropic::Request { + pub fn into_anthropic(self, model: String, max_output_tokens: u32) -> anthropic::Request { let mut new_messages: Vec = Vec::new(); let mut system_message = String::new(); @@ -353,7 +353,7 @@ impl LanguageModelRequest { anthropic::Request { model, messages: new_messages, - max_tokens: 4092, + max_tokens: max_output_tokens, system: Some(system_message), tools: Vec::new(), tool_choice: None, diff --git a/crates/language_model/src/settings.rs b/crates/language_model/src/settings.rs index ded797e1e554f04ee53879bf0674d9824fd8c70f..85dce2e121ca2faa25851e0df076aca47462b9b6 100644 --- a/crates/language_model/src/settings.rs +++ b/crates/language_model/src/settings.rs @@ -97,6 +97,7 @@ impl AnthropicSettingsContent { max_tokens, tool_override, cache_configuration, + max_output_tokens, } => Some(provider::anthropic::AvailableModel { name, max_tokens, @@ -108,6 +109,7 @@ impl AnthropicSettingsContent { min_total_token: config.min_total_token, }, ), + max_output_tokens, }), _ => None, })