From 6f8023530c4099a39d29b87568810d7f43836f06 Mon Sep 17 00:00:00 2001 From: Shardul Vaidya <31039336+5herlocked@users.noreply.github.com> Date: Tue, 17 Feb 2026 04:22:25 -0500 Subject: [PATCH] bedrock: Model streamlining and cleanup (#49287) Release Notes: - Improved Bedrock error messages: region-locked models ask the user to try a different region, rate limits and access errors are reported cleanly instead of as raw API responses - Streamlined Bedrock model list to 39 curated models - Fixed API errors when using non-tool models in agent threads --------- Co-authored-by: Ona --- crates/bedrock/src/bedrock.rs | 50 +- crates/bedrock/src/models.rs | 1188 ++++++++--------- .../language_models/src/provider/bedrock.rs | 123 +- 3 files changed, 681 insertions(+), 680 deletions(-) diff --git a/crates/bedrock/src/bedrock.rs b/crates/bedrock/src/bedrock.rs index d9e3c0984687ab9d7843f912dafa0853b9677811..12db814d3ae2b8239ca6e662aa9c549c58b649e0 100644 --- a/crates/bedrock/src/bedrock.rs +++ b/crates/bedrock/src/bedrock.rs @@ -1,6 +1,6 @@ mod models; -use anyhow::{Context, Error, Result, anyhow}; +use anyhow::{Result, anyhow}; use aws_sdk_bedrockruntime as bedrock; pub use aws_sdk_bedrockruntime as bedrock_client; use aws_sdk_bedrockruntime::types::InferenceConfiguration; @@ -37,7 +37,7 @@ pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07"; pub async fn stream_completion( client: bedrock::Client, request: Request, -) -> Result>, Error> { +) -> Result>, BedrockError> { let mut response = bedrock::Client::converse_stream(&client) .model_id(request.model.clone()) .set_messages(request.messages.into()); @@ -94,10 +94,30 @@ pub async fn stream_completion( } } - let output = response - .send() - .await - .context("Failed to send API request to Bedrock"); + let output = response.send().await.map_err(|err| match err { + bedrock::error::SdkError::ServiceError(ctx) => { + use bedrock::operation::converse_stream::ConverseStreamError; + let err = ctx.into_err(); + match &err { + ConverseStreamError::ValidationException(e) => { + BedrockError::Validation(e.message().unwrap_or("validation error").to_string()) + } + ConverseStreamError::ThrottlingException(_) => BedrockError::RateLimited, + ConverseStreamError::ServiceUnavailableException(_) + | ConverseStreamError::ModelNotReadyException(_) => { + BedrockError::ServiceUnavailable + } + ConverseStreamError::AccessDeniedException(e) => { + BedrockError::AccessDenied(e.message().unwrap_or("access denied").to_string()) + } + ConverseStreamError::InternalServerException(e) => BedrockError::InternalServer( + e.message().unwrap_or("internal server error").to_string(), + ), + _ => BedrockError::Other(err.into()), + } + } + other => BedrockError::Other(other.into()), + }); let stream = Box::pin(stream::unfold( output?.stream, @@ -106,10 +126,10 @@ pub async fn stream_completion( Ok(Some(output)) => Some((Ok(output), stream)), Ok(None) => None, Err(err) => Some(( - Err(BedrockError::ClientError(anyhow!( + Err(anyhow!( "{}", aws_sdk_bedrockruntime::error::DisplayErrorContext(err) - ))), + )), stream, )), } @@ -196,10 +216,16 @@ pub struct Metadata { #[derive(Error, Debug)] pub enum BedrockError { - #[error("client error: {0}")] - ClientError(anyhow::Error), - #[error("extension error: {0}")] - ExtensionError(anyhow::Error), + #[error("{0}")] + Validation(String), + #[error("rate limited")] + RateLimited, + #[error("service unavailable")] + ServiceUnavailable, + #[error("{0}")] + AccessDenied(String), + #[error("{0}")] + InternalServer(String), #[error(transparent)] Other(#[from] anyhow::Error), } diff --git a/crates/bedrock/src/models.rs b/crates/bedrock/src/models.rs index 1efcf28d129499086d30bf51327c7be9c430f644..6c80867b8f93956189de78ed6ec84422b93626e0 100644 --- a/crates/bedrock/src/models.rs +++ b/crates/bedrock/src/models.rs @@ -45,7 +45,9 @@ pub struct BedrockModelCacheConfiguration { #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)] pub enum Model { - // Anthropic models (already included) + // Anthropic Claude 4+ models + #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")] + ClaudeHaiku4_5, #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")] ClaudeSonnet4, #[serde( @@ -61,15 +63,8 @@ pub enum Model { alias = "claude-sonnet-4-5-thinking-latest" )] ClaudeSonnet4_5Thinking, - #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")] - ClaudeOpus4, #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")] ClaudeOpus4_1, - #[serde( - rename = "claude-opus-4-thinking", - alias = "claude-opus-4-thinking-latest" - )] - ClaudeOpus4Thinking, #[serde( rename = "claude-opus-4-1-thinking", alias = "claude-opus-4-1-thinking-latest" @@ -89,79 +84,83 @@ pub enum Model { alias = "claude-opus-4-6-thinking-latest" )] ClaudeOpus4_6Thinking, - #[serde(rename = "claude-3-5-sonnet-v2", alias = "claude-3-5-sonnet-latest")] - Claude3_5SonnetV2, - #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")] - Claude3_7Sonnet, - #[serde( - rename = "claude-3-7-sonnet-thinking", - alias = "claude-3-7-sonnet-thinking-latest" - )] - Claude3_7SonnetThinking, - #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")] - Claude3Opus, - #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")] - Claude3Sonnet, - #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")] - Claude3_5Haiku, - #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")] - ClaudeHaiku4_5, - Claude3_5Sonnet, - Claude3Haiku, - // Amazon Nova Models - AmazonNovaLite, - AmazonNovaMicro, - AmazonNovaPro, - AmazonNovaPremier, - // AI21 models - AI21J2GrandeInstruct, - AI21J2JumboInstruct, - AI21J2Mid, - AI21J2MidV1, - AI21J2Ultra, - AI21J2UltraV1_8k, - AI21J2UltraV1, - AI21JambaInstructV1, - AI21Jamba15LargeV1, - AI21Jamba15MiniV1, - // Cohere models - CohereCommandTextV14_4k, - CohereCommandRV1, - CohereCommandRPlusV1, - CohereCommandLightTextV14_4k, - // DeepSeek - DeepSeekR1, - // Meta models - MetaLlama38BInstructV1, - MetaLlama370BInstructV1, - MetaLlama318BInstructV1_128k, - MetaLlama318BInstructV1, - MetaLlama3170BInstructV1_128k, - MetaLlama3170BInstructV1, - MetaLlama31405BInstructV1, - MetaLlama321BInstructV1, - MetaLlama323BInstructV1, - MetaLlama3211BInstructV1, - MetaLlama3290BInstructV1, - MetaLlama3370BInstructV1, - #[allow(non_camel_case_types)] - MetaLlama4Scout17BInstructV1, - #[allow(non_camel_case_types)] - MetaLlama4Maverick17BInstructV1, + + // Meta Llama 4 models + #[serde(rename = "llama-4-scout-17b")] + Llama4Scout17B, + #[serde(rename = "llama-4-maverick-17b")] + Llama4Maverick17B, + + // Google Gemma 3 models + #[serde(rename = "gemma-3-4b")] + Gemma3_4B, + #[serde(rename = "gemma-3-12b")] + Gemma3_12B, + #[serde(rename = "gemma-3-27b")] + Gemma3_27B, + // Mistral models - MistralMistral7BInstructV0, - MistralMixtral8x7BInstructV0, - MistralMistralLarge2402V1, - MistralMistralSmall2402V1, - MistralPixtralLarge2502V1, - // Writer models - PalmyraWriterX5, - PalmyraWriterX4, + #[serde(rename = "magistral-small")] + MagistralSmall, + #[serde(rename = "mistral-large-3")] + MistralLarge3, + #[serde(rename = "pixtral-large")] + PixtralLarge, + + // Qwen models + #[serde(rename = "qwen3-32b")] + Qwen3_32B, + #[serde(rename = "qwen3-vl-235b")] + Qwen3VL235B, + #[serde(rename = "qwen3-235b")] + Qwen3_235B, + #[serde(rename = "qwen3-next-80b")] + Qwen3Next80B, + #[serde(rename = "qwen3-coder-30b")] + Qwen3Coder30B, + #[serde(rename = "qwen3-coder-next")] + Qwen3CoderNext, + #[serde(rename = "qwen3-coder-480b")] + Qwen3Coder480B, + + // Amazon Nova models + #[serde(rename = "nova-lite")] + NovaLite, + #[serde(rename = "nova-pro")] + NovaPro, + #[serde(rename = "nova-premier")] + NovaPremier, + #[serde(rename = "nova-2-lite")] + Nova2Lite, + + // OpenAI GPT OSS models + #[serde(rename = "gpt-oss-20b")] + GptOss20B, + #[serde(rename = "gpt-oss-120b")] + GptOss120B, + + // MiniMax models + #[serde(rename = "minimax-m2")] + MiniMaxM2, + + // Moonshot models + #[serde(rename = "kimi-k2-thinking")] + KimiK2Thinking, + #[serde(rename = "kimi-k2-5")] + KimiK2_5, + + // DeepSeek models + #[serde(rename = "deepseek-r1")] + DeepSeekR1, + #[serde(rename = "deepseek-v3")] + DeepSeekV3_1, + #[serde(rename = "deepseek-v3-2")] + DeepSeekV3_2, + #[serde(rename = "custom")] Custom { name: String, max_tokens: u64, - /// The name displayed in the UI, such as in the assistant panel model dropdown menu. display_name: Option, max_output_tokens: Option, default_temperature: Option, @@ -170,12 +169,8 @@ pub enum Model { } impl Model { - pub fn default_fast(region: &str) -> Self { - if region.starts_with("us-") { - Self::Claude3_5Haiku - } else { - Self::Claude3Haiku - } + pub fn default_fast(_region: &str) -> Self { + Self::ClaudeHaiku4_5 } pub fn from_id(id: &str) -> anyhow::Result { @@ -191,24 +186,6 @@ impl Model { Ok(Self::ClaudeOpus4_1Thinking) } else if id.starts_with("claude-opus-4-1") { Ok(Self::ClaudeOpus4_1) - } else if id.starts_with("claude-opus-4-thinking") { - Ok(Self::ClaudeOpus4Thinking) - } else if id.starts_with("claude-opus-4") { - Ok(Self::ClaudeOpus4) - } else if id.starts_with("claude-3-5-sonnet-v2") { - Ok(Self::Claude3_5SonnetV2) - } else if id.starts_with("claude-3-opus") { - Ok(Self::Claude3Opus) - } else if id.starts_with("claude-3-sonnet") { - Ok(Self::Claude3Sonnet) - } else if id.starts_with("claude-3-5-haiku") { - Ok(Self::Claude3_5Haiku) - } else if id.starts_with("claude-haiku-4-5") { - Ok(Self::ClaudeHaiku4_5) - } else if id.starts_with("claude-3-7-sonnet") { - Ok(Self::Claude3_7Sonnet) - } else if id.starts_with("claude-3-7-sonnet-thinking") { - Ok(Self::Claude3_7SonnetThinking) } else if id.starts_with("claude-sonnet-4-5-thinking") { Ok(Self::ClaudeSonnet4_5Thinking) } else if id.starts_with("claude-sonnet-4-5") { @@ -217,6 +194,8 @@ impl Model { Ok(Self::ClaudeSonnet4Thinking) } else if id.starts_with("claude-sonnet-4") { Ok(Self::ClaudeSonnet4) + } else if id.starts_with("claude-haiku-4-5") { + Ok(Self::ClaudeHaiku4_5) } else { anyhow::bail!("invalid model id {id}"); } @@ -224,283 +203,228 @@ impl Model { pub fn id(&self) -> &str { match self { - Model::ClaudeSonnet4 => "claude-sonnet-4", - Model::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking", - Model::ClaudeSonnet4_5 => "claude-sonnet-4-5", - Model::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking", - Model::ClaudeOpus4 => "claude-opus-4", - Model::ClaudeOpus4_1 => "claude-opus-4-1", - Model::ClaudeOpus4Thinking => "claude-opus-4-thinking", - Model::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking", - Model::ClaudeOpus4_5 => "claude-opus-4-5", - Model::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking", - Model::ClaudeOpus4_6 => "claude-opus-4-6", - Model::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking", - Model::Claude3_5SonnetV2 => "claude-3-5-sonnet-v2", - Model::Claude3_5Sonnet => "claude-3-5-sonnet", - Model::Claude3Opus => "claude-3-opus", - Model::Claude3Sonnet => "claude-3-sonnet", - Model::Claude3Haiku => "claude-3-haiku", - Model::Claude3_5Haiku => "claude-3-5-haiku", - Model::ClaudeHaiku4_5 => "claude-haiku-4-5", - Model::Claude3_7Sonnet => "claude-3-7-sonnet", - Model::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking", - Model::AmazonNovaLite => "amazon-nova-lite", - Model::AmazonNovaMicro => "amazon-nova-micro", - Model::AmazonNovaPro => "amazon-nova-pro", - Model::AmazonNovaPremier => "amazon-nova-premier", - Model::DeepSeekR1 => "deepseek-r1", - Model::AI21J2GrandeInstruct => "ai21-j2-grande-instruct", - Model::AI21J2JumboInstruct => "ai21-j2-jumbo-instruct", - Model::AI21J2Mid => "ai21-j2-mid", - Model::AI21J2MidV1 => "ai21-j2-mid-v1", - Model::AI21J2Ultra => "ai21-j2-ultra", - Model::AI21J2UltraV1_8k => "ai21-j2-ultra-v1-8k", - Model::AI21J2UltraV1 => "ai21-j2-ultra-v1", - Model::AI21JambaInstructV1 => "ai21-jamba-instruct-v1", - Model::AI21Jamba15LargeV1 => "ai21-jamba-1-5-large-v1", - Model::AI21Jamba15MiniV1 => "ai21-jamba-1-5-mini-v1", - Model::CohereCommandTextV14_4k => "cohere-command-text-v14-4k", - Model::CohereCommandRV1 => "cohere-command-r-v1", - Model::CohereCommandRPlusV1 => "cohere-command-r-plus-v1", - Model::CohereCommandLightTextV14_4k => "cohere-command-light-text-v14-4k", - Model::MetaLlama38BInstructV1 => "meta-llama3-8b-instruct-v1", - Model::MetaLlama370BInstructV1 => "meta-llama3-70b-instruct-v1", - Model::MetaLlama318BInstructV1_128k => "meta-llama3-1-8b-instruct-v1-128k", - Model::MetaLlama318BInstructV1 => "meta-llama3-1-8b-instruct-v1", - Model::MetaLlama3170BInstructV1_128k => "meta-llama3-1-70b-instruct-v1-128k", - Model::MetaLlama3170BInstructV1 => "meta-llama3-1-70b-instruct-v1", - Model::MetaLlama31405BInstructV1 => "meta-llama3-1-405b-instruct-v1", - Model::MetaLlama321BInstructV1 => "meta-llama3-2-1b-instruct-v1", - Model::MetaLlama323BInstructV1 => "meta-llama3-2-3b-instruct-v1", - Model::MetaLlama3211BInstructV1 => "meta-llama3-2-11b-instruct-v1", - Model::MetaLlama3290BInstructV1 => "meta-llama3-2-90b-instruct-v1", - Model::MetaLlama3370BInstructV1 => "meta-llama3-3-70b-instruct-v1", - Model::MetaLlama4Scout17BInstructV1 => "meta-llama4-scout-17b-instruct-v1", - Model::MetaLlama4Maverick17BInstructV1 => "meta-llama4-maverick-17b-instruct-v1", - Model::MistralMistral7BInstructV0 => "mistral-7b-instruct-v0", - Model::MistralMixtral8x7BInstructV0 => "mistral-mixtral-8x7b-instruct-v0", - Model::MistralMistralLarge2402V1 => "mistral-large-2402-v1", - Model::MistralMistralSmall2402V1 => "mistral-small-2402-v1", - Model::MistralPixtralLarge2502V1 => "mistral-pixtral-large-2502-v1", - Model::PalmyraWriterX4 => "palmyra-writer-x4", - Model::PalmyraWriterX5 => "palmyra-writer-x5", + Self::ClaudeHaiku4_5 => "claude-haiku-4-5", + Self::ClaudeSonnet4 => "claude-sonnet-4", + Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking", + Self::ClaudeSonnet4_5 => "claude-sonnet-4-5", + Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking", + Self::ClaudeOpus4_1 => "claude-opus-4-1", + Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking", + Self::ClaudeOpus4_5 => "claude-opus-4-5", + Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking", + Self::ClaudeOpus4_6 => "claude-opus-4-6", + Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking", + Self::Llama4Scout17B => "llama-4-scout-17b", + Self::Llama4Maverick17B => "llama-4-maverick-17b", + Self::Gemma3_4B => "gemma-3-4b", + Self::Gemma3_12B => "gemma-3-12b", + Self::Gemma3_27B => "gemma-3-27b", + Self::MagistralSmall => "magistral-small", + Self::MistralLarge3 => "mistral-large-3", + Self::PixtralLarge => "pixtral-large", + Self::Qwen3_32B => "qwen3-32b", + Self::Qwen3VL235B => "qwen3-vl-235b", + Self::Qwen3_235B => "qwen3-235b", + Self::Qwen3Next80B => "qwen3-next-80b", + Self::Qwen3Coder30B => "qwen3-coder-30b", + Self::Qwen3CoderNext => "qwen3-coder-next", + Self::Qwen3Coder480B => "qwen3-coder-480b", + Self::NovaLite => "nova-lite", + Self::NovaPro => "nova-pro", + Self::NovaPremier => "nova-premier", + Self::Nova2Lite => "nova-2-lite", + Self::GptOss20B => "gpt-oss-20b", + Self::GptOss120B => "gpt-oss-120b", + Self::MiniMaxM2 => "minimax-m2", + Self::KimiK2Thinking => "kimi-k2-thinking", + Self::KimiK2_5 => "kimi-k2-5", + Self::DeepSeekR1 => "deepseek-r1", + Self::DeepSeekV3_1 => "deepseek-v3", + Self::DeepSeekV3_2 => "deepseek-v3-2", Self::Custom { name, .. } => name, } } pub fn request_id(&self) -> &str { match self { - Model::ClaudeSonnet4 | Model::ClaudeSonnet4Thinking => { + Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0", + Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => { "anthropic.claude-sonnet-4-20250514-v1:0" } - Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking => { + Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => { "anthropic.claude-sonnet-4-5-20250929-v1:0" } - Model::ClaudeOpus4 | Model::ClaudeOpus4Thinking => { - "anthropic.claude-opus-4-20250514-v1:0" - } - Model::ClaudeOpus4_1 | Model::ClaudeOpus4_1Thinking => { + Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => { "anthropic.claude-opus-4-1-20250805-v1:0" } - Model::ClaudeOpus4_5 | Model::ClaudeOpus4_5Thinking => { + Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => { "anthropic.claude-opus-4-5-20251101-v1:0" } - Model::ClaudeOpus4_6 | Model::ClaudeOpus4_6Thinking => "anthropic.claude-opus-4-6-v1", - Model::Claude3_5SonnetV2 => "anthropic.claude-3-5-sonnet-20241022-v2:0", - Model::Claude3_5Sonnet => "anthropic.claude-3-5-sonnet-20240620-v1:0", - Model::Claude3Opus => "anthropic.claude-3-opus-20240229-v1:0", - Model::Claude3Sonnet => "anthropic.claude-3-sonnet-20240229-v1:0", - Model::Claude3Haiku => "anthropic.claude-3-haiku-20240307-v1:0", - Model::Claude3_5Haiku => "anthropic.claude-3-5-haiku-20241022-v1:0", - Model::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0", - Model::Claude3_7Sonnet | Model::Claude3_7SonnetThinking => { - "anthropic.claude-3-7-sonnet-20250219-v1:0" - } - Model::AmazonNovaLite => "amazon.nova-lite-v1:0", - Model::AmazonNovaMicro => "amazon.nova-micro-v1:0", - Model::AmazonNovaPro => "amazon.nova-pro-v1:0", - Model::AmazonNovaPremier => "amazon.nova-premier-v1:0", - Model::DeepSeekR1 => "deepseek.r1-v1:0", - Model::AI21J2GrandeInstruct => "ai21.j2-grande-instruct", - Model::AI21J2JumboInstruct => "ai21.j2-jumbo-instruct", - Model::AI21J2Mid => "ai21.j2-mid", - Model::AI21J2MidV1 => "ai21.j2-mid-v1", - Model::AI21J2Ultra => "ai21.j2-ultra", - Model::AI21J2UltraV1_8k => "ai21.j2-ultra-v1:0:8k", - Model::AI21J2UltraV1 => "ai21.j2-ultra-v1", - Model::AI21JambaInstructV1 => "ai21.jamba-instruct-v1:0", - Model::AI21Jamba15LargeV1 => "ai21.jamba-1-5-large-v1:0", - Model::AI21Jamba15MiniV1 => "ai21.jamba-1-5-mini-v1:0", - Model::CohereCommandTextV14_4k => "cohere.command-text-v14:7:4k", - Model::CohereCommandRV1 => "cohere.command-r-v1:0", - Model::CohereCommandRPlusV1 => "cohere.command-r-plus-v1:0", - Model::CohereCommandLightTextV14_4k => "cohere.command-light-text-v14:7:4k", - Model::MetaLlama38BInstructV1 => "meta.llama3-8b-instruct-v1:0", - Model::MetaLlama370BInstructV1 => "meta.llama3-70b-instruct-v1:0", - Model::MetaLlama318BInstructV1_128k => "meta.llama3-1-8b-instruct-v1:0", - Model::MetaLlama318BInstructV1 => "meta.llama3-1-8b-instruct-v1:0", - Model::MetaLlama3170BInstructV1_128k => "meta.llama3-1-70b-instruct-v1:0", - Model::MetaLlama3170BInstructV1 => "meta.llama3-1-70b-instruct-v1:0", - Model::MetaLlama31405BInstructV1 => "meta.llama3-1-405b-instruct-v1:0", - Model::MetaLlama3211BInstructV1 => "meta.llama3-2-11b-instruct-v1:0", - Model::MetaLlama3290BInstructV1 => "meta.llama3-2-90b-instruct-v1:0", - Model::MetaLlama321BInstructV1 => "meta.llama3-2-1b-instruct-v1:0", - Model::MetaLlama323BInstructV1 => "meta.llama3-2-3b-instruct-v1:0", - Model::MetaLlama3370BInstructV1 => "meta.llama3-3-70b-instruct-v1:0", - Model::MetaLlama4Scout17BInstructV1 => "meta.llama4-scout-17b-instruct-v1:0", - Model::MetaLlama4Maverick17BInstructV1 => "meta.llama4-maverick-17b-instruct-v1:0", - Model::MistralMistral7BInstructV0 => "mistral.mistral-7b-instruct-v0:2", - Model::MistralMixtral8x7BInstructV0 => "mistral.mixtral-8x7b-instruct-v0:1", - Model::MistralMistralLarge2402V1 => "mistral.mistral-large-2402-v1:0", - Model::MistralMistralSmall2402V1 => "mistral.mistral-small-2402-v1:0", - Model::MistralPixtralLarge2502V1 => "mistral.pixtral-large-2502-v1:0", - Model::PalmyraWriterX4 => "writer.palmyra-x4-v1:0", - Model::PalmyraWriterX5 => "writer.palmyra-x5-v1:0", + Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => "anthropic.claude-opus-4-6-v1", + Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0", + Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0", + Self::Gemma3_4B => "google.gemma-3-4b-it", + Self::Gemma3_12B => "google.gemma-3-12b-it", + Self::Gemma3_27B => "google.gemma-3-27b-it", + Self::MagistralSmall => "mistral.magistral-small-2509", + Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct", + Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0", + Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b", + Self::Qwen3_32B => "qwen.qwen3-32b-v1:0", + Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0", + Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b", + Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0", + Self::Qwen3CoderNext => "qwen.qwen3-coder-next", + Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0", + Self::NovaLite => "amazon.nova-lite-v1:0", + Self::NovaPro => "amazon.nova-pro-v1:0", + Self::NovaPremier => "amazon.nova-premier-v1:0", + Self::Nova2Lite => "amazon.nova-2-lite-v1:0", + Self::GptOss20B => "openai.gpt-oss-20b-1:0", + Self::GptOss120B => "openai.gpt-oss-120b-1:0", + Self::MiniMaxM2 => "minimax.minimax-m2", + Self::KimiK2Thinking => "moonshot.kimi-k2-thinking", + Self::KimiK2_5 => "moonshotai.kimi-k2.5", + Self::DeepSeekR1 => "deepseek.r1-v1:0", + Self::DeepSeekV3_1 => "deepseek.v3-v1:0", + Self::DeepSeekV3_2 => "deepseek.v3.2", Self::Custom { name, .. } => name, } } pub fn display_name(&self) -> &str { match self { + Self::ClaudeHaiku4_5 => "Claude Haiku 4.5", Self::ClaudeSonnet4 => "Claude Sonnet 4", Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking", Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5", Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking", - Self::ClaudeOpus4 => "Claude Opus 4", Self::ClaudeOpus4_1 => "Claude Opus 4.1", - Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking", Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking", Self::ClaudeOpus4_5 => "Claude Opus 4.5", Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking", Self::ClaudeOpus4_6 => "Claude Opus 4.6", Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking", - Self::Claude3_5SonnetV2 => "Claude 3.5 Sonnet v2", - Self::Claude3_5Sonnet => "Claude 3.5 Sonnet", - Self::Claude3Opus => "Claude 3 Opus", - Self::Claude3Sonnet => "Claude 3 Sonnet", - Self::Claude3Haiku => "Claude 3 Haiku", - Self::Claude3_5Haiku => "Claude 3.5 Haiku", - Self::ClaudeHaiku4_5 => "Claude Haiku 4.5", - Self::Claude3_7Sonnet => "Claude 3.7 Sonnet", - Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking", - Self::AmazonNovaLite => "Amazon Nova Lite", - Self::AmazonNovaMicro => "Amazon Nova Micro", - Self::AmazonNovaPro => "Amazon Nova Pro", - Self::AmazonNovaPremier => "Amazon Nova Premier", + Self::Llama4Scout17B => "Llama 4 Scout 17B", + Self::Llama4Maverick17B => "Llama 4 Maverick 17B", + Self::Gemma3_4B => "Gemma 3 4B", + Self::Gemma3_12B => "Gemma 3 12B", + Self::Gemma3_27B => "Gemma 3 27B", + Self::MagistralSmall => "Magistral Small", + Self::MistralLarge3 => "Mistral Large 3", + Self::PixtralLarge => "Pixtral Large", + Self::Qwen3VL235B => "Qwen3 VL 235B", + Self::Qwen3_32B => "Qwen3 32B", + Self::Qwen3_235B => "Qwen3 235B", + Self::Qwen3Next80B => "Qwen3 Next 80B", + Self::Qwen3Coder30B => "Qwen3 Coder 30B", + Self::Qwen3CoderNext => "Qwen3 Coder Next", + Self::Qwen3Coder480B => "Qwen3 Coder 480B", + Self::NovaLite => "Amazon Nova Lite", + Self::NovaPro => "Amazon Nova Pro", + Self::NovaPremier => "Amazon Nova Premier", + Self::Nova2Lite => "Amazon Nova 2 Lite", + Self::GptOss20B => "GPT OSS 20B", + Self::GptOss120B => "GPT OSS 120B", + Self::MiniMaxM2 => "MiniMax M2", + Self::KimiK2Thinking => "Kimi K2 Thinking", + Self::KimiK2_5 => "Kimi K2.5", Self::DeepSeekR1 => "DeepSeek R1", - Self::AI21J2GrandeInstruct => "AI21 Jurassic2 Grande Instruct", - Self::AI21J2JumboInstruct => "AI21 Jurassic2 Jumbo Instruct", - Self::AI21J2Mid => "AI21 Jurassic2 Mid", - Self::AI21J2MidV1 => "AI21 Jurassic2 Mid V1", - Self::AI21J2Ultra => "AI21 Jurassic2 Ultra", - Self::AI21J2UltraV1_8k => "AI21 Jurassic2 Ultra V1 8K", - Self::AI21J2UltraV1 => "AI21 Jurassic2 Ultra V1", - Self::AI21JambaInstructV1 => "AI21 Jamba Instruct", - Self::AI21Jamba15LargeV1 => "AI21 Jamba 1.5 Large", - Self::AI21Jamba15MiniV1 => "AI21 Jamba 1.5 Mini", - Self::CohereCommandTextV14_4k => "Cohere Command Text V14 4K", - Self::CohereCommandRV1 => "Cohere Command R V1", - Self::CohereCommandRPlusV1 => "Cohere Command R Plus V1", - Self::CohereCommandLightTextV14_4k => "Cohere Command Light Text V14 4K", - Self::MetaLlama38BInstructV1 => "Meta Llama 3 8B Instruct", - Self::MetaLlama370BInstructV1 => "Meta Llama 3 70B Instruct", - Self::MetaLlama318BInstructV1_128k => "Meta Llama 3.1 8B Instruct 128K", - Self::MetaLlama318BInstructV1 => "Meta Llama 3.1 8B Instruct", - Self::MetaLlama3170BInstructV1_128k => "Meta Llama 3.1 70B Instruct 128K", - Self::MetaLlama3170BInstructV1 => "Meta Llama 3.1 70B Instruct", - Self::MetaLlama31405BInstructV1 => "Meta Llama 3.1 405B Instruct", - Self::MetaLlama3211BInstructV1 => "Meta Llama 3.2 11B Instruct", - Self::MetaLlama3290BInstructV1 => "Meta Llama 3.2 90B Instruct", - Self::MetaLlama321BInstructV1 => "Meta Llama 3.2 1B Instruct", - Self::MetaLlama323BInstructV1 => "Meta Llama 3.2 3B Instruct", - Self::MetaLlama3370BInstructV1 => "Meta Llama 3.3 70B Instruct", - Self::MetaLlama4Scout17BInstructV1 => "Meta Llama 4 Scout 17B Instruct", - Self::MetaLlama4Maverick17BInstructV1 => "Meta Llama 4 Maverick 17B Instruct", - Self::MistralMistral7BInstructV0 => "Mistral 7B Instruct V0", - Self::MistralMixtral8x7BInstructV0 => "Mistral Mixtral 8x7B Instruct V0", - Self::MistralMistralLarge2402V1 => "Mistral Large 2402 V1", - Self::MistralMistralSmall2402V1 => "Mistral Small 2402 V1", - Self::MistralPixtralLarge2502V1 => "Pixtral Large 25.02 V1", - Self::PalmyraWriterX5 => "Writer Palmyra X5", - Self::PalmyraWriterX4 => "Writer Palmyra X4", + Self::DeepSeekV3_1 => "DeepSeek V3.1", + Self::DeepSeekV3_2 => "DeepSeek V3.2", Self::Custom { display_name, name, .. - } => display_name.as_deref().unwrap_or(name), + } => display_name.as_deref().unwrap_or(name.as_str()), } } pub fn max_token_count(&self) -> u64 { + self.max_tokens() + } + + pub fn max_tokens(&self) -> u64 { match self { - Self::Claude3_5SonnetV2 - | Self::Claude3Opus - | Self::Claude3Sonnet - | Self::Claude3_5Haiku - | Self::ClaudeHaiku4_5 - | Self::Claude3_7Sonnet + Self::ClaudeHaiku4_5 | Self::ClaudeSonnet4 - | Self::ClaudeOpus4 - | Self::ClaudeOpus4_1 | Self::ClaudeSonnet4Thinking | Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking - | Self::ClaudeOpus4Thinking + | Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking | Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => 200_000, - Self::AmazonNovaPremier => 1_000_000, - Self::PalmyraWriterX5 => 1_000_000, - Self::PalmyraWriterX4 => 128_000, + Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000, + Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000, + Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000, + Self::Qwen3_32B + | Self::Qwen3VL235B + | Self::Qwen3_235B + | Self::Qwen3Next80B + | Self::Qwen3Coder30B + | Self::Qwen3CoderNext + | Self::Qwen3Coder480B => 128_000, + Self::NovaLite | Self::NovaPro => 300_000, + Self::NovaPremier => 1_000_000, + Self::Nova2Lite => 300_000, + Self::GptOss20B | Self::GptOss120B => 128_000, + Self::MiniMaxM2 => 128_000, + Self::KimiK2Thinking | Self::KimiK2_5 => 128_000, + Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000, Self::Custom { max_tokens, .. } => *max_tokens, - _ => 128_000, } } pub fn max_output_tokens(&self) -> u64 { match self { - Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3_5Haiku => 4_096, - Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => 128_000, - Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => 64_000, - Self::ClaudeSonnet4_5 + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking - | Self::ClaudeHaiku4_5 | Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => 64_000, + Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => 64_000, + Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => 32_000, Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => 128_000, - Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking - | Self::ClaudeOpus4_1 - | Self::ClaudeOpus4_1Thinking => 32_000, - Self::Claude3_5SonnetV2 | Self::PalmyraWriterX4 | Self::PalmyraWriterX5 => 8_192, + Self::Llama4Scout17B + | Self::Llama4Maverick17B + | Self::Gemma3_4B + | Self::Gemma3_12B + | Self::Gemma3_27B + | Self::MagistralSmall + | Self::MistralLarge3 + | Self::PixtralLarge => 8_192, + Self::Qwen3_32B + | Self::Qwen3VL235B + | Self::Qwen3_235B + | Self::Qwen3Next80B + | Self::Qwen3Coder30B + | Self::Qwen3CoderNext + | Self::Qwen3Coder480B => 8_192, + Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000, + Self::GptOss20B | Self::GptOss120B => 16_000, + Self::MiniMaxM2 => 16_000, + Self::KimiK2Thinking | Self::KimiK2_5 => 16_000, + Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000, Self::Custom { max_output_tokens, .. } => max_output_tokens.unwrap_or(4_096), - _ => 4_096, } } pub fn default_temperature(&self) -> f32 { match self { - Self::Claude3_5SonnetV2 - | Self::Claude3Opus - | Self::Claude3Sonnet - | Self::Claude3_5Haiku - | Self::ClaudeHaiku4_5 - | Self::Claude3_7Sonnet - | Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking | Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking | Self::ClaudeOpus4_6 - | Self::ClaudeOpus4_6Thinking - | Self::ClaudeSonnet4 - | Self::ClaudeSonnet4Thinking - | Self::ClaudeSonnet4_5 - | Self::ClaudeSonnet4_5Thinking => 1.0, + | Self::ClaudeOpus4_6Thinking => 1.0, Self::Custom { default_temperature, .. @@ -511,127 +435,97 @@ impl Model { pub fn supports_tool_use(&self) -> bool { match self { - // Anthropic Claude 3 models (all support tool use) - Self::Claude3Opus - | Self::Claude3Sonnet - | Self::Claude3_5Sonnet - | Self::Claude3_5SonnetV2 - | Self::Claude3_7Sonnet - | Self::Claude3_7SonnetThinking - | Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking | Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking | Self::ClaudeOpus4_6 - | Self::ClaudeOpus4_6Thinking - | Self::ClaudeSonnet4 - | Self::ClaudeSonnet4Thinking - | Self::ClaudeSonnet4_5 - | Self::ClaudeSonnet4_5Thinking - | Self::Claude3_5Haiku - | Self::ClaudeHaiku4_5 => true, - - // Amazon Nova models (all support tool use) - Self::AmazonNovaPremier - | Self::AmazonNovaPro - | Self::AmazonNovaLite - | Self::AmazonNovaMicro => true, - - // AI21 Jamba 1.5 models support tool use - Self::AI21Jamba15LargeV1 | Self::AI21Jamba15MiniV1 => true, - - // Cohere Command R models support tool use - Self::CohereCommandRV1 | Self::CohereCommandRPlusV1 => true, - - // All other models don't support tool use - // Including Meta Llama 3.2, AI21 Jurassic, and others + | Self::ClaudeOpus4_6Thinking => true, + Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true, + Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true, + // Gemma accepts toolConfig without error but produces unreliable tool + // calls -- malformed JSON args, hallucinated tool names, dropped calls. + Self::Qwen3_32B + | Self::Qwen3VL235B + | Self::Qwen3_235B + | Self::Qwen3Next80B + | Self::Qwen3Coder30B + | Self::Qwen3CoderNext + | Self::Qwen3Coder480B => true, + Self::MiniMaxM2 => true, + Self::KimiK2Thinking | Self::KimiK2_5 => true, + Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true, _ => false, } } pub fn supports_images(&self) -> bool { match self { - // Anthropic Claude 3+ models (all support vision) - Self::Claude3Opus - | Self::Claude3Sonnet - | Self::Claude3_5Sonnet - | Self::Claude3_5SonnetV2 - | Self::Claude3_7Sonnet - | Self::Claude3_7SonnetThinking - | Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking - | Self::ClaudeOpus4_1 - | Self::ClaudeOpus4_1Thinking - | Self::ClaudeOpus4_5 - | Self::ClaudeOpus4_5Thinking + Self::ClaudeHaiku4_5 | Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking | Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking - | Self::Claude3_5Haiku - | Self::ClaudeHaiku4_5 - | Self::Claude3Haiku => true, - - // Amazon Nova visual models - Self::AmazonNovaPro | Self::AmazonNovaLite => true, - - // Meta Llama 3.2 Vision models - Self::MetaLlama3211BInstructV1 | Self::MetaLlama3290BInstructV1 => true, - - // Mistral Pixtral (visual model) - Self::MistralPixtralLarge2502V1 => true, - - // Custom models default to no image support - Self::Custom { .. } => false, - - // All other models don't support images + | Self::ClaudeOpus4_1 + | Self::ClaudeOpus4_1Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking => true, + Self::NovaLite | Self::NovaPro => true, + Self::PixtralLarge => true, + Self::Qwen3VL235B => true, + Self::KimiK2_5 => true, _ => false, } } + pub fn supports_extended_context(&self) -> bool { + matches!( + self, + Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking + ) + } + pub fn supports_caching(&self) -> bool { match self { - // Only Claude models on Bedrock support caching - // Nova models support only text caching - // https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html#prompt-caching-models - Self::Claude3_5Haiku - | Self::ClaudeHaiku4_5 - | Self::Claude3_7Sonnet - | Self::Claude3_7SonnetThinking + Self::ClaudeHaiku4_5 | Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking | Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking - | Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking | Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking | Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => true, - - // Custom models - check if they have cache configuration Self::Custom { cache_configuration, .. } => cache_configuration.is_some(), - - // All other models don't support caching _ => false, } } pub fn cache_configuration(&self) -> Option { match self { - Self::Claude3_7Sonnet - | Self::Claude3_7SonnetThinking - | Self::ClaudeSonnet4 + Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking - | Self::ClaudeOpus4 - | Self::ClaudeOpus4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking | Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5 @@ -641,97 +535,94 @@ impl Model { max_cache_anchors: 4, min_total_token: 1024, }), - - Self::Claude3_5Haiku | Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration { + Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration { max_cache_anchors: 4, min_total_token: 2048, }), - Self::Custom { cache_configuration, .. } => cache_configuration.clone(), - _ => None, } } pub fn mode(&self) -> BedrockModelMode { match self { - Model::Claude3_7SonnetThinking => BedrockModelMode::Thinking { - budget_tokens: Some(4096), - }, - Model::ClaudeSonnet4Thinking | Model::ClaudeSonnet4_5Thinking => { + Self::ClaudeSonnet4Thinking | Self::ClaudeSonnet4_5Thinking => { BedrockModelMode::Thinking { budget_tokens: Some(4096), } } - Model::ClaudeOpus4Thinking - | Model::ClaudeOpus4_1Thinking - | Model::ClaudeOpus4_5Thinking => BedrockModelMode::Thinking { - budget_tokens: Some(4096), - }, - Model::ClaudeOpus4_6Thinking => BedrockModelMode::AdaptiveThinking { + Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5Thinking => { + BedrockModelMode::Thinking { + budget_tokens: Some(4096), + } + } + Self::ClaudeOpus4_6Thinking => BedrockModelMode::AdaptiveThinking { effort: BedrockAdaptiveThinkingEffort::default(), }, _ => BedrockModelMode::Default, } } - pub fn supports_extended_context(&self) -> bool { - matches!( - self, - Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking - | Model::ClaudeOpus4_5 - | Model::ClaudeOpus4_5Thinking - | Model::ClaudeOpus4_6 - | Model::ClaudeOpus4_6Thinking - ) - } - pub fn cross_region_inference_id( &self, region: &str, allow_global: bool, ) -> anyhow::Result { - // List derived from here: - // https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system let model_id = self.request_id(); let supports_global = matches!( self, - Model::ClaudeOpus4_5 - | Model::ClaudeOpus4_5Thinking - | Model::ClaudeOpus4_6 - | Model::ClaudeOpus4_6Thinking - | Model::ClaudeHaiku4_5 - | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking + | Self::Nova2Lite ); + // Determine region group based on AWS region let region_group = if region.starts_with("us-gov-") { "us-gov" - } else if region.starts_with("us-") - || region.starts_with("ca-") - || region.starts_with("sa-") - { + } else if region.starts_with("us-") || region.starts_with("sa-") { if allow_global && supports_global { "global" } else { "us" } + } else if region.starts_with("ca-") { + if allow_global && supports_global { + "global" + } else { + "ca" + } } else if region.starts_with("eu-") { if allow_global && supports_global { "global" } else { "eu" } - } else if region.starts_with("ap-") || region == "me-central-1" || region == "me-south-1" { + } else if region == "ap-southeast-2" || region == "ap-southeast-4" { + // Australia + if allow_global && supports_global { + "global" + } else { + "au" + } + } else if region == "ap-northeast-1" || region == "ap-northeast-3" { + // Japan + if allow_global && supports_global { + "global" + } else { + "jp" + } + } else if region.starts_with("ap-") || region.starts_with("me-") { if allow_global && supports_global { "global" } else { @@ -741,134 +632,102 @@ impl Model { anyhow::bail!("Unsupported Region {region}"); }; - match (self, region_group, region) { - (Model::Custom { .. }, _, _) => Ok(self.request_id().into()), + match (self, region_group) { + (Self::Custom { .. }, _) => Ok(model_id.into()), + // Global inference profiles ( - Model::ClaudeOpus4_5 - | Model::ClaudeOpus4_5Thinking - | Model::ClaudeOpus4_6 - | Model::ClaudeOpus4_6Thinking - | Model::ClaudeHaiku4_5 - | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking, + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking + | Self::Nova2Lite, "global", - _, ) => Ok(format!("{}.{}", region_group, model_id)), + // US Government region inference profiles + (Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking, "us-gov") => { + Ok(format!("{}.{}", region_group, model_id)) + } + + // US region inference profiles ( - Model::Claude3Haiku - | Model::Claude3_5Sonnet - | Model::Claude3_7Sonnet - | Model::Claude3_7SonnetThinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking, - "us-gov", - _, + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_1 + | Self::ClaudeOpus4_1Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking + | Self::Llama4Scout17B + | Self::Llama4Maverick17B + | Self::NovaLite + | Self::NovaPro + | Self::NovaPremier + | Self::Nova2Lite + | Self::PixtralLarge + | Self::DeepSeekR1, + "us", ) => Ok(format!("{}.{}", region_group, model_id)), - ( - Model::ClaudeHaiku4_5 | Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking, - "apac", - "ap-southeast-2" | "ap-southeast-4", - ) => Ok(format!("au.{}", model_id)), + // Canada region inference profiles + (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)), + // EU region inference profiles ( - Model::ClaudeHaiku4_5 | Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking, - "apac", - "ap-northeast-1" | "ap-northeast-3", - ) => Ok(format!("jp.{}", model_id)), - - (Model::AmazonNovaLite, "us", r) if r.starts_with("ca-") => { - Ok(format!("ca.{}", model_id)) - } + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking + | Self::NovaLite + | Self::NovaPro + | Self::Nova2Lite, + "eu", + ) => Ok(format!("{}.{}", region_group, model_id)), + // Australia region inference profiles ( - Model::AmazonNovaPremier - | Model::AmazonNovaLite - | Model::AmazonNovaMicro - | Model::AmazonNovaPro - | Model::Claude3_5Haiku - | Model::ClaudeHaiku4_5 - | Model::Claude3_5Sonnet - | Model::Claude3_5SonnetV2 - | Model::Claude3_7Sonnet - | Model::Claude3_7SonnetThinking - | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking - | Model::ClaudeOpus4 - | Model::ClaudeOpus4Thinking - | Model::ClaudeOpus4_1 - | Model::ClaudeOpus4_1Thinking - | Model::ClaudeOpus4_5 - | Model::ClaudeOpus4_5Thinking - | Model::ClaudeOpus4_6 - | Model::ClaudeOpus4_6Thinking - | Model::Claude3Haiku - | Model::Claude3Opus - | Model::Claude3Sonnet - | Model::DeepSeekR1 - | Model::MetaLlama31405BInstructV1 - | Model::MetaLlama3170BInstructV1_128k - | Model::MetaLlama3170BInstructV1 - | Model::MetaLlama318BInstructV1_128k - | Model::MetaLlama318BInstructV1 - | Model::MetaLlama3211BInstructV1 - | Model::MetaLlama321BInstructV1 - | Model::MetaLlama323BInstructV1 - | Model::MetaLlama3290BInstructV1 - | Model::MetaLlama3370BInstructV1 - | Model::MetaLlama4Maverick17BInstructV1 - | Model::MetaLlama4Scout17BInstructV1 - | Model::MistralPixtralLarge2502V1 - | Model::PalmyraWriterX4 - | Model::PalmyraWriterX5, - "us", - _, + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::ClaudeOpus4_6 + | Self::ClaudeOpus4_6Thinking, + "au", ) => Ok(format!("{}.{}", region_group, model_id)), + // Japan region inference profiles ( - Model::AmazonNovaLite - | Model::AmazonNovaMicro - | Model::AmazonNovaPro - | Model::Claude3_5Sonnet - | Model::ClaudeHaiku4_5 - | Model::Claude3_7Sonnet - | Model::Claude3_7SonnetThinking - | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking - | Model::ClaudeOpus4_6 - | Model::ClaudeOpus4_6Thinking - | Model::Claude3Haiku - | Model::Claude3Sonnet - | Model::MetaLlama321BInstructV1 - | Model::MetaLlama323BInstructV1 - | Model::MistralPixtralLarge2502V1, - "eu", - _, + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::Nova2Lite, + "jp", ) => Ok(format!("{}.{}", region_group, model_id)), + // APAC region inference profiles (other than AU/JP) ( - Model::AmazonNovaLite - | Model::AmazonNovaMicro - | Model::AmazonNovaPro - | Model::Claude3_5Sonnet - | Model::Claude3_5SonnetV2 - | Model::ClaudeHaiku4_5 - | Model::Claude3_7Sonnet - | Model::Claude3_7SonnetThinking - | Model::ClaudeSonnet4 - | Model::Claude3Haiku - | Model::Claude3Sonnet, + Self::ClaudeHaiku4_5 + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::NovaLite + | Self::NovaPro + | Self::Nova2Lite, "apac", - _, ) => Ok(format!("{}.{}", region_group, model_id)), + // Default: use model ID directly _ => Ok(model_id.into()), } } @@ -880,25 +739,27 @@ mod tests { #[test] fn test_us_region_inference_ids() -> anyhow::Result<()> { - // Test US regions assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("us-east-1", false)?, - "us.anthropic.claude-3-5-sonnet-20241022-v2:0" + Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?, + "us.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("us-west-2", false)?, - "us.anthropic.claude-3-5-sonnet-20241022-v2:0" + Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?, + "us.anthropic.claude-sonnet-4-20250514-v1:0" ); assert_eq!( - Model::AmazonNovaPro.cross_region_inference_id("us-east-2", false)?, + Model::NovaPro.cross_region_inference_id("us-east-2", false)?, "us.amazon.nova-pro-v1:0" ); + assert_eq!( + Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?, + "us.deepseek.r1-v1:0" + ); Ok(()) } #[test] fn test_eu_region_inference_ids() -> anyhow::Result<()> { - // Test European regions assert_eq!( Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?, "eu.anthropic.claude-sonnet-4-20250514-v1:0" @@ -908,114 +769,148 @@ mod tests { "eu.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::Claude3Sonnet.cross_region_inference_id("eu-west-1", false)?, - "eu.anthropic.claude-3-sonnet-20240229-v1:0" + Model::NovaLite.cross_region_inference_id("eu-north-1", false)?, + "eu.amazon.nova-lite-v1:0" ); assert_eq!( - Model::AmazonNovaMicro.cross_region_inference_id("eu-north-1", false)?, - "eu.amazon.nova-micro-v1:0" + Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?, + "eu.anthropic.claude-opus-4-6-v1" ); Ok(()) } #[test] fn test_apac_region_inference_ids() -> anyhow::Result<()> { - // Test Asia-Pacific regions - assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1", false)?, - "apac.anthropic.claude-3-5-sonnet-20241022-v2:0" - ); assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("ap-southeast-2", false)?, - "apac.anthropic.claude-3-5-sonnet-20241022-v2:0" + Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?, + "apac.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::AmazonNovaLite.cross_region_inference_id("ap-south-1", false)?, + Model::NovaLite.cross_region_inference_id("ap-south-1", false)?, "apac.amazon.nova-lite-v1:0" ); Ok(()) } #[test] - fn test_gov_region_inference_ids() -> anyhow::Result<()> { - // Test Government regions + fn test_au_region_inference_ids() -> anyhow::Result<()> { + assert_eq!( + Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?, + "au.anthropic.claude-haiku-4-5-20251001-v1:0" + ); assert_eq!( - Model::Claude3_5Sonnet.cross_region_inference_id("us-gov-east-1", false)?, - "us-gov.anthropic.claude-3-5-sonnet-20240620-v1:0" + Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::Claude3Haiku.cross_region_inference_id("us-gov-west-1", false)?, - "us-gov.anthropic.claude-3-haiku-20240307-v1:0" + Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?, + "au.anthropic.claude-opus-4-6-v1" ); Ok(()) } #[test] - fn test_meta_models_inference_ids() -> anyhow::Result<()> { - // Test Meta models + fn test_jp_region_inference_ids() -> anyhow::Result<()> { assert_eq!( - Model::MetaLlama370BInstructV1.cross_region_inference_id("us-east-1", false)?, - "meta.llama3-70b-instruct-v1:0" + Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?, + "jp.anthropic.claude-haiku-4-5-20251001-v1:0" ); assert_eq!( - Model::MetaLlama3170BInstructV1.cross_region_inference_id("us-east-1", false)?, - "us.meta.llama3-1-70b-instruct-v1:0" + Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::MetaLlama321BInstructV1.cross_region_inference_id("eu-west-1", false)?, - "eu.meta.llama3-2-1b-instruct-v1:0" + Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?, + "jp.amazon.nova-2-lite-v1:0" ); Ok(()) } #[test] - fn test_mistral_models_inference_ids() -> anyhow::Result<()> { - // Mistral models don't follow the regional prefix pattern, - // so they should return their original IDs + fn test_ca_region_inference_ids() -> anyhow::Result<()> { assert_eq!( - Model::MistralMistralLarge2402V1.cross_region_inference_id("us-east-1", false)?, - "mistral.mistral-large-2402-v1:0" + Model::NovaLite.cross_region_inference_id("ca-central-1", false)?, + "ca.amazon.nova-lite-v1:0" ); + Ok(()) + } + + #[test] + fn test_gov_region_inference_ids() -> anyhow::Result<()> { assert_eq!( - Model::MistralMixtral8x7BInstructV0.cross_region_inference_id("eu-west-1", false)?, - "mistral.mixtral-8x7b-instruct-v0:1" + Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?, + "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0" + ); + assert_eq!( + Model::ClaudeSonnet4_5Thinking.cross_region_inference_id("us-gov-west-1", false)?, + "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0" ); Ok(()) } #[test] - fn test_ai21_models_inference_ids() -> anyhow::Result<()> { - // AI21 models don't follow the regional prefix pattern, - // so they should return their original IDs + fn test_global_inference_ids() -> anyhow::Result<()> { assert_eq!( - Model::AI21J2UltraV1.cross_region_inference_id("us-east-1", false)?, - "ai21.j2-ultra-v1" + Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?, + "global.anthropic.claude-sonnet-4-20250514-v1:0" ); assert_eq!( - Model::AI21JambaInstructV1.cross_region_inference_id("eu-west-1", false)?, - "ai21.jamba-instruct-v1:0" + Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0" + ); + assert_eq!( + Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?, + "global.anthropic.claude-haiku-4-5-20251001-v1:0" + ); + assert_eq!( + Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?, + "global.anthropic.claude-opus-4-6-v1" + ); + assert_eq!( + Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?, + "global.amazon.nova-2-lite-v1:0" + ); + + // Models without global support fall back to regional + assert_eq!( + Model::NovaPro.cross_region_inference_id("us-east-1", true)?, + "us.amazon.nova-pro-v1:0" ); Ok(()) } #[test] - fn test_cohere_models_inference_ids() -> anyhow::Result<()> { - // Cohere models don't follow the regional prefix pattern, - // so they should return their original IDs + fn test_models_without_cross_region() -> anyhow::Result<()> { + // Models without cross-region support return their request_id directly + assert_eq!( + Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?, + "google.gemma-3-4b-it" + ); assert_eq!( - Model::CohereCommandRV1.cross_region_inference_id("us-east-1", false)?, - "cohere.command-r-v1:0" + Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?, + "mistral.mistral-large-3-675b-instruct" ); assert_eq!( - Model::CohereCommandTextV14_4k.cross_region_inference_id("ap-southeast-1", false)?, - "cohere.command-text-v14:7:4k" + Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?, + "qwen.qwen3-vl-235b-a22b" + ); + assert_eq!( + Model::GptOss120B.cross_region_inference_id("us-east-1", false)?, + "openai.gpt-oss-120b-1:0" + ); + assert_eq!( + Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?, + "minimax.minimax-m2" + ); + assert_eq!( + Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?, + "moonshot.kimi-k2-thinking" ); Ok(()) } #[test] fn test_custom_model_inference_ids() -> anyhow::Result<()> { - // Test custom models let custom_model = Model::Custom { name: "custom.my-model-v1:0".to_string(), max_tokens: 100000, @@ -1025,46 +920,36 @@ mod tests { cache_configuration: None, }; - // Custom model should return its name unchanged assert_eq!( custom_model.cross_region_inference_id("us-east-1", false)?, "custom.my-model-v1:0" ); - - // Test that models without global support fall back to regional when allow_global is true assert_eq!( - Model::AmazonNovaPro.cross_region_inference_id("us-east-1", true)?, - "us.amazon.nova-pro-v1:0", - "Nova Pro should fall back to regional profile even when allow_global is true" + custom_model.cross_region_inference_id("eu-west-1", true)?, + "custom.my-model-v1:0" ); - Ok(()) } #[test] fn test_friendly_id_vs_request_id() { - // Test that id() returns friendly identifiers - assert_eq!(Model::Claude3_5SonnetV2.id(), "claude-3-5-sonnet-v2"); - assert_eq!(Model::AmazonNovaLite.id(), "amazon-nova-lite"); + assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5"); + assert_eq!(Model::NovaLite.id(), "nova-lite"); assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1"); - assert_eq!( - Model::MetaLlama38BInstructV1.id(), - "meta-llama3-8b-instruct-v1" - ); + assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b"); - // Test that request_id() returns actual backend model IDs assert_eq!( - Model::Claude3_5SonnetV2.request_id(), - "anthropic.claude-3-5-sonnet-20241022-v2:0" + Model::ClaudeSonnet4_5.request_id(), + "anthropic.claude-sonnet-4-5-20250929-v1:0" ); - assert_eq!(Model::AmazonNovaLite.request_id(), "amazon.nova-lite-v1:0"); + assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0"); assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0"); assert_eq!( - Model::MetaLlama38BInstructV1.request_id(), - "meta.llama3-8b-instruct-v1:0" + Model::Llama4Scout17B.request_id(), + "meta.llama4-scout-17b-instruct-v1:0" ); - // Test thinking models have different friendly IDs but same request IDs + // Thinking models have different friendly IDs but same request IDs assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4"); assert_eq!( Model::ClaudeSonnet4Thinking.id(), @@ -1075,29 +960,58 @@ mod tests { Model::ClaudeSonnet4Thinking.request_id() ); } -} -#[test] -fn test_global_inference_ids() -> anyhow::Result<()> { - // Test global inference for models that support it when allow_global is true - assert_eq!( - Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?, - "global.anthropic.claude-sonnet-4-20250514-v1:0" - ); - assert_eq!( - Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?, - "global.anthropic.claude-sonnet-4-5-20250929-v1:0" - ); - assert_eq!( - Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?, - "global.anthropic.claude-haiku-4-5-20251001-v1:0" - ); - - // Test that regional prefix is used when allow_global is false - assert_eq!( - Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", false)?, - "us.anthropic.claude-sonnet-4-20250514-v1:0" - ); - - Ok(()) + #[test] + fn test_model_modes() { + assert_eq!(Model::ClaudeSonnet4.mode(), BedrockModelMode::Default); + assert_eq!( + Model::ClaudeSonnet4Thinking.mode(), + BedrockModelMode::Thinking { + budget_tokens: Some(4096) + } + ); + assert_eq!( + Model::ClaudeOpus4_6Thinking.mode(), + BedrockModelMode::AdaptiveThinking { + effort: BedrockAdaptiveThinkingEffort::High + } + ); + } + + #[test] + fn test_max_tokens() { + assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000); + assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000); + assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000); + assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000); + } + + #[test] + fn test_max_output_tokens() { + assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000); + assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000); + assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000); + assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192); + } + + #[test] + fn test_supports_tool_use() { + assert!(Model::ClaudeSonnet4_5.supports_tool_use()); + assert!(Model::NovaPro.supports_tool_use()); + assert!(Model::MistralLarge3.supports_tool_use()); + assert!(!Model::Gemma3_4B.supports_tool_use()); + assert!(Model::Qwen3_32B.supports_tool_use()); + assert!(Model::MiniMaxM2.supports_tool_use()); + assert!(Model::KimiK2_5.supports_tool_use()); + assert!(Model::DeepSeekR1.supports_tool_use()); + assert!(!Model::Llama4Scout17B.supports_tool_use()); + } + + #[test] + fn test_supports_caching() { + assert!(Model::ClaudeSonnet4_5.supports_caching()); + assert!(Model::ClaudeOpus4_6.supports_caching()); + assert!(!Model::Llama4Scout17B.supports_caching()); + assert!(!Model::NovaPro.supports_caching()); + } } diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 677764d32c16d7f8456e45e0e53f1d5d67691599..bcf8401c1c14ae1a74bb7136141d0b35509cdd40 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -600,18 +600,19 @@ impl BedrockModel { cx: &AsyncApp, ) -> BoxFuture< 'static, - Result>>, + Result>, BedrockError>, > { let Ok(runtime_client) = self .get_or_init_client(cx) .cloned() .context("Bedrock client not initialized") else { - return futures::future::ready(Err(anyhow!("App state dropped"))).boxed(); + return futures::future::ready(Err(BedrockError::Other(anyhow!("App state dropped")))) + .boxed(); }; let task = Tokio::spawn(cx, bedrock::stream_completion(runtime_client, request)); - async move { task.await.map_err(|err| anyhow!(err))? }.boxed() + async move { task.await.map_err(|e| BedrockError::Other(e.into()))? }.boxed() } } @@ -715,6 +716,7 @@ impl LanguageModel for BedrockModel { self.model.max_output_tokens(), self.model.mode(), self.model.supports_caching(), + self.model.supports_tool_use(), use_extended_context, ) { Ok(request) => request, @@ -722,8 +724,44 @@ impl LanguageModel for BedrockModel { }; let request = self.stream_completion(request, cx); + let display_name = self.model.display_name().to_string(); let future = self.request_limiter.stream(async move { - let response = request.await.map_err(|err| anyhow!(err))?; + let response = request.await.map_err(|err| match err { + BedrockError::Validation(ref msg) => { + if msg.contains("model identifier is invalid") { + LanguageModelCompletionError::Other(anyhow!( + "{display_name} is not available in {region}. \ + Try switching to a region where this model is supported." + )) + } else { + LanguageModelCompletionError::BadRequestFormat { + provider: PROVIDER_NAME, + message: msg.clone(), + } + } + } + BedrockError::RateLimited => LanguageModelCompletionError::RateLimitExceeded { + provider: PROVIDER_NAME, + retry_after: None, + }, + BedrockError::ServiceUnavailable => { + LanguageModelCompletionError::ServerOverloaded { + provider: PROVIDER_NAME, + retry_after: None, + } + } + BedrockError::AccessDenied(msg) => LanguageModelCompletionError::PermissionError { + provider: PROVIDER_NAME, + message: msg, + }, + BedrockError::InternalServer(msg) => { + LanguageModelCompletionError::ApiInternalServerError { + provider: PROVIDER_NAME, + message: msg, + } + } + other => LanguageModelCompletionError::Other(anyhow!(other)), + })?; let events = map_to_language_model_completion_events(response); if deny_tool_calls { @@ -771,6 +809,7 @@ pub fn into_bedrock( max_output_tokens: u64, mode: BedrockModelMode, supports_caching: bool, + supports_tool_use: bool, allow_extended_context: bool, ) -> Result { let mut new_messages: Vec = Vec::new(); @@ -965,28 +1004,32 @@ pub fn into_bedrock( } } - let mut tool_spec: Vec = request - .tools - .iter() - .filter_map(|tool| { - Some(BedrockTool::ToolSpec( - BedrockToolSpec::builder() - .name(tool.name.clone()) - .description(tool.description.clone()) - .input_schema(BedrockToolInputSchema::Json(value_to_aws_document( - &tool.input_schema, - ))) - .build() - .log_err()?, - )) - }) - .collect(); + let mut tool_spec: Vec = if supports_tool_use { + request + .tools + .iter() + .filter_map(|tool| { + Some(BedrockTool::ToolSpec( + BedrockToolSpec::builder() + .name(tool.name.clone()) + .description(tool.description.clone()) + .input_schema(BedrockToolInputSchema::Json(value_to_aws_document( + &tool.input_schema, + ))) + .build() + .log_err()?, + )) + }) + .collect() + } else { + Vec::new() + }; // Bedrock requires toolConfig when messages contain tool use/result blocks. // If no tools are defined but messages contain tool content (e.g., when // summarising a conversation that used tools), add a dummy tool to satisfy // the API requirement. - if tool_spec.is_empty() && messages_contain_tool_content { + if supports_tool_use && tool_spec.is_empty() && messages_contain_tool_content { tool_spec.push(BedrockTool::ToolSpec( BedrockToolSpec::builder() .name("_placeholder") @@ -1020,17 +1063,23 @@ pub fn into_bedrock( BedrockToolChoice::Auto(BedrockAutoToolChoice::builder().build()) } }; - let tool_config: BedrockToolConfig = BedrockToolConfig::builder() - .set_tools(Some(tool_spec)) - .tool_choice(tool_choice) - .build()?; + let tool_config = if tool_spec.is_empty() { + None + } else { + Some( + BedrockToolConfig::builder() + .set_tools(Some(tool_spec)) + .tool_choice(tool_choice) + .build()?, + ) + }; Ok(bedrock::Request { model, messages: new_messages, max_tokens: max_output_tokens, system: Some(system_message), - tools: Some(tool_config), + tools: tool_config, thinking: if request.thinking_allowed { match mode { BedrockModelMode::Thinking { budget_tokens } => { @@ -1116,7 +1165,7 @@ pub fn get_bedrock_tokens( } pub fn map_to_language_model_completion_events( - events: Pin>>>, + events: Pin>>>, ) -> impl Stream> { struct RawToolUse { id: String, @@ -1125,13 +1174,15 @@ pub fn map_to_language_model_completion_events( } struct State { - events: Pin>>>, + events: Pin>>>, tool_uses_by_index: HashMap, + emitted_tool_use: bool, } let initial_state = State { events, tool_uses_by_index: HashMap::default(), + emitted_tool_use: false, }; futures::stream::unfold(initial_state, |mut state| async move { @@ -1190,10 +1241,13 @@ pub fn map_to_language_model_completion_events( } None } + ConverseStreamOutput::MessageStart(_) => None, ConverseStreamOutput::ContentBlockStop(cb_stop) => state .tool_uses_by_index .remove(&cb_stop.content_block_index) .map(|tool_use| { + state.emitted_tool_use = true; + let input = parse_tool_arguments(&tool_use.input_json) .unwrap_or_else(|_| Value::Object(Default::default())); @@ -1223,9 +1277,16 @@ pub fn map_to_language_model_completion_events( })) }), ConverseStreamOutput::MessageStop(message_stop) => { - let stop_reason = match message_stop.stop_reason { - StopReason::ToolUse => language_model::StopReason::ToolUse, - _ => language_model::StopReason::EndTurn, + let stop_reason = if state.emitted_tool_use { + // Some models (e.g. Kimi) send EndTurn even when + // they've made tool calls. Trust the content over + // the stop reason. + language_model::StopReason::ToolUse + } else { + match message_stop.stop_reason { + StopReason::ToolUse => language_model::StopReason::ToolUse, + _ => language_model::StopReason::EndTurn, + } }; Some(Ok(LanguageModelCompletionEvent::Stop(stop_reason))) }