From 0f0017dc8e7dce1ab8cfbbac337e83d79b612289 Mon Sep 17 00:00:00 2001 From: Shardul Vaidya <31039336+5herlocked@users.noreply.github.com> Date: Thu, 4 Dec 2025 06:14:31 -0500 Subject: [PATCH] bedrock: Support global endpoints and new regional endpoints (#44103) Closes #43598 Release Notes: - bedrock: Added opt-in `allow_global` which enables global endpoints - bedrock: Updated cross-region-inference endpoint and model list - bedrock: Fixed Opus 4.5 access on Bedrock, now only accessible through the `allow_global` setting --- crates/bedrock/src/models.rs | 201 +++++++++++++----- .../language_models/src/provider/bedrock.rs | 14 +- crates/language_models/src/settings.rs | 1 + .../src/settings_content/language_model.rs | 1 + docs/src/ai/llm-providers.md | 28 ++- 5 files changed, 185 insertions(+), 60 deletions(-) diff --git a/crates/bedrock/src/models.rs b/crates/bedrock/src/models.rs index f3b276a8d2f30e8062931e76608bbc3a302ad734..51e1b29f9ad3cf953605c5c59090785f3ab45eac 100644 --- a/crates/bedrock/src/models.rs +++ b/crates/bedrock/src/models.rs @@ -584,41 +584,100 @@ impl Model { } } - pub fn cross_region_inference_id(&self, region: &str) -> anyhow::Result { + pub fn cross_region_inference_id( + &self, + region: &str, + allow_global: bool, + ) -> anyhow::Result { + // List derived from here: + // https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system + let model_id = self.request_id(); + + let supports_global = matches!( + self, + Model::ClaudeOpus4_5 + | Model::ClaudeOpus4_5Thinking + | Model::ClaudeHaiku4_5 + | Model::ClaudeSonnet4 + | Model::ClaudeSonnet4Thinking + | Model::ClaudeSonnet4_5 + | Model::ClaudeSonnet4_5Thinking + ); + let region_group = if region.starts_with("us-gov-") { "us-gov" - } else if region.starts_with("us-") { - "us" + } else if region.starts_with("us-") + || region.starts_with("ca-") + || region.starts_with("sa-") + { + if allow_global && supports_global { + "global" + } else { + "us" + } } else if region.starts_with("eu-") { - "eu" + if allow_global && supports_global { + "global" + } else { + "eu" + } } else if region.starts_with("ap-") || region == "me-central-1" || region == "me-south-1" { - "apac" - } else if region.starts_with("ca-") || region.starts_with("sa-") { - // Canada and South America regions - default to US profiles - "us" + if allow_global && supports_global { + "global" + } else { + "apac" + } } else { anyhow::bail!("Unsupported Region {region}"); }; - let model_id = self.request_id(); + match (self, region_group, region) { + (Model::Custom { .. }, _, _) => Ok(self.request_id().into()), - match (self, region_group) { - // Custom models can't have CRI IDs - (Model::Custom { .. }, _) => Ok(self.request_id().into()), + ( + Model::ClaudeOpus4_5 + | Model::ClaudeOpus4_5Thinking + | Model::ClaudeHaiku4_5 + | Model::ClaudeSonnet4 + | Model::ClaudeSonnet4Thinking + | Model::ClaudeSonnet4_5 + | Model::ClaudeSonnet4_5Thinking, + "global", + _, + ) => Ok(format!("{}.{}", region_group, model_id)), - // Models with US Gov only - (Model::Claude3_5Sonnet, "us-gov") | (Model::Claude3Haiku, "us-gov") => { - Ok(format!("{}.{}", region_group, model_id)) - } + ( + Model::Claude3Haiku + | Model::Claude3_5Sonnet + | Model::Claude3_7Sonnet + | Model::Claude3_7SonnetThinking + | Model::ClaudeSonnet4_5 + | Model::ClaudeSonnet4_5Thinking, + "us-gov", + _, + ) => Ok(format!("{}.{}", region_group, model_id)), - // Available everywhere - (Model::AmazonNovaLite | Model::AmazonNovaMicro | Model::AmazonNovaPro, _) => { - Ok(format!("{}.{}", region_group, model_id)) + ( + Model::ClaudeHaiku4_5 | Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking, + "apac", + "ap-southeast-2" | "ap-southeast-4", + ) => Ok(format!("au.{}", model_id)), + + ( + Model::ClaudeHaiku4_5 | Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking, + "apac", + "ap-northeast-1" | "ap-northeast-3", + ) => Ok(format!("jp.{}", model_id)), + + (Model::AmazonNovaLite, "us", r) if r.starts_with("ca-") => { + Ok(format!("ca.{}", model_id)) } - // Models in US ( Model::AmazonNovaPremier + | Model::AmazonNovaLite + | Model::AmazonNovaMicro + | Model::AmazonNovaPro | Model::Claude3_5Haiku | Model::ClaudeHaiku4_5 | Model::Claude3_5Sonnet @@ -655,16 +714,18 @@ impl Model { | Model::PalmyraWriterX4 | Model::PalmyraWriterX5, "us", + _, ) => Ok(format!("{}.{}", region_group, model_id)), - // Models available in EU ( - Model::Claude3_5Sonnet + Model::AmazonNovaLite + | Model::AmazonNovaMicro + | Model::AmazonNovaPro + | Model::Claude3_5Sonnet | Model::ClaudeHaiku4_5 | Model::Claude3_7Sonnet | Model::Claude3_7SonnetThinking | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking | Model::ClaudeSonnet4_5 | Model::ClaudeSonnet4_5Thinking | Model::Claude3Haiku @@ -673,26 +734,26 @@ impl Model { | Model::MetaLlama323BInstructV1 | Model::MistralPixtralLarge2502V1, "eu", + _, ) => Ok(format!("{}.{}", region_group, model_id)), - // Models available in APAC ( - Model::Claude3_5Sonnet + Model::AmazonNovaLite + | Model::AmazonNovaMicro + | Model::AmazonNovaPro + | Model::Claude3_5Sonnet | Model::Claude3_5SonnetV2 | Model::ClaudeHaiku4_5 - | Model::Claude3Haiku - | Model::Claude3Sonnet | Model::Claude3_7Sonnet | Model::Claude3_7SonnetThinking | Model::ClaudeSonnet4 - | Model::ClaudeSonnet4Thinking - | Model::ClaudeSonnet4_5 - | Model::ClaudeSonnet4_5Thinking, + | Model::Claude3Haiku + | Model::Claude3Sonnet, "apac", + _, ) => Ok(format!("{}.{}", region_group, model_id)), - // Any other combination is not supported - _ => Ok(self.request_id().into()), + _ => Ok(model_id.into()), } } } @@ -705,15 +766,15 @@ mod tests { fn test_us_region_inference_ids() -> anyhow::Result<()> { // Test US regions assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("us-east-1")?, + Model::Claude3_5SonnetV2.cross_region_inference_id("us-east-1", false)?, "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ); assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("us-west-2")?, + Model::Claude3_5SonnetV2.cross_region_inference_id("us-west-2", false)?, "us.anthropic.claude-3-5-sonnet-20241022-v2:0" ); assert_eq!( - Model::AmazonNovaPro.cross_region_inference_id("us-east-2")?, + Model::AmazonNovaPro.cross_region_inference_id("us-east-2", false)?, "us.amazon.nova-pro-v1:0" ); Ok(()) @@ -723,19 +784,19 @@ mod tests { fn test_eu_region_inference_ids() -> anyhow::Result<()> { // Test European regions assert_eq!( - Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1")?, + Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?, "eu.anthropic.claude-sonnet-4-20250514-v1:0" ); assert_eq!( - Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1")?, + Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?, "eu.anthropic.claude-sonnet-4-5-20250929-v1:0" ); assert_eq!( - Model::Claude3Sonnet.cross_region_inference_id("eu-west-1")?, + Model::Claude3Sonnet.cross_region_inference_id("eu-west-1", false)?, "eu.anthropic.claude-3-sonnet-20240229-v1:0" ); assert_eq!( - Model::AmazonNovaMicro.cross_region_inference_id("eu-north-1")?, + Model::AmazonNovaMicro.cross_region_inference_id("eu-north-1", false)?, "eu.amazon.nova-micro-v1:0" ); Ok(()) @@ -745,15 +806,15 @@ mod tests { fn test_apac_region_inference_ids() -> anyhow::Result<()> { // Test Asia-Pacific regions assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1")?, + Model::Claude3_5SonnetV2.cross_region_inference_id("ap-northeast-1", false)?, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0" ); assert_eq!( - Model::Claude3_5SonnetV2.cross_region_inference_id("ap-southeast-2")?, + Model::Claude3_5SonnetV2.cross_region_inference_id("ap-southeast-2", false)?, "apac.anthropic.claude-3-5-sonnet-20241022-v2:0" ); assert_eq!( - Model::AmazonNovaLite.cross_region_inference_id("ap-south-1")?, + Model::AmazonNovaLite.cross_region_inference_id("ap-south-1", false)?, "apac.amazon.nova-lite-v1:0" ); Ok(()) @@ -763,11 +824,11 @@ mod tests { fn test_gov_region_inference_ids() -> anyhow::Result<()> { // Test Government regions assert_eq!( - Model::Claude3_5Sonnet.cross_region_inference_id("us-gov-east-1")?, + Model::Claude3_5Sonnet.cross_region_inference_id("us-gov-east-1", false)?, "us-gov.anthropic.claude-3-5-sonnet-20240620-v1:0" ); assert_eq!( - Model::Claude3Haiku.cross_region_inference_id("us-gov-west-1")?, + Model::Claude3Haiku.cross_region_inference_id("us-gov-west-1", false)?, "us-gov.anthropic.claude-3-haiku-20240307-v1:0" ); Ok(()) @@ -777,15 +838,15 @@ mod tests { fn test_meta_models_inference_ids() -> anyhow::Result<()> { // Test Meta models assert_eq!( - Model::MetaLlama370BInstructV1.cross_region_inference_id("us-east-1")?, + Model::MetaLlama370BInstructV1.cross_region_inference_id("us-east-1", false)?, "meta.llama3-70b-instruct-v1:0" ); assert_eq!( - Model::MetaLlama3170BInstructV1.cross_region_inference_id("us-east-1")?, + Model::MetaLlama3170BInstructV1.cross_region_inference_id("us-east-1", false)?, "us.meta.llama3-1-70b-instruct-v1:0" ); assert_eq!( - Model::MetaLlama321BInstructV1.cross_region_inference_id("eu-west-1")?, + Model::MetaLlama321BInstructV1.cross_region_inference_id("eu-west-1", false)?, "eu.meta.llama3-2-1b-instruct-v1:0" ); Ok(()) @@ -796,11 +857,11 @@ mod tests { // Mistral models don't follow the regional prefix pattern, // so they should return their original IDs assert_eq!( - Model::MistralMistralLarge2402V1.cross_region_inference_id("us-east-1")?, + Model::MistralMistralLarge2402V1.cross_region_inference_id("us-east-1", false)?, "mistral.mistral-large-2402-v1:0" ); assert_eq!( - Model::MistralMixtral8x7BInstructV0.cross_region_inference_id("eu-west-1")?, + Model::MistralMixtral8x7BInstructV0.cross_region_inference_id("eu-west-1", false)?, "mistral.mixtral-8x7b-instruct-v0:1" ); Ok(()) @@ -811,11 +872,11 @@ mod tests { // AI21 models don't follow the regional prefix pattern, // so they should return their original IDs assert_eq!( - Model::AI21J2UltraV1.cross_region_inference_id("us-east-1")?, + Model::AI21J2UltraV1.cross_region_inference_id("us-east-1", false)?, "ai21.j2-ultra-v1" ); assert_eq!( - Model::AI21JambaInstructV1.cross_region_inference_id("eu-west-1")?, + Model::AI21JambaInstructV1.cross_region_inference_id("eu-west-1", false)?, "ai21.jamba-instruct-v1:0" ); Ok(()) @@ -826,11 +887,11 @@ mod tests { // Cohere models don't follow the regional prefix pattern, // so they should return their original IDs assert_eq!( - Model::CohereCommandRV1.cross_region_inference_id("us-east-1")?, + Model::CohereCommandRV1.cross_region_inference_id("us-east-1", false)?, "cohere.command-r-v1:0" ); assert_eq!( - Model::CohereCommandTextV14_4k.cross_region_inference_id("ap-southeast-1")?, + Model::CohereCommandTextV14_4k.cross_region_inference_id("ap-southeast-1", false)?, "cohere.command-text-v14:7:4k" ); Ok(()) @@ -850,10 +911,17 @@ mod tests { // Custom model should return its name unchanged assert_eq!( - custom_model.cross_region_inference_id("us-east-1")?, + custom_model.cross_region_inference_id("us-east-1", false)?, "custom.my-model-v1:0" ); + // Test that models without global support fall back to regional when allow_global is true + assert_eq!( + Model::AmazonNovaPro.cross_region_inference_id("us-east-1", true)?, + "us.amazon.nova-pro-v1:0", + "Nova Pro should fall back to regional profile even when allow_global is true" + ); + Ok(()) } @@ -892,3 +960,28 @@ mod tests { ); } } + +#[test] +fn test_global_inference_ids() -> anyhow::Result<()> { + // Test global inference for models that support it when allow_global is true + assert_eq!( + Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?, + "global.anthropic.claude-sonnet-4-20250514-v1:0" + ); + assert_eq!( + Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0" + ); + assert_eq!( + Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?, + "global.anthropic.claude-haiku-4-5-20251001-v1:0" + ); + + // Test that regional prefix is used when allow_global is false + assert_eq!( + Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", false)?, + "us.anthropic.claude-sonnet-4-20250514-v1:0" + ); + + Ok(()) +} diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index 9672d61f90512be62ea58e77682d63cc8553710f..e478c193a27a9e30301ae9233ea666c8160b25f5 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -71,6 +71,7 @@ pub struct AmazonBedrockSettings { pub profile_name: Option, pub role_arn: Option, pub authentication_method: Option, + pub allow_global: Option, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, EnumIter, IntoStaticStr, JsonSchema)] @@ -239,6 +240,13 @@ impl State { .or(settings_region) .unwrap_or(String::from("us-east-1")) } + + fn get_allow_global(&self) -> bool { + self.settings + .as_ref() + .and_then(|s| s.allow_global) + .unwrap_or(false) + } } pub struct BedrockLanguageModelProvider { @@ -545,11 +553,13 @@ impl LanguageModel for BedrockModel { LanguageModelCompletionError, >, > { - let Ok(region) = cx.read_entity(&self.state, |state, _cx| state.get_region()) else { + let Ok((region, allow_global)) = cx.read_entity(&self.state, |state, _cx| { + (state.get_region(), state.get_allow_global()) + }) else { return async move { Err(anyhow::anyhow!("App State Dropped").into()) }.boxed(); }; - let model_id = match self.model.cross_region_inference_id(®ion) { + let model_id = match self.model.cross_region_inference_id(®ion, allow_global) { Ok(s) => s, Err(e) => { return async move { Err(e.into()) }.boxed(); diff --git a/crates/language_models/src/settings.rs b/crates/language_models/src/settings.rs index edff1f768e9fc6d2ad9333133b20d88c7676c24d..43a8e7334a744c84d6edfae3ffc97115eb8f51b2 100644 --- a/crates/language_models/src/settings.rs +++ b/crates/language_models/src/settings.rs @@ -58,6 +58,7 @@ impl settings::Settings for AllLanguageModelSettings { profile_name: bedrock.profile, role_arn: None, // todo(was never a setting for this...) authentication_method: bedrock.authentication_method.map(Into::into), + allow_global: bedrock.allow_global, }, deepseek: DeepSeekSettings { api_url: deepseek.api_url.unwrap(), diff --git a/crates/settings/src/settings_content/language_model.rs b/crates/settings/src/settings_content/language_model.rs index 0a746c1284c1d981fdf95745952baacc74548d04..48f5a463a4b8d896885d9ba5b7d804d16ecb5b6b 100644 --- a/crates/settings/src/settings_content/language_model.rs +++ b/crates/settings/src/settings_content/language_model.rs @@ -61,6 +61,7 @@ pub struct AmazonBedrockSettingsContent { pub region: Option, pub profile: Option, pub authentication_method: Option, + pub allow_global: Option, } #[with_fallible_options] diff --git a/docs/src/ai/llm-providers.md b/docs/src/ai/llm-providers.md index 3e40d7ae0283b3dbd1c50ba1bef5ae410d969305..f13ece5d3eb6aac3af38a0046abddc474649f503 100644 --- a/docs/src/ai/llm-providers.md +++ b/docs/src/ai/llm-providers.md @@ -89,12 +89,32 @@ To do this: #### Cross-Region Inference -The Zed implementation of Amazon Bedrock uses [Cross-Region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference.html) for all the models and region combinations that support it. +The Zed implementation of Amazon Bedrock uses [Cross-Region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/cross-region-inference.html) to improve availability and throughput. With Cross-Region inference, you can distribute traffic across multiple AWS Regions, enabling higher throughput. -For example, if you use `Claude Sonnet 3.7 Thinking` from `us-east-1`, it may be processed across the US regions, namely: `us-east-1`, `us-east-2`, or `us-west-2`. -Cross-Region inference requests are kept within the AWS Regions that are part of the geography where the data originally resides. -For example, a request made within the US is kept within the AWS Regions in the US. +##### Regional vs Global Inference Profiles + +Bedrock supports two types of cross-region inference profiles: + +- **Regional profiles** (default): Route requests within a specific geography (US, EU, APAC). For example, `us-east-1` uses the `us.*` profile which routes across `us-east-1`, `us-east-2`, and `us-west-2`. +- **Global profiles**: Route requests across all commercial AWS Regions for maximum availability and performance. + +By default, Zed uses **regional profiles** which keep your data within the same geography. You can opt into global profiles by adding `"allow_global": true` to your Bedrock configuration: + +```json [settings] +{ + "language_models": { + "bedrock": { + "authentication_method": "named_profile", + "region": "your-aws-region", + "profile": "your-profile-name", + "allow_global": true + } + } +} +``` + +**Note:** Only select newer models support global inference profiles. See the [AWS Bedrock supported models documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system) for the current list of models that support global inference. If you encounter availability issues with a model in your region, enabling `allow_global` may resolve them. Although the data remains stored only in the source Region, your input prompts and output results might move outside of your source Region during cross-Region inference. All data will be transmitted encrypted across Amazon's secure network.