bedrock: Always use 1M context window for anthropic models (#56195)

Bennet Bo Fenner created 1 day ago

Closes #49617

Release Notes:

- bedrock: Always use 1M context windows for Anthropic models

Change summary

crates/bedrock/src/bedrock.rs                  | 10 ------
crates/bedrock/src/models.rs                   | 30 ++++---------------
crates/language_models/src/provider/bedrock.rs | 24 ++--------------
crates/language_models/src/settings.rs         |  1 
crates/settings_content/src/language_model.rs  |  2 -
docs/src/ai/llm-providers.md                   | 19 ------------
6 files changed, 10 insertions(+), 76 deletions(-)

Detailed changes

crates/bedrock/src/bedrock.rs 🔗

@@ -32,8 +32,6 @@ use thiserror::Error;
 
 pub use crate::models::*;
 
-pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
-
 pub async fn stream_completion(
     client: bedrock::Client,
     request: Request,
@@ -70,13 +68,6 @@ pub async fn stream_completion(
         _ => {}
     }
 
-    if request.allow_extended_context {
-        additional_fields.insert(
-            "anthropic_beta".to_string(),
-            Document::Array(vec![Document::String(CONTEXT_1M_BETA_HEADER.to_string())]),
-        );
-    }
-
     if !additional_fields.is_empty() {
         response = response.additional_model_request_fields(Document::Object(additional_fields));
     }
@@ -211,7 +202,6 @@ pub struct Request {
     pub temperature: Option<f32>,
     pub top_k: Option<u32>,
     pub top_p: Option<f32>,
-    pub allow_extended_context: bool,
 }
 
 #[derive(Debug, Serialize, Deserialize)]

crates/bedrock/src/models.rs 🔗

@@ -384,19 +384,15 @@ impl Model {
     }
 
     pub fn max_token_count(&self) -> u64 {
-        self.max_tokens()
-    }
-
-    pub fn max_tokens(&self) -> u64 {
         match self {
             Self::ClaudeHaiku4_5
             | Self::ClaudeSonnet4
             | Self::ClaudeSonnet4_5
-            | Self::ClaudeOpus4_1
             | Self::ClaudeOpus4_5
             | Self::ClaudeOpus4_6
             | Self::ClaudeOpus4_7
-            | Self::ClaudeSonnet4_6 => 200_000,
+            | Self::ClaudeSonnet4_6 => 1_000_000,
+            Self::ClaudeOpus4_1 => 200_000,
             Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
             Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
             Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
@@ -526,18 +522,6 @@ impl Model {
         }
     }
 
-    pub fn supports_extended_context(&self) -> bool {
-        matches!(
-            self,
-            Self::ClaudeSonnet4
-                | Self::ClaudeSonnet4_5
-                | Self::ClaudeOpus4_5
-                | Self::ClaudeOpus4_6
-                | Self::ClaudeOpus4_7
-                | Self::ClaudeSonnet4_6
-        )
-    }
-
     pub fn supports_caching(&self) -> bool {
         match self {
             Self::ClaudeHaiku4_5
@@ -1040,11 +1024,11 @@ mod tests {
     }
 
     #[test]
-    fn test_max_tokens() {
-        assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
-        assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
-        assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
-        assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
+    fn test_max_token_count() {
+        assert_eq!(Model::ClaudeSonnet4_5.max_token_count(), 1_000_000);
+        assert_eq!(Model::ClaudeOpus4_6.max_token_count(), 1_000_000);
+        assert_eq!(Model::Llama4Scout17B.max_token_count(), 128_000);
+        assert_eq!(Model::NovaPremier.max_token_count(), 1_000_000);
     }
 
     #[test]

crates/language_models/src/provider/bedrock.rs 🔗

@@ -113,7 +113,6 @@ pub struct AmazonBedrockSettings {
     pub role_arn: Option<String>,
     pub authentication_method: Option<BedrockAuthMethod>,
     pub allow_global: Option<bool>,
-    pub allow_extended_context: Option<bool>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, EnumIter, IntoStaticStr, JsonSchema)]
@@ -386,13 +385,6 @@ impl State {
             .and_then(|s| s.allow_global)
             .unwrap_or(false)
     }
-
-    fn get_allow_extended_context(&self) -> bool {
-        self.settings
-            .as_ref()
-            .and_then(|s| s.allow_extended_context)
-            .unwrap_or(false)
-    }
 }
 
 pub struct BedrockLanguageModelProvider {
@@ -718,14 +710,9 @@ impl LanguageModel for BedrockModel {
             LanguageModelCompletionError,
         >,
     > {
-        let (region, allow_global, allow_extended_context) =
-            cx.read_entity(&self.state, |state, _cx| {
-                (
-                    state.get_region(),
-                    state.get_allow_global(),
-                    state.get_allow_extended_context(),
-                )
-            });
+        let (region, allow_global) = cx.read_entity(&self.state, |state, _cx| {
+            (state.get_region(), state.get_allow_global())
+        });
 
         let model_id = match self.model.cross_region_inference_id(&region, allow_global) {
             Ok(s) => s,
@@ -736,8 +723,6 @@ impl LanguageModel for BedrockModel {
 
         let deny_tool_calls = request.tool_choice == Some(LanguageModelToolChoice::None);
 
-        let use_extended_context = allow_extended_context && self.model.supports_extended_context();
-
         let request = match into_bedrock(
             request,
             model_id,
@@ -746,7 +731,6 @@ impl LanguageModel for BedrockModel {
             self.model.thinking_mode(),
             self.model.supports_caching(),
             self.model.supports_tool_use(),
-            use_extended_context,
         ) {
             Ok(request) => request,
             Err(err) => return futures::future::ready(Err(err.into())).boxed(),
@@ -839,7 +823,6 @@ pub fn into_bedrock(
     thinking_mode: BedrockModelMode,
     supports_caching: bool,
     supports_tool_use: bool,
-    allow_extended_context: bool,
 ) -> Result<bedrock::Request> {
     let mut new_messages: Vec<BedrockMessage> = Vec::new();
     let mut system_message = String::new();
@@ -1144,7 +1127,6 @@ pub fn into_bedrock(
         temperature: request.temperature.or(Some(default_temperature)),
         top_k: None,
         top_p: None,
-        allow_extended_context,
     })
 }

crates/language_models/src/settings.rs 🔗

@@ -61,7 +61,6 @@ impl settings::Settings for AllLanguageModelSettings {
                 role_arn: None, // todo(was never a setting for this...)
                 authentication_method: bedrock.authentication_method.map(Into::into),
                 allow_global: bedrock.allow_global,
-                allow_extended_context: bedrock.allow_extended_context,
             },
             deepseek: DeepSeekSettings {
                 api_url: deepseek.api_url.unwrap(),

crates/settings_content/src/language_model.rs 🔗

@@ -65,8 +65,6 @@ pub struct AmazonBedrockSettingsContent {
     pub profile: Option<String>,
     pub authentication_method: Option<BedrockAuthMethodContent>,
     pub allow_global: Option<bool>,
-    /// Enable the 1M token extended context window beta for supported Anthropic models.
-    pub allow_extended_context: Option<bool>,
 }
 
 #[with_fallible_options]

docs/src/ai/llm-providers.md 🔗

@@ -150,25 +150,6 @@ We will support Cross-Region inference for each of the models on a best-effort b
 
 For the most up-to-date supported regions and models, refer to the [Supported Models and Regions for Cross Region inference](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html).
 
-#### Extended Context Window {#bedrock-extended-context}
-
-Anthropic models on Bedrock support a 1M token extended context window through the `anthropic_beta` API parameter. To enable this feature, set `"allow_extended_context": true` in your Bedrock configuration:
-
-```json [settings]
-{
-  "language_models": {
-    "bedrock": {
-      "authentication_method": "named_profile",
-      "region": "your-aws-region",
-      "profile": "your-profile-name",
-      "allow_extended_context": true
-    }
-  }
-}
-```
-
-Zed enables extended context for supported models (Claude Sonnet 4.5, Claude Opus 4.6, and Claude Opus 4.7). Extended context usage may increase API costs—refer to AWS Bedrock pricing for details.
-
 #### Image Support {#bedrock-image-support}
 
 Bedrock models that support vision (Claude 3 and later, Amazon Nova Pro and Lite, Meta Llama 3.2 Vision models, Mistral Pixtral) can receive images in conversations and tool results.