diff --git a/crates/anthropic/src/anthropic.rs b/crates/anthropic/src/anthropic.rs
index bdbb5e465eb07e32c2cf7224ac2c98b0257bc622..e9f0ea51a99562a149960806573cbbd17678e827 100644
--- a/crates/anthropic/src/anthropic.rs
+++ b/crates/anthropic/src/anthropic.rs
@@ -42,6 +42,7 @@ pub enum Model {
         tool_override: Option<String>,
         /// Indicates whether this custom model supports caching.
         cache_configuration: Option<AnthropicModelCacheConfiguration>,
+        max_output_tokens: Option<u32>,
     },
 }
 
@@ -105,6 +106,16 @@ impl Model {
         }
     }
 
+    pub fn max_output_tokens(&self) -> u32 {
+        match self {
+            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
+            Self::Claude3_5Sonnet => 8_192,
+            Self::Custom {
+                max_output_tokens, ..
+            } => max_output_tokens.unwrap_or(4_096),
+        }
+    }
+
     pub fn tool_model_id(&self) -> &str {
         if let Self::Custom {
             tool_override: Some(tool_override),
@@ -131,7 +142,7 @@ pub async fn complete(
         .header("Anthropic-Version", "2023-06-01")
         .header(
             "Anthropic-Beta",
-            "tools-2024-04-04,prompt-caching-2024-07-31",
+            "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
         )
         .header("X-Api-Key", api_key)
         .header("Content-Type", "application/json");
@@ -191,7 +202,7 @@ pub async fn stream_completion(
         .header("Anthropic-Version", "2023-06-01")
         .header(
             "Anthropic-Beta",
-            "tools-2024-04-04,prompt-caching-2024-07-31",
+            "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15",
         )
         .header("X-Api-Key", api_key)
         .header("Content-Type", "application/json");
diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs
index 1df651ad9ed4090414157da894e3fdb693d75db5..0d23023c2a47700b8e2ee1fbb3ccd85fb3422409 100644
--- a/crates/language_model/src/language_model.rs
+++ b/crates/language_model/src/language_model.rs
@@ -64,6 +64,9 @@ pub trait LanguageModel: Send + Sync {
     }
 
     fn max_token_count(&self) -> usize;
+    fn max_output_tokens(&self) -> Option<u32> {
+        None
+    }
 
     fn count_tokens(
         &self,
diff --git a/crates/language_model/src/provider/anthropic.rs b/crates/language_model/src/provider/anthropic.rs
index fa5401a38fe39f1ce8bd49fa54ed110a78314a8b..3b4485669756464930b1fc46abd231977768b435 100644
--- a/crates/language_model/src/provider/anthropic.rs
+++ b/crates/language_model/src/provider/anthropic.rs
@@ -39,6 +39,7 @@ pub struct AvailableModel {
     pub max_tokens: usize,
     pub tool_override: Option<String>,
     pub cache_configuration: Option<LanguageModelCacheConfiguration>,
+    pub max_output_tokens: Option<u32>,
 }
 
 pub struct AnthropicLanguageModelProvider {
@@ -179,6 +180,7 @@ impl LanguageModelProvider for AnthropicLanguageModelProvider {
                             min_total_token: config.min_total_token,
                         }
                     }),
+                    max_output_tokens: model.max_output_tokens,
                 },
             );
         }
@@ -331,6 +333,10 @@ impl LanguageModel for AnthropicModel {
         self.model.max_token_count()
     }
 
+    fn max_output_tokens(&self) -> Option<u32> {
+        Some(self.model.max_output_tokens())
+    }
+
     fn count_tokens(
         &self,
         request: LanguageModelRequest,
@@ -344,7 +350,8 @@ impl LanguageModel for AnthropicModel {
         request: LanguageModelRequest,
         cx: &AsyncAppContext,
     ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
-        let request = request.into_anthropic(self.model.id().into());
+        let request =
+            request.into_anthropic(self.model.id().into(), self.model.max_output_tokens());
         let request = self.stream_completion(request, cx);
         let future = self.request_limiter.stream(async move {
             let response = request.await.map_err(|err| anyhow!(err))?;
@@ -377,7 +384,10 @@ impl LanguageModel for AnthropicModel {
         input_schema: serde_json::Value,
         cx: &AsyncAppContext,
     ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
-        let mut request = request.into_anthropic(self.model.tool_model_id().into());
+        let mut request = request.into_anthropic(
+            self.model.tool_model_id().into(),
+            self.model.max_output_tokens(),
+        );
         request.tool_choice = Some(anthropic::ToolChoice::Tool {
             name: tool_name.clone(),
         });
diff --git a/crates/language_model/src/provider/cloud.rs b/crates/language_model/src/provider/cloud.rs
index 517cb13342859c9e62667b6768c7d74b7b1dfb92..38478e4de35cf381eff02bcdf4d8ca0a4bba3c65 100644
--- a/crates/language_model/src/provider/cloud.rs
+++ b/crates/language_model/src/provider/cloud.rs
@@ -57,6 +57,7 @@ pub struct AvailableModel {
     max_tokens: usize,
     tool_override: Option<String>,
     cache_configuration: Option<LanguageModelCacheConfiguration>,
+    max_output_tokens: Option<u32>,
 }
 
 pub struct CloudLanguageModelProvider {
@@ -210,6 +211,7 @@ impl LanguageModelProvider for CloudLanguageModelProvider {
                                     min_total_token: config.min_total_token,
                                 }
                             }),
+                            max_output_tokens: model.max_output_tokens,
                         })
                     }
                     AvailableProvider::OpenAi => CloudModel::OpenAi(open_ai::Model::Custom {
@@ -446,7 +448,7 @@ impl LanguageModel for CloudLanguageModel {
     ) -> BoxFuture<'static, Result<BoxStream<'static, Result<String>>>> {
         match &self.model {
             CloudModel::Anthropic(model) => {
-                let request = request.into_anthropic(model.id().into());
+                let request = request.into_anthropic(model.id().into(), model.max_output_tokens());
                 let client = self.client.clone();
                 let llm_api_token = self.llm_api_token.clone();
                 let future = self.request_limiter.stream(async move {
@@ -556,7 +558,8 @@ impl LanguageModel for CloudLanguageModel {
 
         match &self.model {
             CloudModel::Anthropic(model) => {
-                let mut request = request.into_anthropic(model.tool_model_id().into());
+                let mut request =
+                    request.into_anthropic(model.tool_model_id().into(), model.max_output_tokens());
                 request.tool_choice = Some(anthropic::ToolChoice::Tool {
                     name: tool_name.clone(),
                 });
diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs
index 6c4f1bb50b86a08505322e4411f02adc1af87c81..ecebc5e86802cd42b30b6fc2b98212b68db86573 100644
--- a/crates/language_model/src/request.rs
+++ b/crates/language_model/src/request.rs
@@ -286,7 +286,7 @@ impl LanguageModelRequest {
         }
     }
 
-    pub fn into_anthropic(self, model: String) -> anthropic::Request {
+    pub fn into_anthropic(self, model: String, max_output_tokens: u32) -> anthropic::Request {
         let mut new_messages: Vec<anthropic::Message> = Vec::new();
         let mut system_message = String::new();
 
@@ -353,7 +353,7 @@ impl LanguageModelRequest {
         anthropic::Request {
             model,
             messages: new_messages,
-            max_tokens: 4092,
+            max_tokens: max_output_tokens,
             system: Some(system_message),
             tools: Vec::new(),
             tool_choice: None,
diff --git a/crates/language_model/src/settings.rs b/crates/language_model/src/settings.rs
index ded797e1e554f04ee53879bf0674d9824fd8c70f..85dce2e121ca2faa25851e0df076aca47462b9b6 100644
--- a/crates/language_model/src/settings.rs
+++ b/crates/language_model/src/settings.rs
@@ -97,6 +97,7 @@ impl AnthropicSettingsContent {
                                     max_tokens,
                                     tool_override,
                                     cache_configuration,
+                                    max_output_tokens,
                                 } => Some(provider::anthropic::AvailableModel {
                                     name,
                                     max_tokens,
@@ -108,6 +109,7 @@ impl AnthropicSettingsContent {
                                             min_total_token: config.min_total_token,
                                         },
                                     ),
+                                    max_output_tokens,
                                 }),
                                 _ => None,
                             })