From a70cf3f1d432462f164fbc4b4de187bc7b52e31d Mon Sep 17 00:00:00 2001 From: Shardul Vaidya <31039336+5herlocked@users.noreply.github.com> Date: Fri, 29 Aug 2025 18:13:06 -0400 Subject: [PATCH] bedrock: Inference Config updates (#35808) Fixes #36866 - Updated internal naming for Claude 4 models to be consistent. - Corrected max output tokens for Anthropic Bedrock models to match docs Shoutout to @tlehn for noticing the bug, and finding the resolution. Release Notes: - bedrock: Fixed inference config errors causing Opus 4 Thinking and Opus 4.1 Thinking to fail (thanks [@tlehn](https://github.com/tlehn) and [@5herlocked](https://github.com/5herlocked]) - bedrock: Fixed an issue which prevented Rules / System prompts not functioning with Bedrock models (thanks [@tlehn](https://github.com/tlehn) and [@5herlocked](https://github.com/5herlocked]) --- crates/bedrock/src/bedrock.rs | 18 +++++++++++++++++- crates/bedrock/src/models.rs | 28 +++++++++++++--------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/crates/bedrock/src/bedrock.rs b/crates/bedrock/src/bedrock.rs index c8315d4201a46d5ac47825ff40aed3829f191d87..ec0b4070906fdfd31195668312b3e7b425cd28ee 100644 --- a/crates/bedrock/src/bedrock.rs +++ b/crates/bedrock/src/bedrock.rs @@ -3,6 +3,7 @@ mod models; use anyhow::{Context, Error, Result, anyhow}; use aws_sdk_bedrockruntime as bedrock; pub use aws_sdk_bedrockruntime as bedrock_client; +use aws_sdk_bedrockruntime::types::InferenceConfiguration; pub use aws_sdk_bedrockruntime::types::{ AnyToolChoice as BedrockAnyToolChoice, AutoToolChoice as BedrockAutoToolChoice, ContentBlock as BedrockInnerContent, Tool as BedrockTool, ToolChoice as BedrockToolChoice, @@ -17,7 +18,8 @@ pub use bedrock::types::{ ConverseOutput as BedrockResponse, ConverseStreamOutput as BedrockStreamingResponse, ImageBlock as BedrockImageBlock, Message as BedrockMessage, ReasoningContentBlock as BedrockThinkingBlock, ReasoningTextBlock as BedrockThinkingTextBlock, - ResponseStream as BedrockResponseStream, ToolResultBlock as BedrockToolResultBlock, + ResponseStream as BedrockResponseStream, SystemContentBlock as BedrockSystemContentBlock, + ToolResultBlock as BedrockToolResultBlock, ToolResultContentBlock as BedrockToolResultContentBlock, ToolResultStatus as BedrockToolResultStatus, ToolUseBlock as BedrockToolUseBlock, }; @@ -58,6 +60,20 @@ pub async fn stream_completion( response = response.set_tool_config(request.tools); } + let inference_config = InferenceConfiguration::builder() + .max_tokens(request.max_tokens as i32) + .set_temperature(request.temperature) + .set_top_p(request.top_p) + .build(); + + response = response.inference_config(inference_config); + + if let Some(system) = request.system { + if !system.is_empty() { + response = response.system(BedrockSystemContentBlock::Text(system)); + } + } + let output = response .send() .await diff --git a/crates/bedrock/src/models.rs b/crates/bedrock/src/models.rs index 69d2ffb84569ef848f88de47f5394a6b25b18e02..c3a793d69d086a8a8c607d34debc5a7034f33f32 100644 --- a/crates/bedrock/src/models.rs +++ b/crates/bedrock/src/models.rs @@ -151,12 +151,12 @@ impl Model { pub fn id(&self) -> &str { match self { - Model::ClaudeSonnet4 => "claude-4-sonnet", - Model::ClaudeSonnet4Thinking => "claude-4-sonnet-thinking", - Model::ClaudeOpus4 => "claude-4-opus", - Model::ClaudeOpus4_1 => "claude-4-opus-1", - Model::ClaudeOpus4Thinking => "claude-4-opus-thinking", - Model::ClaudeOpus4_1Thinking => "claude-4-opus-1-thinking", + Model::ClaudeSonnet4 => "claude-sonnet-4", + Model::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking", + Model::ClaudeOpus4 => "claude-opus-4", + Model::ClaudeOpus4_1 => "claude-opus-4-1", + Model::ClaudeOpus4Thinking => "claude-opus-4-thinking", + Model::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking", Model::Claude3_5SonnetV2 => "claude-3-5-sonnet-v2", Model::Claude3_5Sonnet => "claude-3-5-sonnet", Model::Claude3Opus => "claude-3-opus", @@ -359,14 +359,12 @@ impl Model { pub fn max_output_tokens(&self) -> u64 { match self { Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3_5Haiku => 4_096, - Self::Claude3_7Sonnet - | Self::Claude3_7SonnetThinking - | Self::ClaudeSonnet4 - | Self::ClaudeSonnet4Thinking - | Self::ClaudeOpus4 - | Model::ClaudeOpus4Thinking + Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => 128_000, + Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => 64_000, + Self::ClaudeOpus4 + | Self::ClaudeOpus4Thinking | Self::ClaudeOpus4_1 - | Model::ClaudeOpus4_1Thinking => 128_000, + | Self::ClaudeOpus4_1Thinking => 32_000, Self::Claude3_5SonnetV2 | Self::PalmyraWriterX4 | Self::PalmyraWriterX5 => 8_192, Self::Custom { max_output_tokens, .. @@ -784,10 +782,10 @@ mod tests { ); // Test thinking models have different friendly IDs but same request IDs - assert_eq!(Model::ClaudeSonnet4.id(), "claude-4-sonnet"); + assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4"); assert_eq!( Model::ClaudeSonnet4Thinking.id(), - "claude-4-sonnet-thinking" + "claude-sonnet-4-thinking" ); assert_eq!( Model::ClaudeSonnet4.request_id(),