diff --git a/Cargo.lock b/Cargo.lock index a520dec990c0ce2f361be6f7a4adc3de4366a0d4..c53b8b89a45b7823a4f2bca256a76ed6536e3606 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9071,6 +9071,7 @@ dependencies = [ "aws-config", "aws-credential-types", "aws_http_client", + "base64 0.22.1", "bedrock", "chrono", "client", diff --git a/crates/bedrock/src/bedrock.rs b/crates/bedrock/src/bedrock.rs index 92ab097c4925326813a539d986f4fa1d89ca096a..d9e3c0984687ab9d7843f912dafa0853b9677811 100644 --- a/crates/bedrock/src/bedrock.rs +++ b/crates/bedrock/src/bedrock.rs @@ -16,7 +16,8 @@ pub use bedrock::operation::converse_stream::ConverseStreamInput as BedrockStrea pub use bedrock::types::{ ContentBlock as BedrockRequestContent, ConversationRole as BedrockRole, ConverseOutput as BedrockResponse, ConverseStreamOutput as BedrockStreamingResponse, - ImageBlock as BedrockImageBlock, Message as BedrockMessage, + ImageBlock as BedrockImageBlock, ImageFormat as BedrockImageFormat, + ImageSource as BedrockImageSource, Message as BedrockMessage, ReasoningContentBlock as BedrockThinkingBlock, ReasoningTextBlock as BedrockThinkingTextBlock, ResponseStream as BedrockResponseStream, SystemContentBlock as BedrockSystemContentBlock, ToolResultBlock as BedrockToolResultBlock, diff --git a/crates/bedrock/src/models.rs b/crates/bedrock/src/models.rs index e073f5f17aa09505d45fd5bffc99c442e22f54bf..1efcf28d129499086d30bf51327c7be9c430f644 100644 --- a/crates/bedrock/src/models.rs +++ b/crates/bedrock/src/models.rs @@ -551,6 +551,46 @@ impl Model { } } + pub fn supports_images(&self) -> bool { + match self { + // Anthropic Claude 3+ models (all support vision) + Self::Claude3Opus + | Self::Claude3Sonnet + | Self::Claude3_5Sonnet + | Self::Claude3_5SonnetV2 + | Self::Claude3_7Sonnet + | Self::Claude3_7SonnetThinking + | Self::ClaudeOpus4 + | Self::ClaudeOpus4Thinking + | Self::ClaudeOpus4_1 + | Self::ClaudeOpus4_1Thinking + | Self::ClaudeOpus4_5 + | Self::ClaudeOpus4_5Thinking + | Self::ClaudeSonnet4 + | Self::ClaudeSonnet4Thinking + | Self::ClaudeSonnet4_5 + | Self::ClaudeSonnet4_5Thinking + | Self::Claude3_5Haiku + | Self::ClaudeHaiku4_5 + | Self::Claude3Haiku => true, + + // Amazon Nova visual models + Self::AmazonNovaPro | Self::AmazonNovaLite => true, + + // Meta Llama 3.2 Vision models + Self::MetaLlama3211BInstructV1 | Self::MetaLlama3290BInstructV1 => true, + + // Mistral Pixtral (visual model) + Self::MistralPixtralLarge2502V1 => true, + + // Custom models default to no image support + Self::Custom { .. } => false, + + // All other models don't support images + _ => false, + } + } + pub fn supports_caching(&self) -> bool { match self { // Only Claude models on Bedrock support caching diff --git a/crates/language_models/Cargo.toml b/crates/language_models/Cargo.toml index 751a568da0b8739c1b83d2afb7c24b7b38ea5773..9685e24085495d7b028951367b1a2b4f0808c094 100644 --- a/crates/language_models/Cargo.toml +++ b/crates/language_models/Cargo.toml @@ -18,6 +18,7 @@ anyhow.workspace = true aws-config = { workspace = true, features = ["behavior-version-latest"] } aws-credential-types = { workspace = true, features = ["hardcoded-credentials"] } aws_http_client.workspace = true +base64.workspace = true bedrock = { workspace = true, features = ["schemars"] } chrono.workspace = true client.workspace = true diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index f16af99b50c578b45cb012d0334267fd5b91fe5c..1e6596fa318115d40bab2d6151f1ae51d8be537b 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -13,11 +13,12 @@ use bedrock::bedrock_client::types::{ ReasoningContentBlockDelta, StopReason, }; use bedrock::{ - BedrockAnyToolChoice, BedrockAutoToolChoice, BedrockBlob, BedrockError, BedrockInnerContent, - BedrockMessage, BedrockModelMode, BedrockStreamingResponse, BedrockThinkingBlock, - BedrockThinkingTextBlock, BedrockTool, BedrockToolChoice, BedrockToolConfig, - BedrockToolInputSchema, BedrockToolResultBlock, BedrockToolResultContentBlock, - BedrockToolResultStatus, BedrockToolSpec, BedrockToolUseBlock, Model, value_to_aws_document, + BedrockAnyToolChoice, BedrockAutoToolChoice, BedrockBlob, BedrockError, BedrockImageBlock, + BedrockImageFormat, BedrockImageSource, BedrockInnerContent, BedrockMessage, BedrockModelMode, + BedrockStreamingResponse, BedrockThinkingBlock, BedrockThinkingTextBlock, BedrockTool, + BedrockToolChoice, BedrockToolConfig, BedrockToolInputSchema, BedrockToolResultBlock, + BedrockToolResultContentBlock, BedrockToolResultStatus, BedrockToolSpec, BedrockToolUseBlock, + Model, value_to_aws_document, }; use collections::{BTreeMap, HashMap}; use credentials_provider::CredentialsProvider; @@ -636,7 +637,7 @@ impl LanguageModel for BedrockModel { } fn supports_images(&self) -> bool { - false + self.model.supports_images() } fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool { @@ -835,7 +836,7 @@ pub fn into_bedrock( .context("failed to build Bedrock tool use block") .log_err() .map(BedrockInnerContent::ToolUse) - }, + } MessageContent::ToolResult(tool_result) => { BedrockToolResultBlock::builder() .tool_use_id(tool_result.tool_use_id.to_string()) @@ -843,11 +844,42 @@ pub fn into_bedrock( LanguageModelToolResultContent::Text(text) => { BedrockToolResultContentBlock::Text(text.to_string()) } - LanguageModelToolResultContent::Image(_) => { - BedrockToolResultContentBlock::Text( - // TODO: Bedrock image support - "[Tool responded with an image, but Zed doesn't support these in Bedrock models yet]".to_string() - ) + LanguageModelToolResultContent::Image(image) => { + use base64::Engine; + + match base64::engine::general_purpose::STANDARD + .decode(image.source.as_bytes()) + { + Ok(image_bytes) => { + match BedrockImageBlock::builder() + .format(BedrockImageFormat::Png) + .source(BedrockImageSource::Bytes( + BedrockBlob::new(image_bytes), + )) + .build() + { + Ok(image_block) => { + BedrockToolResultContentBlock::Image( + image_block, + ) + } + Err(err) => { + BedrockToolResultContentBlock::Text( + format!( + "[Failed to build image block: {}]", + err + ), + ) + } + } + } + Err(err) => { + BedrockToolResultContentBlock::Text(format!( + "[Failed to decode tool result image: {}]", + err + )) + } + } } }) .status({ @@ -862,7 +894,22 @@ pub fn into_bedrock( .log_err() .map(BedrockInnerContent::ToolResult) } - _ => None, + MessageContent::Image(image) => { + use base64::Engine; + + let image_bytes = base64::engine::general_purpose::STANDARD + .decode(image.source.as_bytes()) + .context("failed to decode base64 image data") + .log_err()?; + + BedrockImageBlock::builder() + .format(BedrockImageFormat::Png) + .source(BedrockImageSource::Bytes(BedrockBlob::new(image_bytes))) + .build() + .context("failed to build Bedrock image block") + .log_err() + .map(BedrockInnerContent::Image) + } }) .collect(); if message.cache && supports_caching {