From a176a8c47efd8312875585349ab4228fa72d7ea4 Mon Sep 17 00:00:00 2001 From: Xiaobo Liu Date: Tue, 16 Dec 2025 16:50:40 +0800 Subject: [PATCH] agent: Allow LanguageModelImage size to be optional (#44956) Release Notes: - Improved allow LanguageModelImage size to be optional Signed-off-by: Xiaobo Liu --- crates/agent/src/thread.rs | 3 +- crates/language_model/src/request.rs | 36 +++++++++++-------- .../language_models/src/provider/mistral.rs | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index b61c0ad0840475c3b5f6d4c0a7082a26d4d44a58..837bf454a2431c4a1efa81679adc6ed9ef355908 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -2662,7 +2662,6 @@ impl From for acp::ContentBlock { fn convert_image(image_content: acp::ImageContent) -> LanguageModelImage { LanguageModelImage { source: image_content.data.into(), - // TODO: make this optional? - size: gpui::Size::new(0.into(), 0.into()), + size: None, } } diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index d97d87bdc95c443aeaf3f2b5578bf7f0c1ef322a..5e99cca4f9d6e61672c541cb90a3a1ca7da91203 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -19,7 +19,8 @@ use crate::{LanguageModelToolUse, LanguageModelToolUseId}; pub struct LanguageModelImage { /// A base64-encoded PNG image. pub source: SharedString, - pub size: Size, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub size: Option>, } impl LanguageModelImage { @@ -61,7 +62,7 @@ impl LanguageModelImage { } Some(Self { - size: size(DevicePixels(width?), DevicePixels(height?)), + size: Some(size(DevicePixels(width?), DevicePixels(height?))), source: SharedString::from(source.to_string()), }) } @@ -83,7 +84,7 @@ impl LanguageModelImage { pub fn empty() -> Self { Self { source: "".into(), - size: size(DevicePixels(0), DevicePixels(0)), + size: None, } } @@ -139,15 +140,18 @@ impl LanguageModelImage { let source = unsafe { String::from_utf8_unchecked(base64_image) }; Some(LanguageModelImage { - size: image_size, + size: Some(image_size), source: source.into(), }) }) } pub fn estimate_tokens(&self) -> usize { - let width = self.size.width.0.unsigned_abs() as usize; - let height = self.size.height.0.unsigned_abs() as usize; + let Some(size) = self.size.as_ref() else { + return 0; + }; + let width = size.width.0.unsigned_abs() as usize; + let height = size.height.0.unsigned_abs() as usize; // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this, @@ -463,8 +467,9 @@ mod tests { match result { LanguageModelToolResultContent::Image(image) => { assert_eq!(image.source.as_ref(), "base64encodedimagedata"); - assert_eq!(image.size.width.0, 100); - assert_eq!(image.size.height.0, 200); + let size = image.size.expect("size"); + assert_eq!(size.width.0, 100); + assert_eq!(size.height.0, 200); } _ => panic!("Expected Image variant"), } @@ -483,8 +488,9 @@ mod tests { match result { LanguageModelToolResultContent::Image(image) => { assert_eq!(image.source.as_ref(), "wrappedimagedata"); - assert_eq!(image.size.width.0, 50); - assert_eq!(image.size.height.0, 75); + let size = image.size.expect("size"); + assert_eq!(size.width.0, 50); + assert_eq!(size.height.0, 75); } _ => panic!("Expected Image variant"), } @@ -503,8 +509,9 @@ mod tests { match result { LanguageModelToolResultContent::Image(image) => { assert_eq!(image.source.as_ref(), "caseinsensitive"); - assert_eq!(image.size.width.0, 30); - assert_eq!(image.size.height.0, 40); + let size = image.size.expect("size"); + assert_eq!(size.width.0, 30); + assert_eq!(size.height.0, 40); } _ => panic!("Expected Image variant"), } @@ -541,8 +548,9 @@ mod tests { match result { LanguageModelToolResultContent::Image(image) => { assert_eq!(image.source.as_ref(), "directimage"); - assert_eq!(image.size.width.0, 200); - assert_eq!(image.size.height.0, 300); + let size = image.size.expect("size"); + assert_eq!(size.width.0, 200); + assert_eq!(size.height.0, 300); } _ => panic!("Expected Image variant"), } diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index 3e99f32be8224bb2b9973feccb0ce973b58eaaed..64f3999e3aa96b2611e265a6eaf5df8063332c2a 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -927,7 +927,7 @@ mod tests { MessageContent::Text("What's in this image?".into()), MessageContent::Image(LanguageModelImage { source: "base64data".into(), - size: Default::default(), + size: None, }), ], cache: false,