request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use anyhow::Result;
  5use base64::write::EncoderWriter;
  6use cloud_llm_client::{CompletionIntent, CompletionMode};
  7use gpui::{
  8    App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
  9    point, px, size,
 10};
 11use image::GenericImageView as _;
 12use image::codecs::png::PngEncoder;
 13use serde::{Deserialize, Serialize};
 14use util::ResultExt;
 15
 16use crate::role::Role;
 17use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 18
 19#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 20pub struct LanguageModelImage {
 21    /// A base64-encoded PNG image.
 22    pub source: SharedString,
 23    #[serde(default, skip_serializing_if = "Option::is_none")]
 24    pub size: Option<Size<DevicePixels>>,
 25}
 26
 27impl LanguageModelImage {
 28    pub fn len(&self) -> usize {
 29        self.source.len()
 30    }
 31
 32    pub fn is_empty(&self) -> bool {
 33        self.source.is_empty()
 34    }
 35
 36    // Parse Self from a JSON object with case-insensitive field names
 37    pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
 38        let mut source = None;
 39        let mut size_obj = None;
 40
 41        // Find source and size fields (case-insensitive)
 42        for (k, v) in obj.iter() {
 43            match k.to_lowercase().as_str() {
 44                "source" => source = v.as_str(),
 45                "size" => size_obj = v.as_object(),
 46                _ => {}
 47            }
 48        }
 49
 50        let source = source?;
 51        let size_obj = size_obj?;
 52
 53        let mut width = None;
 54        let mut height = None;
 55
 56        // Find width and height in size object (case-insensitive)
 57        for (k, v) in size_obj.iter() {
 58            match k.to_lowercase().as_str() {
 59                "width" => width = v.as_i64().map(|w| w as i32),
 60                "height" => height = v.as_i64().map(|h| h as i32),
 61                _ => {}
 62            }
 63        }
 64
 65        Some(Self {
 66            size: Some(size(DevicePixels(width?), DevicePixels(height?))),
 67            source: SharedString::from(source.to_string()),
 68        })
 69    }
 70}
 71
 72impl std::fmt::Debug for LanguageModelImage {
 73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 74        f.debug_struct("LanguageModelImage")
 75            .field("source", &format!("<{} bytes>", self.source.len()))
 76            .field("size", &self.size)
 77            .finish()
 78    }
 79}
 80
 81/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 82const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
 83
 84/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
 85///
 86/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
 87/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
 88const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
 89
 90/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
 91/// `DEFAULT_IMAGE_MAX_BYTES`.
 92const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
 93
 94impl LanguageModelImage {
 95    // All language model images are encoded as PNGs.
 96    pub const FORMAT: ImageFormat = ImageFormat::Png;
 97
 98    pub fn empty() -> Self {
 99        Self {
100            source: "".into(),
101            size: None,
102        }
103    }
104
105    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
106        cx.background_spawn(async move {
107            let image_bytes = Cursor::new(data.bytes());
108            let dynamic_image = match data.format() {
109                ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
110                    .and_then(image::DynamicImage::from_decoder),
111                ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
112                    .and_then(image::DynamicImage::from_decoder),
113                ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
114                    .and_then(image::DynamicImage::from_decoder),
115                ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
116                    .and_then(image::DynamicImage::from_decoder),
117                ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
118                    .and_then(image::DynamicImage::from_decoder),
119                ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
120                    .and_then(image::DynamicImage::from_decoder),
121                _ => return None,
122            }
123            .log_err()?;
124
125            let width = dynamic_image.width();
126            let height = dynamic_image.height();
127            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
128
129            // First apply any provider-specific dimension constraints we know about (Anthropic).
130            let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
131                || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
132            {
133                let new_bounds = ObjectFit::ScaleDown.get_bounds(
134                    gpui::Bounds {
135                        origin: point(px(0.0), px(0.0)),
136                        size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
137                    },
138                    image_size,
139                );
140                dynamic_image.resize(
141                    new_bounds.size.width.into(),
142                    new_bounds.size.height.into(),
143                    image::imageops::FilterType::Triangle,
144                )
145            } else {
146                dynamic_image
147            };
148
149            // Then enforce a default per-image size cap on the encoded PNG bytes.
150            //
151            // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
152            // The upstream provider limit we want to respect is effectively on the binary image
153            // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
154            let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
155            for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
156                if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
157                    break;
158                }
159
160                // Scale down geometrically to converge quickly. We don't know the final PNG size
161                // as a function of pixels, so we iteratively shrink.
162                let (w, h) = processed_image.dimensions();
163                if w <= 1 || h <= 1 {
164                    break;
165                }
166
167                // Shrink by ~15% each pass (0.85). This is a compromise between speed and
168                // preserving image detail.
169                let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
170                let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
171
172                processed_image =
173                    processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
174                encoded_png = encode_png_bytes(&processed_image).log_err()?;
175            }
176
177            if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
178                // Still too large after multiple passes; treat as non-convertible for now.
179                // (Provider-specific handling can be introduced later.)
180                return None;
181            }
182
183            // Now base64 encode the PNG bytes.
184            let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
185
186            // SAFETY: The base64 encoder should not produce non-UTF8.
187            let source = unsafe { String::from_utf8_unchecked(base64_image) };
188
189            Some(LanguageModelImage {
190                size: Some(image_size),
191                source: source.into(),
192            })
193        })
194    }
195
196    pub fn estimate_tokens(&self) -> usize {
197        let Some(size) = self.size.as_ref() else {
198            return 0;
199        };
200        let width = size.width.0.unsigned_abs() as usize;
201        let height = size.height.0.unsigned_abs() as usize;
202
203        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
204        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
205        // so this method is more of a rough guess.
206        (width * height) / 750
207    }
208
209    pub fn to_base64_url(&self) -> String {
210        format!("data:image/png;base64,{}", self.source)
211    }
212}
213
214fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
215    let mut png = Vec::new();
216    image.write_with_encoder(PngEncoder::new(&mut png))?;
217    Ok(png)
218}
219
220fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
221    let mut base64_image = Vec::new();
222    {
223        let mut base64_encoder = EncoderWriter::new(
224            Cursor::new(&mut base64_image),
225            &base64::engine::general_purpose::STANDARD,
226        );
227        base64_encoder.write_all(bytes)?;
228    }
229    Ok(base64_image)
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
233pub struct LanguageModelToolResult {
234    pub tool_use_id: LanguageModelToolUseId,
235    pub tool_name: Arc<str>,
236    pub is_error: bool,
237    pub content: LanguageModelToolResultContent,
238    pub output: Option<serde_json::Value>,
239}
240
241#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
242pub enum LanguageModelToolResultContent {
243    Text(Arc<str>),
244    Image(LanguageModelImage),
245}
246
247impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
248    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
249    where
250        D: serde::Deserializer<'de>,
251    {
252        use serde::de::Error;
253
254        let value = serde_json::Value::deserialize(deserializer)?;
255
256        // Models can provide these responses in several styles. Try each in order.
257
258        // 1. Try as plain string
259        if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
260            return Ok(Self::Text(Arc::from(text)));
261        }
262
263        // 2. Try as object
264        if let Some(obj) = value.as_object() {
265            // get a JSON field case-insensitively
266            fn get_field<'a>(
267                obj: &'a serde_json::Map<String, serde_json::Value>,
268                field: &str,
269            ) -> Option<&'a serde_json::Value> {
270                obj.iter()
271                    .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
272                    .map(|(_, v)| v)
273            }
274
275            // Accept wrapped text format: { "type": "text", "text": "..." }
276            if let (Some(type_value), Some(text_value)) =
277                (get_field(obj, "type"), get_field(obj, "text"))
278                && let Some(type_str) = type_value.as_str()
279                && type_str.to_lowercase() == "text"
280                && let Some(text) = text_value.as_str()
281            {
282                return Ok(Self::Text(Arc::from(text)));
283            }
284
285            // Check for wrapped Text variant: { "text": "..." }
286            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
287                && obj.len() == 1
288            {
289                // Only one field, and it's "text" (case-insensitive)
290                if let Some(text) = value.as_str() {
291                    return Ok(Self::Text(Arc::from(text)));
292                }
293            }
294
295            // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
296            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
297                && obj.len() == 1
298            {
299                // Only one field, and it's "image" (case-insensitive)
300                // Try to parse the nested image object
301                if let Some(image_obj) = value.as_object()
302                    && let Some(image) = LanguageModelImage::from_json(image_obj)
303                {
304                    return Ok(Self::Image(image));
305                }
306            }
307
308            // Try as direct Image (object with "source" and "size" fields)
309            if let Some(image) = LanguageModelImage::from_json(obj) {
310                return Ok(Self::Image(image));
311            }
312        }
313
314        // If none of the variants match, return an error with the problematic JSON
315        Err(D::Error::custom(format!(
316            "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
317             an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
318            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
319        )))
320    }
321}
322
323impl LanguageModelToolResultContent {
324    pub fn to_str(&self) -> Option<&str> {
325        match self {
326            Self::Text(text) => Some(text),
327            Self::Image(_) => None,
328        }
329    }
330
331    pub fn is_empty(&self) -> bool {
332        match self {
333            Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
334            Self::Image(_) => false,
335        }
336    }
337}
338
339impl From<&str> for LanguageModelToolResultContent {
340    fn from(value: &str) -> Self {
341        Self::Text(Arc::from(value))
342    }
343}
344
345impl From<String> for LanguageModelToolResultContent {
346    fn from(value: String) -> Self {
347        Self::Text(Arc::from(value))
348    }
349}
350
351impl From<LanguageModelImage> for LanguageModelToolResultContent {
352    fn from(image: LanguageModelImage) -> Self {
353        Self::Image(image)
354    }
355}
356
357#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
358pub enum MessageContent {
359    Text(String),
360    Thinking {
361        text: String,
362        signature: Option<String>,
363    },
364    RedactedThinking(String),
365    Image(LanguageModelImage),
366    ToolUse(LanguageModelToolUse),
367    ToolResult(LanguageModelToolResult),
368}
369
370impl MessageContent {
371    pub fn to_str(&self) -> Option<&str> {
372        match self {
373            MessageContent::Text(text) => Some(text.as_str()),
374            MessageContent::Thinking { text, .. } => Some(text.as_str()),
375            MessageContent::RedactedThinking(_) => None,
376            MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
377            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
378        }
379    }
380
381    pub fn is_empty(&self) -> bool {
382        match self {
383            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
384            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
385            MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
386            MessageContent::RedactedThinking(_)
387            | MessageContent::ToolUse(_)
388            | MessageContent::Image(_) => false,
389        }
390    }
391}
392
393impl From<String> for MessageContent {
394    fn from(value: String) -> Self {
395        MessageContent::Text(value)
396    }
397}
398
399impl From<&str> for MessageContent {
400    fn from(value: &str) -> Self {
401        MessageContent::Text(value.to_string())
402    }
403}
404
405#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
406pub struct LanguageModelRequestMessage {
407    pub role: Role,
408    pub content: Vec<MessageContent>,
409    pub cache: bool,
410    #[serde(default, skip_serializing_if = "Option::is_none")]
411    pub reasoning_details: Option<serde_json::Value>,
412}
413
414impl LanguageModelRequestMessage {
415    pub fn string_contents(&self) -> String {
416        let mut buffer = String::new();
417        for string in self.content.iter().filter_map(|content| content.to_str()) {
418            buffer.push_str(string);
419        }
420
421        buffer
422    }
423
424    pub fn contents_empty(&self) -> bool {
425        self.content.iter().all(|content| content.is_empty())
426    }
427}
428
429#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
430pub struct LanguageModelRequestTool {
431    pub name: String,
432    pub description: String,
433    pub input_schema: serde_json::Value,
434}
435
436#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
437pub enum LanguageModelToolChoice {
438    Auto,
439    Any,
440    None,
441}
442
443#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
444pub struct LanguageModelRequest {
445    pub thread_id: Option<String>,
446    pub prompt_id: Option<String>,
447    pub intent: Option<CompletionIntent>,
448    pub mode: Option<CompletionMode>,
449    pub messages: Vec<LanguageModelRequestMessage>,
450    pub tools: Vec<LanguageModelRequestTool>,
451    pub tool_choice: Option<LanguageModelToolChoice>,
452    pub stop: Vec<String>,
453    pub temperature: Option<f32>,
454    pub thinking_allowed: bool,
455}
456
457#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
458pub struct LanguageModelResponseMessage {
459    pub role: Option<Role>,
460    pub content: Option<String>,
461}
462
463#[cfg(test)]
464mod tests {
465    use super::*;
466    use base64::Engine as _;
467    use gpui::TestAppContext;
468    use image::ImageDecoder as _;
469
470    fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
471        base64::engine::general_purpose::STANDARD
472            .decode(base64_png.as_bytes())
473            .expect("base64 should decode")
474    }
475
476    fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
477        let decoder =
478            image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
479        decoder.dimensions()
480    }
481
482    fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
483        // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
484        let mut img = image::RgbaImage::new(width, height);
485        for y in 0..height {
486            for x in 0..width {
487                let r = ((x ^ y) & 0xFF) as u8;
488                let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
489                let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
490                img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
491            }
492        }
493
494        let mut out = Vec::new();
495        image::DynamicImage::ImageRgba8(img)
496            .write_with_encoder(PngEncoder::new(&mut out))
497            .expect("png encoding should succeed");
498        out
499    }
500
501    #[gpui::test]
502    async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
503        // Pick a size that reliably produces a PNG > 5MB when filled with noise.
504        // If this fails (image is too small), bump dimensions.
505        let original_png = make_noisy_png_bytes(4096, 4096);
506        assert!(
507            original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
508            "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
509        );
510
511        let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
512        let lm_image = cx
513            .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
514            .await
515            .expect("image conversion should succeed");
516
517        let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
518        assert!(
519            encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
520            "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
521            encoded_png.len()
522        );
523
524        // Ensure we actually downscaled in pixels (not just re-encoded).
525        let (w, h) = png_dimensions(&encoded_png);
526        assert!(
527            w < 4096 || h < 4096,
528            "expected image to be downscaled in at least one dimension; got {w}x{h}"
529        );
530    }
531
532    #[test]
533    fn test_language_model_tool_result_content_deserialization() {
534        let json = r#""This is plain text""#;
535        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
536        assert_eq!(
537            result,
538            LanguageModelToolResultContent::Text("This is plain text".into())
539        );
540
541        let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
542        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
543        assert_eq!(
544            result,
545            LanguageModelToolResultContent::Text("This is wrapped text".into())
546        );
547
548        let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
549        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
550        assert_eq!(
551            result,
552            LanguageModelToolResultContent::Text("Case insensitive".into())
553        );
554
555        let json = r#"{"Text": "Wrapped variant"}"#;
556        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
557        assert_eq!(
558            result,
559            LanguageModelToolResultContent::Text("Wrapped variant".into())
560        );
561
562        let json = r#"{"text": "Lowercase wrapped"}"#;
563        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
564        assert_eq!(
565            result,
566            LanguageModelToolResultContent::Text("Lowercase wrapped".into())
567        );
568
569        // Test image deserialization
570        let json = r#"{
571            "source": "base64encodedimagedata",
572            "size": {
573                "width": 100,
574                "height": 200
575            }
576        }"#;
577        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
578        match result {
579            LanguageModelToolResultContent::Image(image) => {
580                assert_eq!(image.source.as_ref(), "base64encodedimagedata");
581                let size = image.size.expect("size");
582                assert_eq!(size.width.0, 100);
583                assert_eq!(size.height.0, 200);
584            }
585            _ => panic!("Expected Image variant"),
586        }
587
588        // Test wrapped Image variant
589        let json = r#"{
590            "Image": {
591                "source": "wrappedimagedata",
592                "size": {
593                    "width": 50,
594                    "height": 75
595                }
596            }
597        }"#;
598        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
599        match result {
600            LanguageModelToolResultContent::Image(image) => {
601                assert_eq!(image.source.as_ref(), "wrappedimagedata");
602                let size = image.size.expect("size");
603                assert_eq!(size.width.0, 50);
604                assert_eq!(size.height.0, 75);
605            }
606            _ => panic!("Expected Image variant"),
607        }
608
609        // Test wrapped Image variant with case insensitive
610        let json = r#"{
611            "image": {
612                "Source": "caseinsensitive",
613                "SIZE": {
614                    "width": 30,
615                    "height": 40
616                }
617            }
618        }"#;
619        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
620        match result {
621            LanguageModelToolResultContent::Image(image) => {
622                assert_eq!(image.source.as_ref(), "caseinsensitive");
623                let size = image.size.expect("size");
624                assert_eq!(size.width.0, 30);
625                assert_eq!(size.height.0, 40);
626            }
627            _ => panic!("Expected Image variant"),
628        }
629
630        // Test that wrapped text with wrong type fails
631        let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
632        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
633        assert!(result.is_err());
634
635        // Test that malformed JSON fails
636        let json = r#"{"invalid": "structure"}"#;
637        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
638        assert!(result.is_err());
639
640        // Test edge cases
641        let json = r#""""#; // Empty string
642        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
643        assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
644
645        // Test with extra fields in wrapped text (should be ignored)
646        let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
647        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
648        assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
649
650        // Test direct image with case-insensitive fields
651        let json = r#"{
652            "SOURCE": "directimage",
653            "Size": {
654                "width": 200,
655                "height": 300
656            }
657        }"#;
658        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
659        match result {
660            LanguageModelToolResultContent::Image(image) => {
661                assert_eq!(image.source.as_ref(), "directimage");
662                let size = image.size.expect("size");
663                assert_eq!(size.width.0, 200);
664                assert_eq!(size.height.0, 300);
665            }
666            _ => panic!("Expected Image variant"),
667        }
668
669        // Test that multiple fields prevent wrapped variant interpretation
670        let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
671        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
672        assert!(result.is_err());
673
674        // Test wrapped text with uppercase TEXT variant
675        let json = r#"{"TEXT": "Uppercase variant"}"#;
676        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
677        assert_eq!(
678            result,
679            LanguageModelToolResultContent::Text("Uppercase variant".into())
680        );
681
682        // Test that numbers and other JSON values fail gracefully
683        let json = r#"123"#;
684        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
685        assert!(result.is_err());
686
687        let json = r#"null"#;
688        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
689        assert!(result.is_err());
690
691        let json = r#"[1, 2, 3]"#;
692        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
693        assert!(result.is_err());
694    }
695}