request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use anyhow::Result;
  5use base64::write::EncoderWriter;
  6use cloud_llm_client::{CompletionIntent, CompletionMode};
  7use gpui::{
  8    App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
  9    point, px, size,
 10};
 11use image::GenericImageView as _;
 12use image::codecs::png::PngEncoder;
 13use serde::{Deserialize, Serialize};
 14use util::ResultExt;
 15
 16use crate::role::Role;
 17use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 18
 19#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 20pub struct LanguageModelImage {
 21    /// A base64-encoded PNG image.
 22    pub source: SharedString,
 23    #[serde(default, skip_serializing_if = "Option::is_none")]
 24    pub size: Option<Size<DevicePixels>>,
 25}
 26
 27impl LanguageModelImage {
 28    pub fn len(&self) -> usize {
 29        self.source.len()
 30    }
 31
 32    pub fn is_empty(&self) -> bool {
 33        self.source.is_empty()
 34    }
 35
 36    // Parse Self from a JSON object with case-insensitive field names
 37    pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
 38        let mut source = None;
 39        let mut size_obj = None;
 40
 41        // Find source and size fields (case-insensitive)
 42        for (k, v) in obj.iter() {
 43            match k.to_lowercase().as_str() {
 44                "source" => source = v.as_str(),
 45                "size" => size_obj = v.as_object(),
 46                _ => {}
 47            }
 48        }
 49
 50        let source = source?;
 51        let size_obj = size_obj?;
 52
 53        let mut width = None;
 54        let mut height = None;
 55
 56        // Find width and height in size object (case-insensitive)
 57        for (k, v) in size_obj.iter() {
 58            match k.to_lowercase().as_str() {
 59                "width" => width = v.as_i64().map(|w| w as i32),
 60                "height" => height = v.as_i64().map(|h| h as i32),
 61                _ => {}
 62            }
 63        }
 64
 65        Some(Self {
 66            size: Some(size(DevicePixels(width?), DevicePixels(height?))),
 67            source: SharedString::from(source.to_string()),
 68        })
 69    }
 70}
 71
 72impl std::fmt::Debug for LanguageModelImage {
 73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 74        f.debug_struct("LanguageModelImage")
 75            .field("source", &format!("<{} bytes>", self.source.len()))
 76            .field("size", &self.size)
 77            .finish()
 78    }
 79}
 80
 81/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 82const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
 83
 84/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
 85///
 86/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
 87/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
 88const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
 89
 90/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
 91/// `DEFAULT_IMAGE_MAX_BYTES`.
 92const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
 93
 94impl LanguageModelImage {
 95    pub fn empty() -> Self {
 96        Self {
 97            source: "".into(),
 98            size: None,
 99        }
100    }
101
102    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
103        cx.background_spawn(async move {
104            let image_bytes = Cursor::new(data.bytes());
105            let dynamic_image = match data.format() {
106                ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
107                    .and_then(image::DynamicImage::from_decoder),
108                ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
109                    .and_then(image::DynamicImage::from_decoder),
110                ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
111                    .and_then(image::DynamicImage::from_decoder),
112                ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
113                    .and_then(image::DynamicImage::from_decoder),
114                ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
115                    .and_then(image::DynamicImage::from_decoder),
116                ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
117                    .and_then(image::DynamicImage::from_decoder),
118                _ => return None,
119            }
120            .log_err()?;
121
122            let width = dynamic_image.width();
123            let height = dynamic_image.height();
124            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
125
126            // First apply any provider-specific dimension constraints we know about (Anthropic).
127            let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
128                || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
129            {
130                let new_bounds = ObjectFit::ScaleDown.get_bounds(
131                    gpui::Bounds {
132                        origin: point(px(0.0), px(0.0)),
133                        size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
134                    },
135                    image_size,
136                );
137                dynamic_image.resize(
138                    new_bounds.size.width.into(),
139                    new_bounds.size.height.into(),
140                    image::imageops::FilterType::Triangle,
141                )
142            } else {
143                dynamic_image
144            };
145
146            // Then enforce a default per-image size cap on the encoded PNG bytes.
147            //
148            // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
149            // The upstream provider limit we want to respect is effectively on the binary image
150            // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
151            let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
152            for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
153                if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
154                    break;
155                }
156
157                // Scale down geometrically to converge quickly. We don't know the final PNG size
158                // as a function of pixels, so we iteratively shrink.
159                let (w, h) = processed_image.dimensions();
160                if w <= 1 || h <= 1 {
161                    break;
162                }
163
164                // Shrink by ~15% each pass (0.85). This is a compromise between speed and
165                // preserving image detail.
166                let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
167                let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
168
169                processed_image =
170                    processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
171                encoded_png = encode_png_bytes(&processed_image).log_err()?;
172            }
173
174            if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
175                // Still too large after multiple passes; treat as non-convertible for now.
176                // (Provider-specific handling can be introduced later.)
177                return None;
178            }
179
180            // Now base64 encode the PNG bytes.
181            let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
182
183            // SAFETY: The base64 encoder should not produce non-UTF8.
184            let source = unsafe { String::from_utf8_unchecked(base64_image) };
185
186            Some(LanguageModelImage {
187                size: Some(image_size),
188                source: source.into(),
189            })
190        })
191    }
192
193    pub fn estimate_tokens(&self) -> usize {
194        let Some(size) = self.size.as_ref() else {
195            return 0;
196        };
197        let width = size.width.0.unsigned_abs() as usize;
198        let height = size.height.0.unsigned_abs() as usize;
199
200        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
201        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
202        // so this method is more of a rough guess.
203        (width * height) / 750
204    }
205
206    pub fn to_base64_url(&self) -> String {
207        format!("data:image/png;base64,{}", self.source)
208    }
209}
210
211fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
212    let mut png = Vec::new();
213    image.write_with_encoder(PngEncoder::new(&mut png))?;
214    Ok(png)
215}
216
217fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
218    let mut base64_image = Vec::new();
219    {
220        let mut base64_encoder = EncoderWriter::new(
221            Cursor::new(&mut base64_image),
222            &base64::engine::general_purpose::STANDARD,
223        );
224        base64_encoder.write_all(bytes)?;
225    }
226    Ok(base64_image)
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
230pub struct LanguageModelToolResult {
231    pub tool_use_id: LanguageModelToolUseId,
232    pub tool_name: Arc<str>,
233    pub is_error: bool,
234    pub content: LanguageModelToolResultContent,
235    pub output: Option<serde_json::Value>,
236}
237
238#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
239pub enum LanguageModelToolResultContent {
240    Text(Arc<str>),
241    Image(LanguageModelImage),
242}
243
244impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
245    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
246    where
247        D: serde::Deserializer<'de>,
248    {
249        use serde::de::Error;
250
251        let value = serde_json::Value::deserialize(deserializer)?;
252
253        // Models can provide these responses in several styles. Try each in order.
254
255        // 1. Try as plain string
256        if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
257            return Ok(Self::Text(Arc::from(text)));
258        }
259
260        // 2. Try as object
261        if let Some(obj) = value.as_object() {
262            // get a JSON field case-insensitively
263            fn get_field<'a>(
264                obj: &'a serde_json::Map<String, serde_json::Value>,
265                field: &str,
266            ) -> Option<&'a serde_json::Value> {
267                obj.iter()
268                    .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
269                    .map(|(_, v)| v)
270            }
271
272            // Accept wrapped text format: { "type": "text", "text": "..." }
273            if let (Some(type_value), Some(text_value)) =
274                (get_field(obj, "type"), get_field(obj, "text"))
275                && let Some(type_str) = type_value.as_str()
276                && type_str.to_lowercase() == "text"
277                && let Some(text) = text_value.as_str()
278            {
279                return Ok(Self::Text(Arc::from(text)));
280            }
281
282            // Check for wrapped Text variant: { "text": "..." }
283            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
284                && obj.len() == 1
285            {
286                // Only one field, and it's "text" (case-insensitive)
287                if let Some(text) = value.as_str() {
288                    return Ok(Self::Text(Arc::from(text)));
289                }
290            }
291
292            // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
293            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
294                && obj.len() == 1
295            {
296                // Only one field, and it's "image" (case-insensitive)
297                // Try to parse the nested image object
298                if let Some(image_obj) = value.as_object()
299                    && let Some(image) = LanguageModelImage::from_json(image_obj)
300                {
301                    return Ok(Self::Image(image));
302                }
303            }
304
305            // Try as direct Image (object with "source" and "size" fields)
306            if let Some(image) = LanguageModelImage::from_json(obj) {
307                return Ok(Self::Image(image));
308            }
309        }
310
311        // If none of the variants match, return an error with the problematic JSON
312        Err(D::Error::custom(format!(
313            "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
314             an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
315            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
316        )))
317    }
318}
319
320impl LanguageModelToolResultContent {
321    pub fn to_str(&self) -> Option<&str> {
322        match self {
323            Self::Text(text) => Some(text),
324            Self::Image(_) => None,
325        }
326    }
327
328    pub fn is_empty(&self) -> bool {
329        match self {
330            Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
331            Self::Image(_) => false,
332        }
333    }
334}
335
336impl From<&str> for LanguageModelToolResultContent {
337    fn from(value: &str) -> Self {
338        Self::Text(Arc::from(value))
339    }
340}
341
342impl From<String> for LanguageModelToolResultContent {
343    fn from(value: String) -> Self {
344        Self::Text(Arc::from(value))
345    }
346}
347
348impl From<LanguageModelImage> for LanguageModelToolResultContent {
349    fn from(image: LanguageModelImage) -> Self {
350        Self::Image(image)
351    }
352}
353
354#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
355pub enum MessageContent {
356    Text(String),
357    Thinking {
358        text: String,
359        signature: Option<String>,
360    },
361    RedactedThinking(String),
362    Image(LanguageModelImage),
363    ToolUse(LanguageModelToolUse),
364    ToolResult(LanguageModelToolResult),
365}
366
367impl MessageContent {
368    pub fn to_str(&self) -> Option<&str> {
369        match self {
370            MessageContent::Text(text) => Some(text.as_str()),
371            MessageContent::Thinking { text, .. } => Some(text.as_str()),
372            MessageContent::RedactedThinking(_) => None,
373            MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
374            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
375        }
376    }
377
378    pub fn is_empty(&self) -> bool {
379        match self {
380            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
381            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
382            MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
383            MessageContent::RedactedThinking(_)
384            | MessageContent::ToolUse(_)
385            | MessageContent::Image(_) => false,
386        }
387    }
388}
389
390impl From<String> for MessageContent {
391    fn from(value: String) -> Self {
392        MessageContent::Text(value)
393    }
394}
395
396impl From<&str> for MessageContent {
397    fn from(value: &str) -> Self {
398        MessageContent::Text(value.to_string())
399    }
400}
401
402#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
403pub struct LanguageModelRequestMessage {
404    pub role: Role,
405    pub content: Vec<MessageContent>,
406    pub cache: bool,
407    #[serde(default, skip_serializing_if = "Option::is_none")]
408    pub reasoning_details: Option<serde_json::Value>,
409}
410
411impl LanguageModelRequestMessage {
412    pub fn string_contents(&self) -> String {
413        let mut buffer = String::new();
414        for string in self.content.iter().filter_map(|content| content.to_str()) {
415            buffer.push_str(string);
416        }
417
418        buffer
419    }
420
421    pub fn contents_empty(&self) -> bool {
422        self.content.iter().all(|content| content.is_empty())
423    }
424}
425
426#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
427pub struct LanguageModelRequestTool {
428    pub name: String,
429    pub description: String,
430    pub input_schema: serde_json::Value,
431}
432
433#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
434pub enum LanguageModelToolChoice {
435    Auto,
436    Any,
437    None,
438}
439
440#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
441pub struct LanguageModelRequest {
442    pub thread_id: Option<String>,
443    pub prompt_id: Option<String>,
444    pub intent: Option<CompletionIntent>,
445    pub mode: Option<CompletionMode>,
446    pub messages: Vec<LanguageModelRequestMessage>,
447    pub tools: Vec<LanguageModelRequestTool>,
448    pub tool_choice: Option<LanguageModelToolChoice>,
449    pub stop: Vec<String>,
450    pub temperature: Option<f32>,
451    pub thinking_allowed: bool,
452}
453
454#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
455pub struct LanguageModelResponseMessage {
456    pub role: Option<Role>,
457    pub content: Option<String>,
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463    use base64::Engine as _;
464    use gpui::TestAppContext;
465    use image::ImageDecoder as _;
466
467    fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
468        base64::engine::general_purpose::STANDARD
469            .decode(base64_png.as_bytes())
470            .expect("base64 should decode")
471    }
472
473    fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
474        let decoder =
475            image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
476        decoder.dimensions()
477    }
478
479    fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
480        // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
481        let mut img = image::RgbaImage::new(width, height);
482        for y in 0..height {
483            for x in 0..width {
484                let r = ((x ^ y) & 0xFF) as u8;
485                let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
486                let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
487                img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
488            }
489        }
490
491        let mut out = Vec::new();
492        image::DynamicImage::ImageRgba8(img)
493            .write_with_encoder(PngEncoder::new(&mut out))
494            .expect("png encoding should succeed");
495        out
496    }
497
498    #[gpui::test]
499    async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
500        // Pick a size that reliably produces a PNG > 5MB when filled with noise.
501        // If this fails (image is too small), bump dimensions.
502        let original_png = make_noisy_png_bytes(4096, 4096);
503        assert!(
504            original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
505            "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
506        );
507
508        let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
509        let lm_image = cx
510            .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
511            .await
512            .expect("image conversion should succeed");
513
514        let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
515        assert!(
516            encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
517            "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
518            encoded_png.len()
519        );
520
521        // Ensure we actually downscaled in pixels (not just re-encoded).
522        let (w, h) = png_dimensions(&encoded_png);
523        assert!(
524            w < 4096 || h < 4096,
525            "expected image to be downscaled in at least one dimension; got {w}x{h}"
526        );
527    }
528
529    #[test]
530    fn test_language_model_tool_result_content_deserialization() {
531        let json = r#""This is plain text""#;
532        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
533        assert_eq!(
534            result,
535            LanguageModelToolResultContent::Text("This is plain text".into())
536        );
537
538        let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
539        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
540        assert_eq!(
541            result,
542            LanguageModelToolResultContent::Text("This is wrapped text".into())
543        );
544
545        let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
546        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
547        assert_eq!(
548            result,
549            LanguageModelToolResultContent::Text("Case insensitive".into())
550        );
551
552        let json = r#"{"Text": "Wrapped variant"}"#;
553        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
554        assert_eq!(
555            result,
556            LanguageModelToolResultContent::Text("Wrapped variant".into())
557        );
558
559        let json = r#"{"text": "Lowercase wrapped"}"#;
560        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
561        assert_eq!(
562            result,
563            LanguageModelToolResultContent::Text("Lowercase wrapped".into())
564        );
565
566        // Test image deserialization
567        let json = r#"{
568            "source": "base64encodedimagedata",
569            "size": {
570                "width": 100,
571                "height": 200
572            }
573        }"#;
574        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
575        match result {
576            LanguageModelToolResultContent::Image(image) => {
577                assert_eq!(image.source.as_ref(), "base64encodedimagedata");
578                let size = image.size.expect("size");
579                assert_eq!(size.width.0, 100);
580                assert_eq!(size.height.0, 200);
581            }
582            _ => panic!("Expected Image variant"),
583        }
584
585        // Test wrapped Image variant
586        let json = r#"{
587            "Image": {
588                "source": "wrappedimagedata",
589                "size": {
590                    "width": 50,
591                    "height": 75
592                }
593            }
594        }"#;
595        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
596        match result {
597            LanguageModelToolResultContent::Image(image) => {
598                assert_eq!(image.source.as_ref(), "wrappedimagedata");
599                let size = image.size.expect("size");
600                assert_eq!(size.width.0, 50);
601                assert_eq!(size.height.0, 75);
602            }
603            _ => panic!("Expected Image variant"),
604        }
605
606        // Test wrapped Image variant with case insensitive
607        let json = r#"{
608            "image": {
609                "Source": "caseinsensitive",
610                "SIZE": {
611                    "width": 30,
612                    "height": 40
613                }
614            }
615        }"#;
616        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
617        match result {
618            LanguageModelToolResultContent::Image(image) => {
619                assert_eq!(image.source.as_ref(), "caseinsensitive");
620                let size = image.size.expect("size");
621                assert_eq!(size.width.0, 30);
622                assert_eq!(size.height.0, 40);
623            }
624            _ => panic!("Expected Image variant"),
625        }
626
627        // Test that wrapped text with wrong type fails
628        let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
629        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
630        assert!(result.is_err());
631
632        // Test that malformed JSON fails
633        let json = r#"{"invalid": "structure"}"#;
634        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
635        assert!(result.is_err());
636
637        // Test edge cases
638        let json = r#""""#; // Empty string
639        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
640        assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
641
642        // Test with extra fields in wrapped text (should be ignored)
643        let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
644        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
645        assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
646
647        // Test direct image with case-insensitive fields
648        let json = r#"{
649            "SOURCE": "directimage",
650            "Size": {
651                "width": 200,
652                "height": 300
653            }
654        }"#;
655        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
656        match result {
657            LanguageModelToolResultContent::Image(image) => {
658                assert_eq!(image.source.as_ref(), "directimage");
659                let size = image.size.expect("size");
660                assert_eq!(size.width.0, 200);
661                assert_eq!(size.height.0, 300);
662            }
663            _ => panic!("Expected Image variant"),
664        }
665
666        // Test that multiple fields prevent wrapped variant interpretation
667        let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
668        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
669        assert!(result.is_err());
670
671        // Test wrapped text with uppercase TEXT variant
672        let json = r#"{"TEXT": "Uppercase variant"}"#;
673        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
674        assert_eq!(
675            result,
676            LanguageModelToolResultContent::Text("Uppercase variant".into())
677        );
678
679        // Test that numbers and other JSON values fail gracefully
680        let json = r#"123"#;
681        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
682        assert!(result.is_err());
683
684        let json = r#"null"#;
685        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
686        assert!(result.is_err());
687
688        let json = r#"[1, 2, 3]"#;
689        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
690        assert!(result.is_err());
691    }
692}