request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use anyhow::Result;
  5use base64::write::EncoderWriter;
  6use cloud_llm_client::CompletionIntent;
  7use gpui::{
  8    App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
  9    point, px, size,
 10};
 11use image::GenericImageView as _;
 12use image::codecs::png::PngEncoder;
 13use serde::{Deserialize, Serialize};
 14use util::ResultExt;
 15
 16use crate::role::Role;
 17use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 18
 19#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 20pub struct LanguageModelImage {
 21    /// A base64-encoded PNG image.
 22    pub source: SharedString,
 23    #[serde(default, skip_serializing_if = "Option::is_none")]
 24    pub size: Option<Size<DevicePixels>>,
 25}
 26
 27impl LanguageModelImage {
 28    pub fn len(&self) -> usize {
 29        self.source.len()
 30    }
 31
 32    pub fn is_empty(&self) -> bool {
 33        self.source.is_empty()
 34    }
 35
 36    // Parse Self from a JSON object with case-insensitive field names
 37    pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
 38        let mut source = None;
 39        let mut size_obj = None;
 40
 41        // Find source and size fields (case-insensitive)
 42        for (k, v) in obj.iter() {
 43            match k.to_lowercase().as_str() {
 44                "source" => source = v.as_str(),
 45                "size" => size_obj = v.as_object(),
 46                _ => {}
 47            }
 48        }
 49
 50        let source = source?;
 51        let size_obj = size_obj?;
 52
 53        let mut width = None;
 54        let mut height = None;
 55
 56        // Find width and height in size object (case-insensitive)
 57        for (k, v) in size_obj.iter() {
 58            match k.to_lowercase().as_str() {
 59                "width" => width = v.as_i64().map(|w| w as i32),
 60                "height" => height = v.as_i64().map(|h| h as i32),
 61                _ => {}
 62            }
 63        }
 64
 65        Some(Self {
 66            size: Some(size(DevicePixels(width?), DevicePixels(height?))),
 67            source: SharedString::from(source.to_string()),
 68        })
 69    }
 70}
 71
 72impl std::fmt::Debug for LanguageModelImage {
 73    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 74        f.debug_struct("LanguageModelImage")
 75            .field("source", &format!("<{} bytes>", self.source.len()))
 76            .field("size", &self.size)
 77            .finish()
 78    }
 79}
 80
 81/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 82const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
 83
 84/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
 85///
 86/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
 87/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
 88const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
 89
 90/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
 91/// `DEFAULT_IMAGE_MAX_BYTES`.
 92const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
 93
 94impl LanguageModelImage {
 95    // All language model images are encoded as PNGs.
 96    pub const FORMAT: ImageFormat = ImageFormat::Png;
 97
 98    pub fn empty() -> Self {
 99        Self {
100            source: "".into(),
101            size: None,
102        }
103    }
104
105    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
106        cx.background_spawn(async move {
107            let image_bytes = Cursor::new(data.bytes());
108            let dynamic_image = match data.format() {
109                ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
110                    .and_then(image::DynamicImage::from_decoder),
111                ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
112                    .and_then(image::DynamicImage::from_decoder),
113                ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
114                    .and_then(image::DynamicImage::from_decoder),
115                ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
116                    .and_then(image::DynamicImage::from_decoder),
117                ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
118                    .and_then(image::DynamicImage::from_decoder),
119                ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
120                    .and_then(image::DynamicImage::from_decoder),
121                _ => return None,
122            }
123            .log_err()?;
124
125            let width = dynamic_image.width();
126            let height = dynamic_image.height();
127            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
128
129            // First apply any provider-specific dimension constraints we know about (Anthropic).
130            let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
131                || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
132            {
133                let new_bounds = ObjectFit::ScaleDown.get_bounds(
134                    gpui::Bounds {
135                        origin: point(px(0.0), px(0.0)),
136                        size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
137                    },
138                    image_size,
139                );
140                dynamic_image.resize(
141                    new_bounds.size.width.into(),
142                    new_bounds.size.height.into(),
143                    image::imageops::FilterType::Triangle,
144                )
145            } else {
146                dynamic_image
147            };
148
149            // Then enforce a default per-image size cap on the encoded PNG bytes.
150            //
151            // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
152            // The upstream provider limit we want to respect is effectively on the binary image
153            // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
154            let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
155            for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
156                if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
157                    break;
158                }
159
160                // Scale down geometrically to converge quickly. We don't know the final PNG size
161                // as a function of pixels, so we iteratively shrink.
162                let (w, h) = processed_image.dimensions();
163                if w <= 1 || h <= 1 {
164                    break;
165                }
166
167                // Shrink by ~15% each pass (0.85). This is a compromise between speed and
168                // preserving image detail.
169                let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
170                let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
171
172                processed_image =
173                    processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
174                encoded_png = encode_png_bytes(&processed_image).log_err()?;
175            }
176
177            if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
178                // Still too large after multiple passes; treat as non-convertible for now.
179                // (Provider-specific handling can be introduced later.)
180                return None;
181            }
182
183            // Now base64 encode the PNG bytes.
184            let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
185
186            // SAFETY: The base64 encoder should not produce non-UTF8.
187            let source = unsafe { String::from_utf8_unchecked(base64_image) };
188
189            Some(LanguageModelImage {
190                size: Some(image_size),
191                source: source.into(),
192            })
193        })
194    }
195
196    pub fn estimate_tokens(&self) -> usize {
197        let Some(size) = self.size.as_ref() else {
198            return 0;
199        };
200        let width = size.width.0.unsigned_abs() as usize;
201        let height = size.height.0.unsigned_abs() as usize;
202
203        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
204        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
205        // so this method is more of a rough guess.
206        (width * height) / 750
207    }
208
209    pub fn to_base64_url(&self) -> String {
210        format!("data:image/png;base64,{}", self.source)
211    }
212}
213
214fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
215    let mut png = Vec::new();
216    image.write_with_encoder(PngEncoder::new(&mut png))?;
217    Ok(png)
218}
219
220fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
221    let mut base64_image = Vec::new();
222    {
223        let mut base64_encoder = EncoderWriter::new(
224            Cursor::new(&mut base64_image),
225            &base64::engine::general_purpose::STANDARD,
226        );
227        base64_encoder.write_all(bytes)?;
228    }
229    Ok(base64_image)
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
233pub struct LanguageModelToolResult {
234    pub tool_use_id: LanguageModelToolUseId,
235    pub tool_name: Arc<str>,
236    pub is_error: bool,
237    /// The tool output formatted for presenting to the model
238    pub content: LanguageModelToolResultContent,
239    /// The raw tool output, if available, often for debugging or extra state for replay
240    pub output: Option<serde_json::Value>,
241}
242
243#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
244pub enum LanguageModelToolResultContent {
245    Text(Arc<str>),
246    Image(LanguageModelImage),
247}
248
249impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
250    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
251    where
252        D: serde::Deserializer<'de>,
253    {
254        use serde::de::Error;
255
256        let value = serde_json::Value::deserialize(deserializer)?;
257
258        // Models can provide these responses in several styles. Try each in order.
259
260        // 1. Try as plain string
261        if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
262            return Ok(Self::Text(Arc::from(text)));
263        }
264
265        // 2. Try as object
266        if let Some(obj) = value.as_object() {
267            // get a JSON field case-insensitively
268            fn get_field<'a>(
269                obj: &'a serde_json::Map<String, serde_json::Value>,
270                field: &str,
271            ) -> Option<&'a serde_json::Value> {
272                obj.iter()
273                    .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
274                    .map(|(_, v)| v)
275            }
276
277            // Accept wrapped text format: { "type": "text", "text": "..." }
278            if let (Some(type_value), Some(text_value)) =
279                (get_field(obj, "type"), get_field(obj, "text"))
280                && let Some(type_str) = type_value.as_str()
281                && type_str.to_lowercase() == "text"
282                && let Some(text) = text_value.as_str()
283            {
284                return Ok(Self::Text(Arc::from(text)));
285            }
286
287            // Check for wrapped Text variant: { "text": "..." }
288            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
289                && obj.len() == 1
290            {
291                // Only one field, and it's "text" (case-insensitive)
292                if let Some(text) = value.as_str() {
293                    return Ok(Self::Text(Arc::from(text)));
294                }
295            }
296
297            // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
298            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
299                && obj.len() == 1
300            {
301                // Only one field, and it's "image" (case-insensitive)
302                // Try to parse the nested image object
303                if let Some(image_obj) = value.as_object()
304                    && let Some(image) = LanguageModelImage::from_json(image_obj)
305                {
306                    return Ok(Self::Image(image));
307                }
308            }
309
310            // Try as direct Image (object with "source" and "size" fields)
311            if let Some(image) = LanguageModelImage::from_json(obj) {
312                return Ok(Self::Image(image));
313            }
314        }
315
316        // If none of the variants match, return an error with the problematic JSON
317        Err(D::Error::custom(format!(
318            "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
319             an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
320            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
321        )))
322    }
323}
324
325impl LanguageModelToolResultContent {
326    pub fn to_str(&self) -> Option<&str> {
327        match self {
328            Self::Text(text) => Some(text),
329            Self::Image(_) => None,
330        }
331    }
332
333    pub fn is_empty(&self) -> bool {
334        match self {
335            Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
336            Self::Image(_) => false,
337        }
338    }
339}
340
341impl From<&str> for LanguageModelToolResultContent {
342    fn from(value: &str) -> Self {
343        Self::Text(Arc::from(value))
344    }
345}
346
347impl From<String> for LanguageModelToolResultContent {
348    fn from(value: String) -> Self {
349        Self::Text(Arc::from(value))
350    }
351}
352
353impl From<LanguageModelImage> for LanguageModelToolResultContent {
354    fn from(image: LanguageModelImage) -> Self {
355        Self::Image(image)
356    }
357}
358
359#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
360pub enum MessageContent {
361    Text(String),
362    Thinking {
363        text: String,
364        signature: Option<String>,
365    },
366    RedactedThinking(String),
367    Image(LanguageModelImage),
368    ToolUse(LanguageModelToolUse),
369    ToolResult(LanguageModelToolResult),
370}
371
372impl MessageContent {
373    pub fn to_str(&self) -> Option<&str> {
374        match self {
375            MessageContent::Text(text) => Some(text.as_str()),
376            MessageContent::Thinking { text, .. } => Some(text.as_str()),
377            MessageContent::RedactedThinking(_) => None,
378            MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
379            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
380        }
381    }
382
383    pub fn is_empty(&self) -> bool {
384        match self {
385            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
386            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
387            MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
388            MessageContent::RedactedThinking(_)
389            | MessageContent::ToolUse(_)
390            | MessageContent::Image(_) => false,
391        }
392    }
393}
394
395impl From<String> for MessageContent {
396    fn from(value: String) -> Self {
397        MessageContent::Text(value)
398    }
399}
400
401impl From<&str> for MessageContent {
402    fn from(value: &str) -> Self {
403        MessageContent::Text(value.to_string())
404    }
405}
406
407#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
408pub struct LanguageModelRequestMessage {
409    pub role: Role,
410    pub content: Vec<MessageContent>,
411    pub cache: bool,
412    #[serde(default, skip_serializing_if = "Option::is_none")]
413    pub reasoning_details: Option<serde_json::Value>,
414}
415
416impl LanguageModelRequestMessage {
417    pub fn string_contents(&self) -> String {
418        let mut buffer = String::new();
419        for string in self.content.iter().filter_map(|content| content.to_str()) {
420            buffer.push_str(string);
421        }
422
423        buffer
424    }
425
426    pub fn contents_empty(&self) -> bool {
427        self.content.iter().all(|content| content.is_empty())
428    }
429}
430
431#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
432pub struct LanguageModelRequestTool {
433    pub name: String,
434    pub description: String,
435    pub input_schema: serde_json::Value,
436    pub use_input_streaming: bool,
437}
438
439#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
440pub enum LanguageModelToolChoice {
441    Auto,
442    Any,
443    None,
444}
445
446#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
447pub struct LanguageModelRequest {
448    pub thread_id: Option<String>,
449    pub prompt_id: Option<String>,
450    pub intent: Option<CompletionIntent>,
451    pub messages: Vec<LanguageModelRequestMessage>,
452    pub tools: Vec<LanguageModelRequestTool>,
453    pub tool_choice: Option<LanguageModelToolChoice>,
454    pub stop: Vec<String>,
455    pub temperature: Option<f32>,
456    pub thinking_allowed: bool,
457    pub thinking_effort: Option<String>,
458    pub speed: Option<Speed>,
459}
460
461#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
462#[serde(rename_all = "snake_case")]
463pub enum Speed {
464    #[default]
465    Standard,
466    Fast,
467}
468
469impl Speed {
470    pub fn toggle(self) -> Self {
471        match self {
472            Speed::Standard => Speed::Fast,
473            Speed::Fast => Speed::Standard,
474        }
475    }
476}
477
478impl From<Speed> for anthropic::Speed {
479    fn from(speed: Speed) -> Self {
480        match speed {
481            Speed::Standard => anthropic::Speed::Standard,
482            Speed::Fast => anthropic::Speed::Fast,
483        }
484    }
485}
486
487#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
488pub struct LanguageModelResponseMessage {
489    pub role: Option<Role>,
490    pub content: Option<String>,
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496    use base64::Engine as _;
497    use gpui::TestAppContext;
498    use image::ImageDecoder as _;
499
500    fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
501        base64::engine::general_purpose::STANDARD
502            .decode(base64_png.as_bytes())
503            .expect("base64 should decode")
504    }
505
506    fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
507        let decoder =
508            image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
509        decoder.dimensions()
510    }
511
512    fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
513        // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
514        let mut img = image::RgbaImage::new(width, height);
515        for y in 0..height {
516            for x in 0..width {
517                let r = ((x ^ y) & 0xFF) as u8;
518                let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
519                let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
520                img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
521            }
522        }
523
524        let mut out = Vec::new();
525        image::DynamicImage::ImageRgba8(img)
526            .write_with_encoder(PngEncoder::new(&mut out))
527            .expect("png encoding should succeed");
528        out
529    }
530
531    #[gpui::test]
532    async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
533        // Pick a size that reliably produces a PNG > 5MB when filled with noise.
534        // If this fails (image is too small), bump dimensions.
535        let original_png = make_noisy_png_bytes(4096, 4096);
536        assert!(
537            original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
538            "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
539        );
540
541        let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
542        let lm_image = cx
543            .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
544            .await
545            .expect("image conversion should succeed");
546
547        let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
548        assert!(
549            encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
550            "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
551            encoded_png.len()
552        );
553
554        // Ensure we actually downscaled in pixels (not just re-encoded).
555        let (w, h) = png_dimensions(&encoded_png);
556        assert!(
557            w < 4096 || h < 4096,
558            "expected image to be downscaled in at least one dimension; got {w}x{h}"
559        );
560    }
561
562    #[test]
563    fn test_language_model_tool_result_content_deserialization() {
564        let json = r#""This is plain text""#;
565        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
566        assert_eq!(
567            result,
568            LanguageModelToolResultContent::Text("This is plain text".into())
569        );
570
571        let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
572        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
573        assert_eq!(
574            result,
575            LanguageModelToolResultContent::Text("This is wrapped text".into())
576        );
577
578        let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
579        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
580        assert_eq!(
581            result,
582            LanguageModelToolResultContent::Text("Case insensitive".into())
583        );
584
585        let json = r#"{"Text": "Wrapped variant"}"#;
586        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
587        assert_eq!(
588            result,
589            LanguageModelToolResultContent::Text("Wrapped variant".into())
590        );
591
592        let json = r#"{"text": "Lowercase wrapped"}"#;
593        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
594        assert_eq!(
595            result,
596            LanguageModelToolResultContent::Text("Lowercase wrapped".into())
597        );
598
599        // Test image deserialization
600        let json = r#"{
601            "source": "base64encodedimagedata",
602            "size": {
603                "width": 100,
604                "height": 200
605            }
606        }"#;
607        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
608        match result {
609            LanguageModelToolResultContent::Image(image) => {
610                assert_eq!(image.source.as_ref(), "base64encodedimagedata");
611                let size = image.size.expect("size");
612                assert_eq!(size.width.0, 100);
613                assert_eq!(size.height.0, 200);
614            }
615            _ => panic!("Expected Image variant"),
616        }
617
618        // Test wrapped Image variant
619        let json = r#"{
620            "Image": {
621                "source": "wrappedimagedata",
622                "size": {
623                    "width": 50,
624                    "height": 75
625                }
626            }
627        }"#;
628        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
629        match result {
630            LanguageModelToolResultContent::Image(image) => {
631                assert_eq!(image.source.as_ref(), "wrappedimagedata");
632                let size = image.size.expect("size");
633                assert_eq!(size.width.0, 50);
634                assert_eq!(size.height.0, 75);
635            }
636            _ => panic!("Expected Image variant"),
637        }
638
639        // Test wrapped Image variant with case insensitive
640        let json = r#"{
641            "image": {
642                "Source": "caseinsensitive",
643                "SIZE": {
644                    "width": 30,
645                    "height": 40
646                }
647            }
648        }"#;
649        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
650        match result {
651            LanguageModelToolResultContent::Image(image) => {
652                assert_eq!(image.source.as_ref(), "caseinsensitive");
653                let size = image.size.expect("size");
654                assert_eq!(size.width.0, 30);
655                assert_eq!(size.height.0, 40);
656            }
657            _ => panic!("Expected Image variant"),
658        }
659
660        // Test that wrapped text with wrong type fails
661        let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
662        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
663        assert!(result.is_err());
664
665        // Test that malformed JSON fails
666        let json = r#"{"invalid": "structure"}"#;
667        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
668        assert!(result.is_err());
669
670        // Test edge cases
671        let json = r#""""#; // Empty string
672        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
673        assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
674
675        // Test with extra fields in wrapped text (should be ignored)
676        let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
677        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
678        assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
679
680        // Test direct image with case-insensitive fields
681        let json = r#"{
682            "SOURCE": "directimage",
683            "Size": {
684                "width": 200,
685                "height": 300
686            }
687        }"#;
688        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
689        match result {
690            LanguageModelToolResultContent::Image(image) => {
691                assert_eq!(image.source.as_ref(), "directimage");
692                let size = image.size.expect("size");
693                assert_eq!(size.width.0, 200);
694                assert_eq!(size.height.0, 300);
695            }
696            _ => panic!("Expected Image variant"),
697        }
698
699        // Test that multiple fields prevent wrapped variant interpretation
700        let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
701        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
702        assert!(result.is_err());
703
704        // Test wrapped text with uppercase TEXT variant
705        let json = r#"{"TEXT": "Uppercase variant"}"#;
706        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
707        assert_eq!(
708            result,
709            LanguageModelToolResultContent::Text("Uppercase variant".into())
710        );
711
712        // Test that numbers and other JSON values fail gracefully
713        let json = r#"123"#;
714        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
715        assert!(result.is_err());
716
717        let json = r#"null"#;
718        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
719        assert!(result.is_err());
720
721        let json = r#"[1, 2, 3]"#;
722        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
723        assert!(result.is_err());
724    }
725}