request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use anyhow::Result;
  5use base64::write::EncoderWriter;
  6use gpui::{
  7    App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
  8    point, px, size,
  9};
 10use image::GenericImageView as _;
 11use image::codecs::png::PngEncoder;
 12use serde::{Deserialize, Serialize};
 13use util::ResultExt;
 14
 15use crate::role::Role;
 16use crate::{LanguageModelToolUse, LanguageModelToolUseId};
 17
 18#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 19pub struct LanguageModelImage {
 20    /// A base64-encoded PNG image.
 21    pub source: SharedString,
 22    #[serde(default, skip_serializing_if = "Option::is_none")]
 23    pub size: Option<Size<DevicePixels>>,
 24}
 25
 26impl LanguageModelImage {
 27    pub fn len(&self) -> usize {
 28        self.source.len()
 29    }
 30
 31    pub fn is_empty(&self) -> bool {
 32        self.source.is_empty()
 33    }
 34
 35    // Parse Self from a JSON object with case-insensitive field names
 36    pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
 37        let mut source = None;
 38        let mut size_obj = None;
 39
 40        // Find source and size fields (case-insensitive)
 41        for (k, v) in obj.iter() {
 42            match k.to_lowercase().as_str() {
 43                "source" => source = v.as_str(),
 44                "size" => size_obj = v.as_object(),
 45                _ => {}
 46            }
 47        }
 48
 49        let source = source?;
 50        let size_obj = size_obj?;
 51
 52        let mut width = None;
 53        let mut height = None;
 54
 55        // Find width and height in size object (case-insensitive)
 56        for (k, v) in size_obj.iter() {
 57            match k.to_lowercase().as_str() {
 58                "width" => width = v.as_i64().map(|w| w as i32),
 59                "height" => height = v.as_i64().map(|h| h as i32),
 60                _ => {}
 61            }
 62        }
 63
 64        Some(Self {
 65            size: Some(size(DevicePixels(width?), DevicePixels(height?))),
 66            source: SharedString::from(source.to_string()),
 67        })
 68    }
 69}
 70
 71impl std::fmt::Debug for LanguageModelImage {
 72    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 73        f.debug_struct("LanguageModelImage")
 74            .field("source", &format!("<{} bytes>", self.source.len()))
 75            .field("size", &self.size)
 76            .finish()
 77    }
 78}
 79
 80/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 81const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
 82
 83/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
 84///
 85/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
 86/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
 87const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
 88
 89/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
 90/// `DEFAULT_IMAGE_MAX_BYTES`.
 91const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
 92
 93impl LanguageModelImage {
 94    // All language model images are encoded as PNGs.
 95    pub const FORMAT: ImageFormat = ImageFormat::Png;
 96
 97    pub fn empty() -> Self {
 98        Self {
 99            source: "".into(),
100            size: None,
101        }
102    }
103
104    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
105        cx.background_spawn(async move {
106            let image_bytes = Cursor::new(data.bytes());
107            let dynamic_image = match data.format() {
108                ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
109                    .and_then(image::DynamicImage::from_decoder),
110                ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
111                    .and_then(image::DynamicImage::from_decoder),
112                ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
113                    .and_then(image::DynamicImage::from_decoder),
114                ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
115                    .and_then(image::DynamicImage::from_decoder),
116                ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
117                    .and_then(image::DynamicImage::from_decoder),
118                ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
119                    .and_then(image::DynamicImage::from_decoder),
120                _ => return None,
121            }
122            .log_err()?;
123
124            let width = dynamic_image.width();
125            let height = dynamic_image.height();
126            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
127
128            // First apply any provider-specific dimension constraints we know about (Anthropic).
129            let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
130                || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
131            {
132                let new_bounds = ObjectFit::ScaleDown.get_bounds(
133                    gpui::Bounds {
134                        origin: point(px(0.0), px(0.0)),
135                        size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
136                    },
137                    image_size,
138                );
139                dynamic_image.resize(
140                    new_bounds.size.width.into(),
141                    new_bounds.size.height.into(),
142                    image::imageops::FilterType::Triangle,
143                )
144            } else {
145                dynamic_image
146            };
147
148            // Then enforce a default per-image size cap on the encoded PNG bytes.
149            //
150            // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
151            // The upstream provider limit we want to respect is effectively on the binary image
152            // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
153            let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
154            for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
155                if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
156                    break;
157                }
158
159                // Scale down geometrically to converge quickly. We don't know the final PNG size
160                // as a function of pixels, so we iteratively shrink.
161                let (w, h) = processed_image.dimensions();
162                if w <= 1 || h <= 1 {
163                    break;
164                }
165
166                // Shrink by ~15% each pass (0.85). This is a compromise between speed and
167                // preserving image detail.
168                let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
169                let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
170
171                processed_image =
172                    processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
173                encoded_png = encode_png_bytes(&processed_image).log_err()?;
174            }
175
176            if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
177                // Still too large after multiple passes; treat as non-convertible for now.
178                // (Provider-specific handling can be introduced later.)
179                return None;
180            }
181
182            // Now base64 encode the PNG bytes.
183            let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
184
185            // SAFETY: The base64 encoder should not produce non-UTF8.
186            let source = unsafe { String::from_utf8_unchecked(base64_image) };
187
188            Some(LanguageModelImage {
189                size: Some(image_size),
190                source: source.into(),
191            })
192        })
193    }
194
195    pub fn estimate_tokens(&self) -> usize {
196        let Some(size) = self.size.as_ref() else {
197            return 0;
198        };
199        let width = size.width.0.unsigned_abs() as usize;
200        let height = size.height.0.unsigned_abs() as usize;
201
202        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
203        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
204        // so this method is more of a rough guess.
205        (width * height) / 750
206    }
207
208    pub fn to_base64_url(&self) -> String {
209        format!("data:image/png;base64,{}", self.source)
210    }
211}
212
213fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
214    let mut png = Vec::new();
215    image.write_with_encoder(PngEncoder::new(&mut png))?;
216    Ok(png)
217}
218
219fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
220    let mut base64_image = Vec::new();
221    {
222        let mut base64_encoder = EncoderWriter::new(
223            Cursor::new(&mut base64_image),
224            &base64::engine::general_purpose::STANDARD,
225        );
226        base64_encoder.write_all(bytes)?;
227    }
228    Ok(base64_image)
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
232pub struct LanguageModelToolResult {
233    pub tool_use_id: LanguageModelToolUseId,
234    pub tool_name: Arc<str>,
235    pub is_error: bool,
236    /// The tool output formatted for presenting to the model
237    pub content: LanguageModelToolResultContent,
238    /// The raw tool output, if available, often for debugging or extra state for replay
239    pub output: Option<serde_json::Value>,
240}
241
242#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
243pub enum LanguageModelToolResultContent {
244    Text(Arc<str>),
245    Image(LanguageModelImage),
246}
247
248impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
249    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
250    where
251        D: serde::Deserializer<'de>,
252    {
253        use serde::de::Error;
254
255        let value = serde_json::Value::deserialize(deserializer)?;
256
257        // Models can provide these responses in several styles. Try each in order.
258
259        // 1. Try as plain string
260        if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
261            return Ok(Self::Text(Arc::from(text)));
262        }
263
264        // 2. Try as object
265        if let Some(obj) = value.as_object() {
266            // get a JSON field case-insensitively
267            fn get_field<'a>(
268                obj: &'a serde_json::Map<String, serde_json::Value>,
269                field: &str,
270            ) -> Option<&'a serde_json::Value> {
271                obj.iter()
272                    .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
273                    .map(|(_, v)| v)
274            }
275
276            // Accept wrapped text format: { "type": "text", "text": "..." }
277            if let (Some(type_value), Some(text_value)) =
278                (get_field(obj, "type"), get_field(obj, "text"))
279                && let Some(type_str) = type_value.as_str()
280                && type_str.to_lowercase() == "text"
281                && let Some(text) = text_value.as_str()
282            {
283                return Ok(Self::Text(Arc::from(text)));
284            }
285
286            // Check for wrapped Text variant: { "text": "..." }
287            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
288                && obj.len() == 1
289            {
290                // Only one field, and it's "text" (case-insensitive)
291                if let Some(text) = value.as_str() {
292                    return Ok(Self::Text(Arc::from(text)));
293                }
294            }
295
296            // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
297            if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
298                && obj.len() == 1
299            {
300                // Only one field, and it's "image" (case-insensitive)
301                // Try to parse the nested image object
302                if let Some(image_obj) = value.as_object()
303                    && let Some(image) = LanguageModelImage::from_json(image_obj)
304                {
305                    return Ok(Self::Image(image));
306                }
307            }
308
309            // Try as direct Image (object with "source" and "size" fields)
310            if let Some(image) = LanguageModelImage::from_json(obj) {
311                return Ok(Self::Image(image));
312            }
313        }
314
315        // If none of the variants match, return an error with the problematic JSON
316        Err(D::Error::custom(format!(
317            "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
318             an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
319            serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
320        )))
321    }
322}
323
324impl LanguageModelToolResultContent {
325    pub fn to_str(&self) -> Option<&str> {
326        match self {
327            Self::Text(text) => Some(text),
328            Self::Image(_) => None,
329        }
330    }
331
332    pub fn is_empty(&self) -> bool {
333        match self {
334            Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
335            Self::Image(_) => false,
336        }
337    }
338}
339
340impl From<&str> for LanguageModelToolResultContent {
341    fn from(value: &str) -> Self {
342        Self::Text(Arc::from(value))
343    }
344}
345
346impl From<String> for LanguageModelToolResultContent {
347    fn from(value: String) -> Self {
348        Self::Text(Arc::from(value))
349    }
350}
351
352impl From<LanguageModelImage> for LanguageModelToolResultContent {
353    fn from(image: LanguageModelImage) -> Self {
354        Self::Image(image)
355    }
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
359pub enum MessageContent {
360    Text(String),
361    Thinking {
362        text: String,
363        signature: Option<String>,
364    },
365    RedactedThinking(String),
366    Image(LanguageModelImage),
367    ToolUse(LanguageModelToolUse),
368    ToolResult(LanguageModelToolResult),
369}
370
371impl MessageContent {
372    pub fn to_str(&self) -> Option<&str> {
373        match self {
374            MessageContent::Text(text) => Some(text.as_str()),
375            MessageContent::Thinking { text, .. } => Some(text.as_str()),
376            MessageContent::RedactedThinking(_) => None,
377            MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
378            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
379        }
380    }
381
382    pub fn is_empty(&self) -> bool {
383        match self {
384            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
385            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
386            MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
387            MessageContent::RedactedThinking(_)
388            | MessageContent::ToolUse(_)
389            | MessageContent::Image(_) => false,
390        }
391    }
392}
393
394impl From<String> for MessageContent {
395    fn from(value: String) -> Self {
396        MessageContent::Text(value)
397    }
398}
399
400impl From<&str> for MessageContent {
401    fn from(value: &str) -> Self {
402        MessageContent::Text(value.to_string())
403    }
404}
405
406#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
407pub struct LanguageModelRequestMessage {
408    pub role: Role,
409    pub content: Vec<MessageContent>,
410    pub cache: bool,
411    #[serde(default, skip_serializing_if = "Option::is_none")]
412    pub reasoning_details: Option<serde_json::Value>,
413}
414
415impl LanguageModelRequestMessage {
416    pub fn string_contents(&self) -> String {
417        let mut buffer = String::new();
418        for string in self.content.iter().filter_map(|content| content.to_str()) {
419            buffer.push_str(string);
420        }
421
422        buffer
423    }
424
425    pub fn contents_empty(&self) -> bool {
426        self.content.iter().all(|content| content.is_empty())
427    }
428}
429
430#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
431pub struct LanguageModelRequestTool {
432    pub name: String,
433    pub description: String,
434    pub input_schema: serde_json::Value,
435    pub use_input_streaming: bool,
436}
437
438#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
439pub enum LanguageModelToolChoice {
440    Auto,
441    Any,
442    None,
443}
444
445#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize)]
446#[serde(rename_all = "snake_case")]
447pub enum CompletionIntent {
448    UserPrompt,
449    Subagent,
450    ToolResults,
451    ThreadSummarization,
452    ThreadContextSummarization,
453    CreateFile,
454    EditFile,
455    InlineAssist,
456    TerminalInlineAssist,
457    GenerateGitCommitMessage,
458}
459
460#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
461pub struct LanguageModelRequest {
462    pub thread_id: Option<String>,
463    pub prompt_id: Option<String>,
464    pub intent: Option<CompletionIntent>,
465    pub messages: Vec<LanguageModelRequestMessage>,
466    pub tools: Vec<LanguageModelRequestTool>,
467    pub tool_choice: Option<LanguageModelToolChoice>,
468    pub stop: Vec<String>,
469    pub temperature: Option<f32>,
470    pub thinking_allowed: bool,
471    pub thinking_effort: Option<String>,
472    pub speed: Option<Speed>,
473}
474
475#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
476#[serde(rename_all = "snake_case")]
477pub enum Speed {
478    #[default]
479    Standard,
480    Fast,
481}
482
483impl Speed {
484    pub fn toggle(self) -> Self {
485        match self {
486            Speed::Standard => Speed::Fast,
487            Speed::Fast => Speed::Standard,
488        }
489    }
490}
491
492impl From<Speed> for anthropic::Speed {
493    fn from(speed: Speed) -> Self {
494        match speed {
495            Speed::Standard => anthropic::Speed::Standard,
496            Speed::Fast => anthropic::Speed::Fast,
497        }
498    }
499}
500
501#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
502pub struct LanguageModelResponseMessage {
503    pub role: Option<Role>,
504    pub content: Option<String>,
505}
506
507#[cfg(test)]
508mod tests {
509    use super::*;
510    use base64::Engine as _;
511    use gpui::TestAppContext;
512    use image::ImageDecoder as _;
513
514    fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
515        base64::engine::general_purpose::STANDARD
516            .decode(base64_png.as_bytes())
517            .expect("base64 should decode")
518    }
519
520    fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
521        let decoder =
522            image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
523        decoder.dimensions()
524    }
525
526    fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
527        // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
528        let mut img = image::RgbaImage::new(width, height);
529        for y in 0..height {
530            for x in 0..width {
531                let r = ((x ^ y) & 0xFF) as u8;
532                let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
533                let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
534                img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
535            }
536        }
537
538        let mut out = Vec::new();
539        image::DynamicImage::ImageRgba8(img)
540            .write_with_encoder(PngEncoder::new(&mut out))
541            .expect("png encoding should succeed");
542        out
543    }
544
545    #[gpui::test]
546    async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
547        // Pick a size that reliably produces a PNG > 5MB when filled with noise.
548        // If this fails (image is too small), bump dimensions.
549        let original_png = make_noisy_png_bytes(4096, 4096);
550        assert!(
551            original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
552            "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
553        );
554
555        let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
556        let lm_image = cx
557            .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
558            .await
559            .expect("image conversion should succeed");
560
561        let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
562        assert!(
563            encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
564            "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
565            encoded_png.len()
566        );
567
568        // Ensure we actually downscaled in pixels (not just re-encoded).
569        let (w, h) = png_dimensions(&encoded_png);
570        assert!(
571            w < 4096 || h < 4096,
572            "expected image to be downscaled in at least one dimension; got {w}x{h}"
573        );
574    }
575
576    #[test]
577    fn test_language_model_tool_result_content_deserialization() {
578        let json = r#""This is plain text""#;
579        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
580        assert_eq!(
581            result,
582            LanguageModelToolResultContent::Text("This is plain text".into())
583        );
584
585        let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
586        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
587        assert_eq!(
588            result,
589            LanguageModelToolResultContent::Text("This is wrapped text".into())
590        );
591
592        let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
593        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
594        assert_eq!(
595            result,
596            LanguageModelToolResultContent::Text("Case insensitive".into())
597        );
598
599        let json = r#"{"Text": "Wrapped variant"}"#;
600        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
601        assert_eq!(
602            result,
603            LanguageModelToolResultContent::Text("Wrapped variant".into())
604        );
605
606        let json = r#"{"text": "Lowercase wrapped"}"#;
607        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
608        assert_eq!(
609            result,
610            LanguageModelToolResultContent::Text("Lowercase wrapped".into())
611        );
612
613        // Test image deserialization
614        let json = r#"{
615            "source": "base64encodedimagedata",
616            "size": {
617                "width": 100,
618                "height": 200
619            }
620        }"#;
621        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
622        match result {
623            LanguageModelToolResultContent::Image(image) => {
624                assert_eq!(image.source.as_ref(), "base64encodedimagedata");
625                let size = image.size.expect("size");
626                assert_eq!(size.width.0, 100);
627                assert_eq!(size.height.0, 200);
628            }
629            _ => panic!("Expected Image variant"),
630        }
631
632        // Test wrapped Image variant
633        let json = r#"{
634            "Image": {
635                "source": "wrappedimagedata",
636                "size": {
637                    "width": 50,
638                    "height": 75
639                }
640            }
641        }"#;
642        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
643        match result {
644            LanguageModelToolResultContent::Image(image) => {
645                assert_eq!(image.source.as_ref(), "wrappedimagedata");
646                let size = image.size.expect("size");
647                assert_eq!(size.width.0, 50);
648                assert_eq!(size.height.0, 75);
649            }
650            _ => panic!("Expected Image variant"),
651        }
652
653        // Test wrapped Image variant with case insensitive
654        let json = r#"{
655            "image": {
656                "Source": "caseinsensitive",
657                "SIZE": {
658                    "width": 30,
659                    "height": 40
660                }
661            }
662        }"#;
663        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
664        match result {
665            LanguageModelToolResultContent::Image(image) => {
666                assert_eq!(image.source.as_ref(), "caseinsensitive");
667                let size = image.size.expect("size");
668                assert_eq!(size.width.0, 30);
669                assert_eq!(size.height.0, 40);
670            }
671            _ => panic!("Expected Image variant"),
672        }
673
674        // Test that wrapped text with wrong type fails
675        let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
676        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
677        assert!(result.is_err());
678
679        // Test that malformed JSON fails
680        let json = r#"{"invalid": "structure"}"#;
681        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
682        assert!(result.is_err());
683
684        // Test edge cases
685        let json = r#""""#; // Empty string
686        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
687        assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
688
689        // Test with extra fields in wrapped text (should be ignored)
690        let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
691        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
692        assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
693
694        // Test direct image with case-insensitive fields
695        let json = r#"{
696            "SOURCE": "directimage",
697            "Size": {
698                "width": 200,
699                "height": 300
700            }
701        }"#;
702        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
703        match result {
704            LanguageModelToolResultContent::Image(image) => {
705                assert_eq!(image.source.as_ref(), "directimage");
706                let size = image.size.expect("size");
707                assert_eq!(size.width.0, 200);
708                assert_eq!(size.height.0, 300);
709            }
710            _ => panic!("Expected Image variant"),
711        }
712
713        // Test that multiple fields prevent wrapped variant interpretation
714        let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
715        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
716        assert!(result.is_err());
717
718        // Test wrapped text with uppercase TEXT variant
719        let json = r#"{"TEXT": "Uppercase variant"}"#;
720        let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
721        assert_eq!(
722            result,
723            LanguageModelToolResultContent::Text("Uppercase variant".into())
724        );
725
726        // Test that numbers and other JSON values fail gracefully
727        let json = r#"123"#;
728        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
729        assert!(result.is_err());
730
731        let json = r#"null"#;
732        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
733        assert!(result.is_err());
734
735        let json = r#"[1, 2, 3]"#;
736        let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
737        assert!(result.is_err());
738    }
739}