1use std::io::{Cursor, Write};
2use std::sync::Arc;
3
4use anyhow::Result;
5use base64::write::EncoderWriter;
6use cloud_llm_client::CompletionIntent;
7use gpui::{
8 App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
9 point, px, size,
10};
11use image::GenericImageView as _;
12use image::codecs::png::PngEncoder;
13use serde::{Deserialize, Serialize};
14use util::ResultExt;
15
16use crate::role::Role;
17use crate::{LanguageModelToolUse, LanguageModelToolUseId};
18
19#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
20pub struct LanguageModelImage {
21 /// A base64-encoded PNG image.
22 pub source: SharedString,
23 #[serde(default, skip_serializing_if = "Option::is_none")]
24 pub size: Option<Size<DevicePixels>>,
25}
26
27impl LanguageModelImage {
28 pub fn len(&self) -> usize {
29 self.source.len()
30 }
31
32 pub fn is_empty(&self) -> bool {
33 self.source.is_empty()
34 }
35
36 // Parse Self from a JSON object with case-insensitive field names
37 pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
38 let mut source = None;
39 let mut size_obj = None;
40
41 // Find source and size fields (case-insensitive)
42 for (k, v) in obj.iter() {
43 match k.to_lowercase().as_str() {
44 "source" => source = v.as_str(),
45 "size" => size_obj = v.as_object(),
46 _ => {}
47 }
48 }
49
50 let source = source?;
51 let size_obj = size_obj?;
52
53 let mut width = None;
54 let mut height = None;
55
56 // Find width and height in size object (case-insensitive)
57 for (k, v) in size_obj.iter() {
58 match k.to_lowercase().as_str() {
59 "width" => width = v.as_i64().map(|w| w as i32),
60 "height" => height = v.as_i64().map(|h| h as i32),
61 _ => {}
62 }
63 }
64
65 Some(Self {
66 size: Some(size(DevicePixels(width?), DevicePixels(height?))),
67 source: SharedString::from(source.to_string()),
68 })
69 }
70}
71
72impl std::fmt::Debug for LanguageModelImage {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 f.debug_struct("LanguageModelImage")
75 .field("source", &format!("<{} bytes>", self.source.len()))
76 .field("size", &self.size)
77 .finish()
78 }
79}
80
81/// Anthropic wants uploaded images to be smaller than this in both dimensions.
82const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
83
84/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
85///
86/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
87/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
88const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
89
90/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
91/// `DEFAULT_IMAGE_MAX_BYTES`.
92const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
93
94impl LanguageModelImage {
95 // All language model images are encoded as PNGs.
96 pub const FORMAT: ImageFormat = ImageFormat::Png;
97
98 pub fn empty() -> Self {
99 Self {
100 source: "".into(),
101 size: None,
102 }
103 }
104
105 pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
106 cx.background_spawn(async move {
107 let image_bytes = Cursor::new(data.bytes());
108 let dynamic_image = match data.format() {
109 ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
110 .and_then(image::DynamicImage::from_decoder),
111 ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
112 .and_then(image::DynamicImage::from_decoder),
113 ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
114 .and_then(image::DynamicImage::from_decoder),
115 ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
116 .and_then(image::DynamicImage::from_decoder),
117 ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
118 .and_then(image::DynamicImage::from_decoder),
119 ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
120 .and_then(image::DynamicImage::from_decoder),
121 _ => return None,
122 }
123 .log_err()?;
124
125 let width = dynamic_image.width();
126 let height = dynamic_image.height();
127 let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
128
129 // First apply any provider-specific dimension constraints we know about (Anthropic).
130 let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
131 || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
132 {
133 let new_bounds = ObjectFit::ScaleDown.get_bounds(
134 gpui::Bounds {
135 origin: point(px(0.0), px(0.0)),
136 size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
137 },
138 image_size,
139 );
140 dynamic_image.resize(
141 new_bounds.size.width.into(),
142 new_bounds.size.height.into(),
143 image::imageops::FilterType::Triangle,
144 )
145 } else {
146 dynamic_image
147 };
148
149 // Then enforce a default per-image size cap on the encoded PNG bytes.
150 //
151 // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
152 // The upstream provider limit we want to respect is effectively on the binary image
153 // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
154 let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
155 for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
156 if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
157 break;
158 }
159
160 // Scale down geometrically to converge quickly. We don't know the final PNG size
161 // as a function of pixels, so we iteratively shrink.
162 let (w, h) = processed_image.dimensions();
163 if w <= 1 || h <= 1 {
164 break;
165 }
166
167 // Shrink by ~15% each pass (0.85). This is a compromise between speed and
168 // preserving image detail.
169 let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
170 let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
171
172 processed_image =
173 processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
174 encoded_png = encode_png_bytes(&processed_image).log_err()?;
175 }
176
177 if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
178 // Still too large after multiple passes; treat as non-convertible for now.
179 // (Provider-specific handling can be introduced later.)
180 return None;
181 }
182
183 // Now base64 encode the PNG bytes.
184 let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
185
186 // SAFETY: The base64 encoder should not produce non-UTF8.
187 let source = unsafe { String::from_utf8_unchecked(base64_image) };
188
189 Some(LanguageModelImage {
190 size: Some(image_size),
191 source: source.into(),
192 })
193 })
194 }
195
196 pub fn estimate_tokens(&self) -> usize {
197 let Some(size) = self.size.as_ref() else {
198 return 0;
199 };
200 let width = size.width.0.unsigned_abs() as usize;
201 let height = size.height.0.unsigned_abs() as usize;
202
203 // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
204 // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
205 // so this method is more of a rough guess.
206 (width * height) / 750
207 }
208
209 pub fn to_base64_url(&self) -> String {
210 format!("data:image/png;base64,{}", self.source)
211 }
212}
213
214fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
215 let mut png = Vec::new();
216 image.write_with_encoder(PngEncoder::new(&mut png))?;
217 Ok(png)
218}
219
220fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
221 let mut base64_image = Vec::new();
222 {
223 let mut base64_encoder = EncoderWriter::new(
224 Cursor::new(&mut base64_image),
225 &base64::engine::general_purpose::STANDARD,
226 );
227 base64_encoder.write_all(bytes)?;
228 }
229 Ok(base64_image)
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
233pub struct LanguageModelToolResult {
234 pub tool_use_id: LanguageModelToolUseId,
235 pub tool_name: Arc<str>,
236 pub is_error: bool,
237 /// The tool output formatted for presenting to the model
238 pub content: LanguageModelToolResultContent,
239 /// The raw tool output, if available, often for debugging or extra state for replay
240 pub output: Option<serde_json::Value>,
241}
242
243#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
244pub enum LanguageModelToolResultContent {
245 Text(Arc<str>),
246 Image(LanguageModelImage),
247}
248
249impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
250 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
251 where
252 D: serde::Deserializer<'de>,
253 {
254 use serde::de::Error;
255
256 let value = serde_json::Value::deserialize(deserializer)?;
257
258 // Models can provide these responses in several styles. Try each in order.
259
260 // 1. Try as plain string
261 if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
262 return Ok(Self::Text(Arc::from(text)));
263 }
264
265 // 2. Try as object
266 if let Some(obj) = value.as_object() {
267 // get a JSON field case-insensitively
268 fn get_field<'a>(
269 obj: &'a serde_json::Map<String, serde_json::Value>,
270 field: &str,
271 ) -> Option<&'a serde_json::Value> {
272 obj.iter()
273 .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
274 .map(|(_, v)| v)
275 }
276
277 // Accept wrapped text format: { "type": "text", "text": "..." }
278 if let (Some(type_value), Some(text_value)) =
279 (get_field(obj, "type"), get_field(obj, "text"))
280 && let Some(type_str) = type_value.as_str()
281 && type_str.to_lowercase() == "text"
282 && let Some(text) = text_value.as_str()
283 {
284 return Ok(Self::Text(Arc::from(text)));
285 }
286
287 // Check for wrapped Text variant: { "text": "..." }
288 if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
289 && obj.len() == 1
290 {
291 // Only one field, and it's "text" (case-insensitive)
292 if let Some(text) = value.as_str() {
293 return Ok(Self::Text(Arc::from(text)));
294 }
295 }
296
297 // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
298 if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
299 && obj.len() == 1
300 {
301 // Only one field, and it's "image" (case-insensitive)
302 // Try to parse the nested image object
303 if let Some(image_obj) = value.as_object()
304 && let Some(image) = LanguageModelImage::from_json(image_obj)
305 {
306 return Ok(Self::Image(image));
307 }
308 }
309
310 // Try as direct Image (object with "source" and "size" fields)
311 if let Some(image) = LanguageModelImage::from_json(obj) {
312 return Ok(Self::Image(image));
313 }
314 }
315
316 // If none of the variants match, return an error with the problematic JSON
317 Err(D::Error::custom(format!(
318 "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
319 an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
320 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
321 )))
322 }
323}
324
325impl LanguageModelToolResultContent {
326 pub fn to_str(&self) -> Option<&str> {
327 match self {
328 Self::Text(text) => Some(text),
329 Self::Image(_) => None,
330 }
331 }
332
333 pub fn is_empty(&self) -> bool {
334 match self {
335 Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
336 Self::Image(_) => false,
337 }
338 }
339}
340
341impl From<&str> for LanguageModelToolResultContent {
342 fn from(value: &str) -> Self {
343 Self::Text(Arc::from(value))
344 }
345}
346
347impl From<String> for LanguageModelToolResultContent {
348 fn from(value: String) -> Self {
349 Self::Text(Arc::from(value))
350 }
351}
352
353impl From<LanguageModelImage> for LanguageModelToolResultContent {
354 fn from(image: LanguageModelImage) -> Self {
355 Self::Image(image)
356 }
357}
358
359#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
360pub enum MessageContent {
361 Text(String),
362 Thinking {
363 text: String,
364 signature: Option<String>,
365 },
366 RedactedThinking(String),
367 Image(LanguageModelImage),
368 ToolUse(LanguageModelToolUse),
369 ToolResult(LanguageModelToolResult),
370}
371
372impl MessageContent {
373 pub fn to_str(&self) -> Option<&str> {
374 match self {
375 MessageContent::Text(text) => Some(text.as_str()),
376 MessageContent::Thinking { text, .. } => Some(text.as_str()),
377 MessageContent::RedactedThinking(_) => None,
378 MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
379 MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
380 }
381 }
382
383 pub fn is_empty(&self) -> bool {
384 match self {
385 MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
386 MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
387 MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
388 MessageContent::RedactedThinking(_)
389 | MessageContent::ToolUse(_)
390 | MessageContent::Image(_) => false,
391 }
392 }
393}
394
395impl From<String> for MessageContent {
396 fn from(value: String) -> Self {
397 MessageContent::Text(value)
398 }
399}
400
401impl From<&str> for MessageContent {
402 fn from(value: &str) -> Self {
403 MessageContent::Text(value.to_string())
404 }
405}
406
407#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
408pub struct LanguageModelRequestMessage {
409 pub role: Role,
410 pub content: Vec<MessageContent>,
411 pub cache: bool,
412 #[serde(default, skip_serializing_if = "Option::is_none")]
413 pub reasoning_details: Option<serde_json::Value>,
414}
415
416impl LanguageModelRequestMessage {
417 pub fn string_contents(&self) -> String {
418 let mut buffer = String::new();
419 for string in self.content.iter().filter_map(|content| content.to_str()) {
420 buffer.push_str(string);
421 }
422
423 buffer
424 }
425
426 pub fn contents_empty(&self) -> bool {
427 self.content.iter().all(|content| content.is_empty())
428 }
429}
430
431#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
432pub struct LanguageModelRequestTool {
433 pub name: String,
434 pub description: String,
435 pub input_schema: serde_json::Value,
436 pub use_input_streaming: bool,
437}
438
439#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
440pub enum LanguageModelToolChoice {
441 Auto,
442 Any,
443 None,
444}
445
446#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
447pub struct LanguageModelRequest {
448 pub thread_id: Option<String>,
449 pub prompt_id: Option<String>,
450 pub intent: Option<CompletionIntent>,
451 pub messages: Vec<LanguageModelRequestMessage>,
452 pub tools: Vec<LanguageModelRequestTool>,
453 pub tool_choice: Option<LanguageModelToolChoice>,
454 pub stop: Vec<String>,
455 pub temperature: Option<f32>,
456 pub thinking_allowed: bool,
457 pub thinking_effort: Option<String>,
458 pub speed: Option<Speed>,
459}
460
461#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
462#[serde(rename_all = "snake_case")]
463pub enum Speed {
464 #[default]
465 Standard,
466 Fast,
467}
468
469impl Speed {
470 pub fn toggle(self) -> Self {
471 match self {
472 Speed::Standard => Speed::Fast,
473 Speed::Fast => Speed::Standard,
474 }
475 }
476}
477
478impl From<Speed> for anthropic::Speed {
479 fn from(speed: Speed) -> Self {
480 match speed {
481 Speed::Standard => anthropic::Speed::Standard,
482 Speed::Fast => anthropic::Speed::Fast,
483 }
484 }
485}
486
487#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
488pub struct LanguageModelResponseMessage {
489 pub role: Option<Role>,
490 pub content: Option<String>,
491}
492
493#[cfg(test)]
494mod tests {
495 use super::*;
496 use base64::Engine as _;
497 use gpui::TestAppContext;
498 use image::ImageDecoder as _;
499
500 fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
501 base64::engine::general_purpose::STANDARD
502 .decode(base64_png.as_bytes())
503 .expect("base64 should decode")
504 }
505
506 fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
507 let decoder =
508 image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
509 decoder.dimensions()
510 }
511
512 fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
513 // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
514 let mut img = image::RgbaImage::new(width, height);
515 for y in 0..height {
516 for x in 0..width {
517 let r = ((x ^ y) & 0xFF) as u8;
518 let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
519 let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
520 img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
521 }
522 }
523
524 let mut out = Vec::new();
525 image::DynamicImage::ImageRgba8(img)
526 .write_with_encoder(PngEncoder::new(&mut out))
527 .expect("png encoding should succeed");
528 out
529 }
530
531 #[gpui::test]
532 async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
533 // Pick a size that reliably produces a PNG > 5MB when filled with noise.
534 // If this fails (image is too small), bump dimensions.
535 let original_png = make_noisy_png_bytes(4096, 4096);
536 assert!(
537 original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
538 "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
539 );
540
541 let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
542 let lm_image = cx
543 .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
544 .await
545 .expect("image conversion should succeed");
546
547 let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
548 assert!(
549 encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
550 "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
551 encoded_png.len()
552 );
553
554 // Ensure we actually downscaled in pixels (not just re-encoded).
555 let (w, h) = png_dimensions(&encoded_png);
556 assert!(
557 w < 4096 || h < 4096,
558 "expected image to be downscaled in at least one dimension; got {w}x{h}"
559 );
560 }
561
562 #[test]
563 fn test_language_model_tool_result_content_deserialization() {
564 let json = r#""This is plain text""#;
565 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
566 assert_eq!(
567 result,
568 LanguageModelToolResultContent::Text("This is plain text".into())
569 );
570
571 let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
572 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
573 assert_eq!(
574 result,
575 LanguageModelToolResultContent::Text("This is wrapped text".into())
576 );
577
578 let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
579 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
580 assert_eq!(
581 result,
582 LanguageModelToolResultContent::Text("Case insensitive".into())
583 );
584
585 let json = r#"{"Text": "Wrapped variant"}"#;
586 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
587 assert_eq!(
588 result,
589 LanguageModelToolResultContent::Text("Wrapped variant".into())
590 );
591
592 let json = r#"{"text": "Lowercase wrapped"}"#;
593 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
594 assert_eq!(
595 result,
596 LanguageModelToolResultContent::Text("Lowercase wrapped".into())
597 );
598
599 // Test image deserialization
600 let json = r#"{
601 "source": "base64encodedimagedata",
602 "size": {
603 "width": 100,
604 "height": 200
605 }
606 }"#;
607 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
608 match result {
609 LanguageModelToolResultContent::Image(image) => {
610 assert_eq!(image.source.as_ref(), "base64encodedimagedata");
611 let size = image.size.expect("size");
612 assert_eq!(size.width.0, 100);
613 assert_eq!(size.height.0, 200);
614 }
615 _ => panic!("Expected Image variant"),
616 }
617
618 // Test wrapped Image variant
619 let json = r#"{
620 "Image": {
621 "source": "wrappedimagedata",
622 "size": {
623 "width": 50,
624 "height": 75
625 }
626 }
627 }"#;
628 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
629 match result {
630 LanguageModelToolResultContent::Image(image) => {
631 assert_eq!(image.source.as_ref(), "wrappedimagedata");
632 let size = image.size.expect("size");
633 assert_eq!(size.width.0, 50);
634 assert_eq!(size.height.0, 75);
635 }
636 _ => panic!("Expected Image variant"),
637 }
638
639 // Test wrapped Image variant with case insensitive
640 let json = r#"{
641 "image": {
642 "Source": "caseinsensitive",
643 "SIZE": {
644 "width": 30,
645 "height": 40
646 }
647 }
648 }"#;
649 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
650 match result {
651 LanguageModelToolResultContent::Image(image) => {
652 assert_eq!(image.source.as_ref(), "caseinsensitive");
653 let size = image.size.expect("size");
654 assert_eq!(size.width.0, 30);
655 assert_eq!(size.height.0, 40);
656 }
657 _ => panic!("Expected Image variant"),
658 }
659
660 // Test that wrapped text with wrong type fails
661 let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
662 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
663 assert!(result.is_err());
664
665 // Test that malformed JSON fails
666 let json = r#"{"invalid": "structure"}"#;
667 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
668 assert!(result.is_err());
669
670 // Test edge cases
671 let json = r#""""#; // Empty string
672 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
673 assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
674
675 // Test with extra fields in wrapped text (should be ignored)
676 let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
677 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
678 assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
679
680 // Test direct image with case-insensitive fields
681 let json = r#"{
682 "SOURCE": "directimage",
683 "Size": {
684 "width": 200,
685 "height": 300
686 }
687 }"#;
688 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
689 match result {
690 LanguageModelToolResultContent::Image(image) => {
691 assert_eq!(image.source.as_ref(), "directimage");
692 let size = image.size.expect("size");
693 assert_eq!(size.width.0, 200);
694 assert_eq!(size.height.0, 300);
695 }
696 _ => panic!("Expected Image variant"),
697 }
698
699 // Test that multiple fields prevent wrapped variant interpretation
700 let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
701 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
702 assert!(result.is_err());
703
704 // Test wrapped text with uppercase TEXT variant
705 let json = r#"{"TEXT": "Uppercase variant"}"#;
706 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
707 assert_eq!(
708 result,
709 LanguageModelToolResultContent::Text("Uppercase variant".into())
710 );
711
712 // Test that numbers and other JSON values fail gracefully
713 let json = r#"123"#;
714 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
715 assert!(result.is_err());
716
717 let json = r#"null"#;
718 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
719 assert!(result.is_err());
720
721 let json = r#"[1, 2, 3]"#;
722 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
723 assert!(result.is_err());
724 }
725}