1use std::io::{Cursor, Write};
2use std::sync::Arc;
3
4use anyhow::Result;
5use base64::write::EncoderWriter;
6use gpui::{
7 App, AppContext as _, DevicePixels, Image, ImageFormat, ObjectFit, SharedString, Size, Task,
8 point, px, size,
9};
10use image::GenericImageView as _;
11use image::codecs::png::PngEncoder;
12use serde::{Deserialize, Serialize};
13use util::ResultExt;
14
15use crate::role::Role;
16use crate::{LanguageModelToolUse, LanguageModelToolUseId};
17
18#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
19pub struct LanguageModelImage {
20 /// A base64-encoded PNG image.
21 pub source: SharedString,
22 #[serde(default, skip_serializing_if = "Option::is_none")]
23 pub size: Option<Size<DevicePixels>>,
24}
25
26impl LanguageModelImage {
27 pub fn len(&self) -> usize {
28 self.source.len()
29 }
30
31 pub fn is_empty(&self) -> bool {
32 self.source.is_empty()
33 }
34
35 // Parse Self from a JSON object with case-insensitive field names
36 pub fn from_json(obj: &serde_json::Map<String, serde_json::Value>) -> Option<Self> {
37 let mut source = None;
38 let mut size_obj = None;
39
40 // Find source and size fields (case-insensitive)
41 for (k, v) in obj.iter() {
42 match k.to_lowercase().as_str() {
43 "source" => source = v.as_str(),
44 "size" => size_obj = v.as_object(),
45 _ => {}
46 }
47 }
48
49 let source = source?;
50 let size_obj = size_obj?;
51
52 let mut width = None;
53 let mut height = None;
54
55 // Find width and height in size object (case-insensitive)
56 for (k, v) in size_obj.iter() {
57 match k.to_lowercase().as_str() {
58 "width" => width = v.as_i64().map(|w| w as i32),
59 "height" => height = v.as_i64().map(|h| h as i32),
60 _ => {}
61 }
62 }
63
64 Some(Self {
65 size: Some(size(DevicePixels(width?), DevicePixels(height?))),
66 source: SharedString::from(source.to_string()),
67 })
68 }
69}
70
71impl std::fmt::Debug for LanguageModelImage {
72 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
73 f.debug_struct("LanguageModelImage")
74 .field("source", &format!("<{} bytes>", self.source.len()))
75 .field("size", &self.size)
76 .finish()
77 }
78}
79
80/// Anthropic wants uploaded images to be smaller than this in both dimensions.
81const ANTHROPIC_SIZE_LIMIT: f32 = 1568.;
82
83/// Default per-image hard limit (in bytes) for the encoded image payload we send upstream.
84///
85/// NOTE: `LanguageModelImage.source` is base64-encoded PNG bytes (without the `data:` prefix).
86/// This limit is enforced on the encoded PNG bytes *before* base64 encoding.
87const DEFAULT_IMAGE_MAX_BYTES: usize = 5 * 1024 * 1024;
88
89/// Conservative cap on how many times we'll attempt to shrink/re-encode an image to fit
90/// `DEFAULT_IMAGE_MAX_BYTES`.
91const MAX_IMAGE_DOWNSCALE_PASSES: usize = 8;
92
93impl LanguageModelImage {
94 // All language model images are encoded as PNGs.
95 pub const FORMAT: ImageFormat = ImageFormat::Png;
96
97 pub fn empty() -> Self {
98 Self {
99 source: "".into(),
100 size: None,
101 }
102 }
103
104 pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
105 cx.background_spawn(async move {
106 let image_bytes = Cursor::new(data.bytes());
107 let dynamic_image = match data.format() {
108 ImageFormat::Png => image::codecs::png::PngDecoder::new(image_bytes)
109 .and_then(image::DynamicImage::from_decoder),
110 ImageFormat::Jpeg => image::codecs::jpeg::JpegDecoder::new(image_bytes)
111 .and_then(image::DynamicImage::from_decoder),
112 ImageFormat::Webp => image::codecs::webp::WebPDecoder::new(image_bytes)
113 .and_then(image::DynamicImage::from_decoder),
114 ImageFormat::Gif => image::codecs::gif::GifDecoder::new(image_bytes)
115 .and_then(image::DynamicImage::from_decoder),
116 ImageFormat::Bmp => image::codecs::bmp::BmpDecoder::new(image_bytes)
117 .and_then(image::DynamicImage::from_decoder),
118 ImageFormat::Tiff => image::codecs::tiff::TiffDecoder::new(image_bytes)
119 .and_then(image::DynamicImage::from_decoder),
120 _ => return None,
121 }
122 .log_err()?;
123
124 let width = dynamic_image.width();
125 let height = dynamic_image.height();
126 let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
127
128 // First apply any provider-specific dimension constraints we know about (Anthropic).
129 let mut processed_image = if image_size.width.0 > ANTHROPIC_SIZE_LIMIT as i32
130 || image_size.height.0 > ANTHROPIC_SIZE_LIMIT as i32
131 {
132 let new_bounds = ObjectFit::ScaleDown.get_bounds(
133 gpui::Bounds {
134 origin: point(px(0.0), px(0.0)),
135 size: size(px(ANTHROPIC_SIZE_LIMIT), px(ANTHROPIC_SIZE_LIMIT)),
136 },
137 image_size,
138 );
139 dynamic_image.resize(
140 new_bounds.size.width.into(),
141 new_bounds.size.height.into(),
142 image::imageops::FilterType::Triangle,
143 )
144 } else {
145 dynamic_image
146 };
147
148 // Then enforce a default per-image size cap on the encoded PNG bytes.
149 //
150 // We always send PNG bytes (either original PNG bytes, or re-encoded PNG) base64'd.
151 // The upstream provider limit we want to respect is effectively on the binary image
152 // payload size, so we enforce against the encoded PNG bytes before base64 encoding.
153 let mut encoded_png = encode_png_bytes(&processed_image).log_err()?;
154 for _pass in 0..MAX_IMAGE_DOWNSCALE_PASSES {
155 if encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES {
156 break;
157 }
158
159 // Scale down geometrically to converge quickly. We don't know the final PNG size
160 // as a function of pixels, so we iteratively shrink.
161 let (w, h) = processed_image.dimensions();
162 if w <= 1 || h <= 1 {
163 break;
164 }
165
166 // Shrink by ~15% each pass (0.85). This is a compromise between speed and
167 // preserving image detail.
168 let new_w = ((w as f32) * 0.85).round().max(1.0) as u32;
169 let new_h = ((h as f32) * 0.85).round().max(1.0) as u32;
170
171 processed_image =
172 processed_image.resize(new_w, new_h, image::imageops::FilterType::Triangle);
173 encoded_png = encode_png_bytes(&processed_image).log_err()?;
174 }
175
176 if encoded_png.len() > DEFAULT_IMAGE_MAX_BYTES {
177 // Still too large after multiple passes; treat as non-convertible for now.
178 // (Provider-specific handling can be introduced later.)
179 return None;
180 }
181
182 // Now base64 encode the PNG bytes.
183 let base64_image = encode_bytes_as_base64(encoded_png.as_slice()).log_err()?;
184
185 // SAFETY: The base64 encoder should not produce non-UTF8.
186 let source = unsafe { String::from_utf8_unchecked(base64_image) };
187
188 Some(LanguageModelImage {
189 size: Some(image_size),
190 source: source.into(),
191 })
192 })
193 }
194
195 pub fn estimate_tokens(&self) -> usize {
196 let Some(size) = self.size.as_ref() else {
197 return 0;
198 };
199 let width = size.width.0.unsigned_abs() as usize;
200 let height = size.height.0.unsigned_abs() as usize;
201
202 // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
203 // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
204 // so this method is more of a rough guess.
205 (width * height) / 750
206 }
207
208 pub fn to_base64_url(&self) -> String {
209 format!("data:image/png;base64,{}", self.source)
210 }
211}
212
213fn encode_png_bytes(image: &image::DynamicImage) -> Result<Vec<u8>> {
214 let mut png = Vec::new();
215 image.write_with_encoder(PngEncoder::new(&mut png))?;
216 Ok(png)
217}
218
219fn encode_bytes_as_base64(bytes: &[u8]) -> Result<Vec<u8>> {
220 let mut base64_image = Vec::new();
221 {
222 let mut base64_encoder = EncoderWriter::new(
223 Cursor::new(&mut base64_image),
224 &base64::engine::general_purpose::STANDARD,
225 );
226 base64_encoder.write_all(bytes)?;
227 }
228 Ok(base64_image)
229}
230
231#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
232pub struct LanguageModelToolResult {
233 pub tool_use_id: LanguageModelToolUseId,
234 pub tool_name: Arc<str>,
235 pub is_error: bool,
236 /// The tool output formatted for presenting to the model
237 pub content: LanguageModelToolResultContent,
238 /// The raw tool output, if available, often for debugging or extra state for replay
239 pub output: Option<serde_json::Value>,
240}
241
242#[derive(Debug, Clone, Serialize, Eq, PartialEq, Hash)]
243pub enum LanguageModelToolResultContent {
244 Text(Arc<str>),
245 Image(LanguageModelImage),
246}
247
248impl<'de> Deserialize<'de> for LanguageModelToolResultContent {
249 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
250 where
251 D: serde::Deserializer<'de>,
252 {
253 use serde::de::Error;
254
255 let value = serde_json::Value::deserialize(deserializer)?;
256
257 // Models can provide these responses in several styles. Try each in order.
258
259 // 1. Try as plain string
260 if let Ok(text) = serde_json::from_value::<String>(value.clone()) {
261 return Ok(Self::Text(Arc::from(text)));
262 }
263
264 // 2. Try as object
265 if let Some(obj) = value.as_object() {
266 // get a JSON field case-insensitively
267 fn get_field<'a>(
268 obj: &'a serde_json::Map<String, serde_json::Value>,
269 field: &str,
270 ) -> Option<&'a serde_json::Value> {
271 obj.iter()
272 .find(|(k, _)| k.to_lowercase() == field.to_lowercase())
273 .map(|(_, v)| v)
274 }
275
276 // Accept wrapped text format: { "type": "text", "text": "..." }
277 if let (Some(type_value), Some(text_value)) =
278 (get_field(obj, "type"), get_field(obj, "text"))
279 && let Some(type_str) = type_value.as_str()
280 && type_str.to_lowercase() == "text"
281 && let Some(text) = text_value.as_str()
282 {
283 return Ok(Self::Text(Arc::from(text)));
284 }
285
286 // Check for wrapped Text variant: { "text": "..." }
287 if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "text")
288 && obj.len() == 1
289 {
290 // Only one field, and it's "text" (case-insensitive)
291 if let Some(text) = value.as_str() {
292 return Ok(Self::Text(Arc::from(text)));
293 }
294 }
295
296 // Check for wrapped Image variant: { "image": { "source": "...", "size": ... } }
297 if let Some((_key, value)) = obj.iter().find(|(k, _)| k.to_lowercase() == "image")
298 && obj.len() == 1
299 {
300 // Only one field, and it's "image" (case-insensitive)
301 // Try to parse the nested image object
302 if let Some(image_obj) = value.as_object()
303 && let Some(image) = LanguageModelImage::from_json(image_obj)
304 {
305 return Ok(Self::Image(image));
306 }
307 }
308
309 // Try as direct Image (object with "source" and "size" fields)
310 if let Some(image) = LanguageModelImage::from_json(obj) {
311 return Ok(Self::Image(image));
312 }
313 }
314
315 // If none of the variants match, return an error with the problematic JSON
316 Err(D::Error::custom(format!(
317 "data did not match any variant of LanguageModelToolResultContent. Expected either a string, \
318 an object with 'type': 'text', a wrapped variant like {{\"Text\": \"...\"}}, or an image object. Got: {}",
319 serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string())
320 )))
321 }
322}
323
324impl LanguageModelToolResultContent {
325 pub fn to_str(&self) -> Option<&str> {
326 match self {
327 Self::Text(text) => Some(text),
328 Self::Image(_) => None,
329 }
330 }
331
332 pub fn is_empty(&self) -> bool {
333 match self {
334 Self::Text(text) => text.chars().all(|c| c.is_whitespace()),
335 Self::Image(_) => false,
336 }
337 }
338}
339
340impl From<&str> for LanguageModelToolResultContent {
341 fn from(value: &str) -> Self {
342 Self::Text(Arc::from(value))
343 }
344}
345
346impl From<String> for LanguageModelToolResultContent {
347 fn from(value: String) -> Self {
348 Self::Text(Arc::from(value))
349 }
350}
351
352impl From<LanguageModelImage> for LanguageModelToolResultContent {
353 fn from(image: LanguageModelImage) -> Self {
354 Self::Image(image)
355 }
356}
357
358#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
359pub enum MessageContent {
360 Text(String),
361 Thinking {
362 text: String,
363 signature: Option<String>,
364 },
365 RedactedThinking(String),
366 Image(LanguageModelImage),
367 ToolUse(LanguageModelToolUse),
368 ToolResult(LanguageModelToolResult),
369}
370
371impl MessageContent {
372 pub fn to_str(&self) -> Option<&str> {
373 match self {
374 MessageContent::Text(text) => Some(text.as_str()),
375 MessageContent::Thinking { text, .. } => Some(text.as_str()),
376 MessageContent::RedactedThinking(_) => None,
377 MessageContent::ToolResult(tool_result) => tool_result.content.to_str(),
378 MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
379 }
380 }
381
382 pub fn is_empty(&self) -> bool {
383 match self {
384 MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
385 MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
386 MessageContent::ToolResult(tool_result) => tool_result.content.is_empty(),
387 MessageContent::RedactedThinking(_)
388 | MessageContent::ToolUse(_)
389 | MessageContent::Image(_) => false,
390 }
391 }
392}
393
394impl From<String> for MessageContent {
395 fn from(value: String) -> Self {
396 MessageContent::Text(value)
397 }
398}
399
400impl From<&str> for MessageContent {
401 fn from(value: &str) -> Self {
402 MessageContent::Text(value.to_string())
403 }
404}
405
406#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
407pub struct LanguageModelRequestMessage {
408 pub role: Role,
409 pub content: Vec<MessageContent>,
410 pub cache: bool,
411 #[serde(default, skip_serializing_if = "Option::is_none")]
412 pub reasoning_details: Option<serde_json::Value>,
413}
414
415impl LanguageModelRequestMessage {
416 pub fn string_contents(&self) -> String {
417 let mut buffer = String::new();
418 for string in self.content.iter().filter_map(|content| content.to_str()) {
419 buffer.push_str(string);
420 }
421
422 buffer
423 }
424
425 pub fn contents_empty(&self) -> bool {
426 self.content.iter().all(|content| content.is_empty())
427 }
428}
429
430#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
431pub struct LanguageModelRequestTool {
432 pub name: String,
433 pub description: String,
434 pub input_schema: serde_json::Value,
435 pub use_input_streaming: bool,
436}
437
438#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
439pub enum LanguageModelToolChoice {
440 Auto,
441 Any,
442 None,
443}
444
445#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy, Serialize, Deserialize)]
446#[serde(rename_all = "snake_case")]
447pub enum CompletionIntent {
448 UserPrompt,
449 Subagent,
450 ToolResults,
451 ThreadSummarization,
452 ThreadContextSummarization,
453 CreateFile,
454 EditFile,
455 InlineAssist,
456 TerminalInlineAssist,
457 GenerateGitCommitMessage,
458}
459
460#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
461pub struct LanguageModelRequest {
462 pub thread_id: Option<String>,
463 pub prompt_id: Option<String>,
464 pub intent: Option<CompletionIntent>,
465 pub messages: Vec<LanguageModelRequestMessage>,
466 pub tools: Vec<LanguageModelRequestTool>,
467 pub tool_choice: Option<LanguageModelToolChoice>,
468 pub stop: Vec<String>,
469 pub temperature: Option<f32>,
470 pub thinking_allowed: bool,
471 pub thinking_effort: Option<String>,
472 pub speed: Option<Speed>,
473}
474
475#[derive(Clone, Copy, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
476#[serde(rename_all = "snake_case")]
477pub enum Speed {
478 #[default]
479 Standard,
480 Fast,
481}
482
483impl Speed {
484 pub fn toggle(self) -> Self {
485 match self {
486 Speed::Standard => Speed::Fast,
487 Speed::Fast => Speed::Standard,
488 }
489 }
490}
491
492impl From<Speed> for anthropic::Speed {
493 fn from(speed: Speed) -> Self {
494 match speed {
495 Speed::Standard => anthropic::Speed::Standard,
496 Speed::Fast => anthropic::Speed::Fast,
497 }
498 }
499}
500
501#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
502pub struct LanguageModelResponseMessage {
503 pub role: Option<Role>,
504 pub content: Option<String>,
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510 use base64::Engine as _;
511 use gpui::TestAppContext;
512 use image::ImageDecoder as _;
513
514 fn base64_to_png_bytes(base64_png: &str) -> Vec<u8> {
515 base64::engine::general_purpose::STANDARD
516 .decode(base64_png.as_bytes())
517 .expect("base64 should decode")
518 }
519
520 fn png_dimensions(png_bytes: &[u8]) -> (u32, u32) {
521 let decoder =
522 image::codecs::png::PngDecoder::new(Cursor::new(png_bytes)).expect("png should decode");
523 decoder.dimensions()
524 }
525
526 fn make_noisy_png_bytes(width: u32, height: u32) -> Vec<u8> {
527 // Create an RGBA image with per-pixel variance to avoid PNG compressing too well.
528 let mut img = image::RgbaImage::new(width, height);
529 for y in 0..height {
530 for x in 0..width {
531 let r = ((x ^ y) & 0xFF) as u8;
532 let g = ((x.wrapping_mul(31) ^ y.wrapping_mul(17)) & 0xFF) as u8;
533 let b = ((x.wrapping_mul(131) ^ y.wrapping_mul(7)) & 0xFF) as u8;
534 img.put_pixel(x, y, image::Rgba([r, g, b, 0xFF]));
535 }
536 }
537
538 let mut out = Vec::new();
539 image::DynamicImage::ImageRgba8(img)
540 .write_with_encoder(PngEncoder::new(&mut out))
541 .expect("png encoding should succeed");
542 out
543 }
544
545 #[gpui::test]
546 async fn test_from_image_downscales_to_default_5mb_limit(cx: &mut TestAppContext) {
547 // Pick a size that reliably produces a PNG > 5MB when filled with noise.
548 // If this fails (image is too small), bump dimensions.
549 let original_png = make_noisy_png_bytes(4096, 4096);
550 assert!(
551 original_png.len() > DEFAULT_IMAGE_MAX_BYTES,
552 "precondition failed: noisy PNG must exceed DEFAULT_IMAGE_MAX_BYTES"
553 );
554
555 let image = gpui::Image::from_bytes(ImageFormat::Png, original_png);
556 let lm_image = cx
557 .update(|cx| LanguageModelImage::from_image(Arc::new(image), cx))
558 .await
559 .expect("image conversion should succeed");
560
561 let encoded_png = base64_to_png_bytes(lm_image.source.as_ref());
562 assert!(
563 encoded_png.len() <= DEFAULT_IMAGE_MAX_BYTES,
564 "expected encoded PNG <= DEFAULT_IMAGE_MAX_BYTES, got {} bytes",
565 encoded_png.len()
566 );
567
568 // Ensure we actually downscaled in pixels (not just re-encoded).
569 let (w, h) = png_dimensions(&encoded_png);
570 assert!(
571 w < 4096 || h < 4096,
572 "expected image to be downscaled in at least one dimension; got {w}x{h}"
573 );
574 }
575
576 #[test]
577 fn test_language_model_tool_result_content_deserialization() {
578 let json = r#""This is plain text""#;
579 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
580 assert_eq!(
581 result,
582 LanguageModelToolResultContent::Text("This is plain text".into())
583 );
584
585 let json = r#"{"type": "text", "text": "This is wrapped text"}"#;
586 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
587 assert_eq!(
588 result,
589 LanguageModelToolResultContent::Text("This is wrapped text".into())
590 );
591
592 let json = r#"{"Type": "TEXT", "TEXT": "Case insensitive"}"#;
593 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
594 assert_eq!(
595 result,
596 LanguageModelToolResultContent::Text("Case insensitive".into())
597 );
598
599 let json = r#"{"Text": "Wrapped variant"}"#;
600 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
601 assert_eq!(
602 result,
603 LanguageModelToolResultContent::Text("Wrapped variant".into())
604 );
605
606 let json = r#"{"text": "Lowercase wrapped"}"#;
607 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
608 assert_eq!(
609 result,
610 LanguageModelToolResultContent::Text("Lowercase wrapped".into())
611 );
612
613 // Test image deserialization
614 let json = r#"{
615 "source": "base64encodedimagedata",
616 "size": {
617 "width": 100,
618 "height": 200
619 }
620 }"#;
621 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
622 match result {
623 LanguageModelToolResultContent::Image(image) => {
624 assert_eq!(image.source.as_ref(), "base64encodedimagedata");
625 let size = image.size.expect("size");
626 assert_eq!(size.width.0, 100);
627 assert_eq!(size.height.0, 200);
628 }
629 _ => panic!("Expected Image variant"),
630 }
631
632 // Test wrapped Image variant
633 let json = r#"{
634 "Image": {
635 "source": "wrappedimagedata",
636 "size": {
637 "width": 50,
638 "height": 75
639 }
640 }
641 }"#;
642 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
643 match result {
644 LanguageModelToolResultContent::Image(image) => {
645 assert_eq!(image.source.as_ref(), "wrappedimagedata");
646 let size = image.size.expect("size");
647 assert_eq!(size.width.0, 50);
648 assert_eq!(size.height.0, 75);
649 }
650 _ => panic!("Expected Image variant"),
651 }
652
653 // Test wrapped Image variant with case insensitive
654 let json = r#"{
655 "image": {
656 "Source": "caseinsensitive",
657 "SIZE": {
658 "width": 30,
659 "height": 40
660 }
661 }
662 }"#;
663 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
664 match result {
665 LanguageModelToolResultContent::Image(image) => {
666 assert_eq!(image.source.as_ref(), "caseinsensitive");
667 let size = image.size.expect("size");
668 assert_eq!(size.width.0, 30);
669 assert_eq!(size.height.0, 40);
670 }
671 _ => panic!("Expected Image variant"),
672 }
673
674 // Test that wrapped text with wrong type fails
675 let json = r#"{"type": "blahblah", "text": "This should fail"}"#;
676 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
677 assert!(result.is_err());
678
679 // Test that malformed JSON fails
680 let json = r#"{"invalid": "structure"}"#;
681 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
682 assert!(result.is_err());
683
684 // Test edge cases
685 let json = r#""""#; // Empty string
686 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
687 assert_eq!(result, LanguageModelToolResultContent::Text("".into()));
688
689 // Test with extra fields in wrapped text (should be ignored)
690 let json = r#"{"type": "text", "text": "Hello", "extra": "field"}"#;
691 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
692 assert_eq!(result, LanguageModelToolResultContent::Text("Hello".into()));
693
694 // Test direct image with case-insensitive fields
695 let json = r#"{
696 "SOURCE": "directimage",
697 "Size": {
698 "width": 200,
699 "height": 300
700 }
701 }"#;
702 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
703 match result {
704 LanguageModelToolResultContent::Image(image) => {
705 assert_eq!(image.source.as_ref(), "directimage");
706 let size = image.size.expect("size");
707 assert_eq!(size.width.0, 200);
708 assert_eq!(size.height.0, 300);
709 }
710 _ => panic!("Expected Image variant"),
711 }
712
713 // Test that multiple fields prevent wrapped variant interpretation
714 let json = r#"{"Text": "not wrapped", "extra": "field"}"#;
715 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
716 assert!(result.is_err());
717
718 // Test wrapped text with uppercase TEXT variant
719 let json = r#"{"TEXT": "Uppercase variant"}"#;
720 let result: LanguageModelToolResultContent = serde_json::from_str(json).unwrap();
721 assert_eq!(
722 result,
723 LanguageModelToolResultContent::Text("Uppercase variant".into())
724 );
725
726 // Test that numbers and other JSON values fail gracefully
727 let json = r#"123"#;
728 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
729 assert!(result.is_err());
730
731 let json = r#"null"#;
732 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
733 assert!(result.is_err());
734
735 let json = r#"[1, 2, 3]"#;
736 let result: Result<LanguageModelToolResultContent, _> = serde_json::from_str(json);
737 assert!(result.is_err());
738 }
739}