request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use crate::role::Role;
  5use crate::{LanguageModelToolUse, LanguageModelToolUseId};
  6use base64::write::EncoderWriter;
  7use gpui::{
  8    App, AppContext as _, DevicePixels, Image, ObjectFit, RenderImage, SharedString, Size, Task,
  9    point, px, size,
 10};
 11use image::{DynamicImage, ImageDecoder, codecs::png::PngEncoder, imageops::resize};
 12use serde::{Deserialize, Serialize};
 13use util::ResultExt;
 14
 15#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 16pub struct LanguageModelImage {
 17    /// A base64-encoded PNG image.
 18    pub source: SharedString,
 19    size: Size<DevicePixels>,
 20}
 21
 22impl std::fmt::Debug for LanguageModelImage {
 23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 24        f.debug_struct("LanguageModelImage")
 25            .field("source", &format!("<{} bytes>", self.source.len()))
 26            .field("size", &self.size)
 27            .finish()
 28    }
 29}
 30
 31/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 32const ANTHROPIC_SIZE_LIMT: f32 = 1568.;
 33
 34impl LanguageModelImage {
 35    pub fn empty() -> Self {
 36        Self {
 37            source: "".into(),
 38            size: size(DevicePixels(0), DevicePixels(0)),
 39        }
 40    }
 41
 42    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
 43        cx.background_spawn(async move {
 44            match data.format() {
 45                gpui::ImageFormat::Png
 46                | gpui::ImageFormat::Jpeg
 47                | gpui::ImageFormat::Webp
 48                | gpui::ImageFormat::Gif => {}
 49                _ => return None,
 50            };
 51
 52            let image = image::codecs::png::PngDecoder::new(Cursor::new(data.bytes())).log_err()?;
 53            let (width, height) = image.dimensions();
 54            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
 55
 56            let mut base64_image = Vec::new();
 57
 58            {
 59                let mut base64_encoder = EncoderWriter::new(
 60                    Cursor::new(&mut base64_image),
 61                    &base64::engine::general_purpose::STANDARD,
 62                );
 63
 64                if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
 65                    || image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
 66                {
 67                    let new_bounds = ObjectFit::ScaleDown.get_bounds(
 68                        gpui::Bounds {
 69                            origin: point(px(0.0), px(0.0)),
 70                            size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
 71                        },
 72                        image_size,
 73                    );
 74                    let image = DynamicImage::from_decoder(image).log_err()?.resize(
 75                        new_bounds.size.width.0 as u32,
 76                        new_bounds.size.height.0 as u32,
 77                        image::imageops::FilterType::Triangle,
 78                    );
 79
 80                    let mut png = Vec::new();
 81                    image
 82                        .write_with_encoder(PngEncoder::new(&mut png))
 83                        .log_err()?;
 84
 85                    base64_encoder.write_all(png.as_slice()).log_err()?;
 86                } else {
 87                    base64_encoder.write_all(data.bytes()).log_err()?;
 88                }
 89            }
 90
 91            // SAFETY: The base64 encoder should not produce non-UTF8.
 92            let source = unsafe { String::from_utf8_unchecked(base64_image) };
 93
 94            Some(LanguageModelImage {
 95                size: image_size,
 96                source: source.into(),
 97            })
 98        })
 99    }
100
101    /// Resolves image into an LLM-ready format (base64).
102    pub fn from_render_image(data: &RenderImage) -> Option<Self> {
103        let image_size = data.size(0);
104
105        let mut bytes = data.as_bytes(0).unwrap_or(&[]).to_vec();
106        // Convert from BGRA to RGBA.
107        for pixel in bytes.chunks_exact_mut(4) {
108            pixel.swap(2, 0);
109        }
110        let mut image = image::RgbaImage::from_vec(
111            image_size.width.0 as u32,
112            image_size.height.0 as u32,
113            bytes,
114        )
115        .expect("We already know this works");
116
117        // https://docs.anthropic.com/en/docs/build-with-claude/vision
118        if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
119            || image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
120        {
121            let new_bounds = ObjectFit::ScaleDown.get_bounds(
122                gpui::Bounds {
123                    origin: point(px(0.0), px(0.0)),
124                    size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
125                },
126                image_size,
127            );
128
129            image = resize(
130                &image,
131                new_bounds.size.width.0 as u32,
132                new_bounds.size.height.0 as u32,
133                image::imageops::FilterType::Triangle,
134            );
135        }
136
137        let mut png = Vec::new();
138
139        image
140            .write_with_encoder(PngEncoder::new(&mut png))
141            .log_err()?;
142
143        let mut base64_image = Vec::new();
144
145        {
146            let mut base64_encoder = EncoderWriter::new(
147                Cursor::new(&mut base64_image),
148                &base64::engine::general_purpose::STANDARD,
149            );
150
151            base64_encoder.write_all(png.as_slice()).log_err()?;
152        }
153
154        // SAFETY: The base64 encoder should not produce non-UTF8.
155        let source = unsafe { String::from_utf8_unchecked(base64_image) };
156
157        Some(LanguageModelImage {
158            size: image_size,
159            source: source.into(),
160        })
161    }
162
163    pub fn estimate_tokens(&self) -> usize {
164        let width = self.size.width.0.unsigned_abs() as usize;
165        let height = self.size.height.0.unsigned_abs() as usize;
166
167        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
168        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
169        // so this method is more of a rough guess.
170        (width * height) / 750
171    }
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
175pub struct LanguageModelToolResult {
176    pub tool_use_id: LanguageModelToolUseId,
177    pub tool_name: Arc<str>,
178    pub is_error: bool,
179    pub content: Arc<str>,
180}
181
182#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
183pub enum MessageContent {
184    Text(String),
185    Thinking {
186        text: String,
187        signature: Option<String>,
188    },
189    RedactedThinking(Vec<u8>),
190    Image(LanguageModelImage),
191    ToolUse(LanguageModelToolUse),
192    ToolResult(LanguageModelToolResult),
193}
194
195impl From<String> for MessageContent {
196    fn from(value: String) -> Self {
197        MessageContent::Text(value)
198    }
199}
200
201impl From<&str> for MessageContent {
202    fn from(value: &str) -> Self {
203        MessageContent::Text(value.to_string())
204    }
205}
206
207#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
208pub struct LanguageModelRequestMessage {
209    pub role: Role,
210    pub content: Vec<MessageContent>,
211    pub cache: bool,
212}
213
214impl LanguageModelRequestMessage {
215    pub fn string_contents(&self) -> String {
216        let mut buffer = String::new();
217        for string in self.content.iter().filter_map(|content| match content {
218            MessageContent::Text(text) => Some(text.as_str()),
219            MessageContent::Thinking { text, .. } => Some(text.as_str()),
220            MessageContent::RedactedThinking(_) => None,
221            MessageContent::ToolResult(tool_result) => Some(tool_result.content.as_ref()),
222            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
223        }) {
224            buffer.push_str(string);
225        }
226
227        buffer
228    }
229
230    pub fn contents_empty(&self) -> bool {
231        self.content.iter().all(|content| match content {
232            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
233            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
234            MessageContent::ToolResult(tool_result) => {
235                tool_result.content.chars().all(|c| c.is_whitespace())
236            }
237            MessageContent::RedactedThinking(_)
238            | MessageContent::ToolUse(_)
239            | MessageContent::Image(_) => false,
240        })
241    }
242}
243
244#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
245pub struct LanguageModelRequestTool {
246    pub name: String,
247    pub description: String,
248    pub input_schema: serde_json::Value,
249}
250
251#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
252pub struct LanguageModelRequest {
253    pub thread_id: Option<String>,
254    pub prompt_id: Option<String>,
255    pub messages: Vec<LanguageModelRequestMessage>,
256    pub tools: Vec<LanguageModelRequestTool>,
257    pub stop: Vec<String>,
258    pub temperature: Option<f32>,
259}
260
261#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
262pub struct LanguageModelResponseMessage {
263    pub role: Option<Role>,
264    pub content: Option<String>,
265}