request.rs

  1use std::io::{Cursor, Write};
  2use std::sync::Arc;
  3
  4use crate::role::Role;
  5use crate::{LanguageModelToolUse, LanguageModelToolUseId};
  6use base64::write::EncoderWriter;
  7use gpui::{
  8    App, AppContext as _, DevicePixels, Image, ObjectFit, RenderImage, SharedString, Size, Task,
  9    point, px, size,
 10};
 11use image::{DynamicImage, ImageDecoder, codecs::png::PngEncoder, imageops::resize};
 12use serde::{Deserialize, Serialize};
 13use util::ResultExt;
 14use zed_llm_client::CompletionMode;
 15
 16#[derive(Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
 17pub struct LanguageModelImage {
 18    /// A base64-encoded PNG image.
 19    pub source: SharedString,
 20    size: Size<DevicePixels>,
 21}
 22
 23impl std::fmt::Debug for LanguageModelImage {
 24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 25        f.debug_struct("LanguageModelImage")
 26            .field("source", &format!("<{} bytes>", self.source.len()))
 27            .field("size", &self.size)
 28            .finish()
 29    }
 30}
 31
 32/// Anthropic wants uploaded images to be smaller than this in both dimensions.
 33const ANTHROPIC_SIZE_LIMT: f32 = 1568.;
 34
 35impl LanguageModelImage {
 36    pub fn empty() -> Self {
 37        Self {
 38            source: "".into(),
 39            size: size(DevicePixels(0), DevicePixels(0)),
 40        }
 41    }
 42
 43    pub fn from_image(data: Arc<Image>, cx: &mut App) -> Task<Option<Self>> {
 44        cx.background_spawn(async move {
 45            match data.format() {
 46                gpui::ImageFormat::Png
 47                | gpui::ImageFormat::Jpeg
 48                | gpui::ImageFormat::Webp
 49                | gpui::ImageFormat::Gif => {}
 50                _ => return None,
 51            };
 52
 53            let image = image::codecs::png::PngDecoder::new(Cursor::new(data.bytes())).log_err()?;
 54            let (width, height) = image.dimensions();
 55            let image_size = size(DevicePixels(width as i32), DevicePixels(height as i32));
 56
 57            let mut base64_image = Vec::new();
 58
 59            {
 60                let mut base64_encoder = EncoderWriter::new(
 61                    Cursor::new(&mut base64_image),
 62                    &base64::engine::general_purpose::STANDARD,
 63                );
 64
 65                if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
 66                    || image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
 67                {
 68                    let new_bounds = ObjectFit::ScaleDown.get_bounds(
 69                        gpui::Bounds {
 70                            origin: point(px(0.0), px(0.0)),
 71                            size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
 72                        },
 73                        image_size,
 74                    );
 75                    let image = DynamicImage::from_decoder(image).log_err()?.resize(
 76                        new_bounds.size.width.0 as u32,
 77                        new_bounds.size.height.0 as u32,
 78                        image::imageops::FilterType::Triangle,
 79                    );
 80
 81                    let mut png = Vec::new();
 82                    image
 83                        .write_with_encoder(PngEncoder::new(&mut png))
 84                        .log_err()?;
 85
 86                    base64_encoder.write_all(png.as_slice()).log_err()?;
 87                } else {
 88                    base64_encoder.write_all(data.bytes()).log_err()?;
 89                }
 90            }
 91
 92            // SAFETY: The base64 encoder should not produce non-UTF8.
 93            let source = unsafe { String::from_utf8_unchecked(base64_image) };
 94
 95            Some(LanguageModelImage {
 96                size: image_size,
 97                source: source.into(),
 98            })
 99        })
100    }
101
102    /// Resolves image into an LLM-ready format (base64).
103    pub fn from_render_image(data: &RenderImage) -> Option<Self> {
104        let image_size = data.size(0);
105
106        let mut bytes = data.as_bytes(0).unwrap_or(&[]).to_vec();
107        // Convert from BGRA to RGBA.
108        for pixel in bytes.chunks_exact_mut(4) {
109            pixel.swap(2, 0);
110        }
111        let mut image = image::RgbaImage::from_vec(
112            image_size.width.0 as u32,
113            image_size.height.0 as u32,
114            bytes,
115        )
116        .expect("We already know this works");
117
118        // https://docs.anthropic.com/en/docs/build-with-claude/vision
119        if image_size.width.0 > ANTHROPIC_SIZE_LIMT as i32
120            || image_size.height.0 > ANTHROPIC_SIZE_LIMT as i32
121        {
122            let new_bounds = ObjectFit::ScaleDown.get_bounds(
123                gpui::Bounds {
124                    origin: point(px(0.0), px(0.0)),
125                    size: size(px(ANTHROPIC_SIZE_LIMT), px(ANTHROPIC_SIZE_LIMT)),
126                },
127                image_size,
128            );
129
130            image = resize(
131                &image,
132                new_bounds.size.width.0 as u32,
133                new_bounds.size.height.0 as u32,
134                image::imageops::FilterType::Triangle,
135            );
136        }
137
138        let mut png = Vec::new();
139
140        image
141            .write_with_encoder(PngEncoder::new(&mut png))
142            .log_err()?;
143
144        let mut base64_image = Vec::new();
145
146        {
147            let mut base64_encoder = EncoderWriter::new(
148                Cursor::new(&mut base64_image),
149                &base64::engine::general_purpose::STANDARD,
150            );
151
152            base64_encoder.write_all(png.as_slice()).log_err()?;
153        }
154
155        // SAFETY: The base64 encoder should not produce non-UTF8.
156        let source = unsafe { String::from_utf8_unchecked(base64_image) };
157
158        Some(LanguageModelImage {
159            size: image_size,
160            source: source.into(),
161        })
162    }
163
164    pub fn estimate_tokens(&self) -> usize {
165        let width = self.size.width.0.unsigned_abs() as usize;
166        let height = self.size.height.0.unsigned_abs() as usize;
167
168        // From: https://docs.anthropic.com/en/docs/build-with-claude/vision#calculate-image-costs
169        // Note that are a lot of conditions on Anthropic's API, and OpenAI doesn't use this,
170        // so this method is more of a rough guess.
171        (width * height) / 750
172    }
173}
174
175#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
176pub struct LanguageModelToolResult {
177    pub tool_use_id: LanguageModelToolUseId,
178    pub tool_name: Arc<str>,
179    pub is_error: bool,
180    pub content: Arc<str>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)]
184pub enum MessageContent {
185    Text(String),
186    Thinking {
187        text: String,
188        signature: Option<String>,
189    },
190    RedactedThinking(Vec<u8>),
191    Image(LanguageModelImage),
192    ToolUse(LanguageModelToolUse),
193    ToolResult(LanguageModelToolResult),
194}
195
196impl From<String> for MessageContent {
197    fn from(value: String) -> Self {
198        MessageContent::Text(value)
199    }
200}
201
202impl From<&str> for MessageContent {
203    fn from(value: &str) -> Self {
204        MessageContent::Text(value.to_string())
205    }
206}
207
208#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Hash)]
209pub struct LanguageModelRequestMessage {
210    pub role: Role,
211    pub content: Vec<MessageContent>,
212    pub cache: bool,
213}
214
215impl LanguageModelRequestMessage {
216    pub fn string_contents(&self) -> String {
217        let mut buffer = String::new();
218        for string in self.content.iter().filter_map(|content| match content {
219            MessageContent::Text(text) => Some(text.as_str()),
220            MessageContent::Thinking { text, .. } => Some(text.as_str()),
221            MessageContent::RedactedThinking(_) => None,
222            MessageContent::ToolResult(tool_result) => Some(tool_result.content.as_ref()),
223            MessageContent::ToolUse(_) | MessageContent::Image(_) => None,
224        }) {
225            buffer.push_str(string);
226        }
227
228        buffer
229    }
230
231    pub fn contents_empty(&self) -> bool {
232        self.content.iter().all(|content| match content {
233            MessageContent::Text(text) => text.chars().all(|c| c.is_whitespace()),
234            MessageContent::Thinking { text, .. } => text.chars().all(|c| c.is_whitespace()),
235            MessageContent::ToolResult(tool_result) => {
236                tool_result.content.chars().all(|c| c.is_whitespace())
237            }
238            MessageContent::RedactedThinking(_)
239            | MessageContent::ToolUse(_)
240            | MessageContent::Image(_) => false,
241        })
242    }
243}
244
245#[derive(Debug, PartialEq, Hash, Clone, Serialize, Deserialize)]
246pub struct LanguageModelRequestTool {
247    pub name: String,
248    pub description: String,
249    pub input_schema: serde_json::Value,
250}
251
252#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
253pub struct LanguageModelRequest {
254    pub thread_id: Option<String>,
255    pub prompt_id: Option<String>,
256    pub mode: Option<CompletionMode>,
257    pub messages: Vec<LanguageModelRequestMessage>,
258    pub tools: Vec<LanguageModelRequestTool>,
259    pub stop: Vec<String>,
260    pub temperature: Option<f32>,
261}
262
263#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
264pub struct LanguageModelResponseMessage {
265    pub role: Option<Role>,
266    pub content: Option<String>,
267}