From 4bb04cef9d26721c48f3154b2e5e5c0b31e9fcac Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 20 May 2025 16:50:02 -0400 Subject: [PATCH] Accept wrapped text content from LLM providers (#31048) Some providers sometimes send `{ "type": "text", "text": ... }` instead of just the text as a string. Now we accept those instead of erroring. Release Notes: - N/A --- crates/agent/src/thread.rs | 15 +++++++++++---- crates/eval/src/instance.rs | 10 +++++++--- crates/language_model/src/request.rs | 14 ++++++++++++-- .../language_models/src/provider/anthropic.rs | 17 +++++++++++------ crates/language_models/src/provider/bedrock.rs | 11 ++++++++--- .../src/provider/copilot_chat.rs | 8 ++++++-- crates/language_models/src/provider/google.rs | 7 +++++-- crates/language_models/src/provider/mistral.rs | 8 ++++++-- crates/language_models/src/provider/open_ai.rs | 8 ++++++-- 9 files changed, 72 insertions(+), 26 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 89765bd6c8dc62b0faf4d951aa2f742ea9b47ba4..f872085b8a953ed7ca98422d7936bfdbe656bb2d 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -24,7 +24,7 @@ use language_model::{ LanguageModelRequestMessage, LanguageModelRequestTool, LanguageModelToolResult, LanguageModelToolResultContent, LanguageModelToolUseId, MessageContent, ModelRequestLimitReachedError, PaymentRequiredError, RequestUsage, Role, SelectedModel, - StopReason, TokenUsage, + StopReason, TokenUsage, WrappedTextContent, }; use postage::stream::Stream as _; use project::Project; @@ -881,7 +881,10 @@ impl Thread { pub fn output_for_tool(&self, id: &LanguageModelToolUseId) -> Option<&Arc> { match &self.tool_use.tool_result(id)?.content { - LanguageModelToolResultContent::Text(str) => Some(str), + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { text, .. }) => { + Some(text) + } LanguageModelToolResultContent::Image(_) => { // TODO: We should display image None @@ -2515,8 +2518,12 @@ impl Thread { writeln!(markdown, "**\n")?; match &tool_result.content { - LanguageModelToolResultContent::Text(str) => { - writeln!(markdown, "{}", str)?; + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => { + writeln!(markdown, "{text}")?; } LanguageModelToolResultContent::Image(image) => { writeln!(markdown, "![Image](data:base64,{})", image.source)?; diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index 6baeda8fa7f6075ad6f41cb432e7ac04c8863453..35ebf17257e602b471171c944cf5dcc8bbaf37e2 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -9,7 +9,7 @@ use handlebars::Handlebars; use language::{Buffer, DiagnosticSeverity, OffsetRangeExt as _}; use language_model::{ LanguageModel, LanguageModelCompletionEvent, LanguageModelRequest, LanguageModelRequestMessage, - LanguageModelToolResultContent, MessageContent, Role, TokenUsage, + LanguageModelToolResultContent, MessageContent, Role, TokenUsage, WrappedTextContent, }; use project::lsp_store::OpenLspBufferHandle; use project::{DiagnosticSummary, Project, ProjectPath}; @@ -973,8 +973,12 @@ impl RequestMarkdown { } match &tool_result.content { - LanguageModelToolResultContent::Text(str) => { - writeln!(messages, "{}\n", str).ok(); + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => { + writeln!(messages, "{text}\n").ok(); } LanguageModelToolResultContent::Image(image) => { writeln!(messages, "![Image](data:base64,{})\n", image.source).ok(); diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index a78c6b4ce2479d621028b9f7b0e807ca607174e9..1a6c695192cbc614e63c2ee5c354f01619c98a79 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -153,19 +153,29 @@ pub struct LanguageModelToolResult { pub enum LanguageModelToolResultContent { Text(Arc), Image(LanguageModelImage), + WrappedText(WrappedTextContent), +} + +#[derive(Debug, Clone, Deserialize, Serialize, Eq, PartialEq, Hash)] +pub struct WrappedTextContent { + #[serde(rename = "type")] + pub content_type: String, + pub text: Arc, } impl LanguageModelToolResultContent { pub fn to_str(&self) -> Option<&str> { match self { - Self::Text(text) => Some(&text), + Self::Text(text) | Self::WrappedText(WrappedTextContent { text, .. }) => Some(&text), Self::Image(_) => None, } } pub fn is_empty(&self) -> bool { match self { - Self::Text(text) => text.chars().all(|c| c.is_whitespace()), + Self::Text(text) | Self::WrappedText(WrappedTextContent { text, .. }) => { + text.chars().all(|c| c.is_whitespace()) + } Self::Image(_) => false, } } diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index a87d730093a134e280c2ddd173fdfb1e6f25e763..298efe8805623622f1ef5cbb71446ae9c62b32d2 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -19,7 +19,7 @@ use language_model::{ LanguageModelCompletionError, LanguageModelId, LanguageModelKnownError, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, - LanguageModelToolResultContent, MessageContent, RateLimiter, Role, + LanguageModelToolResultContent, MessageContent, RateLimiter, Role, WrappedTextContent, }; use language_model::{LanguageModelCompletionEvent, LanguageModelToolUse, StopReason}; use schemars::JsonSchema; @@ -350,8 +350,12 @@ pub fn count_anthropic_tokens( // TODO: Estimate token usage from tool uses. } MessageContent::ToolResult(tool_result) => match &tool_result.content { - LanguageModelToolResultContent::Text(txt) => { - string_contents.push_str(txt); + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => { + string_contents.push_str(text); } LanguageModelToolResultContent::Image(image) => { tokens_from_images += image.estimate_tokens(); @@ -588,9 +592,10 @@ pub fn into_anthropic( tool_use_id: tool_result.tool_use_id.to_string(), is_error: tool_result.is_error, content: match tool_result.content { - LanguageModelToolResultContent::Text(text) => { - ToolResultContent::Plain(text.to_string()) - } + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText( + WrappedTextContent { text, .. }, + ) => ToolResultContent::Plain(text.to_string()), LanguageModelToolResultContent::Image(image) => { ToolResultContent::Multipart(vec![ToolResultPart::Image { source: anthropic::ImageSource { diff --git a/crates/language_models/src/provider/bedrock.rs b/crates/language_models/src/provider/bedrock.rs index f4f8e2dce415956a3da792de5dd75e6f17bacb42..38d1f69a8f32171e21870043244b15e0c4f505c8 100644 --- a/crates/language_models/src/provider/bedrock.rs +++ b/crates/language_models/src/provider/bedrock.rs @@ -37,7 +37,7 @@ use language_model::{ LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, RateLimiter, Role, - TokenUsage, + TokenUsage, WrappedTextContent, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -641,7 +641,8 @@ pub fn into_bedrock( BedrockToolResultBlock::builder() .tool_use_id(tool_result.tool_use_id.to_string()) .content(match tool_result.content { - LanguageModelToolResultContent::Text(text) => { + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { text, .. }) => { BedrockToolResultContentBlock::Text(text.to_string()) } LanguageModelToolResultContent::Image(_) => { @@ -776,7 +777,11 @@ pub fn get_bedrock_tokens( // TODO: Estimate token usage from tool uses. } MessageContent::ToolResult(tool_result) => match tool_result.content { - LanguageModelToolResultContent::Text(text) => { + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => { string_contents.push_str(&text); } LanguageModelToolResultContent::Image(image) => { diff --git a/crates/language_models/src/provider/copilot_chat.rs b/crates/language_models/src/provider/copilot_chat.rs index 25f97ffd5986226e966e68f043767b31c6232ed3..78b23af805c956fb675d14239df97f49fc897a3d 100644 --- a/crates/language_models/src/provider/copilot_chat.rs +++ b/crates/language_models/src/provider/copilot_chat.rs @@ -23,7 +23,7 @@ use language_model::{ LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelRequestMessage, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolSchemaFormat, LanguageModelToolUse, MessageContent, RateLimiter, Role, - StopReason, + StopReason, WrappedTextContent, }; use settings::SettingsStore; use std::time::Duration; @@ -455,7 +455,11 @@ fn into_copilot_chat( for content in &message.content { if let MessageContent::ToolResult(tool_result) = content { let content = match &tool_result.content { - LanguageModelToolResultContent::Text(text) => text.to_string().into(), + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => text.to_string().into(), LanguageModelToolResultContent::Image(image) => { if model.supports_vision() { ChatMessageContent::Multipart(vec![ChatMessagePart::Image { diff --git a/crates/language_models/src/provider/google.rs b/crates/language_models/src/provider/google.rs index 11517abc186bf7250a781486521e7debbd253a0c..eaa8e5d6cc80ea1d430151977776b8442d3aa2df 100644 --- a/crates/language_models/src/provider/google.rs +++ b/crates/language_models/src/provider/google.rs @@ -426,14 +426,17 @@ pub fn into_google( } language_model::MessageContent::ToolResult(tool_result) => { match tool_result.content { - language_model::LanguageModelToolResultContent::Text(txt) => { + language_model::LanguageModelToolResultContent::Text(text) + | language_model::LanguageModelToolResultContent::WrappedText( + language_model::WrappedTextContent { text, .. }, + ) => { vec![Part::FunctionResponsePart( google_ai::FunctionResponsePart { function_response: google_ai::FunctionResponse { name: tool_result.tool_name.to_string(), // The API expects a valid JSON object response: serde_json::json!({ - "output": txt + "output": text }), }, }, diff --git a/crates/language_models/src/provider/mistral.rs b/crates/language_models/src/provider/mistral.rs index 93317d1a5132089dacae68def97aef013610b7e3..630fe90399ea2c77bee419a5ae5bbfa7f4a61a13 100644 --- a/crates/language_models/src/provider/mistral.rs +++ b/crates/language_models/src/provider/mistral.rs @@ -13,7 +13,7 @@ use language_model::{ LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, - RateLimiter, Role, StopReason, + RateLimiter, Role, StopReason, WrappedTextContent, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -428,7 +428,11 @@ pub fn into_mistral( } MessageContent::ToolResult(tool_result) => { let content = match &tool_result.content { - LanguageModelToolResultContent::Text(text) => text.to_string(), + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => text.to_string(), LanguageModelToolResultContent::Image(_) => { // TODO: Mistral image support "[Tool responded with an image, but Zed doesn't support these in Mistral models yet]".to_string() diff --git a/crates/language_models/src/provider/open_ai.rs b/crates/language_models/src/provider/open_ai.rs index f9e749ee6e6725f922292aa104be8c57330f7595..9addfc89fa8392c281bae2c38680d3f088f551af 100644 --- a/crates/language_models/src/provider/open_ai.rs +++ b/crates/language_models/src/provider/open_ai.rs @@ -13,7 +13,7 @@ use language_model::{ LanguageModelId, LanguageModelName, LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState, LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent, LanguageModelToolUse, MessageContent, - RateLimiter, Role, StopReason, + RateLimiter, Role, StopReason, WrappedTextContent, }; use open_ai::{ImageUrl, Model, ResponseStreamEvent, stream_completion}; use schemars::JsonSchema; @@ -407,7 +407,11 @@ pub fn into_open_ai( } MessageContent::ToolResult(tool_result) => { let content = match &tool_result.content { - LanguageModelToolResultContent::Text(text) => { + LanguageModelToolResultContent::Text(text) + | LanguageModelToolResultContent::WrappedText(WrappedTextContent { + text, + .. + }) => { vec![open_ai::MessagePart::Text { text: text.to_string(), }]