assistant: Only push text content if not empty with image content (#16270)

Kyle Kelley created

If you submit an image with empty space above it and text below, it will
fail with this error:


![image](https://github.com/user-attachments/assets/a4a2265e-815f-48b5-b09e-e178fce82ef7)

Now instead it fails with an error about needing a message.

<img width="640" alt="image"
src="https://github.com/user-attachments/assets/72b267eb-b288-40a5-a829-750121ff16cc">

It will however work with text above and empty text below the image now.

Release Notes:

- Improved conformance with Anthropic Images in Chat Completions API

Change summary

crates/assistant/src/context.rs      | 43 ++++++++++++++++++-----------
crates/language_model/src/request.rs |  1 
2 files changed, 26 insertions(+), 18 deletions(-)

Detailed changes

crates/assistant/src/context.rs 🔗

@@ -22,7 +22,7 @@ use gpui::{
 use language::{AnchorRangeExt, Bias, Buffer, LanguageRegistry, OffsetRangeExt, Point, ToOffset};
 use language_model::{
     LanguageModel, LanguageModelCacheConfiguration, LanguageModelImage, LanguageModelRegistry,
-    LanguageModelRequest, LanguageModelRequestMessage, Role,
+    LanguageModelRequest, LanguageModelRequestMessage, MessageContent, Role,
 };
 use open_ai::Model as OpenAiModel;
 use paths::{context_images_dir, contexts_dir};
@@ -348,18 +348,15 @@ pub struct Message {
 }
 
 impl Message {
-    fn to_request_message(&self, buffer: &Buffer) -> LanguageModelRequestMessage {
+    fn to_request_message(&self, buffer: &Buffer) -> Option<LanguageModelRequestMessage> {
         let mut content = Vec::new();
 
         let mut range_start = self.offset_range.start;
         for (image_offset, message_image) in self.image_offsets.iter() {
             if *image_offset != range_start {
-                content.push(
-                    buffer
-                        .text_for_range(range_start..*image_offset)
-                        .collect::<String>()
-                        .into(),
-                )
+                if let Some(text) = Self::collect_text_content(buffer, range_start..*image_offset) {
+                    content.push(text);
+                }
             }
 
             if let Some(image) = message_image.image.clone().now_or_never().flatten() {
@@ -369,18 +366,30 @@ impl Message {
             range_start = *image_offset;
         }
         if range_start != self.offset_range.end {
-            content.push(
-                buffer
-                    .text_for_range(range_start..self.offset_range.end)
-                    .collect::<String>()
-                    .into(),
-            )
+            if let Some(text) =
+                Self::collect_text_content(buffer, range_start..self.offset_range.end)
+            {
+                content.push(text);
+            }
+        }
+
+        if content.is_empty() {
+            return None;
         }
 
-        LanguageModelRequestMessage {
+        Some(LanguageModelRequestMessage {
             role: self.role,
             content,
             cache: self.cache,
+        })
+    }
+
+    fn collect_text_content(buffer: &Buffer, range: Range<usize>) -> Option<MessageContent> {
+        let text: String = buffer.text_for_range(range.clone()).collect();
+        if text.trim().is_empty() {
+            None
+        } else {
+            Some(MessageContent::Text(text))
         }
     }
 }
@@ -1619,7 +1628,7 @@ impl Context {
         let request_messages = self
             .messages(cx)
             .filter(|message| message.status == MessageStatus::Done)
-            .map(|message| message.to_request_message(&buffer))
+            .filter_map(|message| message.to_request_message(&buffer))
             .collect();
 
         LanguageModelRequest {
@@ -1945,7 +1954,7 @@ impl Context {
 
             let messages = self
                 .messages(cx)
-                .map(|message| message.to_request_message(self.buffer.read(cx)))
+                .filter_map(|message| message.to_request_message(self.buffer.read(cx)))
                 .chain(Some(LanguageModelRequestMessage {
                     role: Role::User,
                     content: vec![

crates/language_model/src/request.rs 🔗

@@ -307,7 +307,6 @@ impl LanguageModelRequest {
                     let anthropic_message_content: Vec<anthropic::Content> = message
                         .content
                         .into_iter()
-                        // TODO: filter out the empty messages in the message construction step
                         .filter_map(|content| match content {
                             MessageContent::Text(t) if !t.is_empty() => {
                                 Some(anthropic::Content::Text {