@@ -96,6 +96,10 @@ struct ModelSupportedFeatures {
streaming: bool,
#[serde(default)]
tool_calls: bool,
+ #[serde(default)]
+ parallel_tool_calls: bool,
+ #[serde(default)]
+ vision: bool,
}
#[derive(Clone, Copy, Serialize, Deserialize, Debug, Eq, PartialEq)]
@@ -107,6 +111,20 @@ pub enum ModelVendor {
Anthropic,
}
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
+#[serde(tag = "type")]
+pub enum ChatMessageContent {
+ #[serde(rename = "text")]
+ Text { text: String },
+ #[serde(rename = "image_url")]
+ Image { image_url: ImageUrl },
+}
+
+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
+pub struct ImageUrl {
+ pub url: String,
+}
+
impl Model {
pub fn uses_streaming(&self) -> bool {
self.capabilities.supports.streaming
@@ -131,6 +149,14 @@ impl Model {
pub fn vendor(&self) -> ModelVendor {
self.vendor
}
+
+ pub fn supports_vision(&self) -> bool {
+ self.capabilities.supports.vision
+ }
+
+ pub fn supports_parallel_tool_calls(&self) -> bool {
+ self.capabilities.supports.parallel_tool_calls
+ }
}
#[derive(Serialize, Deserialize)]
@@ -177,7 +203,7 @@ pub enum ChatMessage {
tool_calls: Vec<ToolCall>,
},
User {
- content: String,
+ content: Vec<ChatMessageContent>,
},
System {
content: String,
@@ -536,7 +562,8 @@ async fn stream_completion(
)
.header("Authorization", format!("Bearer {}", api_key))
.header("Content-Type", "application/json")
- .header("Copilot-Integration-Id", "vscode-chat");
+ .header("Copilot-Integration-Id", "vscode-chat")
+ .header("Copilot-Vision-Request", "true");
let is_streaming = request.stream;
@@ -104,6 +104,10 @@ impl LanguageModelImage {
// so this method is more of a rough guess.
(width * height) / 750
}
+
+ pub fn to_base64_url(&self) -> String {
+ format!("data:image/png;base64,{}", self.source)
+ }
}
fn encode_as_base64(data: Arc<Image>, image: image::DynamicImage) -> Result<Vec<u8>> {
@@ -5,7 +5,7 @@ use std::sync::Arc;
use anyhow::{Result, anyhow};
use collections::HashMap;
use copilot::copilot_chat::{
- ChatMessage, CopilotChat, Model as CopilotChatModel, ModelVendor,
+ ChatMessage, ChatMessageContent, CopilotChat, ImageUrl, Model as CopilotChatModel, ModelVendor,
Request as CopilotChatRequest, ResponseEvent, Tool, ToolCall,
};
use copilot::{Copilot, Status};
@@ -444,23 +444,6 @@ impl CopilotChatLanguageModel {
let mut tool_called = false;
let mut messages: Vec<ChatMessage> = Vec::new();
for message in request_messages {
- let text_content = {
- let mut buffer = String::new();
- for string in message.content.iter().filter_map(|content| match content {
- MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
- Some(text.as_str())
- }
- MessageContent::ToolUse(_)
- | MessageContent::RedactedThinking(_)
- | MessageContent::ToolResult(_)
- | MessageContent::Image(_) => None,
- }) {
- buffer.push_str(string);
- }
-
- buffer
- };
-
match message.role {
Role::User => {
for content in &message.content {
@@ -472,9 +455,36 @@ impl CopilotChatLanguageModel {
}
}
- if !text_content.is_empty() {
+ let mut content_parts = Vec::new();
+ for content in &message.content {
+ match content {
+ MessageContent::Text(text) | MessageContent::Thinking { text, .. }
+ if !text.is_empty() =>
+ {
+ if let Some(ChatMessageContent::Text { text: text_content }) =
+ content_parts.last_mut()
+ {
+ text_content.push_str(text);
+ } else {
+ content_parts.push(ChatMessageContent::Text {
+ text: text.to_string(),
+ });
+ }
+ }
+ MessageContent::Image(image) if self.model.supports_vision() => {
+ content_parts.push(ChatMessageContent::Image {
+ image_url: ImageUrl {
+ url: image.to_base64_url(),
+ },
+ });
+ }
+ _ => {}
+ }
+ }
+
+ if !content_parts.is_empty() {
messages.push(ChatMessage::User {
- content: text_content,
+ content: content_parts,
});
}
}
@@ -495,6 +505,23 @@ impl CopilotChatLanguageModel {
}
}
+ let text_content = {
+ let mut buffer = String::new();
+ for string in message.content.iter().filter_map(|content| match content {
+ MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
+ Some(text.as_str())
+ }
+ MessageContent::ToolUse(_)
+ | MessageContent::RedactedThinking(_)
+ | MessageContent::ToolResult(_)
+ | MessageContent::Image(_) => None,
+ }) {
+ buffer.push_str(string);
+ }
+
+ buffer
+ };
+
messages.push(ChatMessage::Assistant {
content: if text_content.is_empty() {
None