Detailed changes
@@ -157,6 +157,7 @@ impl ModelInput {
parallel_tool_calls,
prompt_cache_key,
chat_completions,
+ ..
} = ModelCapabilities::default();
Self {
@@ -209,6 +210,7 @@ impl ModelInput {
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
chat_completions: self.capabilities.supports_chat_completions.selected(),
+ interleaved_reasoning: false,
},
})
}
@@ -485,6 +485,7 @@ impl BatchingOpenAiClient {
"assistant" => RequestMessage::Assistant {
content: Some(MessageContent::Plain(msg.content)),
tool_calls: Vec::new(),
+ reasoning_content: None,
},
"system" => RequestMessage::System {
content: MessageContent::Plain(msg.content),
@@ -388,6 +388,7 @@ pub async fn run_repair(
open_ai::RequestMessage::Assistant {
content: Some(open_ai::MessageContent::Plain(teacher_response.clone())),
tool_calls: vec![],
+ reasoning_content: None,
},
// Turn 3: Repair critique and instructions
open_ai::RequestMessage::User {
@@ -401,6 +401,7 @@ impl LanguageModel for OpenAiLanguageModel {
self.model.supports_prompt_cache_key(),
self.max_output_tokens(),
self.model.reasoning_effort(),
+ false,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -403,6 +403,7 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
self.model.reasoning_effort,
+ self.model.capabilities.interleaved_reasoning,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -490,6 +490,7 @@ impl LanguageModel for OpenCodeLanguageModel {
false,
self.model.max_output_tokens(),
None,
+ false,
);
let stream = self.stream_openai_chat(openai_request, cx);
async move {
@@ -324,6 +324,7 @@ impl LanguageModel for VercelLanguageModel {
self.model.supports_prompt_cache_key(),
self.max_output_tokens(),
None,
+ false,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -461,6 +461,7 @@ impl LanguageModel for VercelAiGatewayLanguageModel {
self.model.capabilities.prompt_cache_key,
self.max_output_tokens(),
None,
+ false,
);
let completions = self.stream_open_ai(request, cx);
async move {
@@ -591,6 +592,7 @@ async fn list_models(
parallel_tool_calls,
prompt_cache_key,
chat_completions: true,
+ interleaved_reasoning: false,
},
});
}
@@ -347,6 +347,7 @@ impl LanguageModel for XAiLanguageModel {
self.model.supports_prompt_cache_key(),
self.max_output_tokens(),
None,
+ false,
);
let completions = self.stream_completion(request, cx);
async move {
@@ -600,6 +600,7 @@ impl<TP: CloudLlmTokenProvider + 'static> LanguageModel for CloudLanguageModel<T
false,
None,
None,
+ false,
);
let auth_context = token_provider.auth_context(cx);
let future = self.request_limiter.stream(async move {
@@ -29,13 +29,18 @@ pub fn into_open_ai(
supports_prompt_cache_key: bool,
max_output_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
+ interleaved_reasoning: bool,
) -> crate::Request {
let stream = !model_id.starts_with("o1-");
let mut messages = Vec::new();
+ let mut current_reasoning: Option<String> = None;
for message in request.messages {
for content in message.content {
match content {
+ MessageContent::Thinking { text, .. } if interleaved_reasoning => {
+ current_reasoning.get_or_insert_default().push_str(&text);
+ }
MessageContent::Text(text) | MessageContent::Thinking { text, .. } => {
let should_add = if message.role == Role::User {
// Including whitespace-only user messages can cause error with OpenAI compatible APIs
@@ -50,6 +55,15 @@ pub fn into_open_ai(
message.role,
&mut messages,
);
+ if let Some(reasoning) = current_reasoning.take() {
+ if let Some(crate::RequestMessage::Assistant {
+ reasoning_content,
+ ..
+ }) = messages.last_mut()
+ {
+ *reasoning_content = Some(reasoning);
+ }
+ }
}
}
MessageContent::RedactedThinking(_) => {}
@@ -85,6 +99,7 @@ pub fn into_open_ai(
messages.push(crate::RequestMessage::Assistant {
content: None,
tool_calls: vec![tool_call],
+ reasoning_content: current_reasoning.take(),
});
}
}
@@ -362,6 +377,7 @@ fn add_message_content_part(
Role::Assistant => crate::RequestMessage::Assistant {
content: Some(crate::MessageContent::from(vec![new_part])),
tool_calls: Vec::new(),
+ reasoning_content: None,
},
Role::System => crate::RequestMessage::System {
content: crate::MessageContent::from(vec![new_part]),
@@ -1690,4 +1706,97 @@ mod tests {
"OutputItemDone reasoning should not produce Thinking events"
);
}
+
+ #[test]
+ fn into_open_ai_interleaved_reasoning() {
+ let tool_use_id = LanguageModelToolUseId::from("call-1");
+ let tool_input = json!({"query": "foo"});
+ let tool_arguments = serde_json::to_string(&tool_input).unwrap();
+ let tool_use = LanguageModelToolUse {
+ id: tool_use_id.clone(),
+ name: Arc::from("search"),
+ raw_input: tool_arguments.clone(),
+ input: tool_input,
+ is_input_complete: true,
+ thought_signature: None,
+ };
+ let tool_result = LanguageModelToolResult {
+ tool_use_id: tool_use_id,
+ tool_name: Arc::from("search"),
+ is_error: false,
+ content: LanguageModelToolResultContent::Text(Arc::from("result")),
+ output: None,
+ };
+ let request = LanguageModelRequest {
+ thread_id: None,
+ prompt_id: None,
+ intent: None,
+ messages: vec![
+ LanguageModelRequestMessage {
+ role: Role::User,
+ content: vec![MessageContent::Text("search for something".into())],
+ cache: false,
+ reasoning_details: None,
+ },
+ LanguageModelRequestMessage {
+ role: Role::Assistant,
+ content: vec![
+ MessageContent::Thinking {
+ text: "I should search".into(),
+ signature: None,
+ },
+ MessageContent::Text("Searching now.".into()),
+ MessageContent::ToolUse(tool_use),
+ ],
+ cache: false,
+ reasoning_details: None,
+ },
+ LanguageModelRequestMessage {
+ role: Role::Assistant,
+ content: vec![MessageContent::ToolResult(tool_result)],
+ cache: false,
+ reasoning_details: None,
+ },
+ ],
+ tools: vec![],
+ tool_choice: None,
+ stop: vec![],
+ temperature: None,
+ thinking_allowed: true,
+ thinking_effort: None,
+ speed: None,
+ };
+
+ let result = into_open_ai(request.clone(), "model", false, false, None, None, true);
+ assert_eq!(
+ serde_json::to_value(&result).unwrap()["messages"],
+ json!([
+ {"role": "user", "content": "search for something"},
+ {
+ "role": "assistant",
+ "content": "Searching now.",
+ "tool_calls": [{"id": "call-1", "type": "function", "function": {"name": "search", "arguments": tool_arguments}}],
+ "reasoning_content": "I should search"
+ },
+ {"role": "tool", "content": "result", "tool_call_id": "call-1"}
+ ])
+ );
+
+ let result = into_open_ai(request, "model", false, false, None, None, false);
+ assert_eq!(
+ serde_json::to_value(&result).unwrap()["messages"],
+ json!([
+ {"role": "user", "content": "search for something"},
+ {
+ "role": "assistant",
+ "content": [
+ {"type": "text", "text": "I should search"},
+ {"type": "text", "text": "Searching now."}
+ ],
+ "tool_calls": [{"id": "call-1", "type": "function", "function": {"name": "search", "arguments": tool_arguments}}]
+ },
+ {"role": "tool", "content": "result", "tool_call_id": "call-1"}
+ ])
+ );
+ }
}
@@ -366,6 +366,8 @@ pub enum RequestMessage {
content: Option<MessageContent>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
tool_calls: Vec<ToolCall>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ reasoning_content: Option<String>,
},
User {
content: MessageContent,
@@ -289,6 +289,8 @@ pub struct OpenAiCompatibleModelCapabilities {
pub prompt_cache_key: bool,
#[serde(default = "default_true")]
pub chat_completions: bool,
+ #[serde(default)]
+ pub interleaved_reasoning: bool,
}
impl Default for OpenAiCompatibleModelCapabilities {
@@ -299,6 +301,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
parallel_tool_calls: false,
prompt_cache_key: false,
chat_completions: default_true(),
+ interleaved_reasoning: false,
}
}
}
@@ -607,6 +607,7 @@ By default, OpenAI-compatible models inherit the following capabilities:
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
- `chat_completions`: true (calls the `/chat/completions` endpoint)
+- `interleaved_reasoning`: false (thinking tokens are sent inline in message text; set to true to send them as a dedicated `reasoning_content` field for models that expect it)
If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.