Cargo.lock 🔗
@@ -8983,6 +8983,7 @@ dependencies = [
"open_ai",
"open_router",
"partial-json-fixer",
+ "pretty_assertions",
"project",
"release_channel",
"schemars",
Matt Stallone and Bennet Bo Fenner created
Add support for OpenAI's /responses endpoint for models that don't
support /chat/completions API. This enables compatibility with newer
model variants (`gpt-5-codex`, `gpt-5-pro`, `o3-pro`, etc) while
maintaining compatibility with existing configs
Changes:
- Add `supports_chat_completions` flag to model capabilities that
defaults to true for existing behavior
- Implement responses API client with streaming support as per [OpenAI
documentation](https://app.stainless.com/api/spec/documented/openai/openapi.documented.yml).
- Add `ResponseEventMapper` to convert responses events to completion
events for maintainer simplicity
- Update UI to allow toggling `chat_completions` capability
- Add `gpt-5-codex` model
Closes #38858
Release Notes:
- Added support for `gpt-5-codex` model
---------
Co-authored-by: Bennet Bo Fenner <bennet@zed.dev>
Cargo.lock | 1
crates/agent_ui/src/agent_configuration/add_llm_provider_modal.rs | 27
crates/language_models/Cargo.toml | 1
crates/language_models/src/provider/open_ai.rs | 1061
crates/language_models/src/provider/open_ai_compatible.rs | 90
crates/open_ai/src/open_ai.rs | 384
crates/settings/src/settings_content/language_model.rs | 20
docs/src/ai/llm-providers.md | 16
8 files changed, 1,564 insertions(+), 36 deletions(-)
@@ -8983,6 +8983,7 @@ dependencies = [
"open_ai",
"open_router",
"partial-json-fixer",
+ "pretty_assertions",
"project",
"release_channel",
"schemars",
@@ -102,6 +102,7 @@ struct ModelCapabilityToggles {
pub supports_images: ToggleState,
pub supports_parallel_tool_calls: ToggleState,
pub supports_prompt_cache_key: ToggleState,
+ pub supports_chat_completions: ToggleState,
}
struct ModelInput {
@@ -154,6 +155,7 @@ impl ModelInput {
images,
parallel_tool_calls,
prompt_cache_key,
+ chat_completions,
} = ModelCapabilities::default();
Self {
@@ -166,6 +168,7 @@ impl ModelInput {
supports_images: images.into(),
supports_parallel_tool_calls: parallel_tool_calls.into(),
supports_prompt_cache_key: prompt_cache_key.into(),
+ supports_chat_completions: chat_completions.into(),
},
}
}
@@ -203,6 +206,7 @@ impl ModelInput {
images: self.capabilities.supports_images.selected(),
parallel_tool_calls: self.capabilities.supports_parallel_tool_calls.selected(),
prompt_cache_key: self.capabilities.supports_prompt_cache_key.selected(),
+ chat_completions: self.capabilities.supports_chat_completions.selected(),
},
})
}
@@ -426,6 +430,20 @@ impl AddLlmProviderModal {
cx.notify();
},
)),
+ )
+ .child(
+ Checkbox::new(
+ ("supports-chat-completions", ix),
+ model.capabilities.supports_chat_completions,
+ )
+ .label("Supports /chat/completions")
+ .on_click(cx.listener(
+ move |this, checked, _window, cx| {
+ this.input.models[ix].capabilities.supports_chat_completions =
+ *checked;
+ cx.notify();
+ },
+ )),
),
)
.when(has_more_than_one_model, |this| {
@@ -724,12 +742,17 @@ mod tests {
model_input.capabilities.supports_prompt_cache_key,
ToggleState::Unselected
);
+ assert_eq!(
+ model_input.capabilities.supports_chat_completions,
+ ToggleState::Selected
+ );
let parsed_model = model_input.parse(cx).unwrap();
assert!(parsed_model.capabilities.tools);
assert!(!parsed_model.capabilities.images);
assert!(!parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
+ assert!(parsed_model.capabilities.chat_completions);
});
}
@@ -749,12 +772,14 @@ mod tests {
model_input.capabilities.supports_images = ToggleState::Unselected;
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Unselected;
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
+ model_input.capabilities.supports_chat_completions = ToggleState::Unselected;
let parsed_model = model_input.parse(cx).unwrap();
assert!(!parsed_model.capabilities.tools);
assert!(!parsed_model.capabilities.images);
assert!(!parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
+ assert!(!parsed_model.capabilities.chat_completions);
});
}
@@ -774,6 +799,7 @@ mod tests {
model_input.capabilities.supports_images = ToggleState::Unselected;
model_input.capabilities.supports_parallel_tool_calls = ToggleState::Selected;
model_input.capabilities.supports_prompt_cache_key = ToggleState::Unselected;
+ model_input.capabilities.supports_chat_completions = ToggleState::Selected;
let parsed_model = model_input.parse(cx).unwrap();
assert_eq!(parsed_model.name, "somemodel");
@@ -781,6 +807,7 @@ mod tests {
assert!(!parsed_model.capabilities.images);
assert!(parsed_model.capabilities.parallel_tool_calls);
assert!(!parsed_model.capabilities.prompt_cache_key);
+ assert!(parsed_model.capabilities.chat_completions);
});
}
@@ -66,4 +66,5 @@ x_ai = { workspace = true, features = ["schemars"] }
[dev-dependencies]
editor = { workspace = true, features = ["test-support"] }
language_model = { workspace = true, features = ["test-support"] }
+pretty_assertions.workspace = true
project = { workspace = true, features = ["test-support"] }
@@ -6,15 +6,23 @@ use gpui::{AnyView, App, AsyncApp, Context, Entity, SharedString, Task, Window};
use http_client::HttpClient;
use language_model::{
ApiKeyState, AuthenticateError, EnvVar, IconOrSvg, LanguageModel, LanguageModelCompletionError,
- LanguageModelCompletionEvent, LanguageModelId, LanguageModelName, LanguageModelProvider,
- LanguageModelProviderId, LanguageModelProviderName, LanguageModelProviderState,
- LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolResultContent,
- LanguageModelToolUse, MessageContent, RateLimiter, Role, StopReason, TokenUsage, env_var,
+ LanguageModelCompletionEvent, LanguageModelId, LanguageModelImage, LanguageModelName,
+ LanguageModelProvider, LanguageModelProviderId, LanguageModelProviderName,
+ LanguageModelProviderState, LanguageModelRequest, LanguageModelRequestMessage,
+ LanguageModelToolChoice, LanguageModelToolResult, LanguageModelToolResultContent,
+ LanguageModelToolUse, LanguageModelToolUseId, MessageContent, RateLimiter, Role, StopReason,
+ TokenUsage, env_var,
};
use menu;
use open_ai::{
- ImageUrl, Model, OPEN_AI_API_URL, ReasoningEffort, ResponseStreamEvent, stream_completion,
+ ImageUrl, Model, OPEN_AI_API_URL, ReasoningEffort, ResponseStreamEvent,
+ responses::{
+ Request as ResponseRequest, ResponseOutputItem, ResponseSummary as ResponsesSummary,
+ ResponseUsage as ResponsesUsage, StreamEvent as ResponsesStreamEvent, stream_response,
+ },
+ stream_completion,
};
+use serde_json::{Value, json};
use settings::{OpenAiAvailableModel as AvailableModel, Settings, SettingsStore};
use std::pin::Pin;
use std::str::FromStr as _;
@@ -155,6 +163,7 @@ impl LanguageModelProvider for OpenAiLanguageModelProvider {
max_output_tokens: model.max_output_tokens,
max_completion_tokens: model.max_completion_tokens,
reasoning_effort: model.reasoning_effort.clone(),
+ supports_chat_completions: model.capabilities.chat_completions,
},
);
}
@@ -231,6 +240,40 @@ impl OpenAiLanguageModel {
async move { Ok(future.await?.boxed()) }.boxed()
}
+
+ fn stream_response(
+ &self,
+ request: ResponseRequest,
+ cx: &AsyncApp,
+ ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
+ {
+ let http_client = self.http_client.clone();
+
+ let Ok((api_key, api_url)) = self.state.read_with(cx, |state, cx| {
+ let api_url = OpenAiLanguageModelProvider::api_url(cx);
+ (state.api_key_state.key(&api_url), api_url)
+ }) else {
+ return future::ready(Err(anyhow!("App state dropped"))).boxed();
+ };
+
+ let provider = PROVIDER_NAME;
+ let future = self.request_limiter.stream(async move {
+ let Some(api_key) = api_key else {
+ return Err(LanguageModelCompletionError::NoApiKey { provider });
+ };
+ let request = stream_response(
+ http_client.as_ref(),
+ provider.0.as_str(),
+ &api_url,
+ &api_key,
+ request,
+ );
+ let response = request.await?;
+ Ok(response)
+ });
+
+ async move { Ok(future.await?.boxed()) }.boxed()
+ }
}
impl LanguageModel for OpenAiLanguageModel {
@@ -263,6 +306,7 @@ impl LanguageModel for OpenAiLanguageModel {
| Model::FourPointOneMini
| Model::FourPointOneNano
| Model::Five
+ | Model::FiveCodex
| Model::FiveMini
| Model::FiveNano
| Model::FivePointOne
@@ -320,20 +364,37 @@ impl LanguageModel for OpenAiLanguageModel {
LanguageModelCompletionError,
>,
> {
- let request = into_open_ai(
- request,
- self.model.id(),
- self.model.supports_parallel_tool_calls(),
- self.model.supports_prompt_cache_key(),
- self.max_output_tokens(),
- self.model.reasoning_effort(),
- );
- let completions = self.stream_completion(request, cx);
- async move {
- let mapper = OpenAiEventMapper::new();
- Ok(mapper.map_stream(completions.await?).boxed())
+ if self.model.supports_chat_completions() {
+ let request = into_open_ai(
+ request,
+ self.model.id(),
+ self.model.supports_parallel_tool_calls(),
+ self.model.supports_prompt_cache_key(),
+ self.max_output_tokens(),
+ self.model.reasoning_effort(),
+ );
+ let completions = self.stream_completion(request, cx);
+ async move {
+ let mapper = OpenAiEventMapper::new();
+ Ok(mapper.map_stream(completions.await?).boxed())
+ }
+ .boxed()
+ } else {
+ let request = into_open_ai_response(
+ request,
+ self.model.id(),
+ self.model.supports_parallel_tool_calls(),
+ self.model.supports_prompt_cache_key(),
+ self.max_output_tokens(),
+ self.model.reasoning_effort(),
+ );
+ let completions = self.stream_response(request, cx);
+ async move {
+ let mapper = OpenAiResponseEventMapper::new();
+ Ok(mapper.map_stream(completions.await?).boxed())
+ }
+ .boxed()
}
- .boxed()
}
}
@@ -460,6 +521,195 @@ pub fn into_open_ai(
}
}
+pub fn into_open_ai_response(
+ request: LanguageModelRequest,
+ model_id: &str,
+ supports_parallel_tool_calls: bool,
+ supports_prompt_cache_key: bool,
+ max_output_tokens: Option<u64>,
+ reasoning_effort: Option<ReasoningEffort>,
+) -> ResponseRequest {
+ let stream = !model_id.starts_with("o1-");
+
+ let LanguageModelRequest {
+ thread_id,
+ prompt_id: _,
+ intent: _,
+ mode: _,
+ messages,
+ tools,
+ tool_choice,
+ stop: _,
+ temperature,
+ thinking_allowed: _,
+ } = request;
+
+ let mut input_items = Vec::new();
+ for (index, message) in messages.into_iter().enumerate() {
+ append_message_to_response_items(message, index, &mut input_items);
+ }
+
+ let tools: Vec<_> = tools
+ .into_iter()
+ .map(|tool| open_ai::responses::ToolDefinition::Function {
+ name: tool.name,
+ description: Some(tool.description),
+ parameters: Some(tool.input_schema),
+ strict: None,
+ })
+ .collect();
+
+ ResponseRequest {
+ model: model_id.into(),
+ input: input_items,
+ stream,
+ temperature,
+ top_p: None,
+ max_output_tokens,
+ parallel_tool_calls: if tools.is_empty() {
+ None
+ } else {
+ Some(supports_parallel_tool_calls)
+ },
+ tool_choice: tool_choice.map(|choice| match choice {
+ LanguageModelToolChoice::Auto => open_ai::ToolChoice::Auto,
+ LanguageModelToolChoice::Any => open_ai::ToolChoice::Required,
+ LanguageModelToolChoice::None => open_ai::ToolChoice::None,
+ }),
+ tools,
+ prompt_cache_key: if supports_prompt_cache_key {
+ thread_id
+ } else {
+ None
+ },
+ reasoning: reasoning_effort.map(|effort| open_ai::responses::ReasoningConfig { effort }),
+ }
+}
+
+fn append_message_to_response_items(
+ message: LanguageModelRequestMessage,
+ index: usize,
+ input_items: &mut Vec<Value>,
+) {
+ let mut content_parts: Vec<Value> = Vec::new();
+
+ for content in message.content {
+ match content {
+ MessageContent::Text(text) => {
+ push_response_text_part(&message.role, text, &mut content_parts);
+ }
+ MessageContent::Thinking { text, .. } => {
+ push_response_text_part(&message.role, text, &mut content_parts);
+ }
+ MessageContent::RedactedThinking(_) => {}
+ MessageContent::Image(image) => {
+ push_response_image_part(&message.role, image, &mut content_parts);
+ }
+ MessageContent::ToolUse(tool_use) => {
+ flush_response_parts(&message.role, index, &mut content_parts, input_items);
+ let call_id = tool_use.id.to_string();
+ input_items.push(json!({
+ "type": "function_call",
+ "call_id": call_id,
+ "name": tool_use.name,
+ "arguments": tool_use.raw_input,
+ }));
+ }
+ MessageContent::ToolResult(tool_result) => {
+ flush_response_parts(&message.role, index, &mut content_parts, input_items);
+ input_items.push(json!({
+ "type": "function_call_output",
+ "call_id": tool_result.tool_use_id.to_string(),
+ "output": tool_result_output(&tool_result),
+ }));
+ }
+ }
+ }
+
+ flush_response_parts(&message.role, index, &mut content_parts, input_items);
+}
+
+fn push_response_text_part(role: &Role, text: impl Into<String>, parts: &mut Vec<Value>) {
+ let text = text.into();
+ if text.trim().is_empty() {
+ return;
+ }
+
+ match role {
+ Role::Assistant => parts.push(json!({
+ "type": "output_text",
+ "text": text,
+ "annotations": [],
+ })),
+ _ => parts.push(json!({
+ "type": "input_text",
+ "text": text,
+ })),
+ }
+}
+
+fn push_response_image_part(role: &Role, image: LanguageModelImage, parts: &mut Vec<Value>) {
+ match role {
+ Role::Assistant => parts.push(json!({
+ "type": "output_text",
+ "text": "[image omitted]",
+ "annotations": [],
+ })),
+ _ => parts.push(json!({
+ "type": "input_image",
+ "image_url": image.to_base64_url(),
+ })),
+ }
+}
+
+fn flush_response_parts(
+ role: &Role,
+ _index: usize,
+ parts: &mut Vec<Value>,
+ input_items: &mut Vec<Value>,
+) {
+ if parts.is_empty() {
+ return;
+ }
+
+ let item = match role {
+ Role::Assistant => json!({
+ "type": "message",
+ "role": "assistant",
+ "status": "completed",
+ "content": parts.clone(),
+ }),
+ Role::User => json!({
+ "type": "message",
+ "role": "user",
+ "content": parts.clone(),
+ }),
+ Role::System => json!({
+ "type": "message",
+ "role": "system",
+ "content": parts.clone(),
+ }),
+ };
+
+ input_items.push(item);
+ parts.clear();
+}
+
+fn tool_result_output(result: &LanguageModelToolResult) -> String {
+ if let Some(output) = &result.output {
+ match output {
+ serde_json::Value::String(text) => text.clone(),
+ serde_json::Value::Null => String::new(),
+ _ => output.to_string(),
+ }
+ } else {
+ match &result.content {
+ LanguageModelToolResultContent::Text(text) => text.to_string(),
+ LanguageModelToolResultContent::Image(image) => image.to_base64_url(),
+ }
+ }
+}
+
fn add_message_content_part(
new_part: open_ai::MessagePart,
role: Role,
@@ -608,6 +858,262 @@ struct RawToolCall {
arguments: String,
}
+pub struct OpenAiResponseEventMapper {
+ function_calls_by_item: HashMap<String, PendingResponseFunctionCall>,
+ pending_stop_reason: Option<StopReason>,
+}
+
+#[derive(Default)]
+struct PendingResponseFunctionCall {
+ call_id: String,
+ name: Arc<str>,
+ arguments: String,
+}
+
+impl OpenAiResponseEventMapper {
+ pub fn new() -> Self {
+ Self {
+ function_calls_by_item: HashMap::default(),
+ pending_stop_reason: None,
+ }
+ }
+
+ pub fn map_stream(
+ mut self,
+ events: Pin<Box<dyn Send + Stream<Item = Result<ResponsesStreamEvent>>>>,
+ ) -> impl Stream<Item = Result<LanguageModelCompletionEvent, LanguageModelCompletionError>>
+ {
+ events.flat_map(move |event| {
+ futures::stream::iter(match event {
+ Ok(event) => self.map_event(event),
+ Err(error) => vec![Err(LanguageModelCompletionError::from(anyhow!(error)))],
+ })
+ })
+ }
+
+ fn map_event(
+ &mut self,
+ event: ResponsesStreamEvent,
+ ) -> Vec<Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
+ match event {
+ ResponsesStreamEvent::OutputItemAdded { item, .. } => {
+ let mut events = Vec::new();
+
+ match &item {
+ ResponseOutputItem::Message(message) => {
+ if let Some(id) = &message.id {
+ events.push(Ok(LanguageModelCompletionEvent::StartMessage {
+ message_id: id.clone(),
+ }));
+ }
+ }
+ ResponseOutputItem::FunctionCall(function_call) => {
+ if let Some(item_id) = function_call.id.clone() {
+ let call_id = function_call
+ .call_id
+ .clone()
+ .or_else(|| function_call.id.clone())
+ .unwrap_or_else(|| item_id.clone());
+ let entry = PendingResponseFunctionCall {
+ call_id,
+ name: Arc::<str>::from(
+ function_call.name.clone().unwrap_or_default(),
+ ),
+ arguments: function_call.arguments.clone(),
+ };
+ self.function_calls_by_item.insert(item_id, entry);
+ }
+ }
+ ResponseOutputItem::Unknown => {}
+ }
+ events
+ }
+ ResponsesStreamEvent::OutputTextDelta { delta, .. } => {
+ if delta.is_empty() {
+ Vec::new()
+ } else {
+ vec![Ok(LanguageModelCompletionEvent::Text(delta))]
+ }
+ }
+ ResponsesStreamEvent::FunctionCallArgumentsDelta { item_id, delta, .. } => {
+ if let Some(entry) = self.function_calls_by_item.get_mut(&item_id) {
+ entry.arguments.push_str(&delta);
+ }
+ Vec::new()
+ }
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id, arguments, ..
+ } => {
+ if let Some(mut entry) = self.function_calls_by_item.remove(&item_id) {
+ if !arguments.is_empty() {
+ entry.arguments = arguments;
+ }
+ let raw_input = entry.arguments.clone();
+ self.pending_stop_reason = Some(StopReason::ToolUse);
+ match serde_json::from_str::<serde_json::Value>(&entry.arguments) {
+ Ok(input) => {
+ vec![Ok(LanguageModelCompletionEvent::ToolUse(
+ LanguageModelToolUse {
+ id: LanguageModelToolUseId::from(entry.call_id.clone()),
+ name: entry.name.clone(),
+ is_input_complete: true,
+ input,
+ raw_input,
+ thought_signature: None,
+ },
+ ))]
+ }
+ Err(error) => {
+ vec![Ok(LanguageModelCompletionEvent::ToolUseJsonParseError {
+ id: LanguageModelToolUseId::from(entry.call_id.clone()),
+ tool_name: entry.name.clone(),
+ raw_input: Arc::<str>::from(raw_input),
+ json_parse_error: error.to_string(),
+ })]
+ }
+ }
+ } else {
+ Vec::new()
+ }
+ }
+ ResponsesStreamEvent::Completed { response } => {
+ self.handle_completion(response, StopReason::EndTurn)
+ }
+ ResponsesStreamEvent::Incomplete { response } => {
+ let reason = response
+ .status_details
+ .as_ref()
+ .and_then(|details| details.reason.as_deref());
+ let stop_reason = match reason {
+ Some("max_output_tokens") => StopReason::MaxTokens,
+ Some("content_filter") => {
+ self.pending_stop_reason = Some(StopReason::Refusal);
+ StopReason::Refusal
+ }
+ _ => self
+ .pending_stop_reason
+ .take()
+ .unwrap_or(StopReason::EndTurn),
+ };
+
+ let mut events = Vec::new();
+ if self.pending_stop_reason.is_none() {
+ events.extend(self.emit_tool_calls_from_output(&response.output));
+ }
+ if let Some(usage) = response.usage.as_ref() {
+ events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(
+ token_usage_from_response_usage(usage),
+ )));
+ }
+ events.push(Ok(LanguageModelCompletionEvent::Stop(stop_reason)));
+ events
+ }
+ ResponsesStreamEvent::Failed { response } => {
+ let message = response
+ .status_details
+ .and_then(|details| details.error)
+ .map(|error| error.to_string())
+ .unwrap_or_else(|| "response failed".to_string());
+ vec![Err(LanguageModelCompletionError::Other(anyhow!(message)))]
+ }
+ ResponsesStreamEvent::Error { error }
+ | ResponsesStreamEvent::GenericError { error } => {
+ vec![Err(LanguageModelCompletionError::Other(anyhow!(format!(
+ "{error:?}"
+ ))))]
+ }
+ ResponsesStreamEvent::OutputTextDone { .. } => Vec::new(),
+ ResponsesStreamEvent::OutputItemDone { .. }
+ | ResponsesStreamEvent::ContentPartAdded { .. }
+ | ResponsesStreamEvent::ContentPartDone { .. }
+ | ResponsesStreamEvent::Created { .. }
+ | ResponsesStreamEvent::InProgress { .. }
+ | ResponsesStreamEvent::Unknown => Vec::new(),
+ }
+ }
+
+ fn handle_completion(
+ &mut self,
+ response: ResponsesSummary,
+ default_reason: StopReason,
+ ) -> Vec<Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
+ let mut events = Vec::new();
+
+ if self.pending_stop_reason.is_none() {
+ events.extend(self.emit_tool_calls_from_output(&response.output));
+ }
+
+ if let Some(usage) = response.usage.as_ref() {
+ events.push(Ok(LanguageModelCompletionEvent::UsageUpdate(
+ token_usage_from_response_usage(usage),
+ )));
+ }
+
+ let stop_reason = self.pending_stop_reason.take().unwrap_or(default_reason);
+ events.push(Ok(LanguageModelCompletionEvent::Stop(stop_reason)));
+ events
+ }
+
+ fn emit_tool_calls_from_output(
+ &mut self,
+ output: &[ResponseOutputItem],
+ ) -> Vec<Result<LanguageModelCompletionEvent, LanguageModelCompletionError>> {
+ let mut events = Vec::new();
+ for item in output {
+ if let ResponseOutputItem::FunctionCall(function_call) = item {
+ let Some(call_id) = function_call
+ .call_id
+ .clone()
+ .or_else(|| function_call.id.clone())
+ else {
+ log::error!(
+ "Function call item missing both call_id and id: {:?}",
+ function_call
+ );
+ continue;
+ };
+ let name: Arc<str> = Arc::from(function_call.name.clone().unwrap_or_default());
+ let arguments = &function_call.arguments;
+ if !arguments.is_empty() {
+ self.pending_stop_reason = Some(StopReason::ToolUse);
+ match serde_json::from_str::<serde_json::Value>(arguments) {
+ Ok(input) => {
+ events.push(Ok(LanguageModelCompletionEvent::ToolUse(
+ LanguageModelToolUse {
+ id: LanguageModelToolUseId::from(call_id.clone()),
+ name: name.clone(),
+ is_input_complete: true,
+ input,
+ raw_input: arguments.clone(),
+ thought_signature: None,
+ },
+ )));
+ }
+ Err(error) => {
+ events.push(Ok(LanguageModelCompletionEvent::ToolUseJsonParseError {
+ id: LanguageModelToolUseId::from(call_id.clone()),
+ tool_name: name.clone(),
+ raw_input: Arc::<str>::from(arguments.clone()),
+ json_parse_error: error.to_string(),
+ }));
+ }
+ }
+ }
+ }
+ }
+ events
+ }
+}
+
+fn token_usage_from_response_usage(usage: &ResponsesUsage) -> TokenUsage {
+ TokenUsage {
+ input_tokens: usage.input_tokens.unwrap_or_default(),
+ output_tokens: usage.output_tokens.unwrap_or_default(),
+ cache_creation_input_tokens: 0,
+ cache_read_input_tokens: 0,
+ }
+}
+
pub(crate) fn collect_tiktoken_messages(
request: LanguageModelRequest,
) -> Vec<tiktoken_rs::ChatCompletionRequestMessage> {
@@ -663,6 +1169,7 @@ pub fn count_open_ai_tokens(
| Model::O3Mini
| Model::O4Mini
| Model::Five
+ | Model::FiveCodex
| Model::FiveMini
| Model::FiveNano => tiktoken_rs::num_tokens_from_messages(model.id(), &messages),
// GPT-5.1 and 5.2 don't have dedicated tiktoken support; use gpt-5 tokenizer
@@ -858,10 +1365,46 @@ impl Render for ConfigurationView {
#[cfg(test)]
mod tests {
+ use super::*;
+ use futures::{StreamExt, executor::block_on};
use gpui::TestAppContext;
- use language_model::LanguageModelRequestMessage;
+ use language_model::{LanguageModelRequestMessage, LanguageModelRequestTool};
+ use open_ai::responses::{
+ ResponseFunctionToolCall, ResponseOutputItem, ResponseOutputMessage, ResponseStatusDetails,
+ ResponseSummary, ResponseUsage, StreamEvent as ResponsesStreamEvent,
+ };
+ use pretty_assertions::assert_eq;
+
+ fn map_response_events(events: Vec<ResponsesStreamEvent>) -> Vec<LanguageModelCompletionEvent> {
+ block_on(async {
+ OpenAiResponseEventMapper::new()
+ .map_stream(Box::pin(futures::stream::iter(events.into_iter().map(Ok))))
+ .collect::<Vec<_>>()
+ .await
+ .into_iter()
+ .map(Result::unwrap)
+ .collect()
+ })
+ }
- use super::*;
+ fn response_item_message(id: &str) -> ResponseOutputItem {
+ ResponseOutputItem::Message(ResponseOutputMessage {
+ id: Some(id.to_string()),
+ role: Some("assistant".to_string()),
+ status: Some("in_progress".to_string()),
+ content: vec![],
+ })
+ }
+
+ fn response_item_function_call(id: &str, args: Option<&str>) -> ResponseOutputItem {
+ ResponseOutputItem::FunctionCall(ResponseFunctionToolCall {
+ id: Some(id.to_string()),
+ status: Some("in_progress".to_string()),
+ name: Some("get_weather".to_string()),
+ call_id: Some("call_123".to_string()),
+ arguments: args.map(|s| s.to_string()).unwrap_or_default(),
+ })
+ }
#[gpui::test]
fn tiktoken_rs_support(cx: &TestAppContext) {
@@ -896,4 +1439,482 @@ mod tests {
assert!(count > 0);
}
}
+
+ #[test]
+ fn responses_stream_maps_text_and_usage() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_message("msg_123"),
+ },
+ ResponsesStreamEvent::OutputTextDelta {
+ item_id: "msg_123".into(),
+ output_index: 0,
+ content_index: Some(0),
+ delta: "Hello".into(),
+ },
+ ResponsesStreamEvent::Completed {
+ response: ResponseSummary {
+ usage: Some(ResponseUsage {
+ input_tokens: Some(5),
+ output_tokens: Some(3),
+ total_tokens: Some(8),
+ }),
+ ..Default::default()
+ },
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::StartMessage { ref message_id } if message_id == "msg_123"
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Text(ref text) if text == "Hello"
+ ));
+ assert!(matches!(
+ mapped[2],
+ LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
+ input_tokens: 5,
+ output_tokens: 3,
+ ..
+ })
+ ));
+ assert!(matches!(
+ mapped[3],
+ LanguageModelCompletionEvent::Stop(StopReason::EndTurn)
+ ));
+ }
+
+ #[test]
+ fn into_open_ai_response_builds_complete_payload() {
+ let tool_call_id = LanguageModelToolUseId::from("call-42");
+ let tool_input = json!({ "city": "Boston" });
+ let tool_arguments = serde_json::to_string(&tool_input).unwrap();
+ let tool_use = LanguageModelToolUse {
+ id: tool_call_id.clone(),
+ name: Arc::from("get_weather"),
+ raw_input: tool_arguments.clone(),
+ input: tool_input,
+ is_input_complete: true,
+ thought_signature: None,
+ };
+ let tool_result = LanguageModelToolResult {
+ tool_use_id: tool_call_id,
+ tool_name: Arc::from("get_weather"),
+ is_error: false,
+ content: LanguageModelToolResultContent::Text(Arc::from("Sunny")),
+ output: Some(json!({ "forecast": "Sunny" })),
+ };
+ let user_image = LanguageModelImage {
+ source: SharedString::from("aGVsbG8="),
+ size: None,
+ };
+ let expected_image_url = user_image.to_base64_url();
+
+ let request = LanguageModelRequest {
+ thread_id: Some("thread-123".into()),
+ prompt_id: None,
+ intent: None,
+ mode: None,
+ messages: vec![
+ LanguageModelRequestMessage {
+ role: Role::System,
+ content: vec![MessageContent::Text("System context".into())],
+ cache: false,
+ reasoning_details: None,
+ },
+ LanguageModelRequestMessage {
+ role: Role::User,
+ content: vec![
+ MessageContent::Text("Please check the weather.".into()),
+ MessageContent::Image(user_image),
+ ],
+ cache: false,
+ reasoning_details: None,
+ },
+ LanguageModelRequestMessage {
+ role: Role::Assistant,
+ content: vec![
+ MessageContent::Text("Looking that up.".into()),
+ MessageContent::ToolUse(tool_use),
+ ],
+ cache: false,
+ reasoning_details: None,
+ },
+ LanguageModelRequestMessage {
+ role: Role::Assistant,
+ content: vec![MessageContent::ToolResult(tool_result)],
+ cache: false,
+ reasoning_details: None,
+ },
+ ],
+ tools: vec![LanguageModelRequestTool {
+ name: "get_weather".into(),
+ description: "Fetches the weather".into(),
+ input_schema: json!({ "type": "object" }),
+ }],
+ tool_choice: Some(LanguageModelToolChoice::Any),
+ stop: vec!["<STOP>".into()],
+ temperature: None,
+ thinking_allowed: false,
+ };
+
+ let response = into_open_ai_response(
+ request,
+ "custom-model",
+ true,
+ true,
+ Some(2048),
+ Some(ReasoningEffort::Low),
+ );
+
+ let serialized = serde_json::to_value(&response).unwrap();
+ let expected = json!({
+ "model": "custom-model",
+ "input": [
+ {
+ "type": "message",
+ "role": "system",
+ "content": [
+ { "type": "input_text", "text": "System context" }
+ ]
+ },
+ {
+ "type": "message",
+ "role": "user",
+ "content": [
+ { "type": "input_text", "text": "Please check the weather." },
+ { "type": "input_image", "image_url": expected_image_url }
+ ]
+ },
+ {
+ "type": "message",
+ "role": "assistant",
+ "status": "completed",
+ "content": [
+ { "type": "output_text", "text": "Looking that up.", "annotations": [] }
+ ]
+ },
+ {
+ "type": "function_call",
+ "call_id": "call-42",
+ "name": "get_weather",
+ "arguments": tool_arguments
+ },
+ {
+ "type": "function_call_output",
+ "call_id": "call-42",
+ "output": "{\"forecast\":\"Sunny\"}"
+ }
+ ],
+ "stream": true,
+ "max_output_tokens": 2048,
+ "parallel_tool_calls": true,
+ "tool_choice": "required",
+ "tools": [
+ {
+ "type": "function",
+ "name": "get_weather",
+ "description": "Fetches the weather",
+ "parameters": { "type": "object" }
+ }
+ ],
+ "prompt_cache_key": "thread-123",
+ "reasoning": { "effort": "low" }
+ });
+
+ assert_eq!(serialized, expected);
+ }
+
+ #[test]
+ fn responses_stream_maps_tool_calls() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_function_call("item_fn", Some("{\"city\":\"Bos")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDelta {
+ item_id: "item_fn".into(),
+ output_index: 0,
+ delta: "ton\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn".into(),
+ output_index: 0,
+ arguments: "{\"city\":\"Boston\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Completed {
+ response: ResponseSummary::default(),
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse {
+ ref id,
+ ref name,
+ ref raw_input,
+ ..
+ }) if id.to_string() == "call_123"
+ && name.as_ref() == "get_weather"
+ && raw_input == "{\"city\":\"Boston\"}"
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Stop(StopReason::ToolUse)
+ ));
+ }
+
+ #[test]
+ fn responses_stream_uses_max_tokens_stop_reason() {
+ let events = vec![ResponsesStreamEvent::Incomplete {
+ response: ResponseSummary {
+ status_details: Some(ResponseStatusDetails {
+ reason: Some("max_output_tokens".into()),
+ r#type: Some("incomplete".into()),
+ error: None,
+ }),
+ usage: Some(ResponseUsage {
+ input_tokens: Some(10),
+ output_tokens: Some(20),
+ total_tokens: Some(30),
+ }),
+ ..Default::default()
+ },
+ }];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::UsageUpdate(TokenUsage {
+ input_tokens: 10,
+ output_tokens: 20,
+ ..
+ })
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Stop(StopReason::MaxTokens)
+ ));
+ }
+
+ #[test]
+ fn responses_stream_handles_multiple_tool_calls() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_function_call("item_fn1", Some("{\"city\":\"NYC\"}")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn1".into(),
+ output_index: 0,
+ arguments: "{\"city\":\"NYC\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 1,
+ sequence_number: None,
+ item: response_item_function_call("item_fn2", Some("{\"city\":\"LA\"}")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn2".into(),
+ output_index: 1,
+ arguments: "{\"city\":\"LA\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Completed {
+ response: ResponseSummary::default(),
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert_eq!(mapped.len(), 3);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse { ref raw_input, .. })
+ if raw_input == "{\"city\":\"NYC\"}"
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse { ref raw_input, .. })
+ if raw_input == "{\"city\":\"LA\"}"
+ ));
+ assert!(matches!(
+ mapped[2],
+ LanguageModelCompletionEvent::Stop(StopReason::ToolUse)
+ ));
+ }
+
+ #[test]
+ fn responses_stream_handles_mixed_text_and_tool_calls() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_message("msg_123"),
+ },
+ ResponsesStreamEvent::OutputTextDelta {
+ item_id: "msg_123".into(),
+ output_index: 0,
+ content_index: Some(0),
+ delta: "Let me check that".into(),
+ },
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 1,
+ sequence_number: None,
+ item: response_item_function_call("item_fn", Some("{\"query\":\"test\"}")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn".into(),
+ output_index: 1,
+ arguments: "{\"query\":\"test\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Completed {
+ response: ResponseSummary::default(),
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::StartMessage { .. }
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Text(ref text) if text == "Let me check that"
+ ));
+ assert!(matches!(
+ mapped[2],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse { ref raw_input, .. })
+ if raw_input == "{\"query\":\"test\"}"
+ ));
+ assert!(matches!(
+ mapped[3],
+ LanguageModelCompletionEvent::Stop(StopReason::ToolUse)
+ ));
+ }
+
+ #[test]
+ fn responses_stream_handles_json_parse_error() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_function_call("item_fn", Some("{invalid json")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn".into(),
+ output_index: 0,
+ arguments: "{invalid json".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Completed {
+ response: ResponseSummary::default(),
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::ToolUseJsonParseError {
+ ref raw_input,
+ ..
+ } if raw_input.as_ref() == "{invalid json"
+ ));
+ }
+
+ #[test]
+ fn responses_stream_handles_incomplete_function_call() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_function_call("item_fn", Some("{\"city\":")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDelta {
+ item_id: "item_fn".into(),
+ output_index: 0,
+ delta: "\"Boston\"".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Incomplete {
+ response: ResponseSummary {
+ status_details: Some(ResponseStatusDetails {
+ reason: Some("max_output_tokens".into()),
+ r#type: Some("incomplete".into()),
+ error: None,
+ }),
+ output: vec![response_item_function_call(
+ "item_fn",
+ Some("{\"city\":\"Boston\"}"),
+ )],
+ ..Default::default()
+ },
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse { ref raw_input, .. })
+ if raw_input == "{\"city\":\"Boston\"}"
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Stop(StopReason::MaxTokens)
+ ));
+ }
+
+ #[test]
+ fn responses_stream_incomplete_does_not_duplicate_tool_calls() {
+ let events = vec![
+ ResponsesStreamEvent::OutputItemAdded {
+ output_index: 0,
+ sequence_number: None,
+ item: response_item_function_call("item_fn", Some("{\"city\":\"Boston\"}")),
+ },
+ ResponsesStreamEvent::FunctionCallArgumentsDone {
+ item_id: "item_fn".into(),
+ output_index: 0,
+ arguments: "{\"city\":\"Boston\"}".into(),
+ sequence_number: None,
+ },
+ ResponsesStreamEvent::Incomplete {
+ response: ResponseSummary {
+ status_details: Some(ResponseStatusDetails {
+ reason: Some("max_output_tokens".into()),
+ r#type: Some("incomplete".into()),
+ error: None,
+ }),
+ output: vec![response_item_function_call(
+ "item_fn",
+ Some("{\"city\":\"Boston\"}"),
+ )],
+ ..Default::default()
+ },
+ },
+ ];
+
+ let mapped = map_response_events(events);
+ assert_eq!(mapped.len(), 2);
+ assert!(matches!(
+ mapped[0],
+ LanguageModelCompletionEvent::ToolUse(LanguageModelToolUse { ref raw_input, .. })
+ if raw_input == "{\"city\":\"Boston\"}"
+ ));
+ assert!(matches!(
+ mapped[1],
+ LanguageModelCompletionEvent::Stop(StopReason::MaxTokens)
+ ));
+ }
}
@@ -10,14 +10,20 @@ use language_model::{
LanguageModelRequest, LanguageModelToolChoice, LanguageModelToolSchemaFormat, RateLimiter,
};
use menu;
-use open_ai::{ResponseStreamEvent, stream_completion};
+use open_ai::{
+ ResponseStreamEvent,
+ responses::{Request as ResponseRequest, StreamEvent as ResponsesStreamEvent, stream_response},
+ stream_completion,
+};
use settings::{Settings, SettingsStore};
use std::sync::Arc;
use ui::{ElevationIndex, Tooltip, prelude::*};
use ui_input::InputField;
use util::ResultExt;
-use crate::provider::open_ai::{OpenAiEventMapper, into_open_ai};
+use crate::provider::open_ai::{
+ OpenAiEventMapper, OpenAiResponseEventMapper, into_open_ai, into_open_ai_response,
+};
pub use settings::OpenAiCompatibleAvailableModel as AvailableModel;
pub use settings::OpenAiCompatibleModelCapabilities as ModelCapabilities;
@@ -236,6 +242,43 @@ impl OpenAiCompatibleLanguageModel {
async move { Ok(future.await?.boxed()) }.boxed()
}
+
+ fn stream_response(
+ &self,
+ request: ResponseRequest,
+ cx: &AsyncApp,
+ ) -> BoxFuture<'static, Result<futures::stream::BoxStream<'static, Result<ResponsesStreamEvent>>>>
+ {
+ let http_client = self.http_client.clone();
+
+ let Ok((api_key, api_url)) = self.state.read_with(cx, |state, _cx| {
+ let api_url = &state.settings.api_url;
+ (
+ state.api_key_state.key(api_url),
+ state.settings.api_url.clone(),
+ )
+ }) else {
+ return future::ready(Err(anyhow!("App state dropped"))).boxed();
+ };
+
+ let provider = self.provider_name.clone();
+ let future = self.request_limiter.stream(async move {
+ let Some(api_key) = api_key else {
+ return Err(LanguageModelCompletionError::NoApiKey { provider });
+ };
+ let request = stream_response(
+ http_client.as_ref(),
+ provider.0.as_str(),
+ &api_url,
+ &api_key,
+ request,
+ );
+ let response = request.await?;
+ Ok(response)
+ });
+
+ async move { Ok(future.await?.boxed()) }.boxed()
+ }
}
impl LanguageModel for OpenAiCompatibleLanguageModel {
@@ -327,20 +370,37 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
LanguageModelCompletionError,
>,
> {
- let request = into_open_ai(
- request,
- &self.model.name,
- self.model.capabilities.parallel_tool_calls,
- self.model.capabilities.prompt_cache_key,
- self.max_output_tokens(),
- None,
- );
- let completions = self.stream_completion(request, cx);
- async move {
- let mapper = OpenAiEventMapper::new();
- Ok(mapper.map_stream(completions.await?).boxed())
+ if self.model.capabilities.chat_completions {
+ let request = into_open_ai(
+ request,
+ &self.model.name,
+ self.model.capabilities.parallel_tool_calls,
+ self.model.capabilities.prompt_cache_key,
+ self.max_output_tokens(),
+ None,
+ );
+ let completions = self.stream_completion(request, cx);
+ async move {
+ let mapper = OpenAiEventMapper::new();
+ Ok(mapper.map_stream(completions.await?).boxed())
+ }
+ .boxed()
+ } else {
+ let request = into_open_ai_response(
+ request,
+ &self.model.name,
+ self.model.capabilities.parallel_tool_calls,
+ self.model.capabilities.prompt_cache_key,
+ self.max_output_tokens(),
+ None,
+ );
+ let completions = self.stream_response(request, cx);
+ async move {
+ let mapper = OpenAiResponseEventMapper::new();
+ Ok(mapper.map_stream(completions.await?).boxed())
+ }
+ .boxed()
}
- .boxed()
}
}
@@ -81,6 +81,8 @@ pub enum Model {
O4Mini,
#[serde(rename = "gpt-5")]
Five,
+ #[serde(rename = "gpt-5-codex")]
+ FiveCodex,
#[serde(rename = "gpt-5-mini")]
FiveMini,
#[serde(rename = "gpt-5-nano")]
@@ -98,9 +100,15 @@ pub enum Model {
max_output_tokens: Option<u64>,
max_completion_tokens: Option<u64>,
reasoning_effort: Option<ReasoningEffort>,
+ #[serde(default = "default_supports_chat_completions")]
+ supports_chat_completions: bool,
},
}
+const fn default_supports_chat_completions() -> bool {
+ true
+}
+
impl Model {
pub fn default_fast() -> Self {
// TODO: Replace with FiveMini since all other models are deprecated
@@ -122,6 +130,7 @@ impl Model {
"o3" => Ok(Self::O3),
"o4-mini" => Ok(Self::O4Mini),
"gpt-5" => Ok(Self::Five),
+ "gpt-5-codex" => Ok(Self::FiveCodex),
"gpt-5-mini" => Ok(Self::FiveMini),
"gpt-5-nano" => Ok(Self::FiveNano),
"gpt-5.1" => Ok(Self::FivePointOne),
@@ -145,6 +154,7 @@ impl Model {
Self::O3 => "o3",
Self::O4Mini => "o4-mini",
Self::Five => "gpt-5",
+ Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
@@ -168,6 +178,7 @@ impl Model {
Self::O3 => "o3",
Self::O4Mini => "o4-mini",
Self::Five => "gpt-5",
+ Self::FiveCodex => "gpt-5-codex",
Self::FiveMini => "gpt-5-mini",
Self::FiveNano => "gpt-5-nano",
Self::FivePointOne => "gpt-5.1",
@@ -193,6 +204,7 @@ impl Model {
Self::O3 => 200_000,
Self::O4Mini => 200_000,
Self::Five => 272_000,
+ Self::FiveCodex => 272_000,
Self::FiveMini => 272_000,
Self::FiveNano => 272_000,
Self::FivePointOne => 400_000,
@@ -219,6 +231,7 @@ impl Model {
Self::O3 => Some(100_000),
Self::O4Mini => Some(100_000),
Self::Five => Some(128_000),
+ Self::FiveCodex => Some(128_000),
Self::FiveMini => Some(128_000),
Self::FiveNano => Some(128_000),
Self::FivePointOne => Some(128_000),
@@ -235,6 +248,17 @@ impl Model {
}
}
+ pub fn supports_chat_completions(&self) -> bool {
+ match self {
+ Self::Custom {
+ supports_chat_completions,
+ ..
+ } => *supports_chat_completions,
+ Self::FiveCodex => false,
+ _ => true,
+ }
+ }
+
/// Returns whether the given model supports the `parallel_tool_calls` parameter.
///
/// If the model does not support the parameter, do not pass it up, or the API will return an error.
@@ -249,6 +273,7 @@ impl Model {
| Self::FourPointOneMini
| Self::FourPointOneNano
| Self::Five
+ | Self::FiveCodex
| Self::FiveMini
| Self::FivePointOne
| Self::FivePointTwo
@@ -624,3 +649,362 @@ pub fn embed<'a>(
Ok(response)
}
}
+
+pub mod responses {
+ use anyhow::{Result, anyhow};
+ use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
+ use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest};
+ use serde::{Deserialize, Serialize};
+ use serde_json::Value;
+
+ use crate::RequestError;
+
+ #[derive(Serialize, Debug)]
+ pub struct Request {
+ pub model: String,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub input: Vec<Value>,
+ #[serde(default)]
+ pub stream: bool,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub temperature: Option<f32>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub top_p: Option<f32>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub max_output_tokens: Option<u64>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub parallel_tool_calls: Option<bool>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub tool_choice: Option<super::ToolChoice>,
+ #[serde(skip_serializing_if = "Vec::is_empty")]
+ pub tools: Vec<ToolDefinition>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub prompt_cache_key: Option<String>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub reasoning: Option<ReasoningConfig>,
+ }
+
+ #[derive(Serialize, Debug)]
+ pub struct ReasoningConfig {
+ pub effort: super::ReasoningEffort,
+ }
+
+ #[derive(Serialize, Debug)]
+ #[serde(tag = "type", rename_all = "snake_case")]
+ pub enum ToolDefinition {
+ Function {
+ name: String,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ description: Option<String>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ parameters: Option<Value>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ strict: Option<bool>,
+ },
+ }
+
+ #[derive(Deserialize, Debug)]
+ pub struct Error {
+ pub message: String,
+ }
+
+ #[derive(Deserialize, Debug)]
+ #[serde(tag = "type")]
+ pub enum StreamEvent {
+ #[serde(rename = "response.created")]
+ Created { response: ResponseSummary },
+ #[serde(rename = "response.in_progress")]
+ InProgress { response: ResponseSummary },
+ #[serde(rename = "response.output_item.added")]
+ OutputItemAdded {
+ output_index: usize,
+ #[serde(default)]
+ sequence_number: Option<u64>,
+ item: ResponseOutputItem,
+ },
+ #[serde(rename = "response.output_item.done")]
+ OutputItemDone {
+ output_index: usize,
+ #[serde(default)]
+ sequence_number: Option<u64>,
+ item: ResponseOutputItem,
+ },
+ #[serde(rename = "response.content_part.added")]
+ ContentPartAdded {
+ item_id: String,
+ output_index: usize,
+ content_index: usize,
+ part: Value,
+ },
+ #[serde(rename = "response.content_part.done")]
+ ContentPartDone {
+ item_id: String,
+ output_index: usize,
+ content_index: usize,
+ part: Value,
+ },
+ #[serde(rename = "response.output_text.delta")]
+ OutputTextDelta {
+ item_id: String,
+ output_index: usize,
+ #[serde(default)]
+ content_index: Option<usize>,
+ delta: String,
+ },
+ #[serde(rename = "response.output_text.done")]
+ OutputTextDone {
+ item_id: String,
+ output_index: usize,
+ #[serde(default)]
+ content_index: Option<usize>,
+ text: String,
+ },
+ #[serde(rename = "response.function_call_arguments.delta")]
+ FunctionCallArgumentsDelta {
+ item_id: String,
+ output_index: usize,
+ delta: String,
+ #[serde(default)]
+ sequence_number: Option<u64>,
+ },
+ #[serde(rename = "response.function_call_arguments.done")]
+ FunctionCallArgumentsDone {
+ item_id: String,
+ output_index: usize,
+ arguments: String,
+ #[serde(default)]
+ sequence_number: Option<u64>,
+ },
+ #[serde(rename = "response.completed")]
+ Completed { response: ResponseSummary },
+ #[serde(rename = "response.incomplete")]
+ Incomplete { response: ResponseSummary },
+ #[serde(rename = "response.failed")]
+ Failed { response: ResponseSummary },
+ #[serde(rename = "response.error")]
+ Error { error: Error },
+ #[serde(rename = "error")]
+ GenericError { error: Error },
+ #[serde(other)]
+ Unknown,
+ }
+
+ #[derive(Deserialize, Debug, Default, Clone)]
+ pub struct ResponseSummary {
+ #[serde(default)]
+ pub id: Option<String>,
+ #[serde(default)]
+ pub status: Option<String>,
+ #[serde(default)]
+ pub status_details: Option<ResponseStatusDetails>,
+ #[serde(default)]
+ pub usage: Option<ResponseUsage>,
+ #[serde(default)]
+ pub output: Vec<ResponseOutputItem>,
+ }
+
+ #[derive(Deserialize, Debug, Default, Clone)]
+ pub struct ResponseStatusDetails {
+ #[serde(default)]
+ pub reason: Option<String>,
+ #[serde(default)]
+ pub r#type: Option<String>,
+ #[serde(default)]
+ pub error: Option<Value>,
+ }
+
+ #[derive(Deserialize, Debug, Default, Clone)]
+ pub struct ResponseUsage {
+ #[serde(default)]
+ pub input_tokens: Option<u64>,
+ #[serde(default)]
+ pub output_tokens: Option<u64>,
+ #[serde(default)]
+ pub total_tokens: Option<u64>,
+ }
+
+ #[derive(Deserialize, Debug, Clone)]
+ #[serde(tag = "type", rename_all = "snake_case")]
+ pub enum ResponseOutputItem {
+ Message(ResponseOutputMessage),
+ FunctionCall(ResponseFunctionToolCall),
+ #[serde(other)]
+ Unknown,
+ }
+
+ #[derive(Deserialize, Debug, Clone)]
+ pub struct ResponseOutputMessage {
+ #[serde(default)]
+ pub id: Option<String>,
+ #[serde(default)]
+ pub content: Vec<Value>,
+ #[serde(default)]
+ pub role: Option<String>,
+ #[serde(default)]
+ pub status: Option<String>,
+ }
+
+ #[derive(Deserialize, Debug, Clone)]
+ pub struct ResponseFunctionToolCall {
+ #[serde(default)]
+ pub id: Option<String>,
+ #[serde(default)]
+ pub arguments: String,
+ #[serde(default)]
+ pub call_id: Option<String>,
+ #[serde(default)]
+ pub name: Option<String>,
+ #[serde(default)]
+ pub status: Option<String>,
+ }
+
+ pub async fn stream_response(
+ client: &dyn HttpClient,
+ provider_name: &str,
+ api_url: &str,
+ api_key: &str,
+ request: Request,
+ ) -> Result<BoxStream<'static, Result<StreamEvent>>, RequestError> {
+ let uri = format!("{api_url}/responses");
+ let request_builder = HttpRequest::builder()
+ .method(Method::POST)
+ .uri(uri)
+ .header("Content-Type", "application/json")
+ .header("Authorization", format!("Bearer {}", api_key.trim()));
+
+ let is_streaming = request.stream;
+ let request = request_builder
+ .body(AsyncBody::from(
+ serde_json::to_string(&request).map_err(|e| RequestError::Other(e.into()))?,
+ ))
+ .map_err(|e| RequestError::Other(e.into()))?;
+
+ let mut response = client.send(request).await?;
+ if response.status().is_success() {
+ if is_streaming {
+ let reader = BufReader::new(response.into_body());
+ Ok(reader
+ .lines()
+ .filter_map(|line| async move {
+ match line {
+ Ok(line) => {
+ let line = line
+ .strip_prefix("data: ")
+ .or_else(|| line.strip_prefix("data:"))?;
+ if line == "[DONE]" || line.is_empty() {
+ None
+ } else {
+ match serde_json::from_str::<StreamEvent>(line) {
+ Ok(event) => Some(Ok(event)),
+ Err(error) => {
+ log::error!(
+ "Failed to parse OpenAI responses stream event: `{}`\nResponse: `{}`",
+ error,
+ line,
+ );
+ Some(Err(anyhow!(error)))
+ }
+ }
+ }
+ }
+ Err(error) => Some(Err(anyhow!(error))),
+ }
+ })
+ .boxed())
+ } else {
+ let mut body = String::new();
+ response
+ .body_mut()
+ .read_to_string(&mut body)
+ .await
+ .map_err(|e| RequestError::Other(e.into()))?;
+
+ match serde_json::from_str::<ResponseSummary>(&body) {
+ Ok(response_summary) => {
+ let events = vec![
+ StreamEvent::Created {
+ response: response_summary.clone(),
+ },
+ StreamEvent::InProgress {
+ response: response_summary.clone(),
+ },
+ ];
+
+ let mut all_events = events;
+ for (output_index, item) in response_summary.output.iter().enumerate() {
+ all_events.push(StreamEvent::OutputItemAdded {
+ output_index,
+ sequence_number: None,
+ item: item.clone(),
+ });
+
+ match item {
+ ResponseOutputItem::Message(message) => {
+ for content_item in &message.content {
+ if let Some(text) = content_item.get("text") {
+ if let Some(text_str) = text.as_str() {
+ if let Some(ref item_id) = message.id {
+ all_events.push(StreamEvent::OutputTextDelta {
+ item_id: item_id.clone(),
+ output_index,
+ content_index: None,
+ delta: text_str.to_string(),
+ });
+ }
+ }
+ }
+ }
+ }
+ ResponseOutputItem::FunctionCall(function_call) => {
+ if let Some(ref item_id) = function_call.id {
+ all_events.push(StreamEvent::FunctionCallArgumentsDone {
+ item_id: item_id.clone(),
+ output_index,
+ arguments: function_call.arguments.clone(),
+ sequence_number: None,
+ });
+ }
+ }
+ ResponseOutputItem::Unknown => {}
+ }
+
+ all_events.push(StreamEvent::OutputItemDone {
+ output_index,
+ sequence_number: None,
+ item: item.clone(),
+ });
+ }
+
+ all_events.push(StreamEvent::Completed {
+ response: response_summary,
+ });
+
+ Ok(futures::stream::iter(all_events.into_iter().map(Ok)).boxed())
+ }
+ Err(error) => {
+ log::error!(
+ "Failed to parse OpenAI non-streaming response: `{}`\nResponse: `{}`",
+ error,
+ body,
+ );
+ Err(RequestError::Other(anyhow!(error)))
+ }
+ }
+ }
+ } else {
+ let mut body = String::new();
+ response
+ .body_mut()
+ .read_to_string(&mut body)
+ .await
+ .map_err(|e| RequestError::Other(e.into()))?;
+
+ Err(RequestError::HttpResponseError {
+ provider: provider_name.to_owned(),
+ status_code: response.status(),
+ body,
+ headers: response.headers().clone(),
+ })
+ }
+ }
+}
@@ -208,6 +208,8 @@ pub struct OpenAiAvailableModel {
pub max_output_tokens: Option<u64>,
pub max_completion_tokens: Option<u64>,
pub reasoning_effort: Option<OpenAiReasoningEffort>,
+ #[serde(default)]
+ pub capabilities: OpenAiModelCapabilities,
}
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
@@ -226,6 +228,21 @@ pub struct OpenAiCompatibleSettingsContent {
pub available_models: Vec<OpenAiCompatibleAvailableModel>,
}
+#[with_fallible_options]
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
+pub struct OpenAiModelCapabilities {
+ #[serde(default = "default_true")]
+ pub chat_completions: bool,
+}
+
+impl Default for OpenAiModelCapabilities {
+ fn default() -> Self {
+ Self {
+ chat_completions: default_true(),
+ }
+ }
+}
+
#[with_fallible_options]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema, MergeFrom)]
pub struct OpenAiCompatibleAvailableModel {
@@ -245,6 +262,8 @@ pub struct OpenAiCompatibleModelCapabilities {
pub images: bool,
pub parallel_tool_calls: bool,
pub prompt_cache_key: bool,
+ #[serde(default = "default_true")]
+ pub chat_completions: bool,
}
impl Default for OpenAiCompatibleModelCapabilities {
@@ -254,6 +273,7 @@ impl Default for OpenAiCompatibleModelCapabilities {
images: false,
parallel_tool_calls: false,
prompt_cache_key: false,
+ chat_completions: default_true(),
}
}
}
@@ -469,6 +469,14 @@ To use alternate models, perhaps a preview release, or if you wish to control th
"name": "gpt-4o-2024-08-06",
"display_name": "GPT 4o Summer 2024",
"max_tokens": 128000
+ },
+ {
+ "name": "gpt-5-codex",
+ "display_name": "GPT-5 Codex",
+ "max_tokens": 128000,
+ "capabilities": {
+ "chat_completions": false
+ }
}
]
}
@@ -478,7 +486,10 @@ To use alternate models, perhaps a preview release, or if you wish to control th
You must provide the model's context window in the `max_tokens` parameter; this can be found in the [OpenAI model documentation](https://platform.openai.com/docs/models).
-OpenAI `o1` models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
+OpenAI `o1` and `o`-class models should set `max_completion_tokens` as well to avoid incurring high reasoning token costs.
+
+If a model does not support the `/chat/completions` endpoint (for example `gpt-5-codex`), disable it by setting `capabilities.chat_completions` to `false`. Zed will use the Responses endpoint instead.
+
Custom models will be listed in the model dropdown in the Agent Panel.
### OpenAI API Compatible {#openai-api-compatible}
@@ -525,6 +536,9 @@ By default, OpenAI-compatible models inherit the following capabilities:
- `images`: false (does not support image inputs)
- `parallel_tool_calls`: false (does not support `parallel_tool_calls` parameter)
- `prompt_cache_key`: false (does not support `prompt_cache_key` parameter)
+- `chat_completions`: true (calls the `/chat/completions` endpoint)
+
+If a provider exposes models that only work with the Responses API, set `chat_completions` to `false` for those entries. Zed uses the Responses endpoint for these models.
Note that LLM API keys aren't stored in your settings file.
So, ensure you have it set in your environment variables (`<PROVIDER_NAME>_API_KEY=<your api key>`) so your settings can pick it up. In the example above, it would be `TOGETHER_AI_API_KEY=<your api key>`.