From b57ef838927b0a341319041f899ad97801f5a743 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Apr 2026 15:24:45 -0400 Subject: [PATCH] Delegate model capabilities instead of hardcoding them The into_open_ai_response call was hardcoding supports_parallel_tool_calls=true and supports_prompt_cache_key=false instead of asking the model. This meant reasoning models like GPT-5 Codex variants would incorrectly send parallel_tool_calls=true, which could cause API errors. Add the missing methods to ChatGptModel and delegate like the standard OpenAI provider does. --- .../src/provider/openai_subscribed.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/crates/language_models/src/provider/openai_subscribed.rs b/crates/language_models/src/provider/openai_subscribed.rs index 134a3a84ee01dd420108e0b057b58ea5385a509e..ea503c8a3f1f6b4e0aa86ccb94756efc1ad5ad83 100644 --- a/crates/language_models/src/provider/openai_subscribed.rs +++ b/crates/language_models/src/provider/openai_subscribed.rs @@ -308,6 +308,17 @@ impl ChatGptModel { _ => Some(ReasoningEffort::Medium), } } + + fn supports_parallel_tool_calls(&self) -> bool { + match self { + Self::Gpt54 | Self::Gpt54Mini => true, + _ => false, + } + } + + fn supports_prompt_cache_key(&self) -> bool { + true + } } struct OpenAiSubscribedLanguageModel { @@ -392,13 +403,12 @@ impl LanguageModel for OpenAiSubscribedLanguageModel { let mut responses_request = into_open_ai_response( request, self.model.id(), - true, // supports_parallel_tool_calls - false, // supports_prompt_cache_key - None, // max_output_tokens — not supported by Codex backend + self.model.supports_parallel_tool_calls(), + self.model.supports_prompt_cache_key(), + self.max_output_tokens(), self.model.reasoning_effort(), ); responses_request.store = Some(false); - responses_request.max_output_tokens = None; // The Codex backend requires system messages to be in the top-level // `instructions` field rather than as input items.