From b57ef838927b0a341319041f899ad97801f5a743 Mon Sep 17 00:00:00 2001
From: Richard Feldman <oss@rtfeldman.com>
Date: Mon, 6 Apr 2026 15:24:45 -0400
Subject: [PATCH] Delegate model capabilities instead of hardcoding them

The into_open_ai_response call was hardcoding
supports_parallel_tool_calls=true and supports_prompt_cache_key=false
instead of asking the model. This meant reasoning models like GPT-5
Codex variants would incorrectly send parallel_tool_calls=true,
which could cause API errors. Add the missing methods to ChatGptModel
and delegate like the standard OpenAI provider does.
---
 .../src/provider/openai_subscribed.rs          | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/crates/language_models/src/provider/openai_subscribed.rs b/crates/language_models/src/provider/openai_subscribed.rs
index 134a3a84ee01dd420108e0b057b58ea5385a509e..ea503c8a3f1f6b4e0aa86ccb94756efc1ad5ad83 100644
--- a/crates/language_models/src/provider/openai_subscribed.rs
+++ b/crates/language_models/src/provider/openai_subscribed.rs
@@ -308,6 +308,17 @@ impl ChatGptModel {
             _ => Some(ReasoningEffort::Medium),
         }
     }
+
+    fn supports_parallel_tool_calls(&self) -> bool {
+        match self {
+            Self::Gpt54 | Self::Gpt54Mini => true,
+            _ => false,
+        }
+    }
+
+    fn supports_prompt_cache_key(&self) -> bool {
+        true
+    }
 }
 
 struct OpenAiSubscribedLanguageModel {
@@ -392,13 +403,12 @@ impl LanguageModel for OpenAiSubscribedLanguageModel {
         let mut responses_request = into_open_ai_response(
             request,
             self.model.id(),
-            true,  // supports_parallel_tool_calls
-            false, // supports_prompt_cache_key
-            None,  // max_output_tokens — not supported by Codex backend
+            self.model.supports_parallel_tool_calls(),
+            self.model.supports_prompt_cache_key(),
+            self.max_output_tokens(),
             self.model.reasoning_effort(),
         );
         responses_request.store = Some(false);
-        responses_request.max_output_tokens = None;
 
         // The Codex backend requires system messages to be in the top-level
         // `instructions` field rather than as input items.