openai: Don't send prompt_cache_key for OpenAI-compatible models (#36231)

Oleksiy Syvokon created

Some APIs fail when they get this parameter

Closes #36215

Release Notes:

- Fixed OpenAI-compatible providers that don't support prompt caching
and/or reasoning

Change summary

crates/language_models/src/provider/cloud.rs              | 1 +
crates/language_models/src/provider/open_ai.rs            | 8 +++++++-
crates/language_models/src/provider/open_ai_compatible.rs | 5 ++++-
crates/language_models/src/provider/vercel.rs             | 1 +
crates/language_models/src/provider/x_ai.rs               | 1 +
crates/open_ai/src/open_ai.rs                             | 7 +++++++
crates/vercel/src/vercel.rs                               | 4 ++++
crates/x_ai/src/x_ai.rs                                   | 4 ++++
8 files changed, 29 insertions(+), 2 deletions(-)

Detailed changes

crates/language_models/src/provider/open_ai.rs 🔗

@@ -370,6 +370,7 @@ impl LanguageModel for OpenAiLanguageModel {
             request,
             self.model.id(),
             self.model.supports_parallel_tool_calls(),
+            self.model.supports_prompt_cache_key(),
             self.max_output_tokens(),
             self.model.reasoning_effort(),
         );
@@ -386,6 +387,7 @@ pub fn into_open_ai(
     request: LanguageModelRequest,
     model_id: &str,
     supports_parallel_tool_calls: bool,
+    supports_prompt_cache_key: bool,
     max_output_tokens: Option<u64>,
     reasoning_effort: Option<ReasoningEffort>,
 ) -> open_ai::Request {
@@ -477,7 +479,11 @@ pub fn into_open_ai(
         } else {
             None
         },
-        prompt_cache_key: request.thread_id,
+        prompt_cache_key: if supports_prompt_cache_key {
+            request.thread_id
+        } else {
+            None
+        },
         tools: request
             .tools
             .into_iter()

crates/language_models/src/provider/open_ai_compatible.rs 🔗

@@ -355,10 +355,13 @@ impl LanguageModel for OpenAiCompatibleLanguageModel {
             LanguageModelCompletionError,
         >,
     > {
+        let supports_parallel_tool_call = true;
+        let supports_prompt_cache_key = false;
         let request = into_open_ai(
             request,
             &self.model.name,
-            true,
+            supports_parallel_tool_call,
+            supports_prompt_cache_key,
             self.max_output_tokens(),
             None,
         );

crates/language_models/src/provider/vercel.rs 🔗

@@ -355,6 +355,7 @@ impl LanguageModel for VercelLanguageModel {
             request,
             self.model.id(),
             self.model.supports_parallel_tool_calls(),
+            self.model.supports_prompt_cache_key(),
             self.max_output_tokens(),
             None,
         );

crates/language_models/src/provider/x_ai.rs 🔗

@@ -359,6 +359,7 @@ impl LanguageModel for XAiLanguageModel {
             request,
             self.model.id(),
             self.model.supports_parallel_tool_calls(),
+            self.model.supports_prompt_cache_key(),
             self.max_output_tokens(),
             None,
         );

crates/open_ai/src/open_ai.rs 🔗

@@ -236,6 +236,13 @@ impl Model {
             Self::O1 | Self::O3 | Self::O3Mini | Self::O4Mini | Model::Custom { .. } => false,
         }
     }
+
+    /// Returns whether the given model supports the `prompt_cache_key` parameter.
+    ///
+    /// If the model does not support the parameter, do not pass it up.
+    pub fn supports_prompt_cache_key(&self) -> bool {
+        return true;
+    }
 }
 
 #[derive(Debug, Serialize, Deserialize)]

crates/vercel/src/vercel.rs 🔗

@@ -71,4 +71,8 @@ impl Model {
             Model::Custom { .. } => false,
         }
     }
+
+    pub fn supports_prompt_cache_key(&self) -> bool {
+        false
+    }
 }

crates/x_ai/src/x_ai.rs 🔗

@@ -105,6 +105,10 @@ impl Model {
         }
     }
 
+    pub fn supports_prompt_cache_key(&self) -> bool {
+        false
+    }
+
     pub fn supports_tool(&self) -> bool {
         match self {
             Self::Grok2Vision