language_models: Enable setting reasoning effort for OpenAI models through Zed provider (#48605)

Marshall Bowers and Tom Houlé created

This PR adds support for setting the reasoning effort for OpenAI models
through the Zed provider.

This is gated behind the `cloud-thinking-effort` feature flag.

Release Notes:

- N/A

Co-authored-by: Tom Houlé <tom@tomhoule.com>

Change summary

crates/language_models/src/provider/cloud.rs  | 20 +++++++++++++++-----
crates/settings_content/src/language_model.rs |  5 ++++-
2 files changed, 19 insertions(+), 6 deletions(-)

Detailed changes

crates/language_models/src/provider/cloud.rs 🔗

@@ -747,13 +747,14 @@ impl LanguageModel for CloudLanguageModel {
         } else {
             thinking_allowed && self.model.id.0.ends_with("-thinking")
         };
-        let effort = request
-            .thinking_effort
-            .as_ref()
-            .and_then(|effort| anthropic::Effort::from_str(effort).ok());
         let provider_name = provider_name(&self.model.provider);
         match self.model.provider {
             cloud_llm_client::LanguageModelProvider::Anthropic => {
+                let effort = request
+                    .thinking_effort
+                    .as_ref()
+                    .and_then(|effort| anthropic::Effort::from_str(effort).ok());
+
                 let mut request = into_anthropic(
                     request,
                     self.model.id.to_string(),
@@ -811,8 +812,12 @@ impl LanguageModel for CloudLanguageModel {
             cloud_llm_client::LanguageModelProvider::OpenAi => {
                 let client = self.client.clone();
                 let llm_api_token = self.llm_api_token.clone();
+                let effort = request
+                    .thinking_effort
+                    .as_ref()
+                    .and_then(|effort| open_ai::ReasoningEffort::from_str(effort).ok());
 
-                let request = into_open_ai_response(
+                let mut request = into_open_ai_response(
                     request,
                     &self.model.id.0,
                     self.model.supports_parallel_tool_calls,
@@ -820,6 +825,11 @@ impl LanguageModel for CloudLanguageModel {
                     None,
                     None,
                 );
+
+                if enable_thinking && let Some(effort) = effort {
+                    request.reasoning = Some(open_ai::responses::ReasoningConfig { effort });
+                }
+
                 let future = self.request_limiter.stream(async move {
                     let PerformLlmCompletionResponse {
                         response,

crates/settings_content/src/language_model.rs 🔗

@@ -2,6 +2,7 @@ use collections::HashMap;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings_macros::{MergeFrom, with_fallible_options};
+use strum::EnumString;
 
 use std::sync::Arc;
 
@@ -212,13 +213,15 @@ pub struct OpenAiAvailableModel {
     pub capabilities: OpenAiModelCapabilities,
 }
 
-#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, JsonSchema, MergeFrom)]
+#[derive(Debug, Serialize, Deserialize, PartialEq, Clone, EnumString, JsonSchema, MergeFrom)]
 #[serde(rename_all = "lowercase")]
+#[strum(serialize_all = "lowercase")]
 pub enum OpenAiReasoningEffort {
     Minimal,
     Low,
     Medium,
     High,
+    XHigh,
 }
 
 #[with_fallible_options]