language_models: Remove `open-ai-reponses-api` feature flag (#47317)

Marshall Bowers created

This PR removes the `open-ai-responses-api` feature flag and makes it so
all OpenAI requests to the Zed provider use the Responses API.

We've been running this in Nightly/Preview for a week now without any
issues.

Closes CLO-104.

Release Notes:

- N/A

Change summary

Cargo.lock                                   |   1 
crates/feature_flags/src/flags.rs            |  12 --
crates/language_models/Cargo.toml            |   1 
crates/language_models/src/provider/cloud.rs | 122 +++++++--------------
4 files changed, 39 insertions(+), 97 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -9036,7 +9036,6 @@ dependencies = [
  "editor",
  "extension",
  "extension_host",
- "feature_flags",
  "fs",
  "futures 0.3.31",
  "google_ai",

crates/feature_flags/src/flags.rs 🔗

@@ -59,15 +59,3 @@ impl FeatureFlag for DiffReviewFeatureFlag {
         false
     }
 }
-
-/// Whether to use the OpenAI Responses API format when sending requests to Cloud.
-pub struct OpenAiResponsesApiFeatureFlag;
-
-impl FeatureFlag for OpenAiResponsesApiFeatureFlag {
-    const NAME: &'static str = "open-ai-responses-api";
-
-    fn enabled_for_staff() -> bool {
-        // Add yourself to the flag manually to test it out.
-        false
-    }
-}

crates/language_models/Cargo.toml 🔗

@@ -32,7 +32,6 @@ credentials_provider.workspace = true
 deepseek = { workspace = true, features = ["schemars"] }
 extension.workspace = true
 extension_host.workspace = true
-feature_flags.workspace = true
 fs.workspace = true
 futures.workspace = true
 google_ai = { workspace = true, features = ["schemars"] }

crates/language_models/src/provider/cloud.rs 🔗

@@ -9,7 +9,6 @@ use cloud_llm_client::{
     ListModelsResponse, Plan, PlanV2, SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME,
     ZED_VERSION_HEADER_NAME,
 };
-use feature_flags::{FeatureFlagAppExt as _, OpenAiResponsesApiFeatureFlag};
 use futures::{
     AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
 };
@@ -721,7 +720,6 @@ impl LanguageModel for CloudLanguageModel {
         let intent = request.intent;
         let bypass_rate_limit = request.bypass_rate_limit;
         let app_version = Some(cx.update(|cx| AppVersion::global(cx)));
-        let use_responses_api = cx.update(|cx| cx.has_flag::<OpenAiResponsesApiFeatureFlag>());
         let thinking_allowed = request.thinking_allowed;
         let provider_name = provider_name(&self.model.provider);
         match self.model.provider {
@@ -783,87 +781,45 @@ impl LanguageModel for CloudLanguageModel {
                 let client = self.client.clone();
                 let llm_api_token = self.llm_api_token.clone();
 
-                if use_responses_api {
-                    let request = into_open_ai_response(
-                        request,
-                        &self.model.id.0,
-                        self.model.supports_parallel_tool_calls,
-                        true,
-                        None,
-                        None,
-                    );
-                    let future = self.request_limiter.stream_with_bypass(
-                        async move {
-                            let PerformLlmCompletionResponse {
-                                response,
-                                includes_status_messages,
-                            } = Self::perform_llm_completion(
-                                client.clone(),
-                                llm_api_token,
-                                app_version,
-                                CompletionBody {
-                                    thread_id,
-                                    prompt_id,
-                                    intent,
-                                    provider: cloud_llm_client::LanguageModelProvider::OpenAi,
-                                    model: request.model.clone(),
-                                    provider_request: serde_json::to_value(&request)
-                                        .map_err(|e| anyhow!(e))?,
-                                },
-                            )
-                            .await?;
-
-                            let mut mapper = OpenAiResponseEventMapper::new();
-                            Ok(map_cloud_completion_events(
-                                Box::pin(response_lines(response, includes_status_messages)),
-                                &provider_name,
-                                move |event| mapper.map_event(event),
-                            ))
-                        },
-                        bypass_rate_limit,
-                    );
-                    async move { Ok(future.await?.boxed()) }.boxed()
-                } else {
-                    let request = into_open_ai(
-                        request,
-                        &self.model.id.0,
-                        self.model.supports_parallel_tool_calls,
-                        true,
-                        None,
-                        None,
-                    );
-                    let future = self.request_limiter.stream_with_bypass(
-                        async move {
-                            let PerformLlmCompletionResponse {
-                                response,
-                                includes_status_messages,
-                            } = Self::perform_llm_completion(
-                                client.clone(),
-                                llm_api_token,
-                                app_version,
-                                CompletionBody {
-                                    thread_id,
-                                    prompt_id,
-                                    intent,
-                                    provider: cloud_llm_client::LanguageModelProvider::OpenAi,
-                                    model: request.model.clone(),
-                                    provider_request: serde_json::to_value(&request)
-                                        .map_err(|e| anyhow!(e))?,
-                                },
-                            )
-                            .await?;
-
-                            let mut mapper = OpenAiEventMapper::new();
-                            Ok(map_cloud_completion_events(
-                                Box::pin(response_lines(response, includes_status_messages)),
-                                &provider_name,
-                                move |event| mapper.map_event(event),
-                            ))
-                        },
-                        bypass_rate_limit,
-                    );
-                    async move { Ok(future.await?.boxed()) }.boxed()
-                }
+                let request = into_open_ai_response(
+                    request,
+                    &self.model.id.0,
+                    self.model.supports_parallel_tool_calls,
+                    true,
+                    None,
+                    None,
+                );
+                let future = self.request_limiter.stream_with_bypass(
+                    async move {
+                        let PerformLlmCompletionResponse {
+                            response,
+                            includes_status_messages,
+                        } = Self::perform_llm_completion(
+                            client.clone(),
+                            llm_api_token,
+                            app_version,
+                            CompletionBody {
+                                thread_id,
+                                prompt_id,
+                                intent,
+                                provider: cloud_llm_client::LanguageModelProvider::OpenAi,
+                                model: request.model.clone(),
+                                provider_request: serde_json::to_value(&request)
+                                    .map_err(|e| anyhow!(e))?,
+                            },
+                        )
+                        .await?;
+
+                        let mut mapper = OpenAiResponseEventMapper::new();
+                        Ok(map_cloud_completion_events(
+                            Box::pin(response_lines(response, includes_status_messages)),
+                            &provider_name,
+                            move |event| mapper.map_event(event),
+                        ))
+                    },
+                    bypass_rate_limit,
+                );
+                async move { Ok(future.await?.boxed()) }.boxed()
             }
             cloud_llm_client::LanguageModelProvider::XAi => {
                 let client = self.client.clone();