diff --git a/Cargo.lock b/Cargo.lock index afb45197aff8cf2ae78ae3d8f90ffc656cf8f661..02a2efe1737bb13a44d0fa3261be79c51b04a508 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9036,7 +9036,6 @@ dependencies = [ "editor", "extension", "extension_host", - "feature_flags", "fs", "futures 0.3.31", "google_ai", diff --git a/crates/feature_flags/src/flags.rs b/crates/feature_flags/src/flags.rs index 20a23df734e9df90a734da6826077a2b13cf997d..470bcfe910bfdc23a5cec22cf45a2b7a6cf46c51 100644 --- a/crates/feature_flags/src/flags.rs +++ b/crates/feature_flags/src/flags.rs @@ -59,15 +59,3 @@ impl FeatureFlag for DiffReviewFeatureFlag { false } } - -/// Whether to use the OpenAI Responses API format when sending requests to Cloud. -pub struct OpenAiResponsesApiFeatureFlag; - -impl FeatureFlag for OpenAiResponsesApiFeatureFlag { - const NAME: &'static str = "open-ai-responses-api"; - - fn enabled_for_staff() -> bool { - // Add yourself to the flag manually to test it out. - false - } -} diff --git a/crates/language_models/Cargo.toml b/crates/language_models/Cargo.toml index 3b0cf3a31d0718f89994c4cd3cb2bf82f4ec4408..1dd7162d3d93a68f981489cfb38edb71b2c3e504 100644 --- a/crates/language_models/Cargo.toml +++ b/crates/language_models/Cargo.toml @@ -32,7 +32,6 @@ credentials_provider.workspace = true deepseek = { workspace = true, features = ["schemars"] } extension.workspace = true extension_host.workspace = true -feature_flags.workspace = true fs.workspace = true futures.workspace = true google_ai = { workspace = true, features = ["schemars"] } diff --git a/crates/language_models/src/provider/cloud.rs b/crates/language_models/src/provider/cloud.rs index a96a0618e15217c648836c89fad51725b16b2b43..03f7ba125f54ef0ae18c53d094fa4798c86b24aa 100644 --- a/crates/language_models/src/provider/cloud.rs +++ b/crates/language_models/src/provider/cloud.rs @@ -9,7 +9,6 @@ use cloud_llm_client::{ ListModelsResponse, Plan, PlanV2, SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME, ZED_VERSION_HEADER_NAME, }; -use feature_flags::{FeatureFlagAppExt as _, OpenAiResponsesApiFeatureFlag}; use futures::{ AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream, }; @@ -721,7 +720,6 @@ impl LanguageModel for CloudLanguageModel { let intent = request.intent; let bypass_rate_limit = request.bypass_rate_limit; let app_version = Some(cx.update(|cx| AppVersion::global(cx))); - let use_responses_api = cx.update(|cx| cx.has_flag::()); let thinking_allowed = request.thinking_allowed; let provider_name = provider_name(&self.model.provider); match self.model.provider { @@ -783,87 +781,45 @@ impl LanguageModel for CloudLanguageModel { let client = self.client.clone(); let llm_api_token = self.llm_api_token.clone(); - if use_responses_api { - let request = into_open_ai_response( - request, - &self.model.id.0, - self.model.supports_parallel_tool_calls, - true, - None, - None, - ); - let future = self.request_limiter.stream_with_bypass( - async move { - let PerformLlmCompletionResponse { - response, - includes_status_messages, - } = Self::perform_llm_completion( - client.clone(), - llm_api_token, - app_version, - CompletionBody { - thread_id, - prompt_id, - intent, - provider: cloud_llm_client::LanguageModelProvider::OpenAi, - model: request.model.clone(), - provider_request: serde_json::to_value(&request) - .map_err(|e| anyhow!(e))?, - }, - ) - .await?; - - let mut mapper = OpenAiResponseEventMapper::new(); - Ok(map_cloud_completion_events( - Box::pin(response_lines(response, includes_status_messages)), - &provider_name, - move |event| mapper.map_event(event), - )) - }, - bypass_rate_limit, - ); - async move { Ok(future.await?.boxed()) }.boxed() - } else { - let request = into_open_ai( - request, - &self.model.id.0, - self.model.supports_parallel_tool_calls, - true, - None, - None, - ); - let future = self.request_limiter.stream_with_bypass( - async move { - let PerformLlmCompletionResponse { - response, - includes_status_messages, - } = Self::perform_llm_completion( - client.clone(), - llm_api_token, - app_version, - CompletionBody { - thread_id, - prompt_id, - intent, - provider: cloud_llm_client::LanguageModelProvider::OpenAi, - model: request.model.clone(), - provider_request: serde_json::to_value(&request) - .map_err(|e| anyhow!(e))?, - }, - ) - .await?; - - let mut mapper = OpenAiEventMapper::new(); - Ok(map_cloud_completion_events( - Box::pin(response_lines(response, includes_status_messages)), - &provider_name, - move |event| mapper.map_event(event), - )) - }, - bypass_rate_limit, - ); - async move { Ok(future.await?.boxed()) }.boxed() - } + let request = into_open_ai_response( + request, + &self.model.id.0, + self.model.supports_parallel_tool_calls, + true, + None, + None, + ); + let future = self.request_limiter.stream_with_bypass( + async move { + let PerformLlmCompletionResponse { + response, + includes_status_messages, + } = Self::perform_llm_completion( + client.clone(), + llm_api_token, + app_version, + CompletionBody { + thread_id, + prompt_id, + intent, + provider: cloud_llm_client::LanguageModelProvider::OpenAi, + model: request.model.clone(), + provider_request: serde_json::to_value(&request) + .map_err(|e| anyhow!(e))?, + }, + ) + .await?; + + let mut mapper = OpenAiResponseEventMapper::new(); + Ok(map_cloud_completion_events( + Box::pin(response_lines(response, includes_status_messages)), + &provider_name, + move |event| mapper.map_event(event), + )) + }, + bypass_rate_limit, + ); + async move { Ok(future.await?.boxed()) }.boxed() } cloud_llm_client::LanguageModelProvider::XAi => { let client = self.client.clone();