Cargo.lock 🔗
@@ -9036,7 +9036,6 @@ dependencies = [
"editor",
"extension",
"extension_host",
- "feature_flags",
"fs",
"futures 0.3.31",
"google_ai",
Marshall Bowers created
This PR removes the `open-ai-responses-api` feature flag and makes it so
all OpenAI requests to the Zed provider use the Responses API.
We've been running this in Nightly/Preview for a week now without any
issues.
Closes CLO-104.
Release Notes:
- N/A
Cargo.lock | 1
crates/feature_flags/src/flags.rs | 12 --
crates/language_models/Cargo.toml | 1
crates/language_models/src/provider/cloud.rs | 122 +++++++--------------
4 files changed, 39 insertions(+), 97 deletions(-)
@@ -9036,7 +9036,6 @@ dependencies = [
"editor",
"extension",
"extension_host",
- "feature_flags",
"fs",
"futures 0.3.31",
"google_ai",
@@ -59,15 +59,3 @@ impl FeatureFlag for DiffReviewFeatureFlag {
false
}
}
-
-/// Whether to use the OpenAI Responses API format when sending requests to Cloud.
-pub struct OpenAiResponsesApiFeatureFlag;
-
-impl FeatureFlag for OpenAiResponsesApiFeatureFlag {
- const NAME: &'static str = "open-ai-responses-api";
-
- fn enabled_for_staff() -> bool {
- // Add yourself to the flag manually to test it out.
- false
- }
-}
@@ -32,7 +32,6 @@ credentials_provider.workspace = true
deepseek = { workspace = true, features = ["schemars"] }
extension.workspace = true
extension_host.workspace = true
-feature_flags.workspace = true
fs.workspace = true
futures.workspace = true
google_ai = { workspace = true, features = ["schemars"] }
@@ -9,7 +9,6 @@ use cloud_llm_client::{
ListModelsResponse, Plan, PlanV2, SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME,
ZED_VERSION_HEADER_NAME,
};
-use feature_flags::{FeatureFlagAppExt as _, OpenAiResponsesApiFeatureFlag};
use futures::{
AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
};
@@ -721,7 +720,6 @@ impl LanguageModel for CloudLanguageModel {
let intent = request.intent;
let bypass_rate_limit = request.bypass_rate_limit;
let app_version = Some(cx.update(|cx| AppVersion::global(cx)));
- let use_responses_api = cx.update(|cx| cx.has_flag::<OpenAiResponsesApiFeatureFlag>());
let thinking_allowed = request.thinking_allowed;
let provider_name = provider_name(&self.model.provider);
match self.model.provider {
@@ -783,87 +781,45 @@ impl LanguageModel for CloudLanguageModel {
let client = self.client.clone();
let llm_api_token = self.llm_api_token.clone();
- if use_responses_api {
- let request = into_open_ai_response(
- request,
- &self.model.id.0,
- self.model.supports_parallel_tool_calls,
- true,
- None,
- None,
- );
- let future = self.request_limiter.stream_with_bypass(
- async move {
- let PerformLlmCompletionResponse {
- response,
- includes_status_messages,
- } = Self::perform_llm_completion(
- client.clone(),
- llm_api_token,
- app_version,
- CompletionBody {
- thread_id,
- prompt_id,
- intent,
- provider: cloud_llm_client::LanguageModelProvider::OpenAi,
- model: request.model.clone(),
- provider_request: serde_json::to_value(&request)
- .map_err(|e| anyhow!(e))?,
- },
- )
- .await?;
-
- let mut mapper = OpenAiResponseEventMapper::new();
- Ok(map_cloud_completion_events(
- Box::pin(response_lines(response, includes_status_messages)),
- &provider_name,
- move |event| mapper.map_event(event),
- ))
- },
- bypass_rate_limit,
- );
- async move { Ok(future.await?.boxed()) }.boxed()
- } else {
- let request = into_open_ai(
- request,
- &self.model.id.0,
- self.model.supports_parallel_tool_calls,
- true,
- None,
- None,
- );
- let future = self.request_limiter.stream_with_bypass(
- async move {
- let PerformLlmCompletionResponse {
- response,
- includes_status_messages,
- } = Self::perform_llm_completion(
- client.clone(),
- llm_api_token,
- app_version,
- CompletionBody {
- thread_id,
- prompt_id,
- intent,
- provider: cloud_llm_client::LanguageModelProvider::OpenAi,
- model: request.model.clone(),
- provider_request: serde_json::to_value(&request)
- .map_err(|e| anyhow!(e))?,
- },
- )
- .await?;
-
- let mut mapper = OpenAiEventMapper::new();
- Ok(map_cloud_completion_events(
- Box::pin(response_lines(response, includes_status_messages)),
- &provider_name,
- move |event| mapper.map_event(event),
- ))
- },
- bypass_rate_limit,
- );
- async move { Ok(future.await?.boxed()) }.boxed()
- }
+ let request = into_open_ai_response(
+ request,
+ &self.model.id.0,
+ self.model.supports_parallel_tool_calls,
+ true,
+ None,
+ None,
+ );
+ let future = self.request_limiter.stream_with_bypass(
+ async move {
+ let PerformLlmCompletionResponse {
+ response,
+ includes_status_messages,
+ } = Self::perform_llm_completion(
+ client.clone(),
+ llm_api_token,
+ app_version,
+ CompletionBody {
+ thread_id,
+ prompt_id,
+ intent,
+ provider: cloud_llm_client::LanguageModelProvider::OpenAi,
+ model: request.model.clone(),
+ provider_request: serde_json::to_value(&request)
+ .map_err(|e| anyhow!(e))?,
+ },
+ )
+ .await?;
+
+ let mut mapper = OpenAiResponseEventMapper::new();
+ Ok(map_cloud_completion_events(
+ Box::pin(response_lines(response, includes_status_messages)),
+ &provider_name,
+ move |event| mapper.map_event(event),
+ ))
+ },
+ bypass_rate_limit,
+ );
+ async move { Ok(future.await?.boxed()) }.boxed()
}
cloud_llm_client::LanguageModelProvider::XAi => {
let client = self.client.clone();