agent: Add thinking toggle for Zed provider (#47407)

Marshall Bowers and Neel created 2 weeks ago

This PR adds a thinking toggle for controlling whether to use thinking
for a model in the Zed provider:

<img width="645" height="142" alt="Screenshot 2026-01-22 at 12 34 01 PM"
src="https://github.com/user-attachments/assets/9aa543fe-e708-4840-8b38-1a6fbcb78388"
/>

Previously we would create separate "Thinking" variants of the models
that supported thinking in the model selector.

This only applies to Anthropic models in the Zed provider, currently.

This is gated behind the `cloud-thinking-toggle` feature flag.

Release Notes:

- N/A

---------

Co-authored-by: Neel <neel@zed.dev>

Change summary

Cargo.lock                                   |  1 
crates/agent/src/thread.rs                   | 16 ++++++++
crates/agent_ui/src/acp/thread_view.rs       | 41 ++++++++++++++++++++-
crates/feature_flags/src/flags.rs            | 11 +++++
crates/language_model/src/language_model.rs  |  5 ++
crates/language_models/Cargo.toml            |  1 
crates/language_models/src/provider/cloud.rs | 37 +++++++++++++++----
7 files changed, 100 insertions(+), 12 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -9040,6 +9040,7 @@ dependencies = [
  "editor",
  "extension",
  "extension_host",
+ "feature_flags",
  "fs",
  "futures 0.3.31",
  "google_ai",

crates/agent/src/thread.rs 🔗

@@ -749,6 +749,7 @@ pub struct Thread {
     templates: Arc<Templates>,
     model: Option<Arc<dyn LanguageModel>>,
     summarization_model: Option<Arc<dyn LanguageModel>>,
+    thinking_enabled: bool,
     prompt_capabilities_tx: watch::Sender<acp::PromptCapabilities>,
     pub(crate) prompt_capabilities_rx: watch::Receiver<acp::PromptCapabilities>,
     pub(crate) project: Entity<Project>,
@@ -811,6 +812,7 @@ impl Thread {
             templates,
             model,
             summarization_model: None,
+            thinking_enabled: true,
             prompt_capabilities_tx,
             prompt_capabilities_rx,
             project,
@@ -872,6 +874,7 @@ impl Thread {
             templates,
             model: Some(model),
             summarization_model: None,
+            thinking_enabled: true,
             prompt_capabilities_tx,
             prompt_capabilities_rx,
             project,
@@ -1069,6 +1072,8 @@ impl Thread {
             templates,
             model,
             summarization_model: None,
+            // TODO: Persist this on the `DbThread`.
+            thinking_enabled: true,
             project,
             action_log,
             updated_at: db_thread.updated_at,
@@ -1167,6 +1172,15 @@ impl Thread {
         cx.notify()
     }
 
+    pub fn thinking_enabled(&self) -> bool {
+        self.thinking_enabled
+    }
+
+    pub fn set_thinking_enabled(&mut self, enabled: bool, cx: &mut Context<Self>) {
+        self.thinking_enabled = enabled;
+        cx.notify();
+    }
+
     pub fn last_message(&self) -> Option<Message> {
         if let Some(message) = self.pending_message.clone() {
             Some(Message::Agent(message))
@@ -2292,7 +2306,7 @@ impl Thread {
             tool_choice: None,
             stop: Vec::new(),
             temperature: AgentSettings::temperature_for_model(model, cx),
-            thinking_allowed: true,
+            thinking_allowed: self.thinking_enabled,
             bypass_rate_limit: false,
         };

crates/agent_ui/src/acp/thread_view.rs 🔗

@@ -21,8 +21,8 @@ use editor::{
     Editor, EditorEvent, EditorMode, MultiBuffer, PathKey, SelectionEffects, SizingBehavior,
 };
 use feature_flags::{
-    AgentSharingFeatureFlag, AgentV2FeatureFlag, FeatureFlagAppExt as _,
-    UserSlashCommandsFeatureFlag,
+    AgentSharingFeatureFlag, AgentV2FeatureFlag, CloudThinkingToggleFeatureFlag,
+    FeatureFlagAppExt as _, UserSlashCommandsFeatureFlag,
 };
 use file_icons::FileIcons;
 use fs::Fs;
@@ -6216,6 +6216,7 @@ impl AcpThreadView {
                         h_flex()
                             .gap_1()
                             .children(self.render_token_usage(cx))
+                            .children(self.render_thinking_toggle(cx))
                             .children(self.profile_selector.clone())
                             // Either config_options_view OR (mode_selector + model_selector)
                             .children(self.config_options_view.clone())
@@ -6486,6 +6487,42 @@ impl AcpThreadView {
         }
     }
 
+    fn render_thinking_toggle(&self, cx: &mut Context<Self>) -> Option<IconButton> {
+        if !cx.has_flag::<CloudThinkingToggleFeatureFlag>() {
+            return None;
+        }
+
+        let thread = self.as_native_thread(cx)?.read(cx);
+
+        let supports_thinking = thread.model()?.supports_thinking();
+        if !supports_thinking {
+            return None;
+        }
+
+        let thinking = thread.thinking_enabled();
+
+        let tooltip_label = if thinking {
+            "Disable Thinking Mode".to_string()
+        } else {
+            "Enable Thinking Mode".to_string()
+        };
+
+        Some(
+            IconButton::new("thinking-mode", IconName::ToolThink)
+                .icon_size(IconSize::Small)
+                .icon_color(Color::Muted)
+                .toggle_state(thinking)
+                .tooltip(Tooltip::text(tooltip_label))
+                .on_click(cx.listener(move |this, _, _window, cx| {
+                    if let Some(thread) = this.as_native_thread(cx) {
+                        thread.update(cx, |thread, cx| {
+                            thread.set_thinking_enabled(!thread.thinking_enabled(), cx);
+                        });
+                    }
+                })),
+        )
+    }
+
     fn keep_all(&mut self, _: &KeepAll, _window: &mut Window, cx: &mut Context<Self>) {
         let Some(thread) = self.thread() else {
             return;

crates/feature_flags/src/flags.rs 🔗

@@ -65,3 +65,14 @@ impl FeatureFlag for DiffReviewFeatureFlag {
         false
     }
 }
+
+/// Controls whether we show the new thinking toggle in the Agent Panel when using models through the Zed provider (Cloud).
+pub struct CloudThinkingToggleFeatureFlag;
+
+impl FeatureFlag for CloudThinkingToggleFeatureFlag {
+    const NAME: &'static str = "cloud-thinking-toggle";
+
+    fn enabled_for_staff() -> bool {
+        false
+    }
+}

crates/language_model/src/language_model.rs 🔗

@@ -591,6 +591,11 @@ pub trait LanguageModel: Send + Sync {
         None
     }
 
+    /// Whether this model supports extended thinking.
+    fn supports_thinking(&self) -> bool {
+        false
+    }
+
     /// Whether this model supports images
     fn supports_images(&self) -> bool;

crates/language_models/Cargo.toml 🔗

@@ -32,6 +32,7 @@ credentials_provider.workspace = true
 deepseek = { workspace = true, features = ["schemars"] }
 extension.workspace = true
 extension_host.workspace = true
+feature_flags.workspace = true
 fs.workspace = true
 futures.workspace = true
 google_ai = { workspace = true, features = ["schemars"] }

crates/language_models/src/provider/cloud.rs 🔗

@@ -9,6 +9,7 @@ use cloud_llm_client::{
     ListModelsResponse, Plan, PlanV2, SERVER_SUPPORTS_STATUS_MESSAGES_HEADER_NAME,
     ZED_VERSION_HEADER_NAME,
 };
+use feature_flags::{CloudThinkingToggleFeatureFlag, FeatureFlagAppExt as _};
 use futures::{
     AsyncBufReadExt, FutureExt, Stream, StreamExt, future::BoxFuture, stream::BoxStream,
 };
@@ -164,20 +165,27 @@ impl State {
             state.update(cx, |_, cx| cx.notify())
         })
     }
+
     fn update_models(&mut self, response: ListModelsResponse, cx: &mut Context<Self>) {
+        let is_thinking_toggle_enabled = cx.has_flag::<CloudThinkingToggleFeatureFlag>();
+
         let mut models = Vec::new();
 
         for model in response.models {
             models.push(Arc::new(model.clone()));
 
-            // Right now we represent thinking variants of models as separate models on the client,
-            // so we need to insert variants for any model that supports thinking.
-            if model.supports_thinking {
-                models.push(Arc::new(cloud_llm_client::LanguageModel {
-                    id: cloud_llm_client::LanguageModelId(format!("{}-thinking", model.id).into()),
-                    display_name: format!("{} Thinking", model.display_name),
-                    ..model
-                }));
+            if !is_thinking_toggle_enabled {
+                // Right now we represent thinking variants of models as separate models on the client,
+                // so we need to insert variants for any model that supports thinking.
+                if model.supports_thinking {
+                    models.push(Arc::new(cloud_llm_client::LanguageModel {
+                        id: cloud_llm_client::LanguageModelId(
+                            format!("{}-thinking", model.id).into(),
+                        ),
+                        display_name: format!("{} Thinking", model.display_name),
+                        ..model
+                    }));
+                }
             }
         }
 
@@ -570,6 +578,10 @@ impl LanguageModel for CloudLanguageModel {
         self.model.supports_images
     }
 
+    fn supports_thinking(&self) -> bool {
+        self.model.supports_thinking
+    }
+
     fn supports_streaming_tools(&self) -> bool {
         self.model.supports_streaming_tools
     }
@@ -721,6 +733,13 @@ impl LanguageModel for CloudLanguageModel {
         let bypass_rate_limit = request.bypass_rate_limit;
         let app_version = Some(cx.update(|cx| AppVersion::global(cx)));
         let thinking_allowed = request.thinking_allowed;
+        let is_thinking_toggle_enabled =
+            cx.update(|cx| cx.has_flag::<CloudThinkingToggleFeatureFlag>());
+        let enable_thinking = if is_thinking_toggle_enabled {
+            thinking_allowed && self.model.supports_thinking
+        } else {
+            thinking_allowed && self.model.id.0.ends_with("-thinking")
+        };
         let provider_name = provider_name(&self.model.provider);
         match self.model.provider {
             cloud_llm_client::LanguageModelProvider::Anthropic => {
@@ -729,7 +748,7 @@ impl LanguageModel for CloudLanguageModel {
                     self.model.id.to_string(),
                     1.0,
                     self.model.max_output_tokens as u64,
-                    if thinking_allowed && self.model.id.0.ends_with("-thinking") {
+                    if enable_thinking {
                         AnthropicModelMode::Thinking {
                             budget_tokens: Some(4_096),
                         }