agent: Fix slow file edits when using Opus 4.6 (#49904)

Bennet Bo Fenner and Zed Zippy created

Fixes a regression introduced in #48545 (reasoning effort selector). We
saw edit file tool calls taking a long time (loading animation was
displayed, no diff) when using Opus 4.6. This was caused by Opus 4.6.
emitting thinking tokens even when the user explicitly disabled thinking
in the UI.
<img width="289" height="67" alt="image"
src="https://github.com/user-attachments/assets/090a99f8-9b07-4d25-9058-3706f9333396"
/>

In addition to the thinking tokens causing overhead, we were slowing
down file editing even more. because changing between
thinking/non-thinking between requests causes the cache to be
invalidated
([docs](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#what-invalidates-the-cache)).

This PR ensures that we inherit the setting for enabling or disabling
thinking from the thread from which the edit tool was called.

Before you mark this PR as ready for review, make sure that you have:
- [x] Added a solid test coverage and/or screenshots from doing manual
testing
- [x] Done a self-review taking into account security and performance
aspects
- [x] Aligned any UI changes with the [UI
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)

Release Notes:

- Fixed an issue where editing files was taking a long time when using
Opus 4.6

---------

Co-authored-by: Zed Zippy <234243425+zed-zippy[bot]@users.noreply.github.com>

Change summary

crates/agent/src/edit_agent.rs           | 49 +++++++++++++++++++++++++
crates/agent/src/edit_agent/evals.rs     |  1 
crates/agent/src/tools/edit_file_tool.rs |  4 ++
3 files changed, 53 insertions(+), 1 deletion(-)

Detailed changes

crates/agent/src/edit_agent.rs 🔗

@@ -81,6 +81,7 @@ pub struct EditAgent {
     project: Entity<Project>,
     templates: Arc<Templates>,
     edit_format: EditFormat,
+    thinking_allowed: bool,
 }
 
 impl EditAgent {
@@ -90,6 +91,7 @@ impl EditAgent {
         action_log: Entity<ActionLog>,
         templates: Arc<Templates>,
         edit_format: EditFormat,
+        allow_thinking: bool,
     ) -> Self {
         EditAgent {
             model,
@@ -97,6 +99,7 @@ impl EditAgent {
             action_log,
             templates,
             edit_format,
+            thinking_allowed: allow_thinking,
         }
     }
 
@@ -731,7 +734,7 @@ impl EditAgent {
             tools,
             stop: Vec::new(),
             temperature: None,
-            thinking_allowed: true,
+            thinking_allowed: self.thinking_allowed,
             thinking_effort: None,
         };
 
@@ -1407,6 +1410,10 @@ mod tests {
     }
 
     async fn init_test(cx: &mut TestAppContext) -> EditAgent {
+        init_test_with_thinking(cx, true).await
+    }
+
+    async fn init_test_with_thinking(cx: &mut TestAppContext, thinking_allowed: bool) -> EditAgent {
         cx.update(settings::init);
 
         let project = Project::test(FakeFs::new(cx.executor()), [], cx).await;
@@ -1418,6 +1425,7 @@ mod tests {
             action_log,
             Templates::new(),
             EditFormat::XmlTags,
+            thinking_allowed,
         )
     }
 
@@ -1493,6 +1501,45 @@ mod tests {
         );
     }
 
+    #[gpui::test]
+    async fn test_thinking_allowed_forwarded_to_request(cx: &mut TestAppContext) {
+        let agent = init_test_with_thinking(cx, false).await;
+        let buffer = cx.new(|cx| Buffer::local("hello\n", cx));
+        let (_apply, _events) = agent.edit(
+            buffer.clone(),
+            String::new(),
+            &LanguageModelRequest::default(),
+            &mut cx.to_async(),
+        );
+        cx.run_until_parked();
+
+        let pending = agent.model.as_fake().pending_completions();
+        assert_eq!(pending.len(), 1);
+        assert!(
+            !pending[0].thinking_allowed,
+            "Expected thinking_allowed to be false when EditAgent is constructed with allow_thinking=false"
+        );
+        agent.model.as_fake().end_last_completion_stream();
+
+        let agent = init_test_with_thinking(cx, true).await;
+        let buffer = cx.new(|cx| Buffer::local("hello\n", cx));
+        let (_apply, _events) = agent.edit(
+            buffer,
+            String::new(),
+            &LanguageModelRequest::default(),
+            &mut cx.to_async(),
+        );
+        cx.run_until_parked();
+
+        let pending = agent.model.as_fake().pending_completions();
+        assert_eq!(pending.len(), 1);
+        assert!(
+            pending[0].thinking_allowed,
+            "Expected thinking_allowed to be true when EditAgent is constructed with allow_thinking=true"
+        );
+        agent.model.as_fake().end_last_completion_stream();
+    }
+
     fn drain_events(
         stream: &mut UnboundedReceiver<EditAgentOutputEvent>,
     ) -> Vec<EditAgentOutputEvent> {

crates/agent/src/tools/edit_file_tool.rs 🔗

@@ -239,6 +239,10 @@ impl AgentTool for EditFileTool {
                 ToolCallUpdateFields::new().locations(vec![acp::ToolCallLocation::new(abs_path)]),
             );
         }
+        let allow_thinking = self
+            .thread
+            .read_with(cx, |thread, _cx| thread.thinking_enabled())
+            .unwrap_or(true);
 
         let authorize = self.authorize(&input, &event_stream, cx);
         cx.spawn(async move |cx: &mut AsyncApp| {