From 4a2193591d5416d0572f223023c8014a462ded23 Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Mon, 23 Feb 2026 18:39:48 +0100 Subject: [PATCH] agent: Fix slow file edits when using Opus 4.6 (#49904) Fixes a regression introduced in #48545 (reasoning effort selector). We saw edit file tool calls taking a long time (loading animation was displayed, no diff) when using Opus 4.6. This was caused by Opus 4.6. emitting thinking tokens even when the user explicitly disabled thinking in the UI. image In addition to the thinking tokens causing overhead, we were slowing down file editing even more. because changing between thinking/non-thinking between requests causes the cache to be invalidated ([docs](https://platform.claude.com/docs/en/build-with-claude/prompt-caching#what-invalidates-the-cache)). This PR ensures that we inherit the setting for enabling or disabling thinking from the thread from which the edit tool was called. Before you mark this PR as ready for review, make sure that you have: - [x] Added a solid test coverage and/or screenshots from doing manual testing - [x] Done a self-review taking into account security and performance aspects - [x] Aligned any UI changes with the [UI checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) Release Notes: - Fixed an issue where editing files was taking a long time when using Opus 4.6 --------- Co-authored-by: Zed Zippy <234243425+zed-zippy[bot]@users.noreply.github.com> --- crates/agent/src/edit_agent.rs | 49 +++++++++++++++++++++++- crates/agent/src/edit_agent/evals.rs | 1 + crates/agent/src/tools/edit_file_tool.rs | 4 ++ 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/crates/agent/src/edit_agent.rs b/crates/agent/src/edit_agent.rs index cab1f8ca888f879d0e7c7ec9a6f034325abb5409..3e67cba1b63f4136a03b88c3007aee99489a6e80 100644 --- a/crates/agent/src/edit_agent.rs +++ b/crates/agent/src/edit_agent.rs @@ -81,6 +81,7 @@ pub struct EditAgent { project: Entity, templates: Arc, edit_format: EditFormat, + thinking_allowed: bool, } impl EditAgent { @@ -90,6 +91,7 @@ impl EditAgent { action_log: Entity, templates: Arc, edit_format: EditFormat, + allow_thinking: bool, ) -> Self { EditAgent { model, @@ -97,6 +99,7 @@ impl EditAgent { action_log, templates, edit_format, + thinking_allowed: allow_thinking, } } @@ -731,7 +734,7 @@ impl EditAgent { tools, stop: Vec::new(), temperature: None, - thinking_allowed: true, + thinking_allowed: self.thinking_allowed, thinking_effort: None, }; @@ -1407,6 +1410,10 @@ mod tests { } async fn init_test(cx: &mut TestAppContext) -> EditAgent { + init_test_with_thinking(cx, true).await + } + + async fn init_test_with_thinking(cx: &mut TestAppContext, thinking_allowed: bool) -> EditAgent { cx.update(settings::init); let project = Project::test(FakeFs::new(cx.executor()), [], cx).await; @@ -1418,6 +1425,7 @@ mod tests { action_log, Templates::new(), EditFormat::XmlTags, + thinking_allowed, ) } @@ -1493,6 +1501,45 @@ mod tests { ); } + #[gpui::test] + async fn test_thinking_allowed_forwarded_to_request(cx: &mut TestAppContext) { + let agent = init_test_with_thinking(cx, false).await; + let buffer = cx.new(|cx| Buffer::local("hello\n", cx)); + let (_apply, _events) = agent.edit( + buffer.clone(), + String::new(), + &LanguageModelRequest::default(), + &mut cx.to_async(), + ); + cx.run_until_parked(); + + let pending = agent.model.as_fake().pending_completions(); + assert_eq!(pending.len(), 1); + assert!( + !pending[0].thinking_allowed, + "Expected thinking_allowed to be false when EditAgent is constructed with allow_thinking=false" + ); + agent.model.as_fake().end_last_completion_stream(); + + let agent = init_test_with_thinking(cx, true).await; + let buffer = cx.new(|cx| Buffer::local("hello\n", cx)); + let (_apply, _events) = agent.edit( + buffer, + String::new(), + &LanguageModelRequest::default(), + &mut cx.to_async(), + ); + cx.run_until_parked(); + + let pending = agent.model.as_fake().pending_completions(); + assert_eq!(pending.len(), 1); + assert!( + pending[0].thinking_allowed, + "Expected thinking_allowed to be true when EditAgent is constructed with allow_thinking=true" + ); + agent.model.as_fake().end_last_completion_stream(); + } + fn drain_events( stream: &mut UnboundedReceiver, ) -> Vec { diff --git a/crates/agent/src/edit_agent/evals.rs b/crates/agent/src/edit_agent/evals.rs index 5c30aa46c2fc802edf8e7d6b050af8465adc226f..cdf6c1c0b3f6440e4827c8b74b47a32d997b092f 100644 --- a/crates/agent/src/edit_agent/evals.rs +++ b/crates/agent/src/edit_agent/evals.rs @@ -1468,6 +1468,7 @@ impl EditAgentTest { action_log, Templates::new(), edit_format, + true, ), project, judge_model, diff --git a/crates/agent/src/tools/edit_file_tool.rs b/crates/agent/src/tools/edit_file_tool.rs index 697ab3022312f10d53f46df9c874554d2d16aa5e..b271a19674f733b8879d331ef83379159fc581d5 100644 --- a/crates/agent/src/tools/edit_file_tool.rs +++ b/crates/agent/src/tools/edit_file_tool.rs @@ -239,6 +239,10 @@ impl AgentTool for EditFileTool { ToolCallUpdateFields::new().locations(vec![acp::ToolCallLocation::new(abs_path)]), ); } + let allow_thinking = self + .thread + .read_with(cx, |thread, _cx| thread.thinking_enabled()) + .unwrap_or(true); let authorize = self.authorize(&input, &event_stream, cx); cx.spawn(async move |cx: &mut AsyncApp| {