agent: Add update_plan tool (#52048)

Ben Brandt created 3 weeks ago

## Context

Adds a tool to utilize the UI we already expose to ACP agents. Behind a
feature flag for now.

## How to Review

Mostly a tool to hook up to all of the plan plumbing we already have in
acp thread.

## Self-Review Checklist

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- N/A

Change summary

assets/settings/default.json                           |   2 
crates/agent/src/agent.rs                              |   3 
crates/agent/src/templates.rs                          |   1 
crates/agent/src/templates/system_prompt.hbs           |  28 +
crates/agent/src/tests/mod.rs                          | 173 +++++-
crates/agent/src/thread.rs                             |  29 +
crates/agent/src/tools.rs                              |   3 
crates/agent/src/tools/update_plan_tool.rs             | 290 ++++++++++++
crates/eval/src/example.rs                             |   3 
crates/feature_flags/src/flags.rs                      |  10 
crates/settings_ui/src/pages/tool_permissions_setup.rs |   3 
11 files changed, 500 insertions(+), 45 deletions(-)

Detailed changes

assets/settings/default.json 🔗

@@ -1073,6 +1073,7 @@
           "spawn_agent": true,
           "terminal": true,
           "thinking": true,
+          "update_plan": true,
           "web_search": true,
         },
       },
@@ -1092,6 +1093,7 @@
           "grep": true,
           "spawn_agent": true,
           "thinking": true,
+          "update_plan": true,
           "web_search": true,
         },
       },

crates/agent/src/agent.rs 🔗

@@ -1219,6 +1219,9 @@ impl NativeAgentConnection {
                                     thread.update_tool_call(update, cx)
                                 })??;
                             }
+                            ThreadEvent::Plan(plan) => {
+                                acp_thread.update(cx, |thread, cx| thread.update_plan(plan, cx))?;
+                            }
                             ThreadEvent::SubagentSpawned(session_id) => {
                                 acp_thread.update(cx, |thread, cx| {
                                     thread.subagent_spawned(session_id, cx);

crates/agent/src/templates.rs 🔗

@@ -85,6 +85,7 @@ mod tests {
         let templates = Templates::new();
         let rendered = template.render(&templates).unwrap();
         assert!(rendered.contains("## Fixing Diagnostics"));
+        assert!(!rendered.contains("## Planning"));
         assert!(rendered.contains("test-model"));
     }
 }

crates/agent/src/templates/system_prompt.hbs 🔗

@@ -20,6 +20,34 @@ You are a highly skilled software engineer with extensive knowledge in many prog
 - When running commands that may run indefinitely or for a long time (such as build scripts, tests, servers, or file watchers), specify `timeout_ms` to bound runtime. If the command times out, the user can always ask you to run it again with a longer timeout or no timeout if they're willing to wait or cancel manually.
 - Avoid HTML entity escaping - use plain characters instead.
 
+{{#if (contains available_tools 'update_plan') }}
+## Planning
+
+- You have access to an `update_plan` tool which tracks steps and progress and renders them to the user.
+- Use it to show that you've understood the task and to make complex, ambiguous, or multi-phase work easier for the user to follow.
+- A good plan breaks the work into meaningful, logically ordered steps that are easy to verify as you go.
+- When writing a plan, prefer a short list of concise, concrete steps.
+- Keep each step focused on a real unit of work and use short 1-sentence descriptions.
+- Do not use plans for simple or single-step queries that you can just do or answer immediately.
+- Do not use plans to pad your response with filler steps or to state the obvious.
+- Do not include steps that you are not actually capable of doing.
+- After calling `update_plan`, do not repeat the full plan in your response. The UI already displays it. Instead, briefly summarize what changed and note any important context or next step.
+- Before moving on to a new phase of work, mark the previous step as completed when appropriate.
+- When work is in progress, prefer having exactly one step marked as `in_progress`.
+- You can mark multiple completed steps in a single `update_plan` call.
+- If the task changes midway through, update the plan so it reflects the new approach.
+
+Use a plan when:
+
+- The task is non-trivial and will require multiple actions over a longer horizon.
+- There are logical phases or dependencies where sequencing matters.
+- The work has ambiguity that benefits from outlining high-level goals.
+- You want intermediate checkpoints for feedback and validation.
+- The user asked you to do more than one thing in a single prompt.
+- The user asked you to use the plan tool or TODOs.
+- You discover additional steps while working and intend to complete them before yielding to the user.
+
+{{/if}}
 ## Searching and Reading
 
 If you are unsure how to fulfill the user's request, gather more information with tool calls and/or clarifying questions.

crates/agent/src/tests/mod.rs 🔗

@@ -999,6 +999,20 @@ async fn expect_tool_call_update_fields(
     }
 }
 
+async fn expect_plan(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::Plan {
+    let event = events
+        .next()
+        .await
+        .expect("no plan event received")
+        .unwrap();
+    match event {
+        ThreadEvent::Plan(plan) => plan,
+        event => {
+            panic!("Unexpected event {event:?}");
+        }
+    }
+}
+
 async fn next_tool_call_authorization(
     events: &mut UnboundedReceiver<Result<ThreadEvent>>,
 ) -> ToolCallAuthorization {
@@ -3416,6 +3430,122 @@ async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
     );
 }
 
+#[gpui::test]
+async fn test_update_plan_tool_updates_thread_events(cx: &mut TestAppContext) {
+    let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
+    thread.update(cx, |thread, _cx| thread.add_tool(UpdatePlanTool));
+    let fake_model = model.as_fake();
+
+    let mut events = thread
+        .update(cx, |thread, cx| {
+            thread.send(UserMessageId::new(), ["Make a plan"], cx)
+        })
+        .unwrap();
+    cx.run_until_parked();
+
+    let input = json!({
+        "plan": [
+            {
+                "step": "Inspect the code",
+                "status": "completed",
+                "priority": "high"
+            },
+            {
+                "step": "Implement the tool",
+                "status": "in_progress"
+            },
+            {
+                "step": "Run tests",
+                "status": "pending",
+                "priority": "low"
+            }
+        ]
+    });
+    fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
+        LanguageModelToolUse {
+            id: "plan_1".into(),
+            name: UpdatePlanTool::NAME.into(),
+            raw_input: input.to_string(),
+            input,
+            is_input_complete: true,
+            thought_signature: None,
+        },
+    ));
+    fake_model.end_last_completion_stream();
+    cx.run_until_parked();
+
+    let tool_call = expect_tool_call(&mut events).await;
+    assert_eq!(
+        tool_call,
+        acp::ToolCall::new("plan_1", "Update plan")
+            .kind(acp::ToolKind::Think)
+            .raw_input(json!({
+                "plan": [
+                    {
+                        "step": "Inspect the code",
+                        "status": "completed",
+                        "priority": "high"
+                    },
+                    {
+                        "step": "Implement the tool",
+                        "status": "in_progress"
+                    },
+                    {
+                        "step": "Run tests",
+                        "status": "pending",
+                        "priority": "low"
+                    }
+                ]
+            }))
+            .meta(acp::Meta::from_iter([(
+                "tool_name".into(),
+                "update_plan".into()
+            )]))
+    );
+
+    let update = expect_tool_call_update_fields(&mut events).await;
+    assert_eq!(
+        update,
+        acp::ToolCallUpdate::new(
+            "plan_1",
+            acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
+        )
+    );
+
+    let plan = expect_plan(&mut events).await;
+    assert_eq!(
+        plan,
+        acp::Plan::new(vec![
+            acp::PlanEntry::new(
+                "Inspect the code",
+                acp::PlanEntryPriority::High,
+                acp::PlanEntryStatus::Completed,
+            ),
+            acp::PlanEntry::new(
+                "Implement the tool",
+                acp::PlanEntryPriority::Medium,
+                acp::PlanEntryStatus::InProgress,
+            ),
+            acp::PlanEntry::new(
+                "Run tests",
+                acp::PlanEntryPriority::Low,
+                acp::PlanEntryStatus::Pending,
+            ),
+        ])
+    );
+
+    let update = expect_tool_call_update_fields(&mut events).await;
+    assert_eq!(
+        update,
+        acp::ToolCallUpdate::new(
+            "plan_1",
+            acp::ToolCallUpdateFields::new()
+                .status(acp::ToolCallStatus::Completed)
+                .raw_output("Plan updated")
+        )
+    );
+}
+
 #[gpui::test]
 async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
     let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
@@ -3822,6 +3952,7 @@ async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
                             StreamingEchoTool::NAME: true,
                             StreamingFailingEchoTool::NAME: true,
                             TerminalTool::NAME: true,
+                            UpdatePlanTool::NAME: true,
                         }
                     }
                 }
@@ -5011,48 +5142,6 @@ async fn test_subagent_tool_resume_session(cx: &mut TestAppContext) {
     );
 }
 
-#[gpui::test]
-async fn test_subagent_tool_is_present_when_feature_flag_enabled(cx: &mut TestAppContext) {
-    init_test(cx);
-
-    cx.update(|cx| {
-        cx.update_flags(true, vec!["subagents".to_string()]);
-    });
-
-    let fs = FakeFs::new(cx.executor());
-    fs.insert_tree(path!("/test"), json!({})).await;
-    let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
-    let project_context = cx.new(|_cx| ProjectContext::default());
-    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
-    let context_server_registry =
-        cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
-    let model = Arc::new(FakeLanguageModel::default());
-
-    let environment = Rc::new(cx.update(|cx| {
-        FakeThreadEnvironment::default().with_terminal(FakeTerminalHandle::new_never_exits(cx))
-    }));
-
-    let thread = cx.new(|cx| {
-        let mut thread = Thread::new(
-            project.clone(),
-            project_context,
-            context_server_registry,
-            Templates::new(),
-            Some(model),
-            cx,
-        );
-        thread.add_default_tools(environment, cx);
-        thread
-    });
-
-    thread.read_with(cx, |thread, _| {
-        assert!(
-            thread.has_registered_tool(SpawnAgentTool::NAME),
-            "subagent tool should be present when feature flag is enabled"
-        );
-    });
-}
-
 #[gpui::test]
 async fn test_subagent_thread_inherits_parent_thread_properties(cx: &mut TestAppContext) {
     init_test(cx);

crates/agent/src/thread.rs 🔗

@@ -3,12 +3,14 @@ use crate::{
     DeletePathTool, DiagnosticsTool, EditFileTool, FetchTool, FindPathTool, GrepTool,
     ListDirectoryTool, MovePathTool, NowTool, OpenTool, ProjectSnapshot, ReadFileTool,
     RestoreFileFromDiskTool, SaveFileTool, SpawnAgentTool, StreamingEditFileTool,
-    SystemPromptTemplate, Template, Templates, TerminalTool, ToolPermissionDecision, WebSearchTool,
-    decide_permission_from_settings,
+    SystemPromptTemplate, Template, Templates, TerminalTool, ToolPermissionDecision,
+    UpdatePlanTool, WebSearchTool, decide_permission_from_settings,
 };
 use acp_thread::{MentionUri, UserMessageId};
 use action_log::ActionLog;
-use feature_flags::{FeatureFlagAppExt as _, StreamingEditFileToolFeatureFlag};
+use feature_flags::{
+    FeatureFlagAppExt as _, StreamingEditFileToolFeatureFlag, UpdatePlanToolFeatureFlag,
+};
 
 use agent_client_protocol as acp;
 use agent_settings::{
@@ -661,6 +663,7 @@ pub enum ThreadEvent {
     AgentThinking(String),
     ToolCall(acp::ToolCall),
     ToolCallUpdate(acp_thread::ToolCallUpdate),
+    Plan(acp::Plan),
     ToolCallAuthorization(ToolCallAuthorization),
     SubagentSpawned(acp::SessionId),
     Retry(acp_thread::RetryStatus),
@@ -1529,6 +1532,9 @@ impl Thread {
         self.add_tool(MovePathTool::new(self.project.clone()));
         self.add_tool(NowTool);
         self.add_tool(OpenTool::new(self.project.clone()));
+        if cx.has_flag::<UpdatePlanToolFeatureFlag>() {
+            self.add_tool(UpdatePlanTool);
+        }
         self.add_tool(ReadFileTool::new(
             self.project.clone(),
             self.action_log.clone(),
@@ -3476,6 +3482,10 @@ impl ThreadEventStream {
             .ok();
     }
 
+    fn send_plan(&self, plan: acp::Plan) {
+        self.0.unbounded_send(Ok(ThreadEvent::Plan(plan))).ok();
+    }
+
     fn send_retry(&self, status: acp_thread::RetryStatus) {
         self.0.unbounded_send(Ok(ThreadEvent::Retry(status))).ok();
     }
@@ -3611,6 +3621,10 @@ impl ToolCallEventStream {
             .ok();
     }
 
+    pub fn update_plan(&self, plan: acp::Plan) {
+        self.stream.send_plan(plan);
+    }
+
     /// Authorize a third-party tool (e.g., MCP tool from a context server).
     ///
     /// Unlike built-in tools, third-party tools don't support pattern-based permissions.
@@ -3899,6 +3913,15 @@ impl ToolCallEventStreamReceiver {
             panic!("Expected terminal but got: {:?}", event);
         }
     }
+
+    pub async fn expect_plan(&mut self) -> acp::Plan {
+        let event = self.0.next().await;
+        if let Some(Ok(ThreadEvent::Plan(plan))) = event {
+            plan
+        } else {
+            panic!("Expected plan but got: {:?}", event);
+        }
+    }
 }
 
 #[cfg(any(test, feature = "test-support"))]

crates/agent/src/tools.rs 🔗

@@ -19,6 +19,7 @@ mod streaming_edit_file_tool;
 mod terminal_tool;
 mod tool_edit_parser;
 mod tool_permissions;
+mod update_plan_tool;
 mod web_search_tool;
 
 use crate::AgentTool;
@@ -44,6 +45,7 @@ pub use spawn_agent_tool::*;
 pub use streaming_edit_file_tool::*;
 pub use terminal_tool::*;
 pub use tool_permissions::*;
+pub use update_plan_tool::*;
 pub use web_search_tool::*;
 
 macro_rules! tools {
@@ -132,5 +134,6 @@ tools! {
     SaveFileTool,
     SpawnAgentTool,
     TerminalTool,
+    UpdatePlanTool,
     WebSearchTool,
 }

crates/agent/src/tools/update_plan_tool.rs 🔗

@@ -0,0 +1,290 @@
+use crate::{AgentTool, ToolCallEventStream, ToolInput};
+use agent_client_protocol as acp;
+use gpui::{App, SharedString, Task};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+#[schemars(inline)]
+pub enum PlanEntryStatus {
+    /// The task has not started yet.
+    Pending,
+    /// The task is currently being worked on.
+    InProgress,
+    /// The task has been successfully completed.
+    Completed,
+}
+
+impl From<PlanEntryStatus> for acp::PlanEntryStatus {
+    fn from(value: PlanEntryStatus) -> Self {
+        match value {
+            PlanEntryStatus::Pending => acp::PlanEntryStatus::Pending,
+            PlanEntryStatus::InProgress => acp::PlanEntryStatus::InProgress,
+            PlanEntryStatus::Completed => acp::PlanEntryStatus::Completed,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+#[schemars(inline)]
+pub enum PlanEntryPriority {
+    High,
+    #[default]
+    Medium,
+    Low,
+}
+
+impl From<PlanEntryPriority> for acp::PlanEntryPriority {
+    fn from(value: PlanEntryPriority) -> Self {
+        match value {
+            PlanEntryPriority::High => acp::PlanEntryPriority::High,
+            PlanEntryPriority::Medium => acp::PlanEntryPriority::Medium,
+            PlanEntryPriority::Low => acp::PlanEntryPriority::Low,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanItem {
+    /// Human-readable description of what this task aims to accomplish.
+    pub step: String,
+    /// The current status of this task.
+    pub status: PlanEntryStatus,
+    /// The relative importance of this task. Defaults to medium when omitted.
+    #[serde(default)]
+    pub priority: PlanEntryPriority,
+}
+
+impl From<PlanItem> for acp::PlanEntry {
+    fn from(value: PlanItem) -> Self {
+        acp::PlanEntry::new(value.step, value.priority.into(), value.status.into())
+    }
+}
+
+/// Updates the task plan.
+/// Provide a list of plan entries, each with step, status, and optional priority.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct UpdatePlanToolInput {
+    /// The list of plan entries and their current statuses.
+    pub plan: Vec<PlanItem>,
+}
+
+pub struct UpdatePlanTool;
+
+impl UpdatePlanTool {
+    fn to_plan(input: UpdatePlanToolInput) -> acp::Plan {
+        acp::Plan::new(input.plan.into_iter().map(Into::into).collect())
+    }
+}
+
+impl AgentTool for UpdatePlanTool {
+    type Input = UpdatePlanToolInput;
+    type Output = String;
+
+    const NAME: &'static str = "update_plan";
+
+    fn kind() -> acp::ToolKind {
+        acp::ToolKind::Think
+    }
+
+    fn initial_title(
+        &self,
+        input: Result<Self::Input, serde_json::Value>,
+        _cx: &mut App,
+    ) -> SharedString {
+        match input {
+            Ok(input) if input.plan.is_empty() => "Clear plan".into(),
+            Ok(_) | Err(_) => "Update plan".into(),
+        }
+    }
+
+    fn run(
+        self: Arc<Self>,
+        input: ToolInput<Self::Input>,
+        event_stream: ToolCallEventStream,
+        cx: &mut App,
+    ) -> Task<Result<Self::Output, Self::Output>> {
+        cx.spawn(async move |_cx| {
+            let input = input
+                .recv()
+                .await
+                .map_err(|e| format!("Failed to receive tool input: {e}"))?;
+
+            event_stream.update_plan(Self::to_plan(input));
+
+            Ok("Plan updated".to_string())
+        })
+    }
+
+    fn replay(
+        &self,
+        input: Self::Input,
+        _output: Self::Output,
+        event_stream: ToolCallEventStream,
+        _cx: &mut App,
+    ) -> anyhow::Result<()> {
+        event_stream.update_plan(Self::to_plan(input));
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::ToolCallEventStream;
+    use gpui::TestAppContext;
+    use pretty_assertions::assert_eq;
+
+    fn sample_input() -> UpdatePlanToolInput {
+        UpdatePlanToolInput {
+            plan: vec![
+                PlanItem {
+                    step: "Inspect the existing tool wiring".to_string(),
+                    status: PlanEntryStatus::Completed,
+                    priority: PlanEntryPriority::High,
+                },
+                PlanItem {
+                    step: "Implement the update_plan tool".to_string(),
+                    status: PlanEntryStatus::InProgress,
+                    priority: PlanEntryPriority::Medium,
+                },
+                PlanItem {
+                    step: "Add tests".to_string(),
+                    status: PlanEntryStatus::Pending,
+                    priority: PlanEntryPriority::Low,
+                },
+            ],
+        }
+    }
+
+    #[gpui::test]
+    async fn test_run_emits_plan_event(cx: &mut TestAppContext) {
+        let tool = Arc::new(UpdatePlanTool);
+        let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+        let input = sample_input();
+        let result = cx
+            .update(|cx| tool.run(ToolInput::resolved(input.clone()), event_stream, cx))
+            .await
+            .expect("tool should succeed");
+
+        assert_eq!(result, "Plan updated".to_string());
+
+        let plan = event_rx.expect_plan().await;
+        assert_eq!(
+            plan,
+            acp::Plan::new(vec![
+                acp::PlanEntry::new(
+                    "Inspect the existing tool wiring",
+                    acp::PlanEntryPriority::High,
+                    acp::PlanEntryStatus::Completed,
+                ),
+                acp::PlanEntry::new(
+                    "Implement the update_plan tool",
+                    acp::PlanEntryPriority::Medium,
+                    acp::PlanEntryStatus::InProgress,
+                ),
+                acp::PlanEntry::new(
+                    "Add tests",
+                    acp::PlanEntryPriority::Low,
+                    acp::PlanEntryStatus::Pending,
+                ),
+            ])
+        );
+    }
+
+    #[gpui::test]
+    async fn test_replay_emits_plan_event(cx: &mut TestAppContext) {
+        let tool = UpdatePlanTool;
+        let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+        let input = sample_input();
+
+        cx.update(|cx| {
+            tool.replay(input.clone(), "Plan updated".to_string(), event_stream, cx)
+                .expect("replay should succeed");
+        });
+
+        let plan = event_rx.expect_plan().await;
+        assert_eq!(
+            plan,
+            acp::Plan::new(vec![
+                acp::PlanEntry::new(
+                    "Inspect the existing tool wiring",
+                    acp::PlanEntryPriority::High,
+                    acp::PlanEntryStatus::Completed,
+                ),
+                acp::PlanEntry::new(
+                    "Implement the update_plan tool",
+                    acp::PlanEntryPriority::Medium,
+                    acp::PlanEntryStatus::InProgress,
+                ),
+                acp::PlanEntry::new(
+                    "Add tests",
+                    acp::PlanEntryPriority::Low,
+                    acp::PlanEntryStatus::Pending,
+                ),
+            ])
+        );
+    }
+
+    #[gpui::test]
+    async fn test_run_defaults_priority_to_medium(cx: &mut TestAppContext) {
+        let tool = Arc::new(UpdatePlanTool);
+        let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+        let input = UpdatePlanToolInput {
+            plan: vec![
+                PlanItem {
+                    step: "First".to_string(),
+                    status: PlanEntryStatus::InProgress,
+                    priority: PlanEntryPriority::default(),
+                },
+                PlanItem {
+                    step: "Second".to_string(),
+                    status: PlanEntryStatus::InProgress,
+                    priority: PlanEntryPriority::default(),
+                },
+            ],
+        };
+
+        let result = cx
+            .update(|cx| tool.run(ToolInput::resolved(input), event_stream, cx))
+            .await
+            .expect("tool should succeed");
+
+        assert_eq!(result, "Plan updated".to_string());
+
+        let plan = event_rx.expect_plan().await;
+        assert_eq!(
+            plan,
+            acp::Plan::new(vec![
+                acp::PlanEntry::new(
+                    "First",
+                    acp::PlanEntryPriority::Medium,
+                    acp::PlanEntryStatus::InProgress,
+                ),
+                acp::PlanEntry::new(
+                    "Second",
+                    acp::PlanEntryPriority::Medium,
+                    acp::PlanEntryStatus::InProgress,
+                ),
+            ])
+        );
+    }
+
+    #[gpui::test]
+    async fn test_initial_title(cx: &mut TestAppContext) {
+        let tool = UpdatePlanTool;
+
+        let title = cx.update(|cx| tool.initial_title(Ok(sample_input()), cx));
+        assert_eq!(title, SharedString::from("Update plan"));
+
+        let title =
+            cx.update(|cx| tool.initial_title(Ok(UpdatePlanToolInput { plan: Vec::new() }), cx));
+        assert_eq!(title, SharedString::from("Clear plan"));
+    }
+}

crates/eval/src/example.rs 🔗

@@ -328,6 +328,9 @@ impl ExampleContext {
                         "{}Bug: Tool confirmation should not be required in eval",
                         log_prefix
                     ),
+                    ThreadEvent::Plan(plan) => {
+                        println!("{log_prefix} Got plan: {plan:?}");
+                    }
                     ThreadEvent::SubagentSpawned(session) => {
                         println!("{log_prefix} Got subagent spawn: {session:?}");
                     }

crates/feature_flags/src/flags.rs 🔗

@@ -63,6 +63,16 @@ impl FeatureFlag for StreamingEditFileToolFeatureFlag {
     }
 }
 
+pub struct UpdatePlanToolFeatureFlag;
+
+impl FeatureFlag for UpdatePlanToolFeatureFlag {
+    const NAME: &'static str = "update-plan-tool";
+
+    fn enabled_for_staff() -> bool {
+        true
+    }
+}
+
 pub struct ProjectPanelUndoRedoFeatureFlag;
 
 impl FeatureFlag for ProjectPanelUndoRedoFeatureFlag {

crates/settings_ui/src/pages/tool_permissions_setup.rs 🔗

@@ -1421,6 +1421,9 @@ mod tests {
             // Subagent permission checks happen at the level of individual
             // tool calls within the subagent, not at the spawning level.
             "spawn_agent",
+            // update_plan updates UI-visible planning state but does not use
+            // tool permission rules.
+            "update_plan",
         ];
 
         let tool_info_ids: Vec<&str> = TOOLS.iter().map(|t| t.id).collect();