Detailed changes
@@ -1073,6 +1073,7 @@
"spawn_agent": true,
"terminal": true,
"thinking": true,
+ "update_plan": true,
"web_search": true,
},
},
@@ -1092,6 +1093,7 @@
"grep": true,
"spawn_agent": true,
"thinking": true,
+ "update_plan": true,
"web_search": true,
},
},
@@ -1219,6 +1219,9 @@ impl NativeAgentConnection {
thread.update_tool_call(update, cx)
})??;
}
+ ThreadEvent::Plan(plan) => {
+ acp_thread.update(cx, |thread, cx| thread.update_plan(plan, cx))?;
+ }
ThreadEvent::SubagentSpawned(session_id) => {
acp_thread.update(cx, |thread, cx| {
thread.subagent_spawned(session_id, cx);
@@ -85,6 +85,7 @@ mod tests {
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
assert!(rendered.contains("## Fixing Diagnostics"));
+ assert!(!rendered.contains("## Planning"));
assert!(rendered.contains("test-model"));
}
}
@@ -20,6 +20,34 @@ You are a highly skilled software engineer with extensive knowledge in many prog
- When running commands that may run indefinitely or for a long time (such as build scripts, tests, servers, or file watchers), specify `timeout_ms` to bound runtime. If the command times out, the user can always ask you to run it again with a longer timeout or no timeout if they're willing to wait or cancel manually.
- Avoid HTML entity escaping - use plain characters instead.
+{{#if (contains available_tools 'update_plan') }}
+## Planning
+
+- You have access to an `update_plan` tool which tracks steps and progress and renders them to the user.
+- Use it to show that you've understood the task and to make complex, ambiguous, or multi-phase work easier for the user to follow.
+- A good plan breaks the work into meaningful, logically ordered steps that are easy to verify as you go.
+- When writing a plan, prefer a short list of concise, concrete steps.
+- Keep each step focused on a real unit of work and use short 1-sentence descriptions.
+- Do not use plans for simple or single-step queries that you can just do or answer immediately.
+- Do not use plans to pad your response with filler steps or to state the obvious.
+- Do not include steps that you are not actually capable of doing.
+- After calling `update_plan`, do not repeat the full plan in your response. The UI already displays it. Instead, briefly summarize what changed and note any important context or next step.
+- Before moving on to a new phase of work, mark the previous step as completed when appropriate.
+- When work is in progress, prefer having exactly one step marked as `in_progress`.
+- You can mark multiple completed steps in a single `update_plan` call.
+- If the task changes midway through, update the plan so it reflects the new approach.
+
+Use a plan when:
+
+- The task is non-trivial and will require multiple actions over a longer horizon.
+- There are logical phases or dependencies where sequencing matters.
+- The work has ambiguity that benefits from outlining high-level goals.
+- You want intermediate checkpoints for feedback and validation.
+- The user asked you to do more than one thing in a single prompt.
+- The user asked you to use the plan tool or TODOs.
+- You discover additional steps while working and intend to complete them before yielding to the user.
+
+{{/if}}
## Searching and Reading
If you are unsure how to fulfill the user's request, gather more information with tool calls and/or clarifying questions.
@@ -999,6 +999,20 @@ async fn expect_tool_call_update_fields(
}
}
+async fn expect_plan(events: &mut UnboundedReceiver<Result<ThreadEvent>>) -> acp::Plan {
+ let event = events
+ .next()
+ .await
+ .expect("no plan event received")
+ .unwrap();
+ match event {
+ ThreadEvent::Plan(plan) => plan,
+ event => {
+ panic!("Unexpected event {event:?}");
+ }
+ }
+}
+
async fn next_tool_call_authorization(
events: &mut UnboundedReceiver<Result<ThreadEvent>>,
) -> ToolCallAuthorization {
@@ -3416,6 +3430,122 @@ async fn test_tool_updates_to_completion(cx: &mut TestAppContext) {
);
}
+#[gpui::test]
+async fn test_update_plan_tool_updates_thread_events(cx: &mut TestAppContext) {
+ let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
+ thread.update(cx, |thread, _cx| thread.add_tool(UpdatePlanTool));
+ let fake_model = model.as_fake();
+
+ let mut events = thread
+ .update(cx, |thread, cx| {
+ thread.send(UserMessageId::new(), ["Make a plan"], cx)
+ })
+ .unwrap();
+ cx.run_until_parked();
+
+ let input = json!({
+ "plan": [
+ {
+ "step": "Inspect the code",
+ "status": "completed",
+ "priority": "high"
+ },
+ {
+ "step": "Implement the tool",
+ "status": "in_progress"
+ },
+ {
+ "step": "Run tests",
+ "status": "pending",
+ "priority": "low"
+ }
+ ]
+ });
+ fake_model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
+ LanguageModelToolUse {
+ id: "plan_1".into(),
+ name: UpdatePlanTool::NAME.into(),
+ raw_input: input.to_string(),
+ input,
+ is_input_complete: true,
+ thought_signature: None,
+ },
+ ));
+ fake_model.end_last_completion_stream();
+ cx.run_until_parked();
+
+ let tool_call = expect_tool_call(&mut events).await;
+ assert_eq!(
+ tool_call,
+ acp::ToolCall::new("plan_1", "Update plan")
+ .kind(acp::ToolKind::Think)
+ .raw_input(json!({
+ "plan": [
+ {
+ "step": "Inspect the code",
+ "status": "completed",
+ "priority": "high"
+ },
+ {
+ "step": "Implement the tool",
+ "status": "in_progress"
+ },
+ {
+ "step": "Run tests",
+ "status": "pending",
+ "priority": "low"
+ }
+ ]
+ }))
+ .meta(acp::Meta::from_iter([(
+ "tool_name".into(),
+ "update_plan".into()
+ )]))
+ );
+
+ let update = expect_tool_call_update_fields(&mut events).await;
+ assert_eq!(
+ update,
+ acp::ToolCallUpdate::new(
+ "plan_1",
+ acp::ToolCallUpdateFields::new().status(acp::ToolCallStatus::InProgress)
+ )
+ );
+
+ let plan = expect_plan(&mut events).await;
+ assert_eq!(
+ plan,
+ acp::Plan::new(vec![
+ acp::PlanEntry::new(
+ "Inspect the code",
+ acp::PlanEntryPriority::High,
+ acp::PlanEntryStatus::Completed,
+ ),
+ acp::PlanEntry::new(
+ "Implement the tool",
+ acp::PlanEntryPriority::Medium,
+ acp::PlanEntryStatus::InProgress,
+ ),
+ acp::PlanEntry::new(
+ "Run tests",
+ acp::PlanEntryPriority::Low,
+ acp::PlanEntryStatus::Pending,
+ ),
+ ])
+ );
+
+ let update = expect_tool_call_update_fields(&mut events).await;
+ assert_eq!(
+ update,
+ acp::ToolCallUpdate::new(
+ "plan_1",
+ acp::ToolCallUpdateFields::new()
+ .status(acp::ToolCallStatus::Completed)
+ .raw_output("Plan updated")
+ )
+ );
+}
+
#[gpui::test]
async fn test_send_no_retry_on_success(cx: &mut TestAppContext) {
let ThreadTest { thread, model, .. } = setup(cx, TestModel::Fake).await;
@@ -3822,6 +3952,7 @@ async fn setup(cx: &mut TestAppContext, model: TestModel) -> ThreadTest {
StreamingEchoTool::NAME: true,
StreamingFailingEchoTool::NAME: true,
TerminalTool::NAME: true,
+ UpdatePlanTool::NAME: true,
}
}
}
@@ -5011,48 +5142,6 @@ async fn test_subagent_tool_resume_session(cx: &mut TestAppContext) {
);
}
-#[gpui::test]
-async fn test_subagent_tool_is_present_when_feature_flag_enabled(cx: &mut TestAppContext) {
- init_test(cx);
-
- cx.update(|cx| {
- cx.update_flags(true, vec!["subagents".to_string()]);
- });
-
- let fs = FakeFs::new(cx.executor());
- fs.insert_tree(path!("/test"), json!({})).await;
- let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
- let project_context = cx.new(|_cx| ProjectContext::default());
- let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
- let context_server_registry =
- cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
- let model = Arc::new(FakeLanguageModel::default());
-
- let environment = Rc::new(cx.update(|cx| {
- FakeThreadEnvironment::default().with_terminal(FakeTerminalHandle::new_never_exits(cx))
- }));
-
- let thread = cx.new(|cx| {
- let mut thread = Thread::new(
- project.clone(),
- project_context,
- context_server_registry,
- Templates::new(),
- Some(model),
- cx,
- );
- thread.add_default_tools(environment, cx);
- thread
- });
-
- thread.read_with(cx, |thread, _| {
- assert!(
- thread.has_registered_tool(SpawnAgentTool::NAME),
- "subagent tool should be present when feature flag is enabled"
- );
- });
-}
-
#[gpui::test]
async fn test_subagent_thread_inherits_parent_thread_properties(cx: &mut TestAppContext) {
init_test(cx);
@@ -3,12 +3,14 @@ use crate::{
DeletePathTool, DiagnosticsTool, EditFileTool, FetchTool, FindPathTool, GrepTool,
ListDirectoryTool, MovePathTool, NowTool, OpenTool, ProjectSnapshot, ReadFileTool,
RestoreFileFromDiskTool, SaveFileTool, SpawnAgentTool, StreamingEditFileTool,
- SystemPromptTemplate, Template, Templates, TerminalTool, ToolPermissionDecision, WebSearchTool,
- decide_permission_from_settings,
+ SystemPromptTemplate, Template, Templates, TerminalTool, ToolPermissionDecision,
+ UpdatePlanTool, WebSearchTool, decide_permission_from_settings,
};
use acp_thread::{MentionUri, UserMessageId};
use action_log::ActionLog;
-use feature_flags::{FeatureFlagAppExt as _, StreamingEditFileToolFeatureFlag};
+use feature_flags::{
+ FeatureFlagAppExt as _, StreamingEditFileToolFeatureFlag, UpdatePlanToolFeatureFlag,
+};
use agent_client_protocol as acp;
use agent_settings::{
@@ -661,6 +663,7 @@ pub enum ThreadEvent {
AgentThinking(String),
ToolCall(acp::ToolCall),
ToolCallUpdate(acp_thread::ToolCallUpdate),
+ Plan(acp::Plan),
ToolCallAuthorization(ToolCallAuthorization),
SubagentSpawned(acp::SessionId),
Retry(acp_thread::RetryStatus),
@@ -1529,6 +1532,9 @@ impl Thread {
self.add_tool(MovePathTool::new(self.project.clone()));
self.add_tool(NowTool);
self.add_tool(OpenTool::new(self.project.clone()));
+ if cx.has_flag::<UpdatePlanToolFeatureFlag>() {
+ self.add_tool(UpdatePlanTool);
+ }
self.add_tool(ReadFileTool::new(
self.project.clone(),
self.action_log.clone(),
@@ -3476,6 +3482,10 @@ impl ThreadEventStream {
.ok();
}
+ fn send_plan(&self, plan: acp::Plan) {
+ self.0.unbounded_send(Ok(ThreadEvent::Plan(plan))).ok();
+ }
+
fn send_retry(&self, status: acp_thread::RetryStatus) {
self.0.unbounded_send(Ok(ThreadEvent::Retry(status))).ok();
}
@@ -3611,6 +3621,10 @@ impl ToolCallEventStream {
.ok();
}
+ pub fn update_plan(&self, plan: acp::Plan) {
+ self.stream.send_plan(plan);
+ }
+
/// Authorize a third-party tool (e.g., MCP tool from a context server).
///
/// Unlike built-in tools, third-party tools don't support pattern-based permissions.
@@ -3899,6 +3913,15 @@ impl ToolCallEventStreamReceiver {
panic!("Expected terminal but got: {:?}", event);
}
}
+
+ pub async fn expect_plan(&mut self) -> acp::Plan {
+ let event = self.0.next().await;
+ if let Some(Ok(ThreadEvent::Plan(plan))) = event {
+ plan
+ } else {
+ panic!("Expected plan but got: {:?}", event);
+ }
+ }
}
#[cfg(any(test, feature = "test-support"))]
@@ -19,6 +19,7 @@ mod streaming_edit_file_tool;
mod terminal_tool;
mod tool_edit_parser;
mod tool_permissions;
+mod update_plan_tool;
mod web_search_tool;
use crate::AgentTool;
@@ -44,6 +45,7 @@ pub use spawn_agent_tool::*;
pub use streaming_edit_file_tool::*;
pub use terminal_tool::*;
pub use tool_permissions::*;
+pub use update_plan_tool::*;
pub use web_search_tool::*;
macro_rules! tools {
@@ -132,5 +134,6 @@ tools! {
SaveFileTool,
SpawnAgentTool,
TerminalTool,
+ UpdatePlanTool,
WebSearchTool,
}
@@ -0,0 +1,290 @@
+use crate::{AgentTool, ToolCallEventStream, ToolInput};
+use agent_client_protocol as acp;
+use gpui::{App, SharedString, Task};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use std::sync::Arc;
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+#[schemars(inline)]
+pub enum PlanEntryStatus {
+ /// The task has not started yet.
+ Pending,
+ /// The task is currently being worked on.
+ InProgress,
+ /// The task has been successfully completed.
+ Completed,
+}
+
+impl From<PlanEntryStatus> for acp::PlanEntryStatus {
+ fn from(value: PlanEntryStatus) -> Self {
+ match value {
+ PlanEntryStatus::Pending => acp::PlanEntryStatus::Pending,
+ PlanEntryStatus::InProgress => acp::PlanEntryStatus::InProgress,
+ PlanEntryStatus::Completed => acp::PlanEntryStatus::Completed,
+ }
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq, Default)]
+#[serde(rename_all = "snake_case")]
+#[schemars(inline)]
+pub enum PlanEntryPriority {
+ High,
+ #[default]
+ Medium,
+ Low,
+}
+
+impl From<PlanEntryPriority> for acp::PlanEntryPriority {
+ fn from(value: PlanEntryPriority) -> Self {
+ match value {
+ PlanEntryPriority::High => acp::PlanEntryPriority::High,
+ PlanEntryPriority::Medium => acp::PlanEntryPriority::Medium,
+ PlanEntryPriority::Low => acp::PlanEntryPriority::Low,
+ }
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct PlanItem {
+ /// Human-readable description of what this task aims to accomplish.
+ pub step: String,
+ /// The current status of this task.
+ pub status: PlanEntryStatus,
+ /// The relative importance of this task. Defaults to medium when omitted.
+ #[serde(default)]
+ pub priority: PlanEntryPriority,
+}
+
+impl From<PlanItem> for acp::PlanEntry {
+ fn from(value: PlanItem) -> Self {
+ acp::PlanEntry::new(value.step, value.priority.into(), value.status.into())
+ }
+}
+
+/// Updates the task plan.
+/// Provide a list of plan entries, each with step, status, and optional priority.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, PartialEq, Eq)]
+pub struct UpdatePlanToolInput {
+ /// The list of plan entries and their current statuses.
+ pub plan: Vec<PlanItem>,
+}
+
+pub struct UpdatePlanTool;
+
+impl UpdatePlanTool {
+ fn to_plan(input: UpdatePlanToolInput) -> acp::Plan {
+ acp::Plan::new(input.plan.into_iter().map(Into::into).collect())
+ }
+}
+
+impl AgentTool for UpdatePlanTool {
+ type Input = UpdatePlanToolInput;
+ type Output = String;
+
+ const NAME: &'static str = "update_plan";
+
+ fn kind() -> acp::ToolKind {
+ acp::ToolKind::Think
+ }
+
+ fn initial_title(
+ &self,
+ input: Result<Self::Input, serde_json::Value>,
+ _cx: &mut App,
+ ) -> SharedString {
+ match input {
+ Ok(input) if input.plan.is_empty() => "Clear plan".into(),
+ Ok(_) | Err(_) => "Update plan".into(),
+ }
+ }
+
+ fn run(
+ self: Arc<Self>,
+ input: ToolInput<Self::Input>,
+ event_stream: ToolCallEventStream,
+ cx: &mut App,
+ ) -> Task<Result<Self::Output, Self::Output>> {
+ cx.spawn(async move |_cx| {
+ let input = input
+ .recv()
+ .await
+ .map_err(|e| format!("Failed to receive tool input: {e}"))?;
+
+ event_stream.update_plan(Self::to_plan(input));
+
+ Ok("Plan updated".to_string())
+ })
+ }
+
+ fn replay(
+ &self,
+ input: Self::Input,
+ _output: Self::Output,
+ event_stream: ToolCallEventStream,
+ _cx: &mut App,
+ ) -> anyhow::Result<()> {
+ event_stream.update_plan(Self::to_plan(input));
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::ToolCallEventStream;
+ use gpui::TestAppContext;
+ use pretty_assertions::assert_eq;
+
+ fn sample_input() -> UpdatePlanToolInput {
+ UpdatePlanToolInput {
+ plan: vec![
+ PlanItem {
+ step: "Inspect the existing tool wiring".to_string(),
+ status: PlanEntryStatus::Completed,
+ priority: PlanEntryPriority::High,
+ },
+ PlanItem {
+ step: "Implement the update_plan tool".to_string(),
+ status: PlanEntryStatus::InProgress,
+ priority: PlanEntryPriority::Medium,
+ },
+ PlanItem {
+ step: "Add tests".to_string(),
+ status: PlanEntryStatus::Pending,
+ priority: PlanEntryPriority::Low,
+ },
+ ],
+ }
+ }
+
+ #[gpui::test]
+ async fn test_run_emits_plan_event(cx: &mut TestAppContext) {
+ let tool = Arc::new(UpdatePlanTool);
+ let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+ let input = sample_input();
+ let result = cx
+ .update(|cx| tool.run(ToolInput::resolved(input.clone()), event_stream, cx))
+ .await
+ .expect("tool should succeed");
+
+ assert_eq!(result, "Plan updated".to_string());
+
+ let plan = event_rx.expect_plan().await;
+ assert_eq!(
+ plan,
+ acp::Plan::new(vec![
+ acp::PlanEntry::new(
+ "Inspect the existing tool wiring",
+ acp::PlanEntryPriority::High,
+ acp::PlanEntryStatus::Completed,
+ ),
+ acp::PlanEntry::new(
+ "Implement the update_plan tool",
+ acp::PlanEntryPriority::Medium,
+ acp::PlanEntryStatus::InProgress,
+ ),
+ acp::PlanEntry::new(
+ "Add tests",
+ acp::PlanEntryPriority::Low,
+ acp::PlanEntryStatus::Pending,
+ ),
+ ])
+ );
+ }
+
+ #[gpui::test]
+ async fn test_replay_emits_plan_event(cx: &mut TestAppContext) {
+ let tool = UpdatePlanTool;
+ let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+ let input = sample_input();
+
+ cx.update(|cx| {
+ tool.replay(input.clone(), "Plan updated".to_string(), event_stream, cx)
+ .expect("replay should succeed");
+ });
+
+ let plan = event_rx.expect_plan().await;
+ assert_eq!(
+ plan,
+ acp::Plan::new(vec![
+ acp::PlanEntry::new(
+ "Inspect the existing tool wiring",
+ acp::PlanEntryPriority::High,
+ acp::PlanEntryStatus::Completed,
+ ),
+ acp::PlanEntry::new(
+ "Implement the update_plan tool",
+ acp::PlanEntryPriority::Medium,
+ acp::PlanEntryStatus::InProgress,
+ ),
+ acp::PlanEntry::new(
+ "Add tests",
+ acp::PlanEntryPriority::Low,
+ acp::PlanEntryStatus::Pending,
+ ),
+ ])
+ );
+ }
+
+ #[gpui::test]
+ async fn test_run_defaults_priority_to_medium(cx: &mut TestAppContext) {
+ let tool = Arc::new(UpdatePlanTool);
+ let (event_stream, mut event_rx) = ToolCallEventStream::test();
+
+ let input = UpdatePlanToolInput {
+ plan: vec![
+ PlanItem {
+ step: "First".to_string(),
+ status: PlanEntryStatus::InProgress,
+ priority: PlanEntryPriority::default(),
+ },
+ PlanItem {
+ step: "Second".to_string(),
+ status: PlanEntryStatus::InProgress,
+ priority: PlanEntryPriority::default(),
+ },
+ ],
+ };
+
+ let result = cx
+ .update(|cx| tool.run(ToolInput::resolved(input), event_stream, cx))
+ .await
+ .expect("tool should succeed");
+
+ assert_eq!(result, "Plan updated".to_string());
+
+ let plan = event_rx.expect_plan().await;
+ assert_eq!(
+ plan,
+ acp::Plan::new(vec![
+ acp::PlanEntry::new(
+ "First",
+ acp::PlanEntryPriority::Medium,
+ acp::PlanEntryStatus::InProgress,
+ ),
+ acp::PlanEntry::new(
+ "Second",
+ acp::PlanEntryPriority::Medium,
+ acp::PlanEntryStatus::InProgress,
+ ),
+ ])
+ );
+ }
+
+ #[gpui::test]
+ async fn test_initial_title(cx: &mut TestAppContext) {
+ let tool = UpdatePlanTool;
+
+ let title = cx.update(|cx| tool.initial_title(Ok(sample_input()), cx));
+ assert_eq!(title, SharedString::from("Update plan"));
+
+ let title =
+ cx.update(|cx| tool.initial_title(Ok(UpdatePlanToolInput { plan: Vec::new() }), cx));
+ assert_eq!(title, SharedString::from("Clear plan"));
+ }
+}
@@ -328,6 +328,9 @@ impl ExampleContext {
"{}Bug: Tool confirmation should not be required in eval",
log_prefix
),
+ ThreadEvent::Plan(plan) => {
+ println!("{log_prefix} Got plan: {plan:?}");
+ }
ThreadEvent::SubagentSpawned(session) => {
println!("{log_prefix} Got subagent spawn: {session:?}");
}
@@ -63,6 +63,16 @@ impl FeatureFlag for StreamingEditFileToolFeatureFlag {
}
}
+pub struct UpdatePlanToolFeatureFlag;
+
+impl FeatureFlag for UpdatePlanToolFeatureFlag {
+ const NAME: &'static str = "update-plan-tool";
+
+ fn enabled_for_staff() -> bool {
+ true
+ }
+}
+
pub struct ProjectPanelUndoRedoFeatureFlag;
impl FeatureFlag for ProjectPanelUndoRedoFeatureFlag {
@@ -1421,6 +1421,9 @@ mod tests {
// Subagent permission checks happen at the level of individual
// tool calls within the subagent, not at the spawning level.
"spawn_agent",
+ // update_plan updates UI-visible planning state but does not use
+ // tool permission rules.
+ "update_plan",
];
let tool_info_ids: Vec<&str> = TOOLS.iter().map(|t| t.id).collect();