From 85c23d0d0bb53ecc02db01f71cb8887b4ff00bb0 Mon Sep 17 00:00:00 2001 From: Ben Brandt Date: Thu, 19 Feb 2026 18:58:59 +0100 Subject: [PATCH] agent: Improve the subagent task structure (#49629) Removes tool filtering since this was throwing off certain models, and also allows for more generic task prompts that don't always require summaries. Since the models usually provide a wrap-up message, we don't have to wait for another turn. This also sets us up to allow the agent to re-interact with an existing subagent thread. Release Notes: - N/A --------- Co-authored-by: Jakub Konka --- crates/agent/src/agent.rs | 72 +---- crates/agent/src/tests/mod.rs | 382 +----------------------- crates/agent/src/thread.rs | 15 +- crates/agent/src/tools/subagent_tool.rs | 191 ++---------- crates/eval/src/instance.rs | 1 - 5 files changed, 64 insertions(+), 597 deletions(-) diff --git a/crates/agent/src/agent.rs b/crates/agent/src/agent.rs index 39747f6ff93b7378bbff375bd001cf1ac1b20658..07f53f114cf38bd2f5a2b1db6140b71dc06d0680 100644 --- a/crates/agent/src/agent.rs +++ b/crates/agent/src/agent.rs @@ -342,7 +342,7 @@ impl NativeAgent { fn register_session( &mut self, thread_handle: Entity, - allowed_tool_names: Option>, + allowed_tool_names: Option>, cx: &mut Context, ) -> Entity { let connection = Rc::new(NativeAgentConnection(cx.entity())); @@ -1590,7 +1590,6 @@ impl NativeThreadEnvironment { label: String, initial_prompt: String, timeout: Option, - allowed_tools: Option>, cx: &mut App, ) -> Result> { let parent_thread = parent_thread_entity.read(cx); @@ -1602,20 +1601,7 @@ impl NativeThreadEnvironment { MAX_SUBAGENT_DEPTH )); } - - let allowed_tools = match allowed_tools { - Some(tools) => { - let parent_tool_names: std::collections::HashSet<&str> = - parent_thread.tools.keys().map(|s| s.as_str()).collect(); - Some( - tools - .into_iter() - .filter(|t| parent_tool_names.contains(t.as_str())) - .collect::>(), - ) - } - None => Some(parent_thread.tools.keys().map(|s| s.to_string()).collect()), - }; + let allowed_tool_names = Some(parent_thread.tools.keys().cloned().collect::>()); let subagent_thread: Entity = cx.new(|cx| { let mut thread = Thread::new_subagent(&parent_thread_entity, cx); @@ -1626,13 +1612,7 @@ impl NativeThreadEnvironment { let session_id = subagent_thread.read(cx).id().clone(); let acp_thread = agent.update(cx, |agent, cx| { - agent.register_session( - subagent_thread.clone(), - allowed_tools - .as_ref() - .map(|v| v.iter().map(|s| s.as_str()).collect()), - cx, - ) + agent.register_session(subagent_thread.clone(), allowed_tool_names, cx) })?; parent_thread_entity.update(cx, |parent_thread, _cx| { @@ -1676,7 +1656,6 @@ impl NativeThreadEnvironment { session_id, subagent_thread, parent_thread: parent_thread_entity.downgrade(), - acp_thread, wait_for_prompt_to_complete, }) as _) } @@ -1722,7 +1701,6 @@ impl ThreadEnvironment for NativeThreadEnvironment { label: String, initial_prompt: String, timeout: Option, - allowed_tools: Option>, cx: &mut App, ) -> Result> { Self::create_subagent_thread( @@ -1731,7 +1709,6 @@ impl ThreadEnvironment for NativeThreadEnvironment { label, initial_prompt, timeout, - allowed_tools, cx, ) } @@ -1748,7 +1725,6 @@ pub struct NativeSubagentHandle { session_id: acp::SessionId, parent_thread: WeakEntity, subagent_thread: Entity, - acp_thread: Entity, wait_for_prompt_to_complete: Shared>, } @@ -1757,51 +1733,35 @@ impl SubagentHandle for NativeSubagentHandle { self.session_id.clone() } - fn wait_for_summary(&self, summary_prompt: String, cx: &AsyncApp) -> Task> { + fn wait_for_output(&self, cx: &AsyncApp) -> Task> { let thread = self.subagent_thread.clone(); - let acp_thread = self.acp_thread.clone(); let wait_for_prompt = self.wait_for_prompt_to_complete.clone(); - let wait_for_summary_task = cx.spawn(async move |cx| { - let timed_out = match wait_for_prompt.await { - SubagentInitialPromptResult::Completed => false, - SubagentInitialPromptResult::Timeout => true, - SubagentInitialPromptResult::Cancelled => return Err(anyhow!("User cancelled")), - }; + let subagent_session_id = self.session_id.clone(); + let parent_thread = self.parent_thread.clone(); - let summary_prompt = if timed_out { - thread.update(cx, |thread, cx| thread.cancel(cx)).await; - format!("{}\n{}", "The time to complete the task was exceeded. Stop with the task and follow the directions below:", summary_prompt) - } else { - summary_prompt + cx.spawn(async move |cx| { + match wait_for_prompt.await { + SubagentInitialPromptResult::Completed => {} + SubagentInitialPromptResult::Timeout => { + return Err(anyhow!("The time to complete the task was exceeded.")); + } + SubagentInitialPromptResult::Cancelled => return Err(anyhow!("User cancelled")), }; - let response = acp_thread - .update(cx, |thread, cx| thread.send(vec![summary_prompt.into()], cx)) - .await?; - - let was_canceled = response.is_some_and(|r| r.stop_reason == acp::StopReason::Cancelled); - if was_canceled { - return Err(anyhow!("User cancelled")); - } - - thread.read_with(cx, |thread, _cx| { + let result = thread.read_with(cx, |thread, _cx| { thread .last_message() .map(|m| m.to_markdown()) .context("No response from subagent") - }) - }); + }); - let subagent_session_id = self.session_id.clone(); - let parent_thread = self.parent_thread.clone(); - cx.spawn(async move |cx| { - let result = wait_for_summary_task.await; parent_thread .update(cx, |parent_thread, cx| { parent_thread.unregister_running_subagent(&subagent_session_id, cx) }) .ok(); + result }) } diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs index 8d04ab297a5559acaf7c6743a9b416d17bd395fb..ce6037406383147491f20df2288abb4b33a27224 100644 --- a/crates/agent/src/tests/mod.rs +++ b/crates/agent/src/tests/mod.rs @@ -166,7 +166,7 @@ impl SubagentHandle for FakeSubagentHandle { self.session_id.clone() } - fn wait_for_summary(&self, _summary_prompt: String, cx: &AsyncApp) -> Task> { + fn wait_for_output(&self, cx: &AsyncApp) -> Task> { let task = self.wait_for_summary_task.clone(); cx.background_spawn(async move { Ok(task.await) }) } @@ -208,7 +208,6 @@ impl crate::ThreadEnvironment for FakeThreadEnvironment { _label: String, _initial_prompt: String, _timeout_ms: Option, - _allowed_tools: Option>, _cx: &mut App, ) -> Result> { Ok(self @@ -255,7 +254,6 @@ impl crate::ThreadEnvironment for MultiTerminalEnvironment { _label: String, _initial_prompt: String, _timeout: Option, - _allowed_tools: Option>, _cx: &mut App, ) -> Result> { unimplemented!() @@ -4234,10 +4232,8 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) { model.send_last_completion_stream_text_chunk("spawning subagent"); let subagent_tool_input = SubagentToolInput { label: "label".to_string(), - task_prompt: "subagent task prompt".to_string(), - summary_prompt: "subagent summary prompt".to_string(), - timeout_ms: None, - allowed_tools: None, + prompt: "subagent task prompt".to_string(), + timeout: None, }; let subagent_tool_use = LanguageModelToolUse { id: "subagent_1".into(), @@ -4276,11 +4272,6 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) { cx.run_until_parked(); - model.send_last_completion_stream_text_chunk("subagent summary response"); - model.end_last_completion_stream(); - - cx.run_until_parked(); - assert_eq!( subagent_thread.read_with(cx, |thread, cx| thread.to_markdown(cx)), indoc! {" @@ -4292,14 +4283,6 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) { subagent task response - ## User - - subagent summary prompt - - ## Assistant - - subagent summary response - "} ); @@ -4325,8 +4308,8 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) { ```json {{ - "subagent_session_id": "{}", - "summary": "subagent summary response\n" + "session_id": "{}", + "output": "subagent task response\n" }} ``` @@ -4399,10 +4382,8 @@ async fn test_subagent_tool_call_cancellation_during_task_prompt(cx: &mut TestAp model.send_last_completion_stream_text_chunk("spawning subagent"); let subagent_tool_input = SubagentToolInput { label: "label".to_string(), - task_prompt: "subagent task prompt".to_string(), - summary_prompt: "subagent summary prompt".to_string(), - timeout_ms: None, - allowed_tools: None, + prompt: "subagent task prompt".to_string(), + timeout: None, }; let subagent_tool_use = LanguageModelToolUse { id: "subagent_1".into(), @@ -4479,153 +4460,6 @@ async fn test_subagent_tool_call_cancellation_during_task_prompt(cx: &mut TestAp }); } -#[gpui::test] -async fn test_subagent_tool_call_cancellation_during_summary_prompt(cx: &mut TestAppContext) { - init_test(cx); - cx.update(|cx| { - LanguageModelRegistry::test(cx); - }); - cx.update(|cx| { - cx.update_flags(true, vec!["subagents".to_string()]); - }); - - let fs = FakeFs::new(cx.executor()); - fs.insert_tree( - "/", - json!({ - "a": { - "b.md": "Lorem" - } - }), - ) - .await; - let project = Project::test(fs.clone(), [path!("/a").as_ref()], cx).await; - let thread_store = cx.new(|cx| ThreadStore::new(cx)); - let agent = NativeAgent::new( - project.clone(), - thread_store.clone(), - Templates::new(), - None, - fs.clone(), - &mut cx.to_async(), - ) - .await - .unwrap(); - let connection = Rc::new(NativeAgentConnection(agent.clone())); - - let acp_thread = cx - .update(|cx| { - connection - .clone() - .new_session(project.clone(), Path::new(""), cx) - }) - .await - .unwrap(); - let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone()); - let thread = agent.read_with(cx, |agent, _| { - agent.sessions.get(&session_id).unwrap().thread.clone() - }); - let model = Arc::new(FakeLanguageModel::default()); - - // Ensure empty threads are not saved, even if they get mutated. - thread.update(cx, |thread, cx| { - thread.set_model(model.clone(), cx); - }); - cx.run_until_parked(); - - let send = acp_thread.update(cx, |thread, cx| thread.send_raw("Prompt", cx)); - cx.run_until_parked(); - model.send_last_completion_stream_text_chunk("spawning subagent"); - let subagent_tool_input = SubagentToolInput { - label: "label".to_string(), - task_prompt: "subagent task prompt".to_string(), - summary_prompt: "subagent summary prompt".to_string(), - timeout_ms: None, - allowed_tools: None, - }; - let subagent_tool_use = LanguageModelToolUse { - id: "subagent_1".into(), - name: SubagentTool::NAME.into(), - raw_input: serde_json::to_string(&subagent_tool_input).unwrap(), - input: serde_json::to_value(&subagent_tool_input).unwrap(), - is_input_complete: true, - thought_signature: None, - }; - model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse( - subagent_tool_use, - )); - model.end_last_completion_stream(); - - cx.run_until_parked(); - - let subagent_session_id = thread.read_with(cx, |thread, cx| { - thread - .running_subagent_ids(cx) - .get(0) - .expect("subagent thread should be running") - .clone() - }); - let subagent_acp_thread = agent.read_with(cx, |agent, _cx| { - agent - .sessions - .get(&subagent_session_id) - .expect("subagent session should exist") - .acp_thread - .clone() - }); - - model.send_last_completion_stream_text_chunk("subagent task response"); - model.end_last_completion_stream(); - - cx.run_until_parked(); - - acp_thread.update(cx, |thread, cx| thread.cancel(cx)).await; - - cx.run_until_parked(); - - send.await.unwrap(); - - acp_thread.read_with(cx, |thread, cx| { - assert_eq!(thread.status(), ThreadStatus::Idle); - assert_eq!( - thread.to_markdown(cx), - indoc! {" - ## User - - Prompt - - ## Assistant - - spawning subagent - - **Tool Call: label** - Status: Canceled - - "} - ); - }); - subagent_acp_thread.read_with(cx, |thread, cx| { - assert_eq!(thread.status(), ThreadStatus::Idle); - assert_eq!( - thread.to_markdown(cx), - indoc! {" - ## User - - subagent task prompt - - ## Assistant - - subagent task response - - ## User - - subagent summary prompt - - "} - ); - }); -} - #[gpui::test] async fn test_subagent_tool_is_present_when_feature_flag_enabled(cx: &mut TestAppContext) { init_test(cx); @@ -4818,105 +4652,6 @@ async fn test_parent_cancel_stops_subagent(cx: &mut TestAppContext) { }); } -#[gpui::test] -async fn test_subagent_tool_returns_summary(cx: &mut TestAppContext) { - init_test(cx); - - always_allow_tools(cx); - - cx.update(|cx| { - cx.update_flags(true, vec!["subagents".to_string()]); - }); - - let fs = FakeFs::new(cx.executor()); - fs.insert_tree(path!("/test"), json!({})).await; - let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await; - let project_context = cx.new(|_cx| ProjectContext::default()); - let context_server_store = project.read_with(cx, |project, _| project.context_server_store()); - let context_server_registry = - cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx)); - cx.update(LanguageModelRegistry::test); - let model = Arc::new(FakeLanguageModel::default()); - let thread_store = cx.new(|cx| ThreadStore::new(cx)); - let native_agent = NativeAgent::new( - project.clone(), - thread_store, - Templates::new(), - None, - fs, - &mut cx.to_async(), - ) - .await - .unwrap(); - let parent_thread = cx.new(|cx| { - Thread::new( - project.clone(), - project_context, - context_server_registry, - Templates::new(), - Some(model.clone()), - cx, - ) - }); - - let subagent_handle = cx - .update(|cx| { - NativeThreadEnvironment::create_subagent_thread( - native_agent.downgrade(), - parent_thread.clone(), - "some title".to_string(), - "task prompt".to_string(), - Some(Duration::from_millis(10)), - None, - cx, - ) - }) - .expect("Failed to create subagent"); - - let summary_task = - subagent_handle.wait_for_summary("summary prompt".to_string(), &cx.to_async()); - - cx.run_until_parked(); - - { - let messages = model.pending_completions().last().unwrap().messages.clone(); - // Ensure that model received a system prompt - assert_eq!(messages[0].role, Role::System); - // Ensure that model received a task prompt - assert_eq!(messages[1].role, Role::User); - assert_eq!( - messages[1].content, - vec![MessageContent::Text("task prompt".to_string())] - ); - } - - model.send_last_completion_stream_text_chunk("Some task response..."); - model.end_last_completion_stream(); - - cx.run_until_parked(); - - { - let messages = model.pending_completions().last().unwrap().messages.clone(); - assert_eq!(messages[2].role, Role::Assistant); - assert_eq!( - messages[2].content, - vec![MessageContent::Text("Some task response...".to_string())] - ); - // Ensure that model received a summary prompt - assert_eq!(messages[3].role, Role::User); - assert_eq!( - messages[3].content, - vec![MessageContent::Text("summary prompt".to_string())] - ); - } - - model.send_last_completion_stream_text_chunk("Some summary..."); - model.end_last_completion_stream(); - - let result = summary_task.await; - assert_eq!(result.unwrap(), "Some summary...\n"); -} - #[gpui::test] async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceeded( cx: &mut TestAppContext, @@ -4967,15 +4702,13 @@ async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceede parent_thread.clone(), "some title".to_string(), "task prompt".to_string(), - Some(Duration::from_millis(100)), - None, + Some(Duration::from_secs(1)), cx, ) }) .expect("Failed to create subagent"); - let summary_task = - subagent_handle.wait_for_summary("summary prompt".to_string(), &cx.to_async()); + let summary_task = subagent_handle.wait_for_output(&cx.to_async()); cx.run_until_parked(); @@ -4991,29 +4724,16 @@ async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceede } // Don't complete the initial model stream — let the timeout expire instead. - cx.executor().advance_clock(Duration::from_millis(200)); + cx.executor().advance_clock(Duration::from_secs(2)); cx.run_until_parked(); - // After the timeout fires, the thread is cancelled and context_low_prompt is sent - // instead of the summary_prompt. - { - let messages = model.pending_completions().last().unwrap().messages.clone(); - let last_user_message = messages - .iter() - .rev() - .find(|m| m.role == Role::User) - .unwrap(); - assert_eq!( - last_user_message.content, - vec![MessageContent::Text("The time to complete the task was exceeded. Stop with the task and follow the directions below:\nsummary prompt".to_string())] - ); - } - - model.send_last_completion_stream_text_chunk("Some context low response..."); model.end_last_completion_stream(); - let result = summary_task.await; - assert_eq!(result.unwrap(), "Some context low response...\n"); + let error = summary_task.await.unwrap_err(); + assert_eq!( + error.to_string(), + "The time to complete the task was exceeded." + ); } #[gpui::test] @@ -5068,7 +4788,6 @@ async fn test_subagent_inherits_parent_thread_tools(cx: &mut TestAppContext) { "some title".to_string(), "task prompt".to_string(), Some(Duration::from_millis(10)), - None, cx, ) }) @@ -5089,77 +4808,6 @@ async fn test_subagent_inherits_parent_thread_tools(cx: &mut TestAppContext) { assert!(tools.contains(&"list_directory".to_string())); } -#[gpui::test] -async fn test_subagent_tool_restricts_tool_access(cx: &mut TestAppContext) { - init_test(cx); - - always_allow_tools(cx); - - cx.update(|cx| { - cx.update_flags(true, vec!["subagents".to_string()]); - }); - - let fs = FakeFs::new(cx.executor()); - fs.insert_tree(path!("/test"), json!({})).await; - let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await; - let project_context = cx.new(|_cx| ProjectContext::default()); - let context_server_store = project.read_with(cx, |project, _| project.context_server_store()); - let context_server_registry = - cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx)); - cx.update(LanguageModelRegistry::test); - let model = Arc::new(FakeLanguageModel::default()); - let thread_store = cx.new(|cx| ThreadStore::new(cx)); - let native_agent = NativeAgent::new( - project.clone(), - thread_store, - Templates::new(), - None, - fs, - &mut cx.to_async(), - ) - .await - .unwrap(); - let parent_thread = cx.new(|cx| { - let mut thread = Thread::new( - project.clone(), - project_context, - context_server_registry, - Templates::new(), - Some(model.clone()), - cx, - ); - thread.add_tool(ListDirectoryTool::new(project.clone()), None); - thread.add_tool(GrepTool::new(project.clone()), None); - thread - }); - - let _subagent_handle = cx - .update(|cx| { - NativeThreadEnvironment::create_subagent_thread( - native_agent.downgrade(), - parent_thread.clone(), - "some title".to_string(), - "task prompt".to_string(), - Some(Duration::from_millis(10)), - Some(vec!["grep".to_string()]), - cx, - ) - }) - .expect("Failed to create subagent"); - - cx.run_until_parked(); - - let tools = model - .pending_completions() - .last() - .unwrap() - .tools - .iter() - .map(|tool| tool.name.clone()) - .collect::>(); - assert_eq!(tools, vec!["grep"]); -} - #[gpui::test] async fn test_edit_file_tool_deny_rule_blocks_edit(cx: &mut TestAppContext) { init_test(cx); diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index e41656469d8213f9a0a38a03aafc533d3eadaa1f..2700235be77e624ba2944215924da95c6bffd17e 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -601,7 +601,7 @@ pub trait TerminalHandle { pub trait SubagentHandle { fn id(&self) -> acp::SessionId; - fn wait_for_summary(&self, summary_prompt: String, cx: &AsyncApp) -> Task>; + fn wait_for_output(&self, cx: &AsyncApp) -> Task>; } pub trait ThreadEnvironment { @@ -619,7 +619,6 @@ pub trait ThreadEnvironment { label: String, initial_prompt: String, timeout: Option, - allowed_tools: Option>, cx: &mut App, ) -> Result>; } @@ -1327,7 +1326,7 @@ impl Thread { pub fn add_default_tools( &mut self, - allowed_tool_names: Option>, + allowed_tool_names: Option>, environment: Rc, cx: &mut Context, ) { @@ -1421,8 +1420,14 @@ impl Thread { } } - pub fn add_tool(&mut self, tool: T, allowed_tool_names: Option<&Vec<&str>>) { - if allowed_tool_names.is_some_and(|tool_names| !tool_names.contains(&T::NAME)) { + pub fn add_tool( + &mut self, + tool: T, + allowed_tool_names: Option<&Vec>, + ) { + if allowed_tool_names + .is_some_and(|tool_names| !tool_names.iter().any(|x| x.as_str() == T::NAME)) + { return; } diff --git a/crates/agent/src/tools/subagent_tool.rs b/crates/agent/src/tools/subagent_tool.rs index 8b38f5f655a37e18cf3114f71103c896ffad8d0b..29fb870d5db20ad6d75ade8120137ed3560533ee 100644 --- a/crates/agent/src/tools/subagent_tool.rs +++ b/crates/agent/src/tools/subagent_tool.rs @@ -1,7 +1,7 @@ use acp_thread::SUBAGENT_SESSION_ID_META_KEY; use agent_client_protocol as acp; use anyhow::{Result, anyhow}; -use gpui::{App, Entity, SharedString, Task, WeakEntity}; +use gpui::{App, SharedString, Task, WeakEntity}; use language_model::LanguageModelToolResultContent; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -10,64 +10,42 @@ use std::{rc::Rc, time::Duration}; use crate::{AgentTool, Thread, ThreadEnvironment, ToolCallEventStream}; -/// Spawns a subagent with its own context window to perform a delegated task. +/// Spawns an agent to perform a delegated task. /// /// Use this tool when you want to do any of the following: /// - Perform an investigation where all you need to know is the outcome, not the research that led to that outcome. /// - Complete a self-contained task where you need to know if it succeeded or failed (and how), but none of its intermediate output. /// - Run multiple tasks in parallel that would take significantly longer to run sequentially. /// -/// You control what the subagent does by providing: -/// 1. A task prompt describing what the subagent should do -/// 2. A summary prompt that tells the subagent how to summarize its work when done -/// 3. A "context running out" prompt for when the subagent is low on tokens +/// You control what the agent does by providing a prompt describing what the agent should do. The agent has access to the same tools you do. /// -/// Each subagent has access to the same tools you do. You can optionally restrict -/// which tools each subagent can use. +/// You will receive the agent's final message. /// /// Note: -/// - Maximum 8 subagents can run in parallel -/// - Subagents cannot use tools you don't have access to -/// - If spawning multiple subagents that might write to the filesystem, provide -/// guidance on how to avoid conflicts (e.g. assign each to different directories) -/// - Instruct subagents to be concise in their summaries to conserve your context +/// - Agents cannot use tools you don't have access to. +/// - If spawning multiple agents that might write to the filesystem, provide guidance on how to avoid conflicts (e.g. assign each to different directories) #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct SubagentToolInput { - /// Short label displayed in the UI while the subagent runs (e.g., "Researching alternatives") + /// Short label displayed in the UI while the agent runs (e.g., "Researching alternatives") pub label: String, - - /// The initial prompt that tells the subagent what task to perform. - /// Be specific about what you want the subagent to accomplish. - pub task_prompt: String, - - /// The prompt sent to the subagent when it completes its task, asking it - /// to summarize what it did and return results. This summary becomes the - /// tool result you receive. - /// - /// Example: "Summarize what you found, listing the top 3 alternatives with pros/cons." - pub summary_prompt: String, - - /// Optional: Maximum runtime in milliseconds. If exceeded, the subagent is - /// asked to summarize and return. No timeout by default. - #[serde(default)] - pub timeout_ms: Option, - - /// Optional: List of tool names the subagent is allowed to use. - /// If not provided, the subagent can use all tools available to the parent. - /// Tools listed here must be a subset of the parent's available tools. + /// The prompt that tells the agent what task to perform. Be specific about what you want the agent to accomplish. + pub prompt: String, + /// Optional: Maximum runtime in seconds. No timeout by default. #[serde(default)] - pub allowed_tools: Option>, + pub timeout: Option, } #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct SubagentToolOutput { - pub subagent_session_id: acp::SessionId, - pub summary: String, + pub session_id: acp::SessionId, + pub output: String, } impl From for LanguageModelToolResultContent { fn from(output: SubagentToolOutput) -> Self { - output.summary.into() + serde_json::to_string(&output) + .expect("Failed to serialize SubagentToolOutput") + .into() } } @@ -84,32 +62,6 @@ impl SubagentTool { environment, } } - - fn validate_allowed_tools( - allowed_tools: &Option>, - parent_thread: &Entity, - cx: &App, - ) -> Result<()> { - let Some(allowed_tools) = allowed_tools else { - return Ok(()); - }; - - let thread = parent_thread.read(cx); - let invalid_tools: Vec<_> = allowed_tools - .iter() - .filter(|tool| !thread.tools.contains_key(tool.as_str())) - .map(|s| format!("'{s}'")) - .collect::>(); - - if !invalid_tools.is_empty() { - return Err(anyhow!( - "The following tools do not exist: {}", - invalid_tools.join(", ") - )); - } - - Ok(()) - } } impl AgentTool for SubagentTool { @@ -142,18 +94,11 @@ impl AgentTool for SubagentTool { return Task::ready(Err(anyhow!("Parent thread no longer exists"))); }; - if let Err(e) = - Self::validate_allowed_tools(&input.allowed_tools, &parent_thread_entity, cx) - { - return Task::ready(Err(e)); - } - let subagent = match self.environment.create_subagent( parent_thread_entity, input.label, - input.task_prompt, - input.timeout_ms.map(|ms| Duration::from_millis(ms)), - input.allowed_tools, + input.prompt, + input.timeout.map(|secs| Duration::from_secs(secs)), cx, ) { Ok(subagent) => subagent, @@ -170,10 +115,10 @@ impl AgentTool for SubagentTool { event_stream.update_fields_with_meta(acp::ToolCallUpdateFields::new(), Some(meta)); cx.spawn(async move |cx| { - let summary = subagent.wait_for_summary(input.summary_prompt, cx).await?; + let output = subagent.wait_for_output(cx).await?; Ok(SubagentToolOutput { - subagent_session_id, - summary, + session_id: subagent_session_id, + output, }) }) } @@ -185,102 +130,12 @@ impl AgentTool for SubagentTool { event_stream: ToolCallEventStream, _cx: &mut App, ) -> Result<()> { - event_stream.subagent_spawned(output.subagent_session_id.clone()); + event_stream.subagent_spawned(output.session_id.clone()); let meta = acp::Meta::from_iter([( SUBAGENT_SESSION_ID_META_KEY.into(), - output.subagent_session_id.to_string().into(), + output.session_id.to_string().into(), )]); event_stream.update_fields_with_meta(acp::ToolCallUpdateFields::new(), Some(meta)); Ok(()) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ContextServerRegistry, Templates, Thread}; - use fs::FakeFs; - use gpui::{AppContext as _, TestAppContext}; - use project::Project; - use prompt_store::ProjectContext; - use serde_json::json; - use settings::SettingsStore; - use util::path; - - async fn create_thread_with_tools(cx: &mut TestAppContext) -> Entity { - cx.update(|cx| { - let settings_store = SettingsStore::test(cx); - cx.set_global(settings_store); - }); - let fs = FakeFs::new(cx.executor()); - fs.insert_tree(path!("/test"), json!({})).await; - let project = Project::test(fs, [path!("/test").as_ref()], cx).await; - let project_context = cx.new(|_cx| ProjectContext::default()); - let context_server_store = - project.read_with(cx, |project, _| project.context_server_store()); - let context_server_registry = - cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx)); - - cx.new(|cx| { - let mut thread = Thread::new( - project, - project_context, - context_server_registry, - Templates::new(), - None, - cx, - ); - thread.add_tool(crate::NowTool, None); - thread.add_tool(crate::WebSearchTool, None); - thread - }) - } - - #[gpui::test] - async fn test_validate_allowed_tools_succeeds_for_valid_tools(cx: &mut TestAppContext) { - let thread = create_thread_with_tools(cx).await; - - cx.update(|cx| { - assert!(SubagentTool::validate_allowed_tools(&None, &thread, cx).is_ok()); - - let valid_tools = Some(vec!["now".to_string()]); - assert!(SubagentTool::validate_allowed_tools(&valid_tools, &thread, cx).is_ok()); - - let both_tools = Some(vec!["now".to_string(), "web_search".to_string()]); - assert!(SubagentTool::validate_allowed_tools(&both_tools, &thread, cx).is_ok()); - }); - } - - #[gpui::test] - async fn test_validate_allowed_tools_fails_for_unknown_tools(cx: &mut TestAppContext) { - let thread = create_thread_with_tools(cx).await; - - cx.update(|cx| { - let unknown_tools = Some(vec!["nonexistent_tool".to_string()]); - let result = SubagentTool::validate_allowed_tools(&unknown_tools, &thread, cx); - assert!(result.is_err()); - let error_message = result.unwrap_err().to_string(); - assert!( - error_message.contains("'nonexistent_tool'"), - "Expected error to mention the invalid tool name, got: {error_message}" - ); - - let mixed_tools = Some(vec![ - "now".to_string(), - "fake_tool_a".to_string(), - "fake_tool_b".to_string(), - ]); - let result = SubagentTool::validate_allowed_tools(&mixed_tools, &thread, cx); - assert!(result.is_err()); - let error_message = result.unwrap_err().to_string(); - assert!( - error_message.contains("'fake_tool_a'") && error_message.contains("'fake_tool_b'"), - "Expected error to mention both invalid tool names, got: {error_message}" - ); - assert!( - !error_message.contains("'now'"), - "Expected error to not mention valid tool 'now', got: {error_message}" - ); - }); - } -} diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index 15694d6828fb780274a2a6fab736077eadc33e10..cda045cee6d0005e6cb5703ee82820d5aac38cbe 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -686,7 +686,6 @@ impl agent::ThreadEnvironment for EvalThreadEnvironment { _label: String, _initial_prompt: String, _timeout_ms: Option, - _allowed_tools: Option>, _cx: &mut App, ) -> Result> { unimplemented!()