agent: Improve the subagent task structure (#49629)

Ben Brandt and Jakub Konka created 1 month ago

Removes tool filtering since this was throwing off certain models, and
also allows for more generic task prompts that don't always require
summaries. Since the models usually provide a wrap-up message, we don't
have to wait for another turn.

This also sets us up to allow the agent to re-interact with an existing
subagent thread.

Release Notes:

- N/A

---------

Co-authored-by: Jakub Konka <kubkon@jakubkonka.com>

Change summary

crates/agent/src/agent.rs               |  72 +---
crates/agent/src/tests/mod.rs           | 382 +-------------------------
crates/agent/src/thread.rs              |  15 
crates/agent/src/tools/subagent_tool.rs | 191 +-----------
crates/eval/src/instance.rs             |   1 
5 files changed, 64 insertions(+), 597 deletions(-)

Detailed changes

crates/agent/src/agent.rs 🔗

@@ -342,7 +342,7 @@ impl NativeAgent {
     fn register_session(
         &mut self,
         thread_handle: Entity<Thread>,
-        allowed_tool_names: Option<Vec<&str>>,
+        allowed_tool_names: Option<Vec<SharedString>>,
         cx: &mut Context<Self>,
     ) -> Entity<AcpThread> {
         let connection = Rc::new(NativeAgentConnection(cx.entity()));
@@ -1590,7 +1590,6 @@ impl NativeThreadEnvironment {
         label: String,
         initial_prompt: String,
         timeout: Option<Duration>,
-        allowed_tools: Option<Vec<String>>,
         cx: &mut App,
     ) -> Result<Rc<dyn SubagentHandle>> {
         let parent_thread = parent_thread_entity.read(cx);
@@ -1602,20 +1601,7 @@ impl NativeThreadEnvironment {
                 MAX_SUBAGENT_DEPTH
             ));
         }
-
-        let allowed_tools = match allowed_tools {
-            Some(tools) => {
-                let parent_tool_names: std::collections::HashSet<&str> =
-                    parent_thread.tools.keys().map(|s| s.as_str()).collect();
-                Some(
-                    tools
-                        .into_iter()
-                        .filter(|t| parent_tool_names.contains(t.as_str()))
-                        .collect::<Vec<_>>(),
-                )
-            }
-            None => Some(parent_thread.tools.keys().map(|s| s.to_string()).collect()),
-        };
+        let allowed_tool_names = Some(parent_thread.tools.keys().cloned().collect::<Vec<_>>());
 
         let subagent_thread: Entity<Thread> = cx.new(|cx| {
             let mut thread = Thread::new_subagent(&parent_thread_entity, cx);
@@ -1626,13 +1612,7 @@ impl NativeThreadEnvironment {
         let session_id = subagent_thread.read(cx).id().clone();
 
         let acp_thread = agent.update(cx, |agent, cx| {
-            agent.register_session(
-                subagent_thread.clone(),
-                allowed_tools
-                    .as_ref()
-                    .map(|v| v.iter().map(|s| s.as_str()).collect()),
-                cx,
-            )
+            agent.register_session(subagent_thread.clone(), allowed_tool_names, cx)
         })?;
 
         parent_thread_entity.update(cx, |parent_thread, _cx| {
@@ -1676,7 +1656,6 @@ impl NativeThreadEnvironment {
             session_id,
             subagent_thread,
             parent_thread: parent_thread_entity.downgrade(),
-            acp_thread,
             wait_for_prompt_to_complete,
         }) as _)
     }
@@ -1722,7 +1701,6 @@ impl ThreadEnvironment for NativeThreadEnvironment {
         label: String,
         initial_prompt: String,
         timeout: Option<Duration>,
-        allowed_tools: Option<Vec<String>>,
         cx: &mut App,
     ) -> Result<Rc<dyn SubagentHandle>> {
         Self::create_subagent_thread(
@@ -1731,7 +1709,6 @@ impl ThreadEnvironment for NativeThreadEnvironment {
             label,
             initial_prompt,
             timeout,
-            allowed_tools,
             cx,
         )
     }
@@ -1748,7 +1725,6 @@ pub struct NativeSubagentHandle {
     session_id: acp::SessionId,
     parent_thread: WeakEntity<Thread>,
     subagent_thread: Entity<Thread>,
-    acp_thread: Entity<AcpThread>,
     wait_for_prompt_to_complete: Shared<Task<SubagentInitialPromptResult>>,
 }
 
@@ -1757,51 +1733,35 @@ impl SubagentHandle for NativeSubagentHandle {
         self.session_id.clone()
     }
 
-    fn wait_for_summary(&self, summary_prompt: String, cx: &AsyncApp) -> Task<Result<String>> {
+    fn wait_for_output(&self, cx: &AsyncApp) -> Task<Result<String>> {
         let thread = self.subagent_thread.clone();
-        let acp_thread = self.acp_thread.clone();
         let wait_for_prompt = self.wait_for_prompt_to_complete.clone();
 
-        let wait_for_summary_task = cx.spawn(async move |cx| {
-            let timed_out = match wait_for_prompt.await {
-                SubagentInitialPromptResult::Completed => false,
-                SubagentInitialPromptResult::Timeout => true,
-                SubagentInitialPromptResult::Cancelled => return Err(anyhow!("User cancelled")),
-            };
+        let subagent_session_id = self.session_id.clone();
+        let parent_thread = self.parent_thread.clone();
 
-            let summary_prompt = if timed_out {
-                thread.update(cx, |thread, cx| thread.cancel(cx)).await;
-                format!("{}\n{}", "The time to complete the task was exceeded. Stop with the task and follow the directions below:", summary_prompt)
-            } else {
-                summary_prompt
+        cx.spawn(async move |cx| {
+            match wait_for_prompt.await {
+                SubagentInitialPromptResult::Completed => {}
+                SubagentInitialPromptResult::Timeout => {
+                    return Err(anyhow!("The time to complete the task was exceeded."));
+                }
+                SubagentInitialPromptResult::Cancelled => return Err(anyhow!("User cancelled")),
             };
 
-            let response = acp_thread
-                .update(cx, |thread, cx| thread.send(vec![summary_prompt.into()], cx))
-                .await?;
-
-            let was_canceled = response.is_some_and(|r| r.stop_reason == acp::StopReason::Cancelled);
-            if was_canceled {
-                return Err(anyhow!("User cancelled"));
-            }
-
-            thread.read_with(cx, |thread, _cx| {
+            let result = thread.read_with(cx, |thread, _cx| {
                 thread
                     .last_message()
                     .map(|m| m.to_markdown())
                     .context("No response from subagent")
-            })
-        });
+            });
 
-        let subagent_session_id = self.session_id.clone();
-        let parent_thread = self.parent_thread.clone();
-        cx.spawn(async move |cx| {
-            let result = wait_for_summary_task.await;
             parent_thread
                 .update(cx, |parent_thread, cx| {
                     parent_thread.unregister_running_subagent(&subagent_session_id, cx)
                 })
                 .ok();
+
             result
         })
     }

crates/agent/src/tests/mod.rs 🔗

@@ -166,7 +166,7 @@ impl SubagentHandle for FakeSubagentHandle {
         self.session_id.clone()
     }
 
-    fn wait_for_summary(&self, _summary_prompt: String, cx: &AsyncApp) -> Task<Result<String>> {
+    fn wait_for_output(&self, cx: &AsyncApp) -> Task<Result<String>> {
         let task = self.wait_for_summary_task.clone();
         cx.background_spawn(async move { Ok(task.await) })
     }
@@ -208,7 +208,6 @@ impl crate::ThreadEnvironment for FakeThreadEnvironment {
         _label: String,
         _initial_prompt: String,
         _timeout_ms: Option<Duration>,
-        _allowed_tools: Option<Vec<String>>,
         _cx: &mut App,
     ) -> Result<Rc<dyn SubagentHandle>> {
         Ok(self
@@ -255,7 +254,6 @@ impl crate::ThreadEnvironment for MultiTerminalEnvironment {
         _label: String,
         _initial_prompt: String,
         _timeout: Option<Duration>,
-        _allowed_tools: Option<Vec<String>>,
         _cx: &mut App,
     ) -> Result<Rc<dyn SubagentHandle>> {
         unimplemented!()
@@ -4234,10 +4232,8 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) {
     model.send_last_completion_stream_text_chunk("spawning subagent");
     let subagent_tool_input = SubagentToolInput {
         label: "label".to_string(),
-        task_prompt: "subagent task prompt".to_string(),
-        summary_prompt: "subagent summary prompt".to_string(),
-        timeout_ms: None,
-        allowed_tools: None,
+        prompt: "subagent task prompt".to_string(),
+        timeout: None,
     };
     let subagent_tool_use = LanguageModelToolUse {
         id: "subagent_1".into(),
@@ -4276,11 +4272,6 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) {
 
     cx.run_until_parked();
 
-    model.send_last_completion_stream_text_chunk("subagent summary response");
-    model.end_last_completion_stream();
-
-    cx.run_until_parked();
-
     assert_eq!(
         subagent_thread.read_with(cx, |thread, cx| thread.to_markdown(cx)),
         indoc! {"
@@ -4292,14 +4283,6 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) {
 
             subagent task response
 
-            ## User
-
-            subagent summary prompt
-
-            ## Assistant
-
-            subagent summary response
-
         "}
     );
 
@@ -4325,8 +4308,8 @@ async fn test_subagent_tool_call_end_to_end(cx: &mut TestAppContext) {
 
                 ```json
                 {{
-                  "subagent_session_id": "{}",
-                  "summary": "subagent summary response\n"
+                  "session_id": "{}",
+                  "output": "subagent task response\n"
                 }}
                 ```
 
@@ -4399,10 +4382,8 @@ async fn test_subagent_tool_call_cancellation_during_task_prompt(cx: &mut TestAp
     model.send_last_completion_stream_text_chunk("spawning subagent");
     let subagent_tool_input = SubagentToolInput {
         label: "label".to_string(),
-        task_prompt: "subagent task prompt".to_string(),
-        summary_prompt: "subagent summary prompt".to_string(),
-        timeout_ms: None,
-        allowed_tools: None,
+        prompt: "subagent task prompt".to_string(),
+        timeout: None,
     };
     let subagent_tool_use = LanguageModelToolUse {
         id: "subagent_1".into(),
@@ -4479,153 +4460,6 @@ async fn test_subagent_tool_call_cancellation_during_task_prompt(cx: &mut TestAp
     });
 }
 
-#[gpui::test]
-async fn test_subagent_tool_call_cancellation_during_summary_prompt(cx: &mut TestAppContext) {
-    init_test(cx);
-    cx.update(|cx| {
-        LanguageModelRegistry::test(cx);
-    });
-    cx.update(|cx| {
-        cx.update_flags(true, vec!["subagents".to_string()]);
-    });
-
-    let fs = FakeFs::new(cx.executor());
-    fs.insert_tree(
-        "/",
-        json!({
-            "a": {
-                "b.md": "Lorem"
-            }
-        }),
-    )
-    .await;
-    let project = Project::test(fs.clone(), [path!("/a").as_ref()], cx).await;
-    let thread_store = cx.new(|cx| ThreadStore::new(cx));
-    let agent = NativeAgent::new(
-        project.clone(),
-        thread_store.clone(),
-        Templates::new(),
-        None,
-        fs.clone(),
-        &mut cx.to_async(),
-    )
-    .await
-    .unwrap();
-    let connection = Rc::new(NativeAgentConnection(agent.clone()));
-
-    let acp_thread = cx
-        .update(|cx| {
-            connection
-                .clone()
-                .new_session(project.clone(), Path::new(""), cx)
-        })
-        .await
-        .unwrap();
-    let session_id = acp_thread.read_with(cx, |thread, _| thread.session_id().clone());
-    let thread = agent.read_with(cx, |agent, _| {
-        agent.sessions.get(&session_id).unwrap().thread.clone()
-    });
-    let model = Arc::new(FakeLanguageModel::default());
-
-    // Ensure empty threads are not saved, even if they get mutated.
-    thread.update(cx, |thread, cx| {
-        thread.set_model(model.clone(), cx);
-    });
-    cx.run_until_parked();
-
-    let send = acp_thread.update(cx, |thread, cx| thread.send_raw("Prompt", cx));
-    cx.run_until_parked();
-    model.send_last_completion_stream_text_chunk("spawning subagent");
-    let subagent_tool_input = SubagentToolInput {
-        label: "label".to_string(),
-        task_prompt: "subagent task prompt".to_string(),
-        summary_prompt: "subagent summary prompt".to_string(),
-        timeout_ms: None,
-        allowed_tools: None,
-    };
-    let subagent_tool_use = LanguageModelToolUse {
-        id: "subagent_1".into(),
-        name: SubagentTool::NAME.into(),
-        raw_input: serde_json::to_string(&subagent_tool_input).unwrap(),
-        input: serde_json::to_value(&subagent_tool_input).unwrap(),
-        is_input_complete: true,
-        thought_signature: None,
-    };
-    model.send_last_completion_stream_event(LanguageModelCompletionEvent::ToolUse(
-        subagent_tool_use,
-    ));
-    model.end_last_completion_stream();
-
-    cx.run_until_parked();
-
-    let subagent_session_id = thread.read_with(cx, |thread, cx| {
-        thread
-            .running_subagent_ids(cx)
-            .get(0)
-            .expect("subagent thread should be running")
-            .clone()
-    });
-    let subagent_acp_thread = agent.read_with(cx, |agent, _cx| {
-        agent
-            .sessions
-            .get(&subagent_session_id)
-            .expect("subagent session should exist")
-            .acp_thread
-            .clone()
-    });
-
-    model.send_last_completion_stream_text_chunk("subagent task response");
-    model.end_last_completion_stream();
-
-    cx.run_until_parked();
-
-    acp_thread.update(cx, |thread, cx| thread.cancel(cx)).await;
-
-    cx.run_until_parked();
-
-    send.await.unwrap();
-
-    acp_thread.read_with(cx, |thread, cx| {
-        assert_eq!(thread.status(), ThreadStatus::Idle);
-        assert_eq!(
-            thread.to_markdown(cx),
-            indoc! {"
-                ## User
-
-                Prompt
-
-                ## Assistant
-
-                spawning subagent
-
-                **Tool Call: label**
-                Status: Canceled
-
-            "}
-        );
-    });
-    subagent_acp_thread.read_with(cx, |thread, cx| {
-        assert_eq!(thread.status(), ThreadStatus::Idle);
-        assert_eq!(
-            thread.to_markdown(cx),
-            indoc! {"
-                ## User
-
-                subagent task prompt
-
-                ## Assistant
-
-                subagent task response
-
-                ## User
-
-                subagent summary prompt
-
-            "}
-        );
-    });
-}
-
 #[gpui::test]
 async fn test_subagent_tool_is_present_when_feature_flag_enabled(cx: &mut TestAppContext) {
     init_test(cx);
@@ -4818,105 +4652,6 @@ async fn test_parent_cancel_stops_subagent(cx: &mut TestAppContext) {
     });
 }
 
-#[gpui::test]
-async fn test_subagent_tool_returns_summary(cx: &mut TestAppContext) {
-    init_test(cx);
-
-    always_allow_tools(cx);
-
-    cx.update(|cx| {
-        cx.update_flags(true, vec!["subagents".to_string()]);
-    });
-
-    let fs = FakeFs::new(cx.executor());
-    fs.insert_tree(path!("/test"), json!({})).await;
-    let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
-    let project_context = cx.new(|_cx| ProjectContext::default());
-    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
-    let context_server_registry =
-        cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
-    cx.update(LanguageModelRegistry::test);
-    let model = Arc::new(FakeLanguageModel::default());
-    let thread_store = cx.new(|cx| ThreadStore::new(cx));
-    let native_agent = NativeAgent::new(
-        project.clone(),
-        thread_store,
-        Templates::new(),
-        None,
-        fs,
-        &mut cx.to_async(),
-    )
-    .await
-    .unwrap();
-    let parent_thread = cx.new(|cx| {
-        Thread::new(
-            project.clone(),
-            project_context,
-            context_server_registry,
-            Templates::new(),
-            Some(model.clone()),
-            cx,
-        )
-    });
-
-    let subagent_handle = cx
-        .update(|cx| {
-            NativeThreadEnvironment::create_subagent_thread(
-                native_agent.downgrade(),
-                parent_thread.clone(),
-                "some title".to_string(),
-                "task prompt".to_string(),
-                Some(Duration::from_millis(10)),
-                None,
-                cx,
-            )
-        })
-        .expect("Failed to create subagent");
-
-    let summary_task =
-        subagent_handle.wait_for_summary("summary prompt".to_string(), &cx.to_async());
-
-    cx.run_until_parked();
-
-    {
-        let messages = model.pending_completions().last().unwrap().messages.clone();
-        // Ensure that model received a system prompt
-        assert_eq!(messages[0].role, Role::System);
-        // Ensure that model received a task prompt
-        assert_eq!(messages[1].role, Role::User);
-        assert_eq!(
-            messages[1].content,
-            vec![MessageContent::Text("task prompt".to_string())]
-        );
-    }
-
-    model.send_last_completion_stream_text_chunk("Some task response...");
-    model.end_last_completion_stream();
-
-    cx.run_until_parked();
-
-    {
-        let messages = model.pending_completions().last().unwrap().messages.clone();
-        assert_eq!(messages[2].role, Role::Assistant);
-        assert_eq!(
-            messages[2].content,
-            vec![MessageContent::Text("Some task response...".to_string())]
-        );
-        // Ensure that model received a summary prompt
-        assert_eq!(messages[3].role, Role::User);
-        assert_eq!(
-            messages[3].content,
-            vec![MessageContent::Text("summary prompt".to_string())]
-        );
-    }
-
-    model.send_last_completion_stream_text_chunk("Some summary...");
-    model.end_last_completion_stream();
-
-    let result = summary_task.await;
-    assert_eq!(result.unwrap(), "Some summary...\n");
-}
-
 #[gpui::test]
 async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceeded(
     cx: &mut TestAppContext,
@@ -4967,15 +4702,13 @@ async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceede
                 parent_thread.clone(),
                 "some title".to_string(),
                 "task prompt".to_string(),
-                Some(Duration::from_millis(100)),
-                None,
+                Some(Duration::from_secs(1)),
                 cx,
             )
         })
         .expect("Failed to create subagent");
 
-    let summary_task =
-        subagent_handle.wait_for_summary("summary prompt".to_string(), &cx.to_async());
+    let summary_task = subagent_handle.wait_for_output(&cx.to_async());
 
     cx.run_until_parked();
 
@@ -4991,29 +4724,16 @@ async fn test_subagent_tool_includes_cancellation_notice_when_timeout_is_exceede
     }
 
     // Don't complete the initial model stream — let the timeout expire instead.
-    cx.executor().advance_clock(Duration::from_millis(200));
+    cx.executor().advance_clock(Duration::from_secs(2));
     cx.run_until_parked();
 
-    // After the timeout fires, the thread is cancelled and context_low_prompt is sent
-    // instead of the summary_prompt.
-    {
-        let messages = model.pending_completions().last().unwrap().messages.clone();
-        let last_user_message = messages
-            .iter()
-            .rev()
-            .find(|m| m.role == Role::User)
-            .unwrap();
-        assert_eq!(
-            last_user_message.content,
-            vec![MessageContent::Text("The time to complete the task was exceeded. Stop with the task and follow the directions below:\nsummary prompt".to_string())]
-        );
-    }
-
-    model.send_last_completion_stream_text_chunk("Some context low response...");
     model.end_last_completion_stream();
 
-    let result = summary_task.await;
-    assert_eq!(result.unwrap(), "Some context low response...\n");
+    let error = summary_task.await.unwrap_err();
+    assert_eq!(
+        error.to_string(),
+        "The time to complete the task was exceeded."
+    );
 }
 
 #[gpui::test]
@@ -5068,7 +4788,6 @@ async fn test_subagent_inherits_parent_thread_tools(cx: &mut TestAppContext) {
                 "some title".to_string(),
                 "task prompt".to_string(),
                 Some(Duration::from_millis(10)),
-                None,
                 cx,
             )
         })
@@ -5089,77 +4808,6 @@ async fn test_subagent_inherits_parent_thread_tools(cx: &mut TestAppContext) {
     assert!(tools.contains(&"list_directory".to_string()));
 }
 
-#[gpui::test]
-async fn test_subagent_tool_restricts_tool_access(cx: &mut TestAppContext) {
-    init_test(cx);
-
-    always_allow_tools(cx);
-
-    cx.update(|cx| {
-        cx.update_flags(true, vec!["subagents".to_string()]);
-    });
-
-    let fs = FakeFs::new(cx.executor());
-    fs.insert_tree(path!("/test"), json!({})).await;
-    let project = Project::test(fs.clone(), [path!("/test").as_ref()], cx).await;
-    let project_context = cx.new(|_cx| ProjectContext::default());
-    let context_server_store = project.read_with(cx, |project, _| project.context_server_store());
-    let context_server_registry =
-        cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
-    cx.update(LanguageModelRegistry::test);
-    let model = Arc::new(FakeLanguageModel::default());
-    let thread_store = cx.new(|cx| ThreadStore::new(cx));
-    let native_agent = NativeAgent::new(
-        project.clone(),
-        thread_store,
-        Templates::new(),
-        None,
-        fs,
-        &mut cx.to_async(),
-    )
-    .await
-    .unwrap();
-    let parent_thread = cx.new(|cx| {
-        let mut thread = Thread::new(
-            project.clone(),
-            project_context,
-            context_server_registry,
-            Templates::new(),
-            Some(model.clone()),
-            cx,
-        );
-        thread.add_tool(ListDirectoryTool::new(project.clone()), None);
-        thread.add_tool(GrepTool::new(project.clone()), None);
-        thread
-    });
-
-    let _subagent_handle = cx
-        .update(|cx| {
-            NativeThreadEnvironment::create_subagent_thread(
-                native_agent.downgrade(),
-                parent_thread.clone(),
-                "some title".to_string(),
-                "task prompt".to_string(),
-                Some(Duration::from_millis(10)),
-                Some(vec!["grep".to_string()]),
-                cx,
-            )
-        })
-        .expect("Failed to create subagent");
-
-    cx.run_until_parked();
-
-    let tools = model
-        .pending_completions()
-        .last()
-        .unwrap()
-        .tools
-        .iter()
-        .map(|tool| tool.name.clone())
-        .collect::<Vec<_>>();
-    assert_eq!(tools, vec!["grep"]);
-}
-
 #[gpui::test]
 async fn test_edit_file_tool_deny_rule_blocks_edit(cx: &mut TestAppContext) {
     init_test(cx);

crates/agent/src/thread.rs 🔗

@@ -601,7 +601,7 @@ pub trait TerminalHandle {
 
 pub trait SubagentHandle {
     fn id(&self) -> acp::SessionId;
-    fn wait_for_summary(&self, summary_prompt: String, cx: &AsyncApp) -> Task<Result<String>>;
+    fn wait_for_output(&self, cx: &AsyncApp) -> Task<Result<String>>;
 }
 
 pub trait ThreadEnvironment {
@@ -619,7 +619,6 @@ pub trait ThreadEnvironment {
         label: String,
         initial_prompt: String,
         timeout: Option<Duration>,
-        allowed_tools: Option<Vec<String>>,
         cx: &mut App,
     ) -> Result<Rc<dyn SubagentHandle>>;
 }
@@ -1327,7 +1326,7 @@ impl Thread {
 
     pub fn add_default_tools(
         &mut self,
-        allowed_tool_names: Option<Vec<&str>>,
+        allowed_tool_names: Option<Vec<SharedString>>,
         environment: Rc<dyn ThreadEnvironment>,
         cx: &mut Context<Self>,
     ) {
@@ -1421,8 +1420,14 @@ impl Thread {
         }
     }
 
-    pub fn add_tool<T: AgentTool>(&mut self, tool: T, allowed_tool_names: Option<&Vec<&str>>) {
-        if allowed_tool_names.is_some_and(|tool_names| !tool_names.contains(&T::NAME)) {
+    pub fn add_tool<T: AgentTool>(
+        &mut self,
+        tool: T,
+        allowed_tool_names: Option<&Vec<SharedString>>,
+    ) {
+        if allowed_tool_names
+            .is_some_and(|tool_names| !tool_names.iter().any(|x| x.as_str() == T::NAME))
+        {
             return;
         }

crates/agent/src/tools/subagent_tool.rs 🔗

@@ -1,7 +1,7 @@
 use acp_thread::SUBAGENT_SESSION_ID_META_KEY;
 use agent_client_protocol as acp;
 use anyhow::{Result, anyhow};
-use gpui::{App, Entity, SharedString, Task, WeakEntity};
+use gpui::{App, SharedString, Task, WeakEntity};
 use language_model::LanguageModelToolResultContent;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
@@ -10,64 +10,42 @@ use std::{rc::Rc, time::Duration};
 
 use crate::{AgentTool, Thread, ThreadEnvironment, ToolCallEventStream};
 
-/// Spawns a subagent with its own context window to perform a delegated task.
+/// Spawns an agent to perform a delegated task.
 ///
 /// Use this tool when you want to do any of the following:
 /// - Perform an investigation where all you need to know is the outcome, not the research that led to that outcome.
 /// - Complete a self-contained task where you need to know if it succeeded or failed (and how), but none of its intermediate output.
 /// - Run multiple tasks in parallel that would take significantly longer to run sequentially.
 ///
-/// You control what the subagent does by providing:
-/// 1. A task prompt describing what the subagent should do
-/// 2. A summary prompt that tells the subagent how to summarize its work when done
-/// 3. A "context running out" prompt for when the subagent is low on tokens
+/// You control what the agent does by providing a prompt describing what the agent should do. The agent has access to the same tools you do.
 ///
-/// Each subagent has access to the same tools you do. You can optionally restrict
-/// which tools each subagent can use.
+/// You will receive the agent's final message.
 ///
 /// Note:
-/// - Maximum 8 subagents can run in parallel
-/// - Subagents cannot use tools you don't have access to
-/// - If spawning multiple subagents that might write to the filesystem, provide
-///   guidance on how to avoid conflicts (e.g. assign each to different directories)
-/// - Instruct subagents to be concise in their summaries to conserve your context
+/// - Agents cannot use tools you don't have access to.
+/// - If spawning multiple agents that might write to the filesystem, provide guidance on how to avoid conflicts (e.g. assign each to different directories)
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct SubagentToolInput {
-    /// Short label displayed in the UI while the subagent runs (e.g., "Researching alternatives")
+    /// Short label displayed in the UI while the agent runs (e.g., "Researching alternatives")
     pub label: String,
-
-    /// The initial prompt that tells the subagent what task to perform.
-    /// Be specific about what you want the subagent to accomplish.
-    pub task_prompt: String,
-
-    /// The prompt sent to the subagent when it completes its task, asking it
-    /// to summarize what it did and return results. This summary becomes the
-    /// tool result you receive.
-    ///
-    /// Example: "Summarize what you found, listing the top 3 alternatives with pros/cons."
-    pub summary_prompt: String,
-
-    /// Optional: Maximum runtime in milliseconds. If exceeded, the subagent is
-    /// asked to summarize and return. No timeout by default.
-    #[serde(default)]
-    pub timeout_ms: Option<u64>,
-
-    /// Optional: List of tool names the subagent is allowed to use.
-    /// If not provided, the subagent can use all tools available to the parent.
-    /// Tools listed here must be a subset of the parent's available tools.
+    /// The prompt that tells the agent what task to perform. Be specific about what you want the agent to accomplish.
+    pub prompt: String,
+    /// Optional: Maximum runtime in seconds. No timeout by default.
     #[serde(default)]
-    pub allowed_tools: Option<Vec<String>>,
+    pub timeout: Option<u64>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 pub struct SubagentToolOutput {
-    pub subagent_session_id: acp::SessionId,
-    pub summary: String,
+    pub session_id: acp::SessionId,
+    pub output: String,
 }
 
 impl From<SubagentToolOutput> for LanguageModelToolResultContent {
     fn from(output: SubagentToolOutput) -> Self {
-        output.summary.into()
+        serde_json::to_string(&output)
+            .expect("Failed to serialize SubagentToolOutput")
+            .into()
     }
 }
 
@@ -84,32 +62,6 @@ impl SubagentTool {
             environment,
         }
     }
-
-    fn validate_allowed_tools(
-        allowed_tools: &Option<Vec<String>>,
-        parent_thread: &Entity<Thread>,
-        cx: &App,
-    ) -> Result<()> {
-        let Some(allowed_tools) = allowed_tools else {
-            return Ok(());
-        };
-
-        let thread = parent_thread.read(cx);
-        let invalid_tools: Vec<_> = allowed_tools
-            .iter()
-            .filter(|tool| !thread.tools.contains_key(tool.as_str()))
-            .map(|s| format!("'{s}'"))
-            .collect::<Vec<_>>();
-
-        if !invalid_tools.is_empty() {
-            return Err(anyhow!(
-                "The following tools do not exist: {}",
-                invalid_tools.join(", ")
-            ));
-        }
-
-        Ok(())
-    }
 }
 
 impl AgentTool for SubagentTool {
@@ -142,18 +94,11 @@ impl AgentTool for SubagentTool {
             return Task::ready(Err(anyhow!("Parent thread no longer exists")));
         };
 
-        if let Err(e) =
-            Self::validate_allowed_tools(&input.allowed_tools, &parent_thread_entity, cx)
-        {
-            return Task::ready(Err(e));
-        }
-
         let subagent = match self.environment.create_subagent(
             parent_thread_entity,
             input.label,
-            input.task_prompt,
-            input.timeout_ms.map(|ms| Duration::from_millis(ms)),
-            input.allowed_tools,
+            input.prompt,
+            input.timeout.map(|secs| Duration::from_secs(secs)),
             cx,
         ) {
             Ok(subagent) => subagent,
@@ -170,10 +115,10 @@ impl AgentTool for SubagentTool {
         event_stream.update_fields_with_meta(acp::ToolCallUpdateFields::new(), Some(meta));
 
         cx.spawn(async move |cx| {
-            let summary = subagent.wait_for_summary(input.summary_prompt, cx).await?;
+            let output = subagent.wait_for_output(cx).await?;
             Ok(SubagentToolOutput {
-                subagent_session_id,
-                summary,
+                session_id: subagent_session_id,
+                output,
             })
         })
     }
@@ -185,102 +130,12 @@ impl AgentTool for SubagentTool {
         event_stream: ToolCallEventStream,
         _cx: &mut App,
     ) -> Result<()> {
-        event_stream.subagent_spawned(output.subagent_session_id.clone());
+        event_stream.subagent_spawned(output.session_id.clone());
         let meta = acp::Meta::from_iter([(
             SUBAGENT_SESSION_ID_META_KEY.into(),
-            output.subagent_session_id.to_string().into(),
+            output.session_id.to_string().into(),
         )]);
         event_stream.update_fields_with_meta(acp::ToolCallUpdateFields::new(), Some(meta));
         Ok(())
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::{ContextServerRegistry, Templates, Thread};
-    use fs::FakeFs;
-    use gpui::{AppContext as _, TestAppContext};
-    use project::Project;
-    use prompt_store::ProjectContext;
-    use serde_json::json;
-    use settings::SettingsStore;
-    use util::path;
-
-    async fn create_thread_with_tools(cx: &mut TestAppContext) -> Entity<Thread> {
-        cx.update(|cx| {
-            let settings_store = SettingsStore::test(cx);
-            cx.set_global(settings_store);
-        });
-        let fs = FakeFs::new(cx.executor());
-        fs.insert_tree(path!("/test"), json!({})).await;
-        let project = Project::test(fs, [path!("/test").as_ref()], cx).await;
-        let project_context = cx.new(|_cx| ProjectContext::default());
-        let context_server_store =
-            project.read_with(cx, |project, _| project.context_server_store());
-        let context_server_registry =
-            cx.new(|cx| ContextServerRegistry::new(context_server_store.clone(), cx));
-
-        cx.new(|cx| {
-            let mut thread = Thread::new(
-                project,
-                project_context,
-                context_server_registry,
-                Templates::new(),
-                None,
-                cx,
-            );
-            thread.add_tool(crate::NowTool, None);
-            thread.add_tool(crate::WebSearchTool, None);
-            thread
-        })
-    }
-
-    #[gpui::test]
-    async fn test_validate_allowed_tools_succeeds_for_valid_tools(cx: &mut TestAppContext) {
-        let thread = create_thread_with_tools(cx).await;
-
-        cx.update(|cx| {
-            assert!(SubagentTool::validate_allowed_tools(&None, &thread, cx).is_ok());
-
-            let valid_tools = Some(vec!["now".to_string()]);
-            assert!(SubagentTool::validate_allowed_tools(&valid_tools, &thread, cx).is_ok());
-
-            let both_tools = Some(vec!["now".to_string(), "web_search".to_string()]);
-            assert!(SubagentTool::validate_allowed_tools(&both_tools, &thread, cx).is_ok());
-        });
-    }
-
-    #[gpui::test]
-    async fn test_validate_allowed_tools_fails_for_unknown_tools(cx: &mut TestAppContext) {
-        let thread = create_thread_with_tools(cx).await;
-
-        cx.update(|cx| {
-            let unknown_tools = Some(vec!["nonexistent_tool".to_string()]);
-            let result = SubagentTool::validate_allowed_tools(&unknown_tools, &thread, cx);
-            assert!(result.is_err());
-            let error_message = result.unwrap_err().to_string();
-            assert!(
-                error_message.contains("'nonexistent_tool'"),
-                "Expected error to mention the invalid tool name, got: {error_message}"
-            );
-
-            let mixed_tools = Some(vec![
-                "now".to_string(),
-                "fake_tool_a".to_string(),
-                "fake_tool_b".to_string(),
-            ]);
-            let result = SubagentTool::validate_allowed_tools(&mixed_tools, &thread, cx);
-            assert!(result.is_err());
-            let error_message = result.unwrap_err().to_string();
-            assert!(
-                error_message.contains("'fake_tool_a'") && error_message.contains("'fake_tool_b'"),
-                "Expected error to mention both invalid tool names, got: {error_message}"
-            );
-            assert!(
-                !error_message.contains("'now'"),
-                "Expected error to not mention valid tool 'now', got: {error_message}"
-            );
-        });
-    }
-}

crates/eval/src/instance.rs 🔗

@@ -686,7 +686,6 @@ impl agent::ThreadEnvironment for EvalThreadEnvironment {
         _label: String,
         _initial_prompt: String,
         _timeout_ms: Option<Duration>,
-        _allowed_tools: Option<Vec<String>>,
         _cx: &mut App,
     ) -> Result<Rc<dyn agent::SubagentHandle>> {
         unimplemented!()