diff --git a/.rules b/.rules index 82d15eb9e88299ee7c7fe6c717b2da2646e676a7..7c98c65d7e0eaf3ed0d57898dbd8acee28a220ae 100644 --- a/.rules +++ b/.rules @@ -26,6 +26,12 @@ }); ``` +# Timers in tests + +* In GPUI tests, prefer GPUI executor timers over `smol::Timer::after(...)` when you need timeouts, delays, or to drive `run_until_parked()`: + - Use `cx.background_executor().timer(duration).await` (or `cx.background_executor.timer(duration).await` in `TestAppContext`) so the work is scheduled on GPUI's dispatcher. + - Avoid `smol::Timer::after(...)` for test timeouts when you rely on `run_until_parked()`, because it may not be tracked by GPUI's scheduler and can lead to "nothing left to run" when pumping. + # GPUI GPUI is a UI framework which also provides primitives for state and concurrency management. diff --git a/crates/agent/src/agent.rs b/crates/agent/src/agent.rs index cf98a24ac52579fc65bdbcc3444615c89625812a..092f735bb7c3713e70ea137c2bab485315aa8849 100644 --- a/crates/agent/src/agent.rs +++ b/crates/agent/src/agent.rs @@ -1219,6 +1219,15 @@ impl TerminalHandle for AcpTerminalHandle { self.terminal .read_with(cx, |term, cx| term.current_output(cx)) } + + fn kill(&self, cx: &AsyncApp) -> Result<()> { + cx.update(|cx| { + self.terminal.update(cx, |terminal, cx| { + terminal.kill(cx); + }); + })?; + Ok(()) + } } #[cfg(test)] diff --git a/crates/agent/src/templates/system_prompt.hbs b/crates/agent/src/templates/system_prompt.hbs index 4620647135631fdb367b0dc2604e89770a938c07..2477e46a85183813f61bb60d7e3de7f119a4f00c 100644 --- a/crates/agent/src/templates/system_prompt.hbs +++ b/crates/agent/src/templates/system_prompt.hbs @@ -16,7 +16,7 @@ You are a highly skilled software engineer with extensive knowledge in many prog 3. DO NOT use tools to access items that are already available in the context section. 4. Use only the tools that are currently available. 5. DO NOT use a tool that is not available just because it appears in the conversation. This means the user turned it off. -6. NEVER run commands that don't terminate on their own such as web servers (like `npm run start`, `npm run dev`, `python -m http.server`, etc) or file watchers. +6. When running commands that may run indefinitely or for a long time (such as build scripts, tests, servers, or file watchers), specify `timeout_ms` to bound runtime. If the command times out, the user can always ask you to run it again with a longer timeout or no timeout if they're willing to wait or cancel manually. 7. Avoid HTML entity escaping - use plain characters instead. ## Searching and Reading diff --git a/crates/agent/src/tests/mod.rs b/crates/agent/src/tests/mod.rs index 9ff870353279635957cd2b84f418f881c3444aa2..5a581c5db80a4c4f527efc8b1711fbf16c8097f8 100644 --- a/crates/agent/src/tests/mod.rs +++ b/crates/agent/src/tests/mod.rs @@ -9,14 +9,16 @@ use collections::IndexMap; use context_server::{ContextServer, ContextServerCommand, ContextServerId}; use fs::{FakeFs, Fs}; use futures::{ - StreamExt, + FutureExt as _, StreamExt, channel::{ mpsc::{self, UnboundedReceiver}, oneshot, }, + future::{Fuse, Shared}, }; use gpui::{ - App, AppContext, Entity, Task, TestAppContext, UpdateGlobal, http_client::FakeHttpClient, + App, AppContext, AsyncApp, Entity, Task, TestAppContext, UpdateGlobal, + http_client::FakeHttpClient, }; use indoc::indoc; use language_model::{ @@ -35,12 +37,109 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_json::json; use settings::{Settings, SettingsStore}; -use std::{path::Path, rc::Rc, sync::Arc, time::Duration}; +use std::{ + path::Path, + pin::Pin, + rc::Rc, + sync::{ + Arc, + atomic::{AtomicBool, Ordering}, + }, + time::Duration, +}; use util::path; mod test_tools; use test_tools::*; +fn init_test(cx: &mut TestAppContext) { + cx.update(|cx| { + let settings_store = SettingsStore::test(cx); + cx.set_global(settings_store); + }); +} + +struct FakeTerminalHandle { + killed: Arc, + wait_for_exit: Shared>, + output: acp::TerminalOutputResponse, + id: acp::TerminalId, +} + +impl FakeTerminalHandle { + fn new_never_exits(cx: &mut App) -> Self { + let killed = Arc::new(AtomicBool::new(false)); + + let killed_for_task = killed.clone(); + let wait_for_exit = cx + .spawn(async move |cx| { + loop { + if killed_for_task.load(Ordering::SeqCst) { + return acp::TerminalExitStatus::new(); + } + cx.background_executor() + .timer(Duration::from_millis(1)) + .await; + } + }) + .shared(); + + Self { + killed, + wait_for_exit, + output: acp::TerminalOutputResponse::new("partial output".to_string(), false), + id: acp::TerminalId::new("fake_terminal".to_string()), + } + } + + fn was_killed(&self) -> bool { + self.killed.load(Ordering::SeqCst) + } +} + +impl crate::TerminalHandle for FakeTerminalHandle { + fn id(&self, _cx: &AsyncApp) -> Result { + Ok(self.id.clone()) + } + + fn current_output(&self, _cx: &AsyncApp) -> Result { + Ok(self.output.clone()) + } + + fn wait_for_exit(&self, _cx: &AsyncApp) -> Result>> { + Ok(self.wait_for_exit.clone()) + } + + fn kill(&self, _cx: &AsyncApp) -> Result<()> { + self.killed.store(true, Ordering::SeqCst); + Ok(()) + } +} + +struct FakeThreadEnvironment { + handle: Rc, +} + +impl crate::ThreadEnvironment for FakeThreadEnvironment { + fn create_terminal( + &self, + _command: String, + _cwd: Option, + _output_byte_limit: Option, + _cx: &mut AsyncApp, + ) -> Task>> { + Task::ready(Ok(self.handle.clone() as Rc)) + } +} + +fn always_allow_tools(cx: &mut TestAppContext) { + cx.update(|cx| { + let mut settings = agent_settings::AgentSettings::get_global(cx).clone(); + settings.always_allow_tool_actions = true; + agent_settings::AgentSettings::override_global(settings, cx); + }); +} + #[gpui::test] async fn test_echo(cx: &mut TestAppContext) { let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await; @@ -71,6 +170,120 @@ async fn test_echo(cx: &mut TestAppContext) { assert_eq!(stop_events(events), vec![acp::StopReason::EndTurn]); } +#[gpui::test] +async fn test_terminal_tool_timeout_kills_handle(cx: &mut TestAppContext) { + init_test(cx); + always_allow_tools(cx); + + let fs = FakeFs::new(cx.executor()); + let project = Project::test(fs, [], cx).await; + + let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx))); + let environment = Rc::new(FakeThreadEnvironment { + handle: handle.clone(), + }); + + #[allow(clippy::arc_with_non_send_sync)] + let tool = Arc::new(crate::TerminalTool::new(project, environment)); + let (event_stream, mut rx) = crate::ToolCallEventStream::test(); + + let task = cx.update(|cx| { + tool.run( + crate::TerminalToolInput { + command: "sleep 1000".to_string(), + cd: ".".to_string(), + timeout_ms: Some(5), + }, + event_stream, + cx, + ) + }); + + let update = rx.expect_update_fields().await; + assert!( + update.content.iter().any(|blocks| { + blocks + .iter() + .any(|c| matches!(c, acp::ToolCallContent::Terminal(_))) + }), + "expected tool call update to include terminal content" + ); + + let mut task_future: Pin>>>> = Box::pin(task.fuse()); + + let deadline = std::time::Instant::now() + Duration::from_millis(500); + loop { + if let Some(result) = task_future.as_mut().now_or_never() { + let result = result.expect("terminal tool task should complete"); + + assert!( + handle.was_killed(), + "expected terminal handle to be killed on timeout" + ); + assert!( + result.contains("partial output"), + "expected result to include terminal output, got: {result}" + ); + return; + } + + if std::time::Instant::now() >= deadline { + panic!("timed out waiting for terminal tool task to complete"); + } + + cx.run_until_parked(); + cx.background_executor.timer(Duration::from_millis(1)).await; + } +} + +#[gpui::test] +#[ignore] +async fn test_terminal_tool_without_timeout_does_not_kill_handle(cx: &mut TestAppContext) { + init_test(cx); + always_allow_tools(cx); + + let fs = FakeFs::new(cx.executor()); + let project = Project::test(fs, [], cx).await; + + let handle = Rc::new(cx.update(|cx| FakeTerminalHandle::new_never_exits(cx))); + let environment = Rc::new(FakeThreadEnvironment { + handle: handle.clone(), + }); + + #[allow(clippy::arc_with_non_send_sync)] + let tool = Arc::new(crate::TerminalTool::new(project, environment)); + let (event_stream, mut rx) = crate::ToolCallEventStream::test(); + + let _task = cx.update(|cx| { + tool.run( + crate::TerminalToolInput { + command: "sleep 1000".to_string(), + cd: ".".to_string(), + timeout_ms: None, + }, + event_stream, + cx, + ) + }); + + let update = rx.expect_update_fields().await; + assert!( + update.content.iter().any(|blocks| { + blocks + .iter() + .any(|c| matches!(c, acp::ToolCallContent::Terminal(_))) + }), + "expected tool call update to include terminal content" + ); + + smol::Timer::after(Duration::from_millis(25)).await; + + assert!( + !handle.was_killed(), + "did not expect terminal handle to be killed without a timeout" + ); +} + #[gpui::test] async fn test_thinking(cx: &mut TestAppContext) { let ThreadTest { model, thread, .. } = setup(cx, TestModel::Fake).await; diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 4aabf8069bc3380b6908187b28517f99a9548f26..a51dd3bf9fd5213c88a0ab56ef9ec9b563a90756 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -530,6 +530,7 @@ pub trait TerminalHandle { fn id(&self, cx: &AsyncApp) -> Result; fn current_output(&self, cx: &AsyncApp) -> Result; fn wait_for_exit(&self, cx: &AsyncApp) -> Result>>; + fn kill(&self, cx: &AsyncApp) -> Result<()>; } pub trait ThreadEnvironment { diff --git a/crates/agent/src/tools/terminal_tool.rs b/crates/agent/src/tools/terminal_tool.rs index 2db4a2d86038579fca62224f3a7c567f93fc6922..f3302fb1894612287bf04acfbfa301188bf853fb 100644 --- a/crates/agent/src/tools/terminal_tool.rs +++ b/crates/agent/src/tools/terminal_tool.rs @@ -1,6 +1,7 @@ use agent_client_protocol as acp; use anyhow::Result; -use gpui::{App, Entity, SharedString, Task}; +use futures::FutureExt as _; +use gpui::{App, AppContext, Entity, SharedString, Task}; use project::Project; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -8,6 +9,7 @@ use std::{ path::{Path, PathBuf}, rc::Rc, sync::Arc, + time::Duration, }; use util::markdown::MarkdownInlineCode; @@ -25,13 +27,17 @@ const COMMAND_OUTPUT_LIMIT: u64 = 16 * 1024; /// /// Do not use this tool for commands that run indefinitely, such as servers (like `npm run start`, `npm run dev`, `python -m http.server`, etc) or file watchers that don't terminate on their own. /// +/// For potentially long-running commands, prefer specifying `timeout_ms` to bound runtime and prevent indefinite hangs. +/// /// Remember that each invocation of this tool will spawn a new shell process, so you can't rely on any state from previous invocations. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct TerminalToolInput { /// The one-liner command to execute. - command: String, + pub command: String, /// Working directory for the command. This must be one of the root directories of the project. - cd: String, + pub cd: String, + /// Optional maximum runtime (in milliseconds). If exceeded, the running terminal task is killed. + pub timeout_ms: Option, } pub struct TerminalTool { @@ -116,7 +122,26 @@ impl AgentTool for TerminalTool { acp::ToolCallContent::Terminal(acp::Terminal::new(terminal_id)), ])); - let exit_status = terminal.wait_for_exit(cx)?.await; + let timeout = input.timeout_ms.map(Duration::from_millis); + + let exit_status = match timeout { + Some(timeout) => { + let wait_for_exit = terminal.wait_for_exit(cx)?; + let timeout_task = cx.background_spawn(async move { + smol::Timer::after(timeout).await; + }); + + futures::select! { + status = wait_for_exit.clone().fuse() => status, + _ = timeout_task.fuse() => { + terminal.kill(cx)?; + wait_for_exit.await + } + } + } + None => terminal.wait_for_exit(cx)?.await, + }; + let output = terminal.current_output(cx)?; Ok(process_content(output, &input.command, exit_status)) diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index 1af705cd4bfdb4419c767feb41d1428181866c08..4c71a5a82b3946a9cc6e22ced378ebaabeec5256 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -625,6 +625,15 @@ impl agent::TerminalHandle for EvalTerminalHandle { self.terminal .read_with(cx, |term, cx| term.current_output(cx)) } + + fn kill(&self, cx: &AsyncApp) -> Result<()> { + cx.update(|cx| { + self.terminal.update(cx, |terminal, cx| { + terminal.kill(cx); + }); + })?; + Ok(()) + } } impl agent::ThreadEnvironment for EvalThreadEnvironment { diff --git a/crates/terminal/src/terminal.rs b/crates/terminal/src/terminal.rs index e6bb454fa296b65de60c25f326bba28f484450f0..601fa75044a648e7c40e84b32aabda8096856119 100644 --- a/crates/terminal/src/terminal.rs +++ b/crates/terminal/src/terminal.rs @@ -369,6 +369,7 @@ impl TerminalBuilder { last_content: Default::default(), last_mouse: None, matches: Vec::new(), + selection_head: None, breadcrumb_text: String::new(), scroll_px: px(0.), @@ -595,6 +596,7 @@ impl TerminalBuilder { last_content: Default::default(), last_mouse: None, matches: Vec::new(), + selection_head: None, breadcrumb_text: String::new(), scroll_px: px(0.), @@ -826,6 +828,7 @@ pub struct Terminal { pub matches: Vec>, pub last_content: TerminalContent, pub selection_head: Option, + pub breadcrumb_text: String, title_override: Option, scroll_px: Pixels, @@ -939,7 +942,7 @@ impl Terminal { AlacTermEvent::Bell => { cx.emit(Event::Bell); } - AlacTermEvent::Exit => self.register_task_finished(None, cx), + AlacTermEvent::Exit => self.register_task_finished(Some(9), cx), AlacTermEvent::MouseCursorDirty => { //NOOP, Handled in render }