Get terminal tool working in evals (#29831)

Cole Miller created

Bypass our terminal subsystem and just run a shell in a pty.

- [x] make sure we use the same working directory
- [x] strip control chars from the pty output (?)
- [x] tests

Release Notes:

- N/A

Change summary

Cargo.lock                                  |  74 ++++++
Cargo.toml                                  |   1 
crates/assistant_tools/Cargo.toml           |   4 
crates/assistant_tools/src/terminal_tool.rs | 233 ++++++++++++++++++++--
tooling/workspace-hack/Cargo.toml           |   4 
5 files changed, 287 insertions(+), 29 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -736,6 +736,8 @@ dependencies = [
  "language_models",
  "linkme",
  "open",
+ "paths",
+ "portable-pty",
  "pretty_assertions",
  "project",
  "rand 0.8.5",
@@ -762,6 +764,7 @@ dependencies = [
  "workspace",
  "workspace-hack",
  "zed_llm_client",
+ "zlog",
 ]
 
 [[package]]
@@ -3991,7 +3994,7 @@ version = "3.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c"
 dependencies = [
- "nix",
+ "nix 0.29.0",
  "windows-sys 0.59.0",
 ]
 
@@ -9076,6 +9079,18 @@ version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
 
+[[package]]
+name = "nix"
+version = "0.28.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
+dependencies = [
+ "bitflags 2.9.0",
+ "cfg-if",
+ "cfg_aliases 0.1.1",
+ "libc",
+]
+
 [[package]]
 name = "nix"
 version = "0.29.0"
@@ -10899,6 +10914,27 @@ dependencies = [
  "portable-atomic",
 ]
 
+[[package]]
+name = "portable-pty"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4a596a2b3d2752d94f51fac2d4a96737b8705dddd311a32b9af47211f08671e"
+dependencies = [
+ "anyhow",
+ "bitflags 1.3.2",
+ "downcast-rs",
+ "filedescriptor",
+ "lazy_static",
+ "libc",
+ "log",
+ "nix 0.28.0",
+ "serial2",
+ "shared_library",
+ "shell-words",
+ "winapi",
+ "winreg 0.10.1",
+]
+
 [[package]]
 name = "postage"
 version = "0.5.0"
@@ -13310,6 +13346,17 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serial2"
+version = "0.2.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7d1d08630509d69f90eff4afcd02c3bd974d979225cbd815ff5942351b14375"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "session"
 version = "0.1.0"
@@ -13410,6 +13457,16 @@ dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "shared_library"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a9e7e0f2bfae24d8a5b5a66c5b257a83c7412304311512a0c054cd5e619da11"
+dependencies = [
+ "lazy_static",
+ "libc",
+]
+
 [[package]]
 name = "shell-words"
 version = "1.1.0"
@@ -17764,6 +17821,15 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "winreg"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "winreg"
 version = "0.50.0"
@@ -18216,7 +18282,7 @@ dependencies = [
  "miniz_oxide",
  "mio",
  "naga",
- "nix",
+ "nix 0.29.0",
  "nom",
  "num-bigint",
  "num-bigint-dig",
@@ -18593,7 +18659,7 @@ dependencies = [
  "futures-core",
  "futures-lite 2.6.0",
  "hex",
- "nix",
+ "nix 0.29.0",
  "ordered-stream",
  "serde",
  "serde_repr",
@@ -18706,7 +18772,7 @@ dependencies = [
  "menu",
  "migrator",
  "mimalloc",
- "nix",
+ "nix 0.29.0",
  "node_runtime",
  "notifications",
  "outline",

Cargo.toml 🔗

@@ -494,6 +494,7 @@ pet-fs = { git = "https://github.com/microsoft/python-environment-tools.git", re
 pet-pixi = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
 pet-poetry = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
 pet-reporter = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
+portable-pty = "0.9.0"
 postage = { version = "0.5", features = ["futures-traits"] }
 pretty_assertions = { version = "1.3.0", features = ["unstable"] }
 proc-macro2 = "1.0.93"

crates/assistant_tools/Cargo.toml 🔗

@@ -37,6 +37,8 @@ language.workspace = true
 language_model.workspace = true
 linkme.workspace = true
 open.workspace = true
+paths.workspace = true
+portable-pty.workspace = true
 project.workspace = true
 regex.workspace = true
 rust-embed.workspace = true
@@ -75,6 +77,8 @@ reqwest_client.workspace = true
 settings = { workspace = true, features = ["test-support"] }
 task = { workspace = true, features = ["test-support"]}
 tempfile.workspace = true
+theme.workspace = true
 tree-sitter-rust.workspace = true
 workspace = { workspace = true, features = ["test-support"] }
 unindent.workspace = true
+zlog.workspace = true

crates/assistant_tools/src/terminal_tool.rs 🔗

@@ -1,11 +1,13 @@
 use crate::schema::json_schema_for;
-use anyhow::{Context as _, Result, anyhow};
+use anyhow::{Context as _, Result, anyhow, bail};
 use assistant_tool::{ActionLog, Tool, ToolCard, ToolResult, ToolUseStatus};
 use gpui::{
     Animation, AnimationExt, AnyWindowHandle, App, AppContext, Empty, Entity, EntityId, Task,
     Transformation, WeakEntity, Window, percentage,
 };
+use language::LineEnding;
 use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
+use portable_pty::{CommandBuilder, PtySize, native_pty_system};
 use project::{Project, terminals::TerminalKind};
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
@@ -87,26 +89,65 @@ impl Tool for TerminalTool {
         window: Option<AnyWindowHandle>,
         cx: &mut App,
     ) -> ToolResult {
-        let Some(window) = window else {
-            return Task::ready(Err(anyhow!("no window options"))).into();
-        };
-
         let input: TerminalToolInput = match serde_json::from_value(input) {
             Ok(input) => input,
             Err(err) => return Task::ready(Err(anyhow!(err))).into(),
         };
 
         let input_path = Path::new(&input.cd);
-        let working_dir = match working_dir(cx, &input, &project, input_path) {
+        let working_dir = match working_dir(&input, &project, input_path, cx) {
             Ok(dir) => dir,
-            Err(err) => return Task::ready(Err(anyhow!(err))).into(),
+            Err(err) => return Task::ready(Err(err)).into(),
         };
+        let command = get_system_shell();
+        let args = vec!["-c".into(), input.command.clone()];
+        let cwd = working_dir.clone();
+
+        let Some(window) = window else {
+            // Headless setup, a test or eval. Our terminal subsystem requires a workspace,
+            // so bypass it and provide a convincing imitation using a pty.
+            let task = cx.background_spawn(async move {
+                let pty_system = native_pty_system();
+                let mut cmd = CommandBuilder::new(command);
+                cmd.args(args);
+                if let Some(cwd) = cwd {
+                    cmd.cwd(cwd);
+                }
+                let pair = pty_system.openpty(PtySize {
+                    rows: 24,
+                    cols: 80,
+                    ..Default::default()
+                })?;
+                let mut child = pair.slave.spawn_command(cmd)?;
+                let mut reader = pair.master.try_clone_reader()?;
+                drop(pair);
+                let mut content = Vec::new();
+                reader.read_to_end(&mut content)?;
+                let mut content = String::from_utf8(content)?;
+                // Massage the pty output a bit to try to match what the terminal codepath gives us
+                LineEnding::normalize(&mut content);
+                content = content
+                    .chars()
+                    .filter(|c| c.is_ascii_whitespace() || !c.is_ascii_control())
+                    .collect();
+                let content = content.trim_start().trim_start_matches("^D");
+                let exit_status = child.wait()?;
+                let (processed_content, _) =
+                    process_content(content, &input.command, Some(exit_status));
+                Ok(processed_content)
+            });
+            return ToolResult {
+                output: task,
+                card: None,
+            };
+        };
+
         let terminal = project.update(cx, |project, cx| {
             project.create_terminal(
                 TerminalKind::Task(task::SpawnInTerminal {
-                    command: get_system_shell(),
-                    args: vec!["-c".into(), input.command.clone()],
-                    cwd: working_dir.clone(),
+                    command,
+                    args,
+                    cwd,
                     ..Default::default()
                 }),
                 window,
@@ -152,8 +193,11 @@ impl Tool for TerminalTool {
                 })?;
 
                 let previous_len = content.len();
-                let (processed_content, finished_with_empty_output) =
-                    process_content(content, &input.command, exit_status);
+                let (processed_content, finished_with_empty_output) = process_content(
+                    &content,
+                    &input.command,
+                    exit_status.map(portable_pty::ExitStatus::from),
+                );
 
                 let _ = card.update(cx, |card, _| {
                     card.command_finished = true;
@@ -177,9 +221,9 @@ impl Tool for TerminalTool {
 }
 
 fn process_content(
-    content: String,
+    content: &str,
     command: &str,
-    exit_status: Option<ExitStatus>,
+    exit_status: Option<portable_pty::ExitStatus>,
 ) -> (String, bool) {
     let should_truncate = content.len() > COMMAND_OUTPUT_LIMIT;
 
@@ -192,7 +236,7 @@ fn process_content(
         end_ix = content[..end_ix].rfind('\n').unwrap_or(end_ix);
         &content[..end_ix]
     } else {
-        content.as_str()
+        content
     };
     let is_empty = content.trim().is_empty();
 
@@ -221,11 +265,16 @@ fn process_content(
             }
         }
         Some(exit_status) => {
-            let code = exit_status.code().unwrap_or(-1);
             if is_empty {
-                format!("Command \"{command}\" failed with exit code {code}.")
+                format!(
+                    "Command \"{command}\" failed with exit code {}.",
+                    exit_status.exit_code()
+                )
             } else {
-                format!("Command \"{command}\" failed with exit code {code}.\n\n{content}")
+                format!(
+                    "Command \"{command}\" failed with exit code {}.\n\n{content}",
+                    exit_status.exit_code()
+                )
             }
         }
         None => {
@@ -239,11 +288,11 @@ fn process_content(
 }
 
 fn working_dir(
-    cx: &mut App,
     input: &TerminalToolInput,
     project: &Entity<Project>,
     input_path: &Path,
-) -> Result<Option<PathBuf>, &'static str> {
+    cx: &mut App,
+) -> Result<Option<PathBuf>> {
     let project = project.read(cx);
 
     if input.cd == "." {
@@ -253,7 +302,7 @@ fn working_dir(
         match worktrees.next() {
             Some(worktree) => {
                 if worktrees.next().is_some() {
-                    return Err(
+                    bail!(
                         "'.' is ambiguous in multi-root workspaces. Please specify a root directory explicitly.",
                     );
                 }
@@ -267,13 +316,13 @@ fn working_dir(
             .worktrees(cx)
             .any(|worktree| input_path.starts_with(&worktree.read(cx).abs_path()))
         {
-            return Err("The absolute path must be within one of the project's worktrees");
+            bail!("The absolute path must be within one of the project's worktrees");
         }
 
         Ok(Some(input_path.into()))
     } else {
         let Some(worktree) = project.worktree_for_root_name(&input.cd, cx) else {
-            return Err("`cd` directory {} not found in the project");
+            bail!("`cd` directory {:?} not found in the project", input.cd);
         };
 
         Ok(Some(worktree.read(cx).abs_path().to_path_buf()))
@@ -507,3 +556,141 @@ impl ToolCard for TerminalToolCard {
             .into_any()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use editor::EditorSettings;
+    use fs::RealFs;
+    use gpui::{BackgroundExecutor, TestAppContext};
+    use pretty_assertions::assert_eq;
+    use serde_json::json;
+    use settings::{Settings, SettingsStore};
+    use terminal::terminal_settings::TerminalSettings;
+    use theme::ThemeSettings;
+    use util::{ResultExt as _, test::TempTree};
+
+    use super::*;
+
+    #[gpui::test]
+    async fn test_working_directory(executor: BackgroundExecutor, cx: &mut TestAppContext) {
+        if cfg!(windows) {
+            return;
+        }
+
+        zlog::init();
+        zlog::init_output_stdout();
+        executor.allow_parking();
+        cx.update(|cx| {
+            let settings_store = SettingsStore::test(cx);
+            cx.set_global(settings_store);
+            language::init(cx);
+            Project::init_settings(cx);
+            workspace::init_settings(cx);
+            ThemeSettings::register(cx);
+            TerminalSettings::register(cx);
+            EditorSettings::register(cx);
+        });
+
+        let fs = Arc::new(RealFs::new(None, executor));
+        let tree = TempTree::new(json!({
+            "project": {},
+            "other-project": {},
+        }));
+        let project: Entity<Project> =
+            Project::test(fs, [tree.path().join("project").as_path()], cx).await;
+        let action_log = cx.update(|cx| cx.new(|_| ActionLog::new(project.clone())));
+
+        let check = |input, expected, cx: &mut App| {
+            let headless_result = TerminalTool::run(
+                Arc::new(TerminalTool),
+                serde_json::to_value(input).unwrap(),
+                &[],
+                project.clone(),
+                action_log.clone(),
+                None,
+                cx,
+            );
+            cx.spawn(async move |_| {
+                let output = headless_result.output.await.log_err();
+                assert_eq!(output, expected);
+            })
+        };
+
+        cx.update(|cx| {
+            check(
+                TerminalToolInput {
+                    command: "pwd".into(),
+                    cd: "project".into(),
+                },
+                Some(format!(
+                    "```\n{}\n```",
+                    tree.path().join("project").display()
+                )),
+                cx,
+            )
+        })
+        .await;
+
+        cx.update(|cx| {
+            check(
+                TerminalToolInput {
+                    command: "pwd".into(),
+                    cd: ".".into(),
+                },
+                Some(format!(
+                    "```\n{}\n```",
+                    tree.path().join("project").display()
+                )),
+                cx,
+            )
+        })
+        .await;
+
+        // Absolute path above the worktree root
+        cx.update(|cx| {
+            check(
+                TerminalToolInput {
+                    command: "pwd".into(),
+                    cd: tree.path().to_string_lossy().into(),
+                },
+                None,
+                cx,
+            )
+        })
+        .await;
+
+        project
+            .update(cx, |project, cx| {
+                project.create_worktree(tree.path().join("other-project"), true, cx)
+            })
+            .await
+            .unwrap();
+
+        cx.update(|cx| {
+            check(
+                TerminalToolInput {
+                    command: "pwd".into(),
+                    cd: "other-project".into(),
+                },
+                Some(format!(
+                    "```\n{}\n```",
+                    tree.path().join("other-project").display()
+                )),
+                cx,
+            )
+        })
+        .await;
+
+        cx.update(|cx| {
+            check(
+                TerminalToolInput {
+                    command: "pwd".into(),
+                    cd: ".".into(),
+                },
+                None,
+                cx,
+            )
+        })
+        .await;
+    }
+}

tooling/workspace-hack/Cargo.toml 🔗

@@ -539,7 +539,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["ring"]
 tokio-socks = { version = "0.5", features = ["futures-io"] }
 tokio-stream = { version = "0.1", features = ["fs"] }
 tower = { version = "0.5", default-features = false, features = ["timeout", "util"] }
-winapi = { version = "0.3", default-features = false, features = ["cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "in6addr", "inaddr", "knownfolders", "minwinbase", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "sysinfoapi", "winbase", "windef", "winerror", "winioctl"] }
+winapi = { version = "0.3", default-features = false, features = ["cfg", "commapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser"] }
 windows-core = { version = "0.61" }
 windows-numerics = { version = "0.2" }
 windows-sys-73dcd821b1037cfd = { package = "windows-sys", version = "0.59", features = ["Wdk_Foundation", "Wdk_Storage_FileSystem", "Win32_Globalization", "Win32_NetworkManagement_IpHelper", "Win32_Networking_WinSock", "Win32_Security_Authentication_Identity", "Win32_Security_Credentials", "Win32_Security_Cryptography", "Win32_Storage_FileSystem", "Win32_System_Com", "Win32_System_Console", "Win32_System_Diagnostics_Debug", "Win32_System_IO", "Win32_System_Ioctl", "Win32_System_Kernel", "Win32_System_LibraryLoader", "Win32_System_Memory", "Win32_System_Performance", "Win32_System_Pipes", "Win32_System_Registry", "Win32_System_SystemInformation", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_Time", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse", "Win32_UI_Shell", "Win32_UI_WindowsAndMessaging"] }
@@ -564,7 +564,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["ring"]
 tokio-socks = { version = "0.5", features = ["futures-io"] }
 tokio-stream = { version = "0.1", features = ["fs"] }
 tower = { version = "0.5", default-features = false, features = ["timeout", "util"] }
-winapi = { version = "0.3", default-features = false, features = ["cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "in6addr", "inaddr", "knownfolders", "minwinbase", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "sysinfoapi", "winbase", "windef", "winerror", "winioctl"] }
+winapi = { version = "0.3", default-features = false, features = ["cfg", "commapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser"] }
 windows-core = { version = "0.61" }
 windows-numerics = { version = "0.2" }
 windows-sys-73dcd821b1037cfd = { package = "windows-sys", version = "0.59", features = ["Wdk_Foundation", "Wdk_Storage_FileSystem", "Win32_Globalization", "Win32_NetworkManagement_IpHelper", "Win32_Networking_WinSock", "Win32_Security_Authentication_Identity", "Win32_Security_Credentials", "Win32_Security_Cryptography", "Win32_Storage_FileSystem", "Win32_System_Com", "Win32_System_Console", "Win32_System_Diagnostics_Debug", "Win32_System_IO", "Win32_System_Ioctl", "Win32_System_Kernel", "Win32_System_LibraryLoader", "Win32_System_Memory", "Win32_System_Performance", "Win32_System_Pipes", "Win32_System_Registry", "Win32_System_SystemInformation", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_Time", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse", "Win32_UI_Shell", "Win32_UI_WindowsAndMessaging"] }