agent_servers: Fix process leaks after terminating ACP server (#45902)

Bennet Bo Fenner created

Closes #45211

This ensures that all sub-processes that were launched by the ACP server
are terminated. One scenario where this is easily reproducible:
- Start a new Claude Code ACP session
- Submit a prompt
- While Claude-code is still responding, start a new session
- The `claude-code` subprocess is leaked from the previous session (The
Claude-code SDK runs the Claude-code binary in a sub process)

This PR fixes this by using process groups on Unix. 
It does not fix the process leaks on Windows yet (will follow up with
another PR)

Release Notes:

- Fixed an issue where subprocesses of ACP servers could be leaked after
starting a new session

Change summary

crates/agent_servers/src/acp.rs                        | 17 -
crates/context_server/src/transport/stdio_transport.rs |  2 
crates/dap/src/transport.rs                            | 79 +----------
crates/project/src/debugger/locators/cargo.rs          |  2 
crates/util/src/process.rs                             | 82 ++++++++++++
crates/util/src/shell_builder.rs                       | 22 ++
crates/util/src/util.rs                                |  1 
7 files changed, 115 insertions(+), 90 deletions(-)

Detailed changes

crates/agent_servers/src/acp.rs 🔗

@@ -13,8 +13,10 @@ use serde::Deserialize;
 use settings::Settings as _;
 use task::ShellBuilder;
 use util::ResultExt as _;
+use util::process::Child;
 
 use std::path::PathBuf;
+use std::process::Stdio;
 use std::{any::Any, cell::RefCell};
 use std::{path::Path, rc::Rc};
 use thiserror::Error;
@@ -41,9 +43,7 @@ pub struct AcpConnection {
     default_model: Option<acp::ModelId>,
     default_config_options: HashMap<String, String>,
     root_dir: PathBuf,
-    // NB: Don't move this into the wait_task, since we need to ensure the process is
-    // killed on drop (setting kill_on_drop on the command seems to not always work).
-    child: smol::process::Child,
+    child: Child,
     _io_task: Task<Result<(), acp::Error>>,
     _wait_task: Task<Result<()>>,
     _stderr_task: Task<Result<()>>,
@@ -114,16 +114,12 @@ impl AcpConnection {
         let shell = cx.update(|cx| TerminalSettings::get(None, cx).shell.clone())?;
         let builder = ShellBuilder::new(&shell, cfg!(windows)).non_interactive();
         let mut child =
-            builder.build_command(Some(command.path.display().to_string()), &command.args);
-        child
-            .envs(command.env.iter().flatten())
-            .stdin(std::process::Stdio::piped())
-            .stdout(std::process::Stdio::piped())
-            .stderr(std::process::Stdio::piped());
+            builder.build_std_command(Some(command.path.display().to_string()), &command.args);
+        child.envs(command.env.iter().flatten());
         if !is_remote {
             child.current_dir(root_dir);
         }
-        let mut child = child.spawn()?;
+        let mut child = Child::spawn(child, Stdio::piped(), Stdio::piped(), Stdio::piped())?;
 
         let stdout = child.stdout.take().context("Failed to take stdout")?;
         let stdin = child.stdin.take().context("Failed to take stdin")?;
@@ -259,7 +255,6 @@ impl AcpConnection {
 
 impl Drop for AcpConnection {
     fn drop(&mut self) {
-        // See the comment on the child field.
         self.child.kill().log_err();
     }
 }

crates/context_server/src/transport/stdio_transport.rs 🔗

@@ -34,7 +34,7 @@ impl StdioTransport {
         let shell = cx.update(|cx| TerminalSettings::get(None, cx).shell.clone())?;
         let builder = ShellBuilder::new(&shell, cfg!(windows)).non_interactive();
         let mut command =
-            builder.build_command(Some(binary.executable.display().to_string()), &binary.args);
+            builder.build_smol_command(Some(binary.executable.display().to_string()), &binary.args);
 
         command
             .envs(binary.env.unwrap_or_default())

crates/dap/src/transport.rs 🔗

@@ -24,7 +24,7 @@ use std::{
     time::Duration,
 };
 use task::TcpArgumentsTemplate;
-use util::ConnectionResult;
+use util::{ConnectionResult, ResultExt, process::Child};
 
 use crate::{
     adapters::{DebugAdapterBinary, TcpArguments},
@@ -528,7 +528,7 @@ impl TcpTransport {
             command.args(&binary.arguments);
             command.envs(&binary.envs);
 
-            let mut p = Child::spawn(command, Stdio::null())
+            let mut p = Child::spawn(command, Stdio::null(), Stdio::piped(), Stdio::piped())
                 .with_context(|| "failed to start debug adapter.")?;
 
             stdout_task = p.stdout.take().map(|stdout| {
@@ -582,7 +582,7 @@ impl Transport for TcpTransport {
 
     fn kill(&mut self) {
         if let Some(process) = &mut *self.process.lock() {
-            process.kill();
+            process.kill().log_err();
         }
     }
 
@@ -647,7 +647,7 @@ impl Transport for TcpTransport {
 impl Drop for TcpTransport {
     fn drop(&mut self) {
         if let Some(mut p) = self.process.lock().take() {
-            p.kill()
+            p.kill().log_err();
         }
     }
 }
@@ -678,7 +678,7 @@ impl StdioTransport {
         command.args(&binary.arguments);
         command.envs(&binary.envs);
 
-        let mut process = Child::spawn(command, Stdio::piped())?;
+        let mut process = Child::spawn(command, Stdio::piped(), Stdio::piped(), Stdio::piped())?;
 
         let _stderr_task = process.stderr.take().map(|stderr| {
             cx.background_spawn(TransportDelegate::handle_adapter_log(
@@ -703,7 +703,7 @@ impl Transport for StdioTransport {
     }
 
     fn kill(&mut self) {
-        self.process.lock().kill();
+        self.process.lock().kill().log_err();
     }
 
     fn connect(
@@ -731,7 +731,7 @@ impl Transport for StdioTransport {
 
 impl Drop for StdioTransport {
     fn drop(&mut self) {
-        self.process.lock().kill();
+        self.process.lock().kill().log_err();
     }
 }
 
@@ -1024,68 +1024,3 @@ impl Transport for FakeTransport {
         self
     }
 }
-
-struct Child {
-    process: smol::process::Child,
-}
-
-impl std::ops::Deref for Child {
-    type Target = smol::process::Child;
-
-    fn deref(&self) -> &Self::Target {
-        &self.process
-    }
-}
-
-impl std::ops::DerefMut for Child {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.process
-    }
-}
-
-impl Child {
-    fn into_inner(self) -> smol::process::Child {
-        self.process
-    }
-
-    #[cfg(not(windows))]
-    fn spawn(mut command: std::process::Command, stdin: Stdio) -> Result<Self> {
-        util::set_pre_exec_to_start_new_session(&mut command);
-        let mut command = smol::process::Command::from(command);
-        let process = command
-            .stdin(stdin)
-            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()
-            .with_context(|| format!("failed to spawn command `{command:?}`",))?;
-        Ok(Self { process })
-    }
-
-    #[cfg(windows)]
-    fn spawn(command: std::process::Command, stdin: Stdio) -> Result<Self> {
-        // TODO(windows): create a job object and add the child process handle to it,
-        // see https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
-        let mut command = smol::process::Command::from(command);
-        let process = command
-            .stdin(stdin)
-            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()
-            .with_context(|| format!("failed to spawn command `{command:?}`",))?;
-        Ok(Self { process })
-    }
-
-    #[cfg(not(windows))]
-    fn kill(&mut self) {
-        let pid = self.process.id();
-        unsafe {
-            libc::killpg(pid as i32, libc::SIGKILL);
-        }
-    }
-
-    #[cfg(windows)]
-    fn kill(&mut self) {
-        // TODO(windows): terminate the job object in kill
-        let _ = self.process.kill();
-    }
-}

crates/project/src/debugger/locators/cargo.rs 🔗

@@ -116,7 +116,7 @@ impl DapLocator for CargoLocator {
             .context("Couldn't get cwd from debug config which is needed for locators")?;
         let builder = ShellBuilder::new(&build_config.shell, cfg!(windows)).non_interactive();
         let mut child = builder
-            .build_command(
+            .build_smol_command(
                 Some("cargo".into()),
                 &build_config
                     .args

crates/util/src/process.rs 🔗

@@ -0,0 +1,82 @@
+use anyhow::{Context as _, Result};
+use std::process::Stdio;
+
+/// A wrapper around `smol::process::Child` that ensures all subprocesses
+/// are killed when the process is terminated by using process groups.
+pub struct Child {
+    process: smol::process::Child,
+}
+
+impl std::ops::Deref for Child {
+    type Target = smol::process::Child;
+
+    fn deref(&self) -> &Self::Target {
+        &self.process
+    }
+}
+
+impl std::ops::DerefMut for Child {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.process
+    }
+}
+
+impl Child {
+    #[cfg(not(windows))]
+    pub fn spawn(
+        mut command: std::process::Command,
+        stdin: Stdio,
+        stdout: Stdio,
+        stderr: Stdio,
+    ) -> Result<Self> {
+        crate::set_pre_exec_to_start_new_session(&mut command);
+        let mut command = smol::process::Command::from(command);
+        let process = command
+            .stdin(stdin)
+            .stdout(stdout)
+            .stderr(stderr)
+            .spawn()
+            .with_context(|| format!("failed to spawn command {command:?}"))?;
+        Ok(Self { process })
+    }
+
+    #[cfg(windows)]
+    pub fn spawn(
+        command: std::process::Command,
+        stdin: Stdio,
+        stdout: Stdio,
+        stderr: Stdio,
+    ) -> Result<Self> {
+        // TODO(windows): create a job object and add the child process handle to it,
+        // see https://learn.microsoft.com/en-us/windows/win32/procthread/job-objects
+        let mut command = smol::process::Command::from(command);
+        let process = command
+            .stdin(stdin)
+            .stdout(stdout)
+            .stderr(stderr)
+            .spawn()
+            .with_context(|| format!("failed to spawn command {command:?}"))?;
+
+        Ok(Self { process })
+    }
+
+    pub fn into_inner(self) -> smol::process::Child {
+        self.process
+    }
+
+    #[cfg(not(windows))]
+    pub fn kill(&mut self) -> Result<()> {
+        let pid = self.process.id();
+        unsafe {
+            libc::killpg(pid as i32, libc::SIGKILL);
+        }
+        Ok(())
+    }
+
+    #[cfg(windows)]
+    pub fn kill(&mut self) -> Result<()> {
+        // TODO(windows): terminate the job object in kill
+        self.process.kill()?;
+        Ok(())
+    }
+}

crates/util/src/shell_builder.rs 🔗

@@ -176,15 +176,27 @@ impl ShellBuilder {
         (self.program, self.args)
     }
 
-    /// Builds a command with the given task command and arguments.
+    /// Builds a `smol::process::Command` with the given task command and arguments.
     ///
     /// Prefer this over manually constructing a command with the output of `Self::build`,
     /// as this method handles `cmd` weirdness on windows correctly.
-    pub fn build_command(
+    pub fn build_smol_command(
         self,
-        mut task_command: Option<String>,
+        task_command: Option<String>,
         task_args: &[String],
     ) -> smol::process::Command {
+        smol::process::Command::from(self.build_std_command(task_command, task_args))
+    }
+
+    /// Builds a `std::process::Command` with the given task command and arguments.
+    ///
+    /// Prefer this over manually constructing a command with the output of `Self::build`,
+    /// as this method handles `cmd` weirdness on windows correctly.
+    pub fn build_std_command(
+        self,
+        mut task_command: Option<String>,
+        task_args: &[String],
+    ) -> std::process::Command {
         #[cfg(windows)]
         let kind = self.kind;
         if task_args.is_empty() {
@@ -195,11 +207,11 @@ impl ShellBuilder {
         }
         let (program, args) = self.build(task_command, task_args);
 
-        let mut child = crate::command::new_smol_command(program);
+        let mut child = crate::command::new_std_command(program);
 
         #[cfg(windows)]
         if kind == ShellKind::Cmd {
-            use smol::process::windows::CommandExt;
+            use std::os::windows::process::CommandExt;
 
             for arg in args {
                 child.raw_arg(arg);

crates/util/src/util.rs 🔗

@@ -4,6 +4,7 @@ pub mod command;
 pub mod fs;
 pub mod markdown;
 pub mod paths;
+pub mod process;
 pub mod redact;
 pub mod rel_path;
 pub mod schemars;