Fix MCP server processes leaking as zombies (#54793)

Richard Feldman created

Two bugs caused MCP server child processes (e.g. `npm`/`node` for
`mcp-remote`) to accumulate as zombie processes that were never cleaned
up:

**Bug 1: `stop_server()` only called `stop()` for `Running` servers**

If a server completed initialization but was still in `Starting` state
when `stop_server()` was called (a race between the init task completing
and `maintain_servers` restarting), the client/transport/process were
never released. The `Arc<ContextServer>` was moved into a `Stopped`
state with its inner client still holding the transport and child
process handle.

Fix: call `stop()` unconditionally in `stop_server()`. It is a safe
no-op when the client has not been initialized (`None`).

**Bug 2: `kill_on_drop` only killed the direct child, not the process
tree**

`StdioTransport` used a raw `smol::process::Child` with
`kill_on_drop(true)`, which sends SIGKILL only to the direct child
process (the shell/`npm` wrapper). The actual MCP server (e.g. `node
mcp-remote`) runs as a grandchild and survives the kill, getting
reparented to launchd.

Fix: use `util::process::Child`, which already exists in the codebase
for exactly this purpose. It calls `setsid()` via `pre_exec` to make the
child a process group leader, and uses `killpg()` to terminate the
entire process tree on kill. This requires passing a
`std::process::Command` (via `build_std_command`) instead of a
`smol::process::Command` (via `build_smol_command`), because that is
what `util::process::Child::spawn` accepts — it needs to call `pre_exec`
on the `std::process::Command` before internally converting it to
`smol::process::Command` for async I/O.

Release Notes:

- Fixed zombie MCP server processes accumulating over time

Change summary

Cargo.lock                                             |  1 
crates/context_server/Cargo.toml                       |  1 
crates/context_server/src/transport/stdio_transport.rs | 23 +++++------
crates/project/src/context_server_store.rs             |  5 --
4 files changed, 12 insertions(+), 18 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -3571,7 +3571,6 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "async-channel 2.5.0",
- "async-process",
  "async-trait",
  "base64 0.22.1",
  "collections",

crates/context_server/Cargo.toml 🔗

@@ -17,7 +17,6 @@ test-support = ["gpui/test-support"]
 [dependencies]
 anyhow.workspace = true
 async-channel.workspace = true
-async-process.workspace = true
 async-trait.workspace = true
 base64.workspace = true
 collections.workspace = true

crates/context_server/src/transport/stdio_transport.rs 🔗

@@ -1,15 +1,16 @@
 use std::path::PathBuf;
 use std::pin::Pin;
 
-use anyhow::{Context as _, Result};
-use async_process::Child;
+use anyhow::Result;
 use async_trait::async_trait;
 use futures::io::{BufReader, BufWriter};
 use futures::{
     AsyncBufReadExt as _, AsyncRead, AsyncWrite, AsyncWriteExt as _, Stream, StreamExt as _,
 };
 use gpui::AsyncApp;
+
 use util::TryFutureExt as _;
+use util::process::Child;
 use util::shell::Shell;
 use util::shell_builder::ShellBuilder;
 
@@ -31,22 +32,20 @@ impl StdioTransport {
     ) -> Result<Self> {
         let builder = ShellBuilder::new(&Shell::System, cfg!(windows)).non_interactive();
         let mut command =
-            builder.build_smol_command(Some(binary.executable.display().to_string()), &binary.args);
+            builder.build_std_command(Some(binary.executable.display().to_string()), &binary.args);
 
-        command
-            .envs(binary.env.unwrap_or_default())
-            .stdin(std::process::Stdio::piped())
-            .stdout(std::process::Stdio::piped())
-            .stderr(std::process::Stdio::piped())
-            .kill_on_drop(true);
+        command.envs(binary.env.unwrap_or_default());
 
         if let Some(working_directory) = working_directory {
             command.current_dir(working_directory);
         }
 
-        let mut server = command
-            .spawn()
-            .with_context(|| format!("failed to spawn command {command:?})",))?;
+        let mut server = Child::spawn(
+            command,
+            std::process::Stdio::piped(),
+            std::process::Stdio::piped(),
+            std::process::Stdio::piped(),
+        )?;
 
         let stdin = server.stdin.take().unwrap();
         let stdout = server.stdout.take().unwrap();

crates/project/src/context_server_store.rs 🔗

@@ -607,10 +607,7 @@ impl ContextServerStore {
 
         let server = state.server();
         let configuration = state.configuration();
-        let mut result = Ok(());
-        if let ContextServerState::Running { server, .. } = &state {
-            result = server.stop();
-        }
+        let result = server.stop();
         drop(state);
 
         self.update_server_state(