From 29aad02404ae1c0049a2d79b841f7e0a7fdb9048 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Wed, 6 May 2026 09:30:07 -0400 Subject: [PATCH] Fix MCP server processes leaking as zombies (#54793) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs caused MCP server child processes (e.g. `npm`/`node` for `mcp-remote`) to accumulate as zombie processes that were never cleaned up: **Bug 1: `stop_server()` only called `stop()` for `Running` servers** If a server completed initialization but was still in `Starting` state when `stop_server()` was called (a race between the init task completing and `maintain_servers` restarting), the client/transport/process were never released. The `Arc` was moved into a `Stopped` state with its inner client still holding the transport and child process handle. Fix: call `stop()` unconditionally in `stop_server()`. It is a safe no-op when the client has not been initialized (`None`). **Bug 2: `kill_on_drop` only killed the direct child, not the process tree** `StdioTransport` used a raw `smol::process::Child` with `kill_on_drop(true)`, which sends SIGKILL only to the direct child process (the shell/`npm` wrapper). The actual MCP server (e.g. `node mcp-remote`) runs as a grandchild and survives the kill, getting reparented to launchd. Fix: use `util::process::Child`, which already exists in the codebase for exactly this purpose. It calls `setsid()` via `pre_exec` to make the child a process group leader, and uses `killpg()` to terminate the entire process tree on kill. This requires passing a `std::process::Command` (via `build_std_command`) instead of a `smol::process::Command` (via `build_smol_command`), because that is what `util::process::Child::spawn` accepts — it needs to call `pre_exec` on the `std::process::Command` before internally converting it to `smol::process::Command` for async I/O. Release Notes: - Fixed zombie MCP server processes accumulating over time --- Cargo.lock | 1 - crates/context_server/Cargo.toml | 1 - .../src/transport/stdio_transport.rs | 23 +++++++++---------- crates/project/src/context_server_store.rs | 5 +--- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48e486af705b1bed2432e8b07759048af1f336c1..6133cdf67e8acbcde493a1ad390248e5ef206f75 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3571,7 +3571,6 @@ version = "0.1.0" dependencies = [ "anyhow", "async-channel 2.5.0", - "async-process", "async-trait", "base64 0.22.1", "collections", diff --git a/crates/context_server/Cargo.toml b/crates/context_server/Cargo.toml index 3a51accb7805c70d900214fa536120fcd34b77ea..39288c5a6d87beeb8a406a83416b2940a70efaf0 100644 --- a/crates/context_server/Cargo.toml +++ b/crates/context_server/Cargo.toml @@ -17,7 +17,6 @@ test-support = ["gpui/test-support"] [dependencies] anyhow.workspace = true async-channel.workspace = true -async-process.workspace = true async-trait.workspace = true base64.workspace = true collections.workspace = true diff --git a/crates/context_server/src/transport/stdio_transport.rs b/crates/context_server/src/transport/stdio_transport.rs index 4bf4b77cda7f3a682ef9b6bc52a6ab23ca67393a..5ee5fc30c22ed0d3dd85e7130ebf4bed9cee344d 100644 --- a/crates/context_server/src/transport/stdio_transport.rs +++ b/crates/context_server/src/transport/stdio_transport.rs @@ -1,15 +1,16 @@ use std::path::PathBuf; use std::pin::Pin; -use anyhow::{Context as _, Result}; -use async_process::Child; +use anyhow::Result; use async_trait::async_trait; use futures::io::{BufReader, BufWriter}; use futures::{ AsyncBufReadExt as _, AsyncRead, AsyncWrite, AsyncWriteExt as _, Stream, StreamExt as _, }; use gpui::AsyncApp; + use util::TryFutureExt as _; +use util::process::Child; use util::shell::Shell; use util::shell_builder::ShellBuilder; @@ -31,22 +32,20 @@ impl StdioTransport { ) -> Result { let builder = ShellBuilder::new(&Shell::System, cfg!(windows)).non_interactive(); let mut command = - builder.build_smol_command(Some(binary.executable.display().to_string()), &binary.args); + builder.build_std_command(Some(binary.executable.display().to_string()), &binary.args); - command - .envs(binary.env.unwrap_or_default()) - .stdin(std::process::Stdio::piped()) - .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::piped()) - .kill_on_drop(true); + command.envs(binary.env.unwrap_or_default()); if let Some(working_directory) = working_directory { command.current_dir(working_directory); } - let mut server = command - .spawn() - .with_context(|| format!("failed to spawn command {command:?})",))?; + let mut server = Child::spawn( + command, + std::process::Stdio::piped(), + std::process::Stdio::piped(), + std::process::Stdio::piped(), + )?; let stdin = server.stdin.take().unwrap(); let stdout = server.stdout.take().unwrap(); diff --git a/crates/project/src/context_server_store.rs b/crates/project/src/context_server_store.rs index 1ea6d2c41887d79ab8159e3f0fefc5ef58735fbe..9effe4a4638762656cc0c9713e2b8d72219aa97e 100644 --- a/crates/project/src/context_server_store.rs +++ b/crates/project/src/context_server_store.rs @@ -607,10 +607,7 @@ impl ContextServerStore { let server = state.server(); let configuration = state.configuration(); - let mut result = Ok(()); - if let ContextServerState::Running { server, .. } = &state { - result = server.stop(); - } + let result = server.stop(); drop(state); self.update_server_state(