Cargo.lock 🔗
@@ -3571,7 +3571,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"async-channel 2.5.0",
- "async-process",
"async-trait",
"base64 0.22.1",
"collections",
Richard Feldman created
Two bugs caused MCP server child processes (e.g. `npm`/`node` for
`mcp-remote`) to accumulate as zombie processes that were never cleaned
up:
**Bug 1: `stop_server()` only called `stop()` for `Running` servers**
If a server completed initialization but was still in `Starting` state
when `stop_server()` was called (a race between the init task completing
and `maintain_servers` restarting), the client/transport/process were
never released. The `Arc<ContextServer>` was moved into a `Stopped`
state with its inner client still holding the transport and child
process handle.
Fix: call `stop()` unconditionally in `stop_server()`. It is a safe
no-op when the client has not been initialized (`None`).
**Bug 2: `kill_on_drop` only killed the direct child, not the process
tree**
`StdioTransport` used a raw `smol::process::Child` with
`kill_on_drop(true)`, which sends SIGKILL only to the direct child
process (the shell/`npm` wrapper). The actual MCP server (e.g. `node
mcp-remote`) runs as a grandchild and survives the kill, getting
reparented to launchd.
Fix: use `util::process::Child`, which already exists in the codebase
for exactly this purpose. It calls `setsid()` via `pre_exec` to make the
child a process group leader, and uses `killpg()` to terminate the
entire process tree on kill. This requires passing a
`std::process::Command` (via `build_std_command`) instead of a
`smol::process::Command` (via `build_smol_command`), because that is
what `util::process::Child::spawn` accepts — it needs to call `pre_exec`
on the `std::process::Command` before internally converting it to
`smol::process::Command` for async I/O.
Release Notes:
- Fixed zombie MCP server processes accumulating over time
Cargo.lock | 1
crates/context_server/Cargo.toml | 1
crates/context_server/src/transport/stdio_transport.rs | 23 +++++------
crates/project/src/context_server_store.rs | 5 --
4 files changed, 12 insertions(+), 18 deletions(-)
@@ -3571,7 +3571,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"async-channel 2.5.0",
- "async-process",
"async-trait",
"base64 0.22.1",
"collections",
@@ -17,7 +17,6 @@ test-support = ["gpui/test-support"]
[dependencies]
anyhow.workspace = true
async-channel.workspace = true
-async-process.workspace = true
async-trait.workspace = true
base64.workspace = true
collections.workspace = true
@@ -1,15 +1,16 @@
use std::path::PathBuf;
use std::pin::Pin;
-use anyhow::{Context as _, Result};
-use async_process::Child;
+use anyhow::Result;
use async_trait::async_trait;
use futures::io::{BufReader, BufWriter};
use futures::{
AsyncBufReadExt as _, AsyncRead, AsyncWrite, AsyncWriteExt as _, Stream, StreamExt as _,
};
use gpui::AsyncApp;
+
use util::TryFutureExt as _;
+use util::process::Child;
use util::shell::Shell;
use util::shell_builder::ShellBuilder;
@@ -31,22 +32,20 @@ impl StdioTransport {
) -> Result<Self> {
let builder = ShellBuilder::new(&Shell::System, cfg!(windows)).non_interactive();
let mut command =
- builder.build_smol_command(Some(binary.executable.display().to_string()), &binary.args);
+ builder.build_std_command(Some(binary.executable.display().to_string()), &binary.args);
- command
- .envs(binary.env.unwrap_or_default())
- .stdin(std::process::Stdio::piped())
- .stdout(std::process::Stdio::piped())
- .stderr(std::process::Stdio::piped())
- .kill_on_drop(true);
+ command.envs(binary.env.unwrap_or_default());
if let Some(working_directory) = working_directory {
command.current_dir(working_directory);
}
- let mut server = command
- .spawn()
- .with_context(|| format!("failed to spawn command {command:?})",))?;
+ let mut server = Child::spawn(
+ command,
+ std::process::Stdio::piped(),
+ std::process::Stdio::piped(),
+ std::process::Stdio::piped(),
+ )?;
let stdin = server.stdin.take().unwrap();
let stdout = server.stdout.take().unwrap();
@@ -607,10 +607,7 @@ impl ContextServerStore {
let server = state.server();
let configuration = state.configuration();
- let mut result = Ok(());
- if let ContextServerState::Running { server, .. } = &state {
- result = server.stop();
- }
+ let result = server.stop();
drop(state);
self.update_server_state(