remote: Reuse existing SSH ControlMaster sessions (#51604)

Nico created

Closes #45271

Zed hardcodes `ControlMaster=yes` with a `ControlPath` in a random temp
directory, so it can never find a ControlMaster session the user already
has open. This means you get prompted for credentials again even if
you're already authenticated.

This patch checks for an existing master before spawning a new one. It
runs `ssh -G` to resolve the user's configured `controlpath` (with `%h`,
`%p`, etc. already expanded), then `ssh -O check` to verify it's alive.
If it is, Zed skips askpass and uses the existing socket. If not, the
current behavior is unchanged. Both commands are local and don't hit the
network.

Also adds a `killed: AtomicBool` to `SshRemoteConnection` because
`has_been_killed()` was using `master_process.is_none()` as a proxy for
"connection is dead." When reusing an external master, `master_process`
starts as `None`, so the connection pool would discard it immediately.

Tested against a university SSH setup with `ControlMaster auto`
configured. The reuse path connects without prompting, and the fallback
path works normally when no master exists. Windows is unaffected.

Release Notes:

- Zed now reuses existing SSH ControlMaster sessions instead of
prompting for credentials again (#45271).

Change summary

crates/remote/src/transport/ssh.rs | 280 ++++++++++++++++++++++++-------
1 file changed, 213 insertions(+), 67 deletions(-)

Detailed changes

crates/remote/src/transport/ssh.rs 🔗

@@ -22,7 +22,10 @@ use smol::fs;
 use std::{
     net::IpAddr,
     path::{Path, PathBuf},
-    sync::Arc,
+    sync::{
+        Arc,
+        atomic::{AtomicBool, Ordering},
+    },
     time::Instant,
 };
 use tempfile::TempDir;
@@ -36,6 +39,9 @@ use util::{
 pub(crate) struct SshRemoteConnection {
     socket: SshSocket,
     master_process: Mutex<Option<MasterProcess>>,
+    /// Whether `kill()` has been called. Separate from `master_process` because
+    /// reused ControlMaster sessions start with `master_process` as `None`.
+    killed: AtomicBool,
     remote_binary_path: Option<Arc<RelPath>>,
     ssh_platform: RemotePlatform,
     ssh_path_style: PathStyle,
@@ -268,7 +274,9 @@ impl AsMut<Child> for MasterProcess {
 #[async_trait(?Send)]
 impl RemoteConnection for SshRemoteConnection {
     async fn kill(&self) -> Result<()> {
+        self.killed.store(true, Ordering::Release);
         let Some(mut process) = self.master_process.lock().take() else {
+            log::debug!("no master process to kill (external ControlMaster session)");
             return Ok(());
         };
         process.as_mut().kill().ok();
@@ -277,7 +285,7 @@ impl RemoteConnection for SshRemoteConnection {
     }
 
     fn has_been_killed(&self) -> bool {
-        self.master_process.lock().is_none()
+        self.killed.load(Ordering::Acquire)
     }
 
     fn connection_options(&self) -> RemoteConnectionOptions {
@@ -507,6 +515,82 @@ impl RemoteConnection for SshRemoteConnection {
     }
 }
 
+/// Check if the user already has an active SSH ControlMaster session for the
+/// given destination. See: https://github.com/zed-industries/zed/issues/45271
+#[cfg(not(windows))]
+async fn find_existing_control_master(
+    destination: &str,
+    additional_args: &[String],
+) -> Option<PathBuf> {
+    // Use `ssh -G` to resolve the user's effective SSH config for this host.
+    // This expands ControlPath tokens (%h, %p, %r, %C, etc.) into actual paths.
+    let output = match util::command::new_command("ssh")
+        .args(additional_args)
+        .arg("-G")
+        .arg(destination)
+        .stdin(Stdio::null())
+        .stdout(Stdio::piped())
+        .stderr(Stdio::null())
+        .output()
+        .await
+    {
+        Ok(output) => output,
+        Err(e) => {
+            log::debug!("failed to run ssh -G: {e}");
+            return None;
+        }
+    };
+
+    if !output.status.success() {
+        log::debug!("ssh -G failed for {destination}, skipping ControlMaster reuse");
+        return None;
+    }
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let control_path = stdout.lines().find_map(|line| {
+        let path = line.strip_prefix("controlpath ")?.trim();
+        if path == "none" || path.is_empty() {
+            None
+        } else {
+            Some(PathBuf::from(path))
+        }
+    })?;
+
+    // Verify the master is actually alive by sending a control command.
+    let check = match util::command::new_command("ssh")
+        .args(additional_args)
+        .args(["-O", "check"])
+        .arg("-o")
+        .arg(format!("ControlPath={}", control_path.display()))
+        .arg(destination)
+        .stdin(Stdio::null())
+        .stdout(Stdio::null())
+        .stderr(Stdio::null())
+        .output()
+        .await
+    {
+        Ok(output) => output,
+        Err(e) => {
+            log::debug!("failed to run ssh -O check: {e}");
+            return None;
+        }
+    };
+
+    if check.status.success() {
+        log::info!(
+            "reusing existing SSH ControlMaster at {}",
+            control_path.display()
+        );
+        Some(control_path)
+    } else {
+        log::debug!(
+            "ControlMaster socket at {} is not alive, creating new connection",
+            control_path.display()
+        );
+        None
+    }
+}
+
 impl SshRemoteConnection {
     pub(crate) async fn new(
         connection_options: SshConnectionOptions,
@@ -520,84 +604,145 @@ impl SshRemoteConnection {
         let temp_dir = tempfile::Builder::new()
             .prefix("zed-ssh-session")
             .tempdir()?;
-        let askpass_delegate = askpass::AskPassDelegate::new(cx, {
-            let delegate = delegate.clone();
-            move |prompt, tx, cx| delegate.ask_password(prompt, tx, cx)
-        });
-
-        let mut askpass =
-            askpass::AskPassSession::new(cx.background_executor().clone(), askpass_delegate)
-                .await?;
 
-        delegate.set_status(Some("Connecting"), cx);
-
-        // Start the master SSH process, which does not do anything except for establish
-        // the connection and keep it open, allowing other ssh commands to reuse it
-        // via a control socket.
+        // On non-Windows, check if the user already has an active ControlMaster
+        // session for this host. If so, reuse it instead of prompting for auth.
         #[cfg(not(windows))]
-        let socket_path = temp_dir.path().join("ssh.sock");
+        let reused_socket =
+            find_existing_control_master(&destination, &connection_options.additional_args()).await;
 
-        #[cfg(windows)]
-        let mut master_process = MasterProcess::new(
-            askpass.script_path().as_ref(),
-            connection_options.additional_args(),
-            &destination,
-        )?;
         #[cfg(not(windows))]
-        let mut master_process = MasterProcess::new(
-            askpass.script_path().as_ref(),
-            connection_options.additional_args(),
-            &socket_path,
-            &destination,
-        )?;
+        let (socket, master_process_option) = if let Some(reused_path) = reused_socket {
+            delegate.set_status(Some("Connecting (reusing session)"), cx);
+            log::info!("reusing existing ControlMaster, skipping authentication");
+            let socket = SshSocket::new(connection_options, reused_path).await?;
+            (socket, None)
+        } else {
+            let askpass_delegate = askpass::AskPassDelegate::new(cx, {
+                let delegate = delegate.clone();
+                move |prompt, tx, cx| delegate.ask_password(prompt, tx, cx)
+            });
+
+            let mut askpass =
+                askpass::AskPassSession::new(cx.background_executor().clone(), askpass_delegate)
+                    .await?;
+
+            delegate.set_status(Some("Connecting"), cx);
+
+            // Start the master SSH process, which does not do anything except
+            // for establish the connection and keep it open, allowing other ssh
+            // commands to reuse it via a control socket.
+            let socket_path = temp_dir.path().join("ssh.sock");
+            let mut master_process = MasterProcess::new(
+                askpass.script_path().as_ref(),
+                connection_options.additional_args(),
+                &socket_path,
+                &destination,
+            )?;
 
-        let result = select_biased! {
-            result = askpass.run().fuse() => {
-                match result {
-                    AskPassResult::CancelledByUser => {
-                        master_process.as_mut().kill().ok();
-                        anyhow::bail!("SSH connection canceled")
-                    }
-                    AskPassResult::Timedout => {
-                        anyhow::bail!("connecting to host timed out")
+            let result = select_biased! {
+                result = askpass.run().fuse() => {
+                    match result {
+                        AskPassResult::CancelledByUser => {
+                            master_process.as_mut().kill().ok();
+                            anyhow::bail!("SSH connection canceled")
+                        }
+                        AskPassResult::Timedout => {
+                            anyhow::bail!("connecting to host timed out")
+                        }
                     }
                 }
+                _ = master_process.wait_connected().fuse() => {
+                    anyhow::Ok(())
+                }
+            };
+
+            if let Err(e) = result {
+                return Err(e.context("Failed to connect to host"));
             }
-            _ = master_process.wait_connected().fuse() => {
-                anyhow::Ok(())
+
+            if master_process.as_mut().try_status()?.is_some() {
+                let mut output = Vec::new();
+                let mut stderr = master_process.as_mut().stderr.take().unwrap();
+                stderr.read_to_end(&mut output).await?;
+
+                let error_message = format!(
+                    "failed to connect: {}",
+                    String::from_utf8_lossy(&output).trim()
+                );
+                anyhow::bail!(error_message);
             }
+
+            let socket = SshSocket::new(connection_options, socket_path).await?;
+            drop(askpass);
+            (socket, Some(master_process))
         };
 
-        if let Err(e) = result {
-            return Err(e.context("Failed to connect to host"));
-        }
+        #[cfg(windows)]
+        let (socket, master_process_option) = {
+            let askpass_delegate = askpass::AskPassDelegate::new(cx, {
+                let delegate = delegate.clone();
+                move |prompt, tx, cx| delegate.ask_password(prompt, tx, cx)
+            });
+
+            let mut askpass =
+                askpass::AskPassSession::new(cx.background_executor().clone(), askpass_delegate)
+                    .await?;
+
+            delegate.set_status(Some("Connecting"), cx);
+
+            let mut master_process = MasterProcess::new(
+                askpass.script_path().as_ref(),
+                connection_options.additional_args(),
+                &destination,
+            )?;
+
+            let result = select_biased! {
+                result = askpass.run().fuse() => {
+                    match result {
+                        AskPassResult::CancelledByUser => {
+                            master_process.as_mut().kill().ok();
+                            anyhow::bail!("SSH connection canceled")
+                        }
+                        AskPassResult::Timedout => {
+                            anyhow::bail!("connecting to host timed out")
+                        }
+                    }
+                }
+                _ = master_process.wait_connected().fuse() => {
+                    anyhow::Ok(())
+                }
+            };
 
-        if master_process.as_mut().try_status()?.is_some() {
-            let mut output = Vec::new();
-            output.clear();
-            let mut stderr = master_process.as_mut().stderr.take().unwrap();
-            stderr.read_to_end(&mut output).await?;
+            if let Err(e) = result {
+                return Err(e.context("Failed to connect to host"));
+            }
 
-            let error_message = format!(
-                "failed to connect: {}",
-                String::from_utf8_lossy(&output).trim()
-            );
-            anyhow::bail!(error_message);
-        }
+            if master_process.as_mut().try_status()?.is_some() {
+                let mut output = Vec::new();
+                let mut stderr = master_process.as_mut().stderr.take().unwrap();
+                stderr.read_to_end(&mut output).await?;
 
-        #[cfg(not(windows))]
-        let socket = SshSocket::new(connection_options, socket_path).await?;
-        #[cfg(windows)]
-        let socket = SshSocket::new(
-            connection_options,
-            askpass
-                .get_password()
-                .or_else(|| askpass::EncryptedPassword::try_from("").ok())
-                .context("Failed to fetch askpass password")?,
-            cx.background_executor().clone(),
-        )
-        .await?;
-        drop(askpass);
+                let error_message = format!(
+                    "failed to connect: {}",
+                    String::from_utf8_lossy(&output).trim()
+                );
+                anyhow::bail!(error_message);
+            }
+
+            let socket = SshSocket::new(
+                connection_options,
+                askpass
+                    .get_password()
+                    .or_else(|| askpass::EncryptedPassword::try_from("").ok())
+                    .context("Failed to fetch askpass password")?,
+                cx.background_executor().clone(),
+            )
+            .await?;
+            drop(askpass);
+
+            (socket, Some(master_process))
+        };
 
         let is_windows = socket.probe_is_windows().await;
         log::info!("Remote is windows: {}", is_windows);
@@ -616,7 +761,8 @@ impl SshRemoteConnection {
 
         let mut this = Self {
             socket,
-            master_process: Mutex::new(Some(master_process)),
+            master_process: Mutex::new(master_process_option),
+            killed: AtomicBool::new(false),
             _temp_dir: temp_dir,
             remote_binary_path: None,
             ssh_path_style,