remote: Add remote timeout debugging commands (#46695)

Lukas Wirth created

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/project/src/trusted_worktrees.rs   |  5 +
crates/proto/proto/app.proto              |  2 
crates/remote/src/json_log.rs             | 18 +++--
crates/remote/src/remote_client.rs        | 76 ++++++++++++++++++++++--
crates/remote/src/transport.rs            |  2 
crates/remote_server/src/remote_server.rs | 10 --
crates/remote_server/src/unix.rs          | 21 ++++-
crates/zed/src/main.rs                    |  1 
crates/zed/src/zed.rs                     |  2 
crates/zed/src/zed/remote_debug.rs        | 52 +++++++++++++++++
crates/zed_actions/src/lib.rs             | 19 ++++++
11 files changed, 176 insertions(+), 32 deletions(-)

Detailed changes

crates/project/src/trusted_worktrees.rs 🔗

@@ -324,7 +324,10 @@ impl TrustedWorktreesStore {
                 }
                 PathTrust::AbsPath(abs_path) => {
                     debug_assert!(
-                        abs_path.is_absolute(),
+                        util::paths::is_absolute(
+                            &abs_path.to_string_lossy(),
+                            worktree_store.read(cx).path_style()
+                        ),
                         "Cannot trust non-absolute path {abs_path:?}"
                     );
                     if let Some((worktree_id, is_file)) =

crates/remote/src/json_log.rs 🔗

@@ -1,23 +1,25 @@
+use std::borrow::Cow;
+
 use log::{Level, Log, Record};
 use serde::{Deserialize, Serialize};
 
 #[derive(Deserialize, Debug, Serialize)]
 pub struct LogRecord<'a> {
     pub level: usize,
-    pub module_path: Option<&'a str>,
-    pub file: Option<&'a str>,
+    pub module_path: Option<Cow<'a, str>>,
+    pub file: Option<Cow<'a, str>>,
     pub line: Option<u32>,
-    pub message: String,
+    pub message: Cow<'a, str>,
 }
 
 impl<'a> LogRecord<'a> {
     pub fn new(record: &'a Record<'a>) -> Self {
         Self {
             level: serialize_level(record.level()),
-            module_path: record.module_path(),
-            file: record.file(),
+            module_path: record.module_path().map(Cow::Borrowed),
+            file: record.file().map(Cow::Borrowed),
             line: record.line(),
-            message: record.args().to_string(),
+            message: Cow::Owned(record.args().to_string()),
         }
     }
 
@@ -25,10 +27,10 @@ impl<'a> LogRecord<'a> {
         if let Some(level) = deserialize_level(self.level) {
             logger.log(
                 &log::Record::builder()
-                    .module_path(self.module_path)
+                    .module_path(self.module_path.as_deref())
                     .target("remote_server")
                     .args(format_args!("{}", self.message))
-                    .file(self.file)
+                    .file(self.file.as_deref())
                     .line(self.line)
                     .level(level)
                     .build(),

crates/remote/src/remote_client.rs 🔗

@@ -142,7 +142,7 @@ const HEARTBEAT_TIMEOUT: Duration = Duration::from_secs(5);
 const INITIAL_CONNECTION_TIMEOUT: Duration =
     Duration::from_secs(if cfg!(debug_assertions) { 5 } else { 60 });
 
-const MAX_RECONNECT_ATTEMPTS: usize = 3;
+pub const MAX_RECONNECT_ATTEMPTS: usize = 3;
 
 enum State {
     Connecting,
@@ -241,7 +241,7 @@ impl State {
                 heartbeat_task,
                 ..
             } => Self::Connected {
-                remote_connection: remote_connection,
+                remote_connection,
                 delegate,
                 multiplex_task,
                 heartbeat_task,
@@ -533,13 +533,17 @@ impl RemoteClient {
             .map(|state| state.can_reconnect())
             .unwrap_or(false);
         if !can_reconnect {
-            log::info!("aborting reconnect, because not in state that allows reconnecting");
-            let error = if let Some(state) = self.state.as_ref() {
-                format!("invalid state, cannot reconnect while in state {state}")
+            let state = if let Some(state) = self.state.as_ref() {
+                state.to_string()
             } else {
                 "no state set".to_string()
             };
-            anyhow::bail!(error);
+            log::info!(
+                "aborting reconnect, because not in state that allows reconnecting: {state}"
+            );
+            anyhow::bail!(
+                "aborting reconnect, because not in state that allows reconnecting: {state}"
+            );
         }
 
         let state = self.state.take().unwrap();
@@ -654,7 +658,7 @@ impl RemoteClient {
             };
 
             State::Connected {
-                remote_connection: remote_connection,
+                remote_connection,
                 delegate,
                 multiplex_task,
                 heartbeat_task: Self::heartbeat(this.clone(), connection_activity_rx, cx),
@@ -956,6 +960,64 @@ impl RemoteClient {
         self.path_style
     }
 
+    /// Forcibly disconnects from the remote server by killing the underlying connection.
+    /// This will trigger the reconnection logic if reconnection attempts remain.
+    /// Useful for testing reconnection behavior in real environments.
+    pub fn force_disconnect(&mut self, cx: &mut Context<Self>) -> Task<Result<()>> {
+        let Some(connection) = self.remote_connection() else {
+            return Task::ready(Err(anyhow!("no active remote connection to disconnect")));
+        };
+
+        log::info!("force_disconnect: killing remote connection");
+
+        cx.spawn(async move |_, _| {
+            connection.kill().await?;
+            Ok(())
+        })
+    }
+
+    /// Simulates a timeout by pausing heartbeat responses.
+    /// This will cause heartbeat failures and eventually trigger reconnection
+    /// after MAX_MISSED_HEARTBEATS are missed.
+    /// Useful for testing timeout behavior in real environments.
+    pub fn force_heartbeat_timeout(&mut self, attempts: usize, cx: &mut Context<Self>) {
+        log::info!("force_heartbeat_timeout: triggering heartbeat failure state");
+
+        if let Some(State::Connected {
+            remote_connection,
+            delegate,
+            multiplex_task,
+            heartbeat_task,
+        }) = self.state.take()
+        {
+            self.set_state(
+                if attempts == 0 {
+                    State::HeartbeatMissed {
+                        missed_heartbeats: MAX_MISSED_HEARTBEATS,
+                        remote_connection,
+                        delegate,
+                        multiplex_task,
+                        heartbeat_task,
+                    }
+                } else {
+                    State::ReconnectFailed {
+                        remote_connection,
+                        delegate,
+                        error: anyhow!("forced heartbeat timeout"),
+                        attempts,
+                    }
+                },
+                cx,
+            );
+
+            self.reconnect(cx)
+                .context("failed to start reconnect after forced timeout")
+                .log_err();
+        } else {
+            log::warn!("force_heartbeat_timeout: not in Connected state, ignoring");
+        }
+    }
+
     #[cfg(any(test, feature = "test-support"))]
     pub fn simulate_disconnect(&self, client_cx: &mut App) -> Task<()> {
         let opts = self.connection_options();

crates/remote/src/transport.rs 🔗

@@ -193,7 +193,7 @@ async fn build_remote_server_from_source(
     async fn run_cmd(command: &mut Command) -> Result<()> {
         let output = command
             .kill_on_drop(true)
-            .stderr(Stdio::inherit())
+            .stdout(Stdio::inherit())
             .output()
             .await?;
         anyhow::ensure!(

crates/remote_server/src/remote_server.rs 🔗

@@ -38,7 +38,7 @@ pub enum Commands {
 pub fn run(command: Commands) -> anyhow::Result<()> {
     use anyhow::Context;
     use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
-    use unix::{ExecuteProxyError, execute_proxy, execute_run};
+    use unix::{execute_proxy, execute_run};
 
     match command {
         Commands::Run {
@@ -57,13 +57,7 @@ pub fn run(command: Commands) -> anyhow::Result<()> {
         Commands::Proxy {
             identifier,
             reconnect,
-        } => execute_proxy(identifier, reconnect)
-            .inspect_err(|err| {
-                if let ExecuteProxyError::ServerNotRunning(err) = err {
-                    std::process::exit(err.to_exit_code());
-                }
-            })
-            .context("running proxy on the remote server"),
+        } => execute_proxy(identifier, reconnect).context("running proxy on the remote server"),
         Commands::Version => {
             let release_channel = *RELEASE_CHANNEL;
             match release_channel {

crates/remote_server/src/unix.rs 🔗

@@ -67,7 +67,8 @@ fn init_logging_proxy() {
     env_logger::builder()
         .format(|buf, record| {
             let mut log_record = LogRecord::new(record);
-            log_record.message = format!("(remote proxy) {}", log_record.message);
+            log_record.message =
+                std::borrow::Cow::Owned(format!("(remote proxy) {}", log_record.message));
             serde_json::to_writer(&mut *buf, &log_record)?;
             buf.write_all(b"\n")?;
             Ok(())
@@ -75,7 +76,7 @@ fn init_logging_proxy() {
         .init();
 }
 
-fn init_logging_server(log_file_path: PathBuf) -> Result<Receiver<Vec<u8>>> {
+fn init_logging_server(log_file_path: &Path) -> Result<Receiver<Vec<u8>>> {
     struct MultiWrite {
         file: File,
         channel: Sender<Vec<u8>>,
@@ -101,7 +102,7 @@ fn init_logging_server(log_file_path: PathBuf) -> Result<Receiver<Vec<u8>>> {
     let log_file = std::fs::OpenOptions::new()
         .create(true)
         .append(true)
-        .open(&log_file_path)
+        .open(log_file_path)
         .context("Failed to open log file in append mode")?;
 
     let (tx, rx) = smol::channel::unbounded();
@@ -112,13 +113,19 @@ fn init_logging_server(log_file_path: PathBuf) -> Result<Receiver<Vec<u8>>> {
         buffer: Vec::new(),
     });
 
+    let old_hook = std::panic::take_hook();
+    std::panic::set_hook(Box::new(move |info| {
+        log::error!("Panic occurred: {:?}", info);
+        old_hook(info);
+    }));
     env_logger::Builder::new()
         .filter_level(log::LevelFilter::Info)
         .parse_default_env()
         .target(env_logger::Target::Pipe(target))
         .format(|buf, record| {
             let mut log_record = LogRecord::new(record);
-            log_record.message = format!("(remote server) {}", log_record.message);
+            log_record.message =
+                std::borrow::Cow::Owned(format!("(remote server) {}", log_record.message));
             serde_json::to_writer(&mut *buf, &log_record)?;
             buf.write_all(b"\n")?;
             Ok(())
@@ -367,10 +374,11 @@ pub fn execute_run(
             commit_sha: option_env!("ZED_COMMIT_SHA").unwrap_or("no_sha").to_owned(),
         }))
         .detach();
-    let log_rx = init_logging_server(log_file)?;
+    let log_rx = init_logging_server(&log_file)?;
     log::info!(
-        "starting up. pid_file: {:?}, stdin_socket: {:?}, stdout_socket: {:?}, stderr_socket: {:?}",
+        "starting up. pid_file: {:?}, log_file: {:?}, stdin_socket: {:?}, stdout_socket: {:?}, stderr_socket: {:?}",
         pid_file,
+        log_file,
         stdin_socket,
         stdout_socket,
         stderr_socket
@@ -723,6 +731,7 @@ pub(crate) enum SpawnServerError {
 }
 
 async fn spawn_server(paths: &ServerPaths) -> Result<(), SpawnServerError> {
+    log::info!("spawning server process",);
     if paths.stdin_socket.exists() {
         std::fs::remove_file(&paths.stdin_socket).map_err(SpawnServerError::RemoveStdinSocket)?;
     }

crates/zed/src/main.rs 🔗

@@ -603,6 +603,7 @@ fn main() {
         language_models::init(app_state.user_store.clone(), app_state.client.clone(), cx);
         acp_tools::init(cx);
         zed::telemetry_log::init(cx);
+        zed::remote_debug::init(cx);
         edit_prediction_ui::init(cx);
         web_search::init(cx);
         web_search_providers::init(app_state.client.clone(), cx);

crates/zed/src/zed.rs 🔗

@@ -6,6 +6,7 @@ mod migrate;
 mod open_listener;
 mod open_url_modal;
 mod quick_action_bar;
+pub mod remote_debug;
 pub mod telemetry_log;
 #[cfg(all(target_os = "macos", any(test, feature = "test-support")))]
 pub mod visual_tests;
@@ -4761,6 +4762,7 @@ mod tests {
                 "project_search",
                 "project_symbols",
                 "projects",
+                "remote_debug",
                 "repl",
                 "rules_library",
                 "search",

crates/zed/src/zed/remote_debug.rs 🔗

@@ -0,0 +1,52 @@
+use workspace::Workspace;

+use zed_actions::remote_debug::{SimulateDisconnect, SimulateTimeout, SimulateTimeoutExhausted};

+

+pub fn init(cx: &mut gpui::App) {

+    cx.observe_new(|workspace: &mut Workspace, _, cx| {

+        let project = workspace.project().read(cx);

+        let Some(remote_client) = project.remote_client() else {

+            return;

+        };

+

+        workspace.register_action({

+            let remote_client = remote_client.downgrade();

+            move |_, _: &SimulateDisconnect, _window, cx| {

+                let Some(remote_client) = remote_client.upgrade() else {

+                    return;

+                };

+

+                log::info!("SimulateDisconnect: forcing disconnect from remote server");

+                remote_client.update(cx, |client, cx| {

+                    client.force_disconnect(cx).detach_and_log_err(cx);

+                });

+            }

+        });

+

+        workspace.register_action({

+            let remote_client = remote_client.downgrade();

+            move |_, _: &SimulateTimeout, _window, cx| {

+                let Some(remote_client) = remote_client.upgrade() else {

+                    return;

+                };

+

+                log::info!("SimulateTimeout: forcing heartbeat timeout on remote connection");

+                remote_client.update(cx, |client, cx| {

+                    client.force_heartbeat_timeout(0, cx);

+                });

+            }

+        });

+

+        let remote_client = remote_client.downgrade();

+        workspace.register_action(move |_, _: &SimulateTimeoutExhausted, _window, cx| {

+            let Some(remote_client) = remote_client.upgrade() else {

+                return;

+            };

+

+            log::info!("SimulateTimeout: forcing heartbeat timeout on remote connection");

+            remote_client.update(cx, |client, cx| {

+                client.force_heartbeat_timeout(remote::remote_client::MAX_RECONNECT_ATTEMPTS, cx);

+            });

+        });

+    })

+    .detach();

+}

crates/zed_actions/src/lib.rs 🔗

@@ -188,6 +188,25 @@ pub mod dev {
     );
 }
 
+pub mod remote_debug {
+    use gpui::actions;
+
+    actions!(
+        remote_debug,
+        [
+            /// Simulates a disconnection from the remote server for testing purposes.
+            /// This will trigger the reconnection logic.
+            SimulateDisconnect,
+            /// Simulates a timeout/slow connection to the remote server for testing purposes.
+            /// This will cause heartbeat failures and trigger reconnection.
+            SimulateTimeout,
+            /// Simulates a timeout/slow connection to the remote server for testing purposes.
+            /// This will cause heartbeat failures and attempting a reconnection while having exhausted all attempts.
+            SimulateTimeoutExhausted,
+        ]
+    );
+}
+
 pub mod workspace {
     use gpui::actions;