Implement panic reporting saving and uploads (#19932)

Mikayla Maki and Trace created

TODO: 
- [x] check that the app version is well formatted for zed.dev

Release Notes:

- N/A

---------

Co-authored-by: Trace <violet.white.batt@gmail.com>

Change summary

Cargo.lock                                      |   5 
crates/gpui/src/app.rs                          |  44 +++++
crates/proto/proto/zed.proto                    |  11 +
crates/proto/src/proto.rs                       |   9 
crates/remote_server/Cargo.toml                 |   8 
crates/remote_server/src/unix.rs                |  98 ++++++++++++
crates/telemetry_events/src/telemetry_events.rs |   4 
crates/zed/Cargo.toml                           |   1 
crates/zed/src/main.rs                          |   4 
crates/zed/src/reliability.rs                   | 137 ++++++++++++++----
10 files changed, 277 insertions(+), 44 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -9522,6 +9522,7 @@ dependencies = [
  "async-watch",
  "backtrace",
  "cargo_toml",
+ "chrono",
  "clap",
  "client",
  "clock",
@@ -9541,6 +9542,8 @@ dependencies = [
  "node_runtime",
  "paths",
  "project",
+ "proto",
+ "release_channel",
  "remote",
  "reqwest_client",
  "rpc",
@@ -9550,6 +9553,7 @@ dependencies = [
  "settings",
  "shellexpand 2.1.2",
  "smol",
+ "telemetry_events",
  "toml 0.8.19",
  "util",
  "worktree",
@@ -15056,6 +15060,7 @@ dependencies = [
  "project",
  "project_panel",
  "project_symbols",
+ "proto",
  "quick_action_bar",
  "recent_projects",
  "release_channel",

crates/gpui/src/app.rs 🔗

@@ -217,6 +217,7 @@ pub(crate) type KeystrokeObserver =
 type QuitHandler = Box<dyn FnOnce(&mut AppContext) -> LocalBoxFuture<'static, ()> + 'static>;
 type ReleaseListener = Box<dyn FnOnce(&mut dyn Any, &mut AppContext) + 'static>;
 type NewViewListener = Box<dyn FnMut(AnyView, &mut WindowContext) + 'static>;
+type NewModelListener = Box<dyn FnMut(AnyModel, &mut AppContext) + 'static>;
 
 /// Contains the state of the full application, and passed as a reference to a variety of callbacks.
 /// Other contexts such as [ModelContext], [WindowContext], and [ViewContext] deref to this type, making it the most general context type.
@@ -237,6 +238,7 @@ pub struct AppContext {
     http_client: Arc<dyn HttpClient>,
     pub(crate) globals_by_type: FxHashMap<TypeId, Box<dyn Any>>,
     pub(crate) entities: EntityMap,
+    pub(crate) new_model_observers: SubscriberSet<TypeId, NewModelListener>,
     pub(crate) new_view_observers: SubscriberSet<TypeId, NewViewListener>,
     pub(crate) windows: SlotMap<WindowId, Option<Window>>,
     pub(crate) window_handles: FxHashMap<WindowId, AnyWindowHandle>,
@@ -296,6 +298,7 @@ impl AppContext {
                 globals_by_type: FxHashMap::default(),
                 entities,
                 new_view_observers: SubscriberSet::new(),
+                new_model_observers: SubscriberSet::new(),
                 window_handles: FxHashMap::default(),
                 windows: SlotMap::with_key(),
                 keymap: Rc::new(RefCell::new(Keymap::default())),
@@ -1016,6 +1019,7 @@ impl AppContext {
         activate();
         subscription
     }
+
     /// Arrange for the given function to be invoked whenever a view of the specified type is created.
     /// The function will be passed a mutable reference to the view along with an appropriate context.
     pub fn observe_new_views<V: 'static>(
@@ -1035,6 +1039,31 @@ impl AppContext {
         )
     }
 
+    pub(crate) fn new_model_observer(&self, key: TypeId, value: NewModelListener) -> Subscription {
+        let (subscription, activate) = self.new_model_observers.insert(key, value);
+        activate();
+        subscription
+    }
+
+    /// Arrange for the given function to be invoked whenever a view of the specified type is created.
+    /// The function will be passed a mutable reference to the view along with an appropriate context.
+    pub fn observe_new_models<T: 'static>(
+        &self,
+        on_new: impl 'static + Fn(&mut T, &mut ModelContext<T>),
+    ) -> Subscription {
+        self.new_model_observer(
+            TypeId::of::<T>(),
+            Box::new(move |any_model: AnyModel, cx: &mut AppContext| {
+                any_model
+                    .downcast::<T>()
+                    .unwrap()
+                    .update(cx, |model_state, cx| {
+                        on_new(model_state, cx);
+                    })
+            }),
+        )
+    }
+
     /// Observe the release of a model or view. The callback is invoked after the model or view
     /// has no more strong references but before it has been dropped.
     pub fn observe_release<E, T>(
@@ -1346,8 +1375,21 @@ impl Context for AppContext {
     ) -> Model<T> {
         self.update(|cx| {
             let slot = cx.entities.reserve();
+            let model = slot.clone();
             let entity = build_model(&mut ModelContext::new(cx, slot.downgrade()));
-            cx.entities.insert(slot, entity)
+            cx.entities.insert(slot, entity);
+
+            // Non-generic part to avoid leaking SubscriberSet to invokers of `new_view`.
+            fn notify_observers(cx: &mut AppContext, tid: TypeId, model: AnyModel) {
+                cx.new_model_observers.clone().retain(&tid, |observer| {
+                    let any_model = model.clone();
+                    (observer)(any_model, cx);
+                    true
+                });
+            }
+            notify_observers(cx, TypeId::of::<T>(), AnyModel::from(model.clone()));
+
+            model
         })
     }
 

crates/proto/proto/zed.proto 🔗

@@ -289,7 +289,10 @@ message Envelope {
         ActiveToolchainResponse active_toolchain_response = 277;
 
         GetPathMetadata get_path_metadata = 278;
-        GetPathMetadataResponse get_path_metadata_response = 279; // current max
+        GetPathMetadataResponse get_path_metadata_response = 279;
+
+        GetPanicFiles get_panic_files = 280;
+        GetPanicFilesResponse get_panic_files_response = 281; // current max
     }
 
     reserved 87 to 88;
@@ -2489,5 +2492,11 @@ message UpdateGitBranch {
     uint64 project_id = 1;
     string branch_name = 2;
     ProjectPath repository = 3;
+}
+
+message GetPanicFiles {
+}
 
+message GetPanicFilesResponse {
+    repeated string file_contents = 2;
 }

crates/proto/src/proto.rs 🔗

@@ -363,7 +363,9 @@ messages!(
     (ActiveToolchain, Foreground),
     (ActiveToolchainResponse, Foreground),
     (GetPathMetadata, Background),
-    (GetPathMetadataResponse, Background)
+    (GetPathMetadataResponse, Background),
+    (GetPanicFiles, Background),
+    (GetPanicFilesResponse, Background),
 );
 
 request_messages!(
@@ -483,7 +485,8 @@ request_messages!(
     (ListToolchains, ListToolchainsResponse),
     (ActivateToolchain, Ack),
     (ActiveToolchain, ActiveToolchainResponse),
-    (GetPathMetadata, GetPathMetadataResponse)
+    (GetPathMetadata, GetPathMetadataResponse),
+    (GetPanicFiles, GetPanicFilesResponse)
 );
 
 entity_messages!(
@@ -566,7 +569,7 @@ entity_messages!(
     ListToolchains,
     ActivateToolchain,
     ActiveToolchain,
-    GetPathMetadata
+    GetPathMetadata,
 );
 
 entity_messages!(

crates/remote_server/Cargo.toml 🔗

@@ -22,9 +22,10 @@ debug-embed = ["dep:rust-embed"]
 test-support = ["fs/test-support"]
 
 [dependencies]
-async-watch.workspace = true
 anyhow.workspace = true
+async-watch.workspace = true
 backtrace = "0.3"
+chrono.workspace = true
 clap.workspace = true
 client.workspace = true
 env_logger.workspace = true
@@ -39,8 +40,10 @@ languages.workspace = true
 log.workspace = true
 lsp.workspace = true
 node_runtime.workspace = true
-project.workspace = true
 paths = { workspace = true }
+project.workspace = true
+proto.workspace = true
+release_channel.workspace = true
 remote.workspace = true
 reqwest_client.workspace = true
 rpc.workspace = true
@@ -50,6 +53,7 @@ serde_json.workspace = true
 settings.workspace = true
 shellexpand.workspace = true
 smol.workspace = true
+telemetry_events.workspace = true
 util.workspace = true
 worktree.workspace = true
 

crates/remote_server/src/unix.rs 🔗

@@ -1,12 +1,13 @@
 use crate::headless_project::HeadlessAppState;
 use crate::HeadlessProject;
 use anyhow::{anyhow, Context, Result};
-use client::ProxySettings;
+use chrono::Utc;
+use client::{telemetry, ProxySettings};
 use fs::{Fs, RealFs};
 use futures::channel::mpsc;
 use futures::{select, select_biased, AsyncRead, AsyncWrite, AsyncWriteExt, FutureExt, SinkExt};
 use git::GitHostingProviderRegistry;
-use gpui::{AppContext, Context as _, ModelContext, UpdateGlobal as _};
+use gpui::{AppContext, Context as _, Model, ModelContext, UpdateGlobal as _};
 use http_client::{read_proxy_from_env, Uri};
 use language::LanguageRegistry;
 use node_runtime::{NodeBinaryOptions, NodeRuntime};
@@ -21,19 +22,23 @@ use remote::{
 };
 use reqwest_client::ReqwestClient;
 use rpc::proto::{self, Envelope, SSH_PROJECT_ID};
+use rpc::{AnyProtoClient, TypedEnvelope};
 use settings::{watch_config_file, Settings, SettingsStore};
 use smol::channel::{Receiver, Sender};
 use smol::io::AsyncReadExt;
 
 use smol::Async;
 use smol::{net::unix::UnixListener, stream::StreamExt as _};
+use std::ffi::OsStr;
 use std::ops::ControlFlow;
+use std::{env, thread};
 use std::{
     io::Write,
     mem,
     path::{Path, PathBuf},
     sync::Arc,
 };
+use telemetry_events::LocationData;
 use util::ResultExt;
 
 fn init_logging_proxy() {
@@ -131,16 +136,97 @@ fn init_panic_hook() {
             backtrace.drain(0..=ix);
         }
 
+        let thread = thread::current();
+        let thread_name = thread.name().unwrap_or("<unnamed>");
+
         log::error!(
             "panic occurred: {}\nBacktrace:\n{}",
-            payload,
-            backtrace.join("\n")
+            &payload,
+            (&backtrace).join("\n")
         );
 
+        let panic_data = telemetry_events::Panic {
+            thread: thread_name.into(),
+            payload: payload.clone(),
+            location_data: info.location().map(|location| LocationData {
+                file: location.file().into(),
+                line: location.line(),
+            }),
+            app_version: format!(
+                "remote-server-{}",
+                option_env!("ZED_COMMIT_SHA").unwrap_or(&env!("ZED_PKG_VERSION"))
+            ),
+            release_channel: release_channel::RELEASE_CHANNEL.display_name().into(),
+            os_name: telemetry::os_name(),
+            os_version: Some(telemetry::os_version()),
+            architecture: env::consts::ARCH.into(),
+            panicked_on: Utc::now().timestamp_millis(),
+            backtrace,
+            system_id: None,            // Set on SSH client
+            installation_id: None,      // Set on SSH client
+            session_id: "".to_string(), // Set on SSH client
+        };
+
+        if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
+            let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
+            let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
+            let panic_file = std::fs::OpenOptions::new()
+                .append(true)
+                .create(true)
+                .open(&panic_file_path)
+                .log_err();
+            if let Some(mut panic_file) = panic_file {
+                writeln!(&mut panic_file, "{panic_data_json}").log_err();
+                panic_file.flush().log_err();
+            }
+        }
+
         std::process::abort();
     }));
 }
 
+fn handle_panic_requests(project: &Model<HeadlessProject>, client: &Arc<ChannelClient>) {
+    let client: AnyProtoClient = client.clone().into();
+    client.add_request_handler(
+        project.downgrade(),
+        |_, _: TypedEnvelope<proto::GetPanicFiles>, _cx| async move {
+            let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
+            let mut panic_files = Vec::new();
+            while let Some(child) = children.next().await {
+                let child = child?;
+                let child_path = child.path();
+
+                if child_path.extension() != Some(OsStr::new("panic")) {
+                    continue;
+                }
+                let filename = if let Some(filename) = child_path.file_name() {
+                    filename.to_string_lossy()
+                } else {
+                    continue;
+                };
+
+                if !filename.starts_with("zed") {
+                    continue;
+                }
+
+                let file_contents = smol::fs::read_to_string(&child_path)
+                    .await
+                    .context("error reading panic file")?;
+
+                panic_files.push(file_contents);
+
+                // We've done what we can, delete the file
+                std::fs::remove_file(child_path)
+                    .context("error removing panic")
+                    .log_err();
+            }
+            anyhow::Ok(proto::GetPanicFilesResponse {
+                file_contents: panic_files,
+            })
+        },
+    );
+}
+
 struct ServerListeners {
     stdin: UnixListener,
     stdout: UnixListener,
@@ -368,7 +454,7 @@ pub fn execute_run(
 
             HeadlessProject::new(
                 HeadlessAppState {
-                    session,
+                    session: session.clone(),
                     fs,
                     http_client,
                     node_runtime,
@@ -378,6 +464,8 @@ pub fn execute_run(
             )
         });
 
+        handle_panic_requests(&project, &session);
+
         mem::forget(project);
     });
     log::info!("gpui app is shut down. quitting.");

crates/telemetry_events/src/telemetry_events.rs 🔗

@@ -222,13 +222,13 @@ pub struct HangReport {
     pub installation_id: Option<String>,
 }
 
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct LocationData {
     pub file: String,
     pub line: u32,
 }
 
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct Panic {
     /// The name of the thread that panicked
     pub thread: String,

crates/zed/Cargo.toml 🔗

@@ -77,6 +77,7 @@ profiling.workspace = true
 project.workspace = true
 project_panel.workspace = true
 project_symbols.workspace = true
+proto.workspace = true
 quick_action_bar.workspace = true
 recent_projects.workspace = true
 release_channel.workspace = true

crates/zed/src/main.rs 🔗

@@ -332,7 +332,7 @@ fn main() {
         telemetry.start(
             system_id.as_ref().map(|id| id.to_string()),
             installation_id.as_ref().map(|id| id.to_string()),
-            session_id,
+            session_id.clone(),
             cx,
         );
 
@@ -368,7 +368,9 @@ fn main() {
         auto_update::init(client.http_client(), cx);
         reliability::init(
             client.http_client(),
+            system_id.as_ref().map(|id| id.to_string()),
             installation_id.clone().map(|id| id.to_string()),
+            session_id.clone(),
             cx,
         );
 

crates/zed/src/reliability.rs 🔗

@@ -1,13 +1,14 @@
 use anyhow::{Context, Result};
 use backtrace::{self, Backtrace};
 use chrono::Utc;
-use client::telemetry;
+use client::{telemetry, TelemetrySettings};
 use db::kvp::KEY_VALUE_STORE;
 use gpui::{AppContext, SemanticVersion};
 use http_client::{HttpRequestExt, Method};
 
 use http_client::{self, HttpClient, HttpClientWithUrl};
 use paths::{crashes_dir, crashes_retired_dir};
+use project::Project;
 use release_channel::ReleaseChannel;
 use release_channel::RELEASE_CHANNEL;
 use settings::Settings;
@@ -21,6 +22,7 @@ use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
 use telemetry_events::LocationData;
 use telemetry_events::Panic;
 use telemetry_events::PanicRequest;
+use url::Url;
 use util::ResultExt;
 
 use crate::stdout_is_a_pty;
@@ -133,13 +135,73 @@ pub fn init_panic_hook(
 
 pub fn init(
     http_client: Arc<HttpClientWithUrl>,
+    system_id: Option<String>,
     installation_id: Option<String>,
+    session_id: String,
     cx: &mut AppContext,
 ) {
     #[cfg(target_os = "macos")]
     monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
 
-    upload_panics_and_crashes(http_client, installation_id, cx)
+    let Some(panic_report_url) = http_client
+        .build_zed_api_url("/telemetry/panics", &[])
+        .log_err()
+    else {
+        return;
+    };
+
+    upload_panics_and_crashes(
+        http_client.clone(),
+        panic_report_url.clone(),
+        installation_id.clone(),
+        cx,
+    );
+
+    cx.observe_new_models(move |project: &mut Project, cx| {
+        let http_client = http_client.clone();
+        let panic_report_url = panic_report_url.clone();
+        let session_id = session_id.clone();
+        let installation_id = installation_id.clone();
+        let system_id = system_id.clone();
+
+        if let Some(ssh_client) = project.ssh_client() {
+            ssh_client.update(cx, |client, cx| {
+                if TelemetrySettings::get_global(cx).diagnostics {
+                    let request = client.proto_client().request(proto::GetPanicFiles {});
+                    cx.background_executor()
+                        .spawn(async move {
+                            let panic_files = request.await?;
+                            for file in panic_files.file_contents {
+                                let panic: Option<Panic> = serde_json::from_str(&file)
+                                    .log_err()
+                                    .or_else(|| {
+                                        file.lines()
+                                            .next()
+                                            .and_then(|line| serde_json::from_str(line).ok())
+                                    })
+                                    .unwrap_or_else(|| {
+                                        log::error!("failed to deserialize panic file {:?}", file);
+                                        None
+                                    });
+
+                                if let Some(mut panic) = panic {
+                                    panic.session_id = session_id.clone();
+                                    panic.system_id = system_id.clone();
+                                    panic.installation_id = installation_id.clone();
+
+                                    upload_panic(&http_client, &panic_report_url, panic, &mut None)
+                                        .await?;
+                                }
+                            }
+
+                            anyhow::Ok(())
+                        })
+                        .detach_and_log_err(cx);
+                }
+            })
+        }
+    })
+    .detach();
 }
 
 #[cfg(target_os = "macos")]
@@ -346,16 +408,18 @@ pub fn monitor_main_thread_hangs(
 
 fn upload_panics_and_crashes(
     http: Arc<HttpClientWithUrl>,
+    panic_report_url: Url,
     installation_id: Option<String>,
     cx: &AppContext,
 ) {
     let telemetry_settings = *client::TelemetrySettings::get_global(cx);
     cx.background_executor()
         .spawn(async move {
-            let most_recent_panic = upload_previous_panics(http.clone(), telemetry_settings)
-                .await
-                .log_err()
-                .flatten();
+            let most_recent_panic =
+                upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings)
+                    .await
+                    .log_err()
+                    .flatten();
             upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
                 .await
                 .log_err()
@@ -366,9 +430,9 @@ fn upload_panics_and_crashes(
 /// Uploads panics via `zed.dev`.
 async fn upload_previous_panics(
     http: Arc<HttpClientWithUrl>,
+    panic_report_url: &Url,
     telemetry_settings: client::TelemetrySettings,
-) -> Result<Option<(i64, String)>> {
-    let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?;
+) -> anyhow::Result<Option<(i64, String)>> {
     let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
 
     let mut most_recent_panic = None;
@@ -396,7 +460,7 @@ async fn upload_previous_panics(
                 .context("error reading panic file")?;
 
             let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
-                .ok()
+                .log_err()
                 .or_else(|| {
                     panic_file_content
                         .lines()
@@ -409,26 +473,8 @@ async fn upload_previous_panics(
                 });
 
             if let Some(panic) = panic {
-                most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
-
-                let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
-
-                let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
+                if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? {
                     continue;
-                };
-
-                let Ok(request) = http_client::Request::builder()
-                    .method(Method::POST)
-                    .uri(panic_report_url.as_ref())
-                    .header("x-zed-checksum", checksum)
-                    .body(json_bytes.into())
-                else {
-                    continue;
-                };
-
-                let response = http.send(request).await.context("error sending panic")?;
-                if !response.status().is_success() {
-                    log::error!("Error uploading panic to server: {}", response.status());
                 }
             }
         }
@@ -438,9 +484,42 @@ async fn upload_previous_panics(
             .context("error removing panic")
             .log_err();
     }
-    Ok::<_, anyhow::Error>(most_recent_panic)
+    Ok(most_recent_panic)
 }
 
+async fn upload_panic(
+    http: &Arc<HttpClientWithUrl>,
+    panic_report_url: &Url,
+    panic: telemetry_events::Panic,
+    most_recent_panic: &mut Option<(i64, String)>,
+) -> Result<bool> {
+    *most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
+
+    let json_bytes = serde_json::to_vec(&PanicRequest {
+        panic: panic.clone(),
+    })
+    .unwrap();
+
+    let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
+        return Ok(false);
+    };
+
+    let Ok(request) = http_client::Request::builder()
+        .method(Method::POST)
+        .uri(panic_report_url.as_ref())
+        .header("x-zed-checksum", checksum)
+        .body(json_bytes.into())
+    else {
+        return Ok(false);
+    };
+
+    let response = http.send(request).await.context("error sending panic")?;
+    if !response.status().is_success() {
+        log::error!("Error uploading panic to server: {}", response.status());
+    }
+
+    Ok(true)
+}
 const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
 
 /// upload crashes from apple's diagnostic reports to our server.