Fix crash metrics ID (#50728)

Conrad Irwin and Miguel Raz Guzmรกn Macedo created

Before this change the crash handler uploaded crashes before sign-in had
happened.

Now we get the metrics_id correctly. This allows for us to tie crashes
reported
on Github to users who have opted into telemetry (users who have opted
into
crash reporting but not telemetry will not have a metrics_id).

Release Notes:

- Fixed crash reporter metadata collection

---------

Co-authored-by: Miguel Raz Guzmรกn Macedo <miguel@zed.dev>

Change summary

Cargo.lock                    |   3 
crates/crashes/Cargo.toml     |   3 
crates/crashes/src/crashes.rs | 172 ++++++++++++++++++++++--------------
crates/zed/Cargo.toml         |   5 
crates/zed/src/main.rs        |  21 ++++
crates/zed/src/reliability.rs |  21 +++-
crates/zed/src/zed.rs         |  11 --
7 files changed, 142 insertions(+), 94 deletions(-)

Detailed changes

Cargo.lock ๐Ÿ”—

@@ -4126,13 +4126,13 @@ dependencies = [
 name = "crashes"
 version = "0.1.0"
 dependencies = [
- "bincode",
  "cfg-if",
  "crash-handler",
  "futures 0.3.31",
  "log",
  "mach2 0.5.0",
  "minidumper",
+ "parking_lot",
  "paths",
  "release_channel",
  "serde",
@@ -21735,7 +21735,6 @@ dependencies = [
  "audio",
  "auto_update",
  "auto_update_ui",
- "bincode",
  "breadcrumbs",
  "call",
  "channel",

crates/crashes/Cargo.toml ๐Ÿ”—

@@ -6,13 +6,12 @@ edition.workspace = true
 license = "GPL-3.0-or-later"
 
 [dependencies]
-bincode.workspace = true
 cfg-if.workspace = true
 crash-handler.workspace = true
 futures.workspace = true
 log.workspace = true
 minidumper.workspace = true
-
+parking_lot.workspace = true
 paths.workspace = true
 release_channel.workspace = true
 smol.workspace = true

crates/crashes/src/crashes.rs ๐Ÿ”—

@@ -2,12 +2,14 @@ use crash_handler::{CrashEventResult, CrashHandler};
 use futures::future::BoxFuture;
 use log::info;
 use minidumper::{Client, LoopAction, MinidumpBinary};
+use parking_lot::Mutex;
 use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
 use serde::{Deserialize, Serialize};
 use std::mem;
 
 #[cfg(not(target_os = "windows"))]
 use smol::process::Command;
+use system_specs::GpuSpecs;
 
 #[cfg(target_os = "macos")]
 use std::sync::atomic::AtomicU32;
@@ -27,12 +29,14 @@ use std::{
 };
 
 // set once the crash handler has initialized and the client has connected to it
-pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
+static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
 // set when the first minidump request is made to avoid generating duplicate crash reports
 pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
 const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
 const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
 
+static PENDING_CRASH_SERVER_MESSAGES: Mutex<Vec<CrashServerMessage>> = Mutex::new(Vec::new());
+
 #[cfg(target_os = "macos")]
 static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
 
@@ -118,6 +122,7 @@ async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandl
     spawn_crash_handler_windows(&exe, &socket_name);
 
     info!("spawning crash handler process");
+    send_crash_server_message(CrashServerMessage::Init(crash_init));
 
     let mut elapsed = Duration::ZERO;
     let retry_frequency = Duration::from_millis(100);
@@ -134,10 +139,6 @@ async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandl
         smol::Timer::after(retry_frequency).await;
     }
     let client = maybe_client.unwrap();
-    client
-        .send_message(1, serde_json::to_vec(&crash_init).unwrap())
-        .unwrap();
-
     let client = Arc::new(client);
 
     #[cfg(target_os = "linux")]
@@ -146,6 +147,10 @@ async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandl
     // Publishing the client to the OnceLock makes it visible to the signal
     // handler callback installed earlier.
     CRASH_HANDLER.set(client.clone()).ok();
+    let messages: Vec<_> = mem::take(PENDING_CRASH_SERVER_MESSAGES.lock().as_mut());
+    for message in messages.into_iter() {
+        send_crash_server_message(message);
+    }
     // mem::forget so that the drop is not called
     mem::forget(handler);
     info!("crash handler registered");
@@ -177,9 +182,10 @@ unsafe fn suspend_all_other_threads() {
 }
 
 pub struct CrashServer {
-    initialization_params: OnceLock<InitCrashHandler>,
-    panic_info: OnceLock<CrashPanic>,
-    active_gpu: OnceLock<system_specs::GpuSpecs>,
+    initialization_params: Mutex<Option<InitCrashHandler>>,
+    panic_info: Mutex<Option<CrashPanic>>,
+    active_gpu: Mutex<Option<system_specs::GpuSpecs>>,
+    user_info: Mutex<Option<UserInfo>>,
     has_connection: Arc<AtomicBool>,
 }
 
@@ -190,6 +196,7 @@ pub struct CrashInfo {
     pub minidump_error: Option<String>,
     pub gpus: Vec<system_specs::GpuInfo>,
     pub active_gpu: Option<system_specs::GpuSpecs>,
+    pub user_info: Option<UserInfo>,
 }
 
 #[derive(Debug, Deserialize, Serialize, Clone)]
@@ -207,15 +214,55 @@ pub struct CrashPanic {
     pub span: String,
 }
 
+#[derive(Deserialize, Serialize, Debug, Clone)]
+pub struct UserInfo {
+    pub metrics_id: Option<String>,
+    pub is_staff: Option<bool>,
+}
+
+fn send_crash_server_message(message: CrashServerMessage) {
+    let Some(crash_server) = CRASH_HANDLER.get() else {
+        PENDING_CRASH_SERVER_MESSAGES.lock().push(message);
+        return;
+    };
+    let data = match serde_json::to_vec(&message) {
+        Ok(data) => data,
+        Err(err) => {
+            log::warn!("Failed to serialize crash server message: {:?}", err);
+            return;
+        }
+    };
+
+    if let Err(err) = crash_server.send_message(0, data) {
+        log::warn!("Failed to send data to crash server {:?}", err);
+    }
+}
+
+pub fn set_gpu_info(specs: GpuSpecs) {
+    send_crash_server_message(CrashServerMessage::GPUInfo(specs));
+}
+
+pub fn set_user_info(info: UserInfo) {
+    send_crash_server_message(CrashServerMessage::UserInfo(info));
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+enum CrashServerMessage {
+    Init(InitCrashHandler),
+    Panic(CrashPanic),
+    GPUInfo(GpuSpecs),
+    UserInfo(UserInfo),
+}
+
 impl minidumper::ServerHandler for CrashServer {
     fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
-        let err_message = "Missing initialization data";
         let dump_path = paths::logs_dir()
             .join(
                 &self
                     .initialization_params
-                    .get()
-                    .expect(err_message)
+                    .lock()
+                    .as_ref()
+                    .expect("Missing initialization data")
                     .session_id,
             )
             .with_extension("dmp");
@@ -255,13 +302,14 @@ impl minidumper::ServerHandler for CrashServer {
         let crash_info = CrashInfo {
             init: self
                 .initialization_params
-                .get()
-                .expect("not initialized")
-                .clone(),
-            panic: self.panic_info.get().cloned(),
+                .lock()
+                .clone()
+                .expect("not initialized"),
+            panic: self.panic_info.lock().clone(),
             minidump_error,
-            active_gpu: self.active_gpu.get().cloned(),
+            active_gpu: self.active_gpu.lock().clone(),
             gpus,
+            user_info: self.user_info.lock().clone(),
         };
 
         let crash_data_path = paths::logs_dir()
@@ -273,30 +321,21 @@ impl minidumper::ServerHandler for CrashServer {
         LoopAction::Exit
     }
 
-    fn on_message(&self, kind: u32, buffer: Vec<u8>) {
-        match kind {
-            1 => {
-                let init_data =
-                    serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
-                self.initialization_params
-                    .set(init_data)
-                    .expect("already initialized");
+    fn on_message(&self, _: u32, buffer: Vec<u8>) {
+        let message: CrashServerMessage =
+            serde_json::from_slice(&buffer).expect("invalid init data");
+        match message {
+            CrashServerMessage::Init(init_data) => {
+                self.initialization_params.lock().replace(init_data);
             }
-            2 => {
-                let panic_data =
-                    serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
-                self.panic_info.set(panic_data).expect("already panicked");
+            CrashServerMessage::Panic(crash_panic) => {
+                self.panic_info.lock().replace(crash_panic);
             }
-            3 => {
-                let gpu_specs: system_specs::GpuSpecs =
-                    bincode::deserialize(&buffer).expect("gpu specs");
-                // we ignore the case where it was already set because this message is sent
-                // on each new window. in theory all zed windows should be using the same
-                // GPU so this is fine.
-                self.active_gpu.set(gpu_specs).ok();
+            CrashServerMessage::GPUInfo(gpu_specs) => {
+                self.active_gpu.lock().replace(gpu_specs);
             }
-            _ => {
-                panic!("invalid message kind");
+            CrashServerMessage::UserInfo(user_info) => {
+                self.user_info.lock().replace(user_info);
             }
         }
     }
@@ -326,37 +365,33 @@ pub fn panic_hook(info: &PanicHookInfo) {
     // if it's still not there just write panic info and no minidump
     let retry_frequency = Duration::from_millis(100);
     for _ in 0..5 {
-        if let Some(client) = CRASH_HANDLER.get() {
-            let location = info
-                .location()
-                .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
-            log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
-            client
-                .send_message(
-                    2,
-                    serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
-                )
-                .ok();
-            log::error!("triggering a crash to generate a minidump...");
-
-            #[cfg(target_os = "macos")]
-            PANIC_THREAD_ID.store(
-                unsafe { mach2::mach_init::mach_thread_self() },
-                Ordering::SeqCst,
-            );
-
-            cfg_if::cfg_if! {
-                if #[cfg(target_os = "windows")] {
-                    // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
-                    CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
-                    break;
-                } else {
-                    std::process::abort();
-                }
-            }
+        if CRASH_HANDLER.get().is_some() {
+            break;
         }
         thread::sleep(retry_frequency);
     }
+    let location = info
+        .location()
+        .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
+    log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
+
+    send_crash_server_message(CrashServerMessage::Panic(CrashPanic { message, span }));
+    log::error!("triggering a crash to generate a minidump...");
+
+    #[cfg(target_os = "macos")]
+    PANIC_THREAD_ID.store(
+        unsafe { mach2::mach_init::mach_thread_self() },
+        Ordering::SeqCst,
+    );
+
+    cfg_if::cfg_if! {
+        if #[cfg(target_os = "windows")] {
+            // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
+            CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
+        } else {
+            std::process::abort();
+        }
+    }
 }
 
 #[cfg(target_os = "windows")]
@@ -436,10 +471,11 @@ pub fn crash_server(socket: &Path) {
     server
         .run(
             Box::new(CrashServer {
-                initialization_params: OnceLock::new(),
-                panic_info: OnceLock::new(),
+                initialization_params: Mutex::default(),
+                panic_info: Mutex::default(),
+                user_info: Mutex::default(),
                 has_connection,
-                active_gpu: OnceLock::new(),
+                active_gpu: Mutex::default(),
             }),
             &shutdown,
             Some(CRASH_HANDLER_PING_TIMEOUT),

crates/zed/Cargo.toml ๐Ÿ”—

@@ -17,7 +17,6 @@ test-support = [
     "gpui/test-support",
     "gpui_platform/screen-capture",
     "dep:image",
-    "dep:semver",
     "workspace/test-support",
     "project/test-support",
     "editor/test-support",
@@ -32,7 +31,6 @@ visual-tests = [
     "gpui_platform/screen-capture",
     "gpui_platform/test-support",
     "dep:image",
-    "dep:semver",
     "dep:tempfile",
     "dep:action_log",
     "dep:agent_servers",
@@ -76,7 +74,6 @@ assets.workspace = true
 audio.workspace = true
 auto_update.workspace = true
 auto_update_ui.workspace = true
-bincode.workspace = true
 breadcrumbs.workspace = true
 call.workspace = true
 chrono.workspace = true
@@ -122,7 +119,7 @@ system_specs.workspace = true
 gpui.workspace = true
 gpui_platform = {workspace = true, features=["screen-capture", "font-kit", "wayland", "x11"]}
 image = { workspace = true, optional = true }
-semver = { workspace = true, optional = true }
+semver.workspace = true
 tempfile = { workspace = true, optional = true }
 clock = { workspace = true, optional = true }
 acp_thread.workspace = true

crates/zed/src/main.rs ๐Ÿ”—

@@ -335,7 +335,13 @@ fn main() {
     crashes::init(
         InitCrashHandler {
             session_id,
-            zed_version: app_version.to_string(),
+            // strip the build and channel information from the version string, we send them separately
+            zed_version: semver::Version::new(
+                app_version.major,
+                app_version.minor,
+                app_version.patch,
+            )
+            .to_string(),
             binary: "zed".to_string(),
             release_channel: release_channel::RELEASE_CHANNEL_NAME.clone(),
             commit_sha: app_commit_sha
@@ -573,6 +579,19 @@ fn main() {
             session.id().to_owned(),
             cx,
         );
+        cx.subscribe(&user_store, {
+            let telemetry = telemetry.clone();
+            move |_, evt: &client::user::Event, _| match evt {
+                client::user::Event::PrivateUserInfoUpdated => {
+                    crashes::set_user_info(crashes::UserInfo {
+                        metrics_id: telemetry.metrics_id().map(|s| s.to_string()),
+                        is_staff: telemetry.is_staff(),
+                    });
+                }
+                _ => {}
+            }
+        })
+        .detach();
 
         // We should rename these in the future to `first app open`, `first app open for release channel`, and `app open`
         if let (Some(system_id), Some(installation_id)) = (&system_id, &installation_id) {

crates/zed/src/reliability.rs ๐Ÿ”—

@@ -288,16 +288,23 @@ async fn upload_minidump(
         form = form.text("minidump_error", minidump_error);
     }
 
-    if let Some(id) = client.telemetry().metrics_id() {
-        form = form.text("sentry[user][id]", id.to_string());
+    if let Some(is_staff) = &metadata
+        .user_info
+        .as_ref()
+        .and_then(|user_info| user_info.is_staff)
+    {
         form = form.text(
             "sentry[user][is_staff]",
-            if client.telemetry().is_staff().unwrap_or_default() {
-                "true"
-            } else {
-                "false"
-            },
+            if *is_staff { "true" } else { "false" },
         );
+    }
+
+    if let Some(metrics_id) = metadata
+        .user_info
+        .as_ref()
+        .and_then(|user_info| user_info.metrics_id.as_ref())
+    {
+        form = form.text("sentry[user][id]", metrics_id.clone());
     } else if let Some(id) = client.telemetry().installation_id() {
         form = form.text("sentry[user][id]", format!("installation-{}", id))
     }

crates/zed/src/zed.rs ๐Ÿ”—

@@ -422,16 +422,7 @@ pub fn initialize_workspace(
         if let Some(specs) = window.gpu_specs() {
             log::info!("Using GPU: {:?}", specs);
             show_software_emulation_warning_if_needed(specs.clone(), window, cx);
-            if let Some((crash_server, message)) = crashes::CRASH_HANDLER
-                .get()
-                .zip(bincode::serialize(&specs).ok())
-                && let Err(err) = crash_server.send_message(3, message)
-            {
-                log::warn!(
-                    "Failed to store active gpu info for crash reporting: {}",
-                    err
-                );
-            }
+            crashes::set_gpu_info(specs);
         }
 
         let edit_prediction_menu_handle = PopoverMenuHandle::default();