Use metrics-id for sentry user id when we have it (#42931)

Conrad Irwin created

This should make it easier to correlate Sentry reports with user reports
and
github issues (for users who have diagnostics enabled)

Release Notes:

- N/A

Change summary

Cargo.lock                                      |   3 
crates/client/src/telemetry.rs                  |   9 
crates/telemetry_events/src/telemetry_events.rs |  63 ----
crates/zed/Cargo.toml                           |   3 
crates/zed/src/main.rs                          |   6 
crates/zed/src/reliability.rs                   | 275 ++----------------
6 files changed, 41 insertions(+), 318 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -21217,7 +21217,6 @@ dependencies = [
  "audio",
  "auto_update",
  "auto_update_ui",
- "backtrace",
  "bincode 1.3.3",
  "breadcrumbs",
  "call",
@@ -21282,7 +21281,6 @@ dependencies = [
  "mimalloc",
  "miniprofiler_ui",
  "nc",
- "nix 0.29.0",
  "node_runtime",
  "notifications",
  "onboarding",
@@ -21324,7 +21322,6 @@ dependencies = [
  "task",
  "tasks_ui",
  "telemetry",
- "telemetry_events",
  "terminal_view",
  "theme",
  "theme_extension",

crates/client/src/telemetry.rs 🔗

@@ -293,10 +293,11 @@ impl Telemetry {
     }
 
     pub fn metrics_enabled(self: &Arc<Self>) -> bool {
-        let state = self.state.lock();
-        let enabled = state.settings.metrics;
-        drop(state);
-        enabled
+        self.state.lock().settings.metrics
+    }
+
+    pub fn diagnostics_enabled(self: &Arc<Self>) -> bool {
+        self.state.lock().settings.diagnostics
     }
 
     pub fn set_authenticated_user_info(

crates/telemetry_events/src/telemetry_events.rs 🔗

@@ -124,66 +124,3 @@ pub struct AssistantEventData {
     pub error_message: Option<String>,
     pub language_name: Option<String>,
 }
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct BacktraceFrame {
-    pub ip: usize,
-    pub symbol_addr: usize,
-    pub base: Option<usize>,
-    pub symbols: Vec<String>,
-}
-
-#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-pub struct HangReport {
-    pub backtrace: Vec<BacktraceFrame>,
-    pub app_version: Option<SemanticVersion>,
-    pub os_name: String,
-    pub os_version: Option<String>,
-    pub architecture: String,
-    /// Identifier unique to each Zed installation (differs for stable, preview, dev)
-    pub installation_id: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct LocationData {
-    pub file: String,
-    pub line: u32,
-}
-
-#[derive(Serialize, Deserialize, Clone, Debug)]
-pub struct Panic {
-    /// The name of the thread that panicked
-    pub thread: String,
-    /// The panic message
-    pub payload: String,
-    /// The location of the panic (file, line number)
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub location_data: Option<LocationData>,
-    pub backtrace: Vec<String>,
-    /// Zed version number
-    pub app_version: String,
-    /// The Git commit SHA that Zed was built at.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub app_commit_sha: Option<String>,
-    /// Zed release channel (stable, preview, dev)
-    pub release_channel: String,
-    pub target: Option<String>,
-    pub os_name: String,
-    pub os_version: Option<String>,
-    pub architecture: String,
-    /// The time the panic occurred (UNIX millisecond timestamp)
-    pub panicked_on: i64,
-    /// Identifier unique to each system Zed is installed on
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub system_id: Option<String>,
-    /// Identifier unique to each Zed installation (differs for stable, preview, dev)
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub installation_id: Option<String>,
-    /// Identifier unique to each Zed session (differs for each time you open Zed)
-    pub session_id: String,
-}
-
-#[derive(Serialize, Deserialize)]
-pub struct PanicRequest {
-    pub panic: Panic,
-}

crates/zed/Cargo.toml 🔗

@@ -29,7 +29,6 @@ assets.workspace = true
 audio.workspace = true
 auto_update.workspace = true
 auto_update_ui.workspace = true
-backtrace = "0.3"
 bincode.workspace = true
 breadcrumbs.workspace = true
 call.workspace = true
@@ -100,7 +99,6 @@ migrator.workspace = true
 miniprofiler_ui.workspace = true
 mimalloc = { version = "0.1", optional = true }
 nc.workspace = true
-nix = { workspace = true, features = ["pthread", "signal"] }
 node_runtime.workspace = true
 notifications.workspace = true
 onboarding.workspace = true
@@ -140,7 +138,6 @@ tab_switcher.workspace = true
 task.workspace = true
 tasks_ui.workspace = true
 telemetry.workspace = true
-telemetry_events.workspace = true
 terminal_view.workspace = true
 theme.workspace = true
 theme_extension.workspace = true

crates/zed/src/main.rs 🔗

@@ -549,11 +549,7 @@ pub fn main() {
         auto_update::init(client.clone(), cx);
         dap_adapters::init(cx);
         auto_update_ui::init(cx);
-        reliability::init(
-            client.http_client(),
-            system_id.as_ref().map(|id| id.to_string()),
-            cx,
-        );
+        reliability::init(client.clone(), cx);
         extension_host::init(
             extension_host_proxy.clone(),
             app_state.fs.clone(),

crates/zed/src/reliability.rs 🔗

@@ -1,46 +1,42 @@
 use anyhow::{Context as _, Result};
-use client::{TelemetrySettings, telemetry::MINIDUMP_ENDPOINT};
+use client::{Client, telemetry::MINIDUMP_ENDPOINT};
 use futures::AsyncReadExt;
 use gpui::{App, AppContext as _, SerializedThreadTaskTimings};
-use http_client::{self, HttpClient, HttpClientWithUrl};
+use http_client::{self, HttpClient};
 use log::info;
 use project::Project;
 use proto::{CrashReport, GetCrashFilesResponse};
 use reqwest::multipart::{Form, Part};
-use settings::Settings;
 use smol::stream::StreamExt;
 use std::{ffi::OsStr, fs, sync::Arc, thread::ThreadId, time::Duration};
 use util::ResultExt;
 
 use crate::STARTUP_TIME;
 
-pub fn init(http_client: Arc<HttpClientWithUrl>, installation_id: Option<String>, cx: &mut App) {
+pub fn init(client: Arc<Client>, cx: &mut App) {
     monitor_hangs(cx);
 
-    #[cfg(target_os = "macos")]
-    monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
-
-    if client::TelemetrySettings::get_global(cx).diagnostics {
-        let client = http_client.clone();
-        let id = installation_id.clone();
+    if client.telemetry().diagnostics_enabled() {
+        let client = client.clone();
         cx.background_spawn(async move {
-            upload_previous_minidumps(client, id).await.warn_on_err();
+            upload_previous_minidumps(client).await.warn_on_err();
         })
         .detach()
     }
 
     cx.observe_new(move |project: &mut Project, _, cx| {
-        let http_client = http_client.clone();
-        let installation_id = installation_id.clone();
+        let client = client.clone();
 
         let Some(remote_client) = project.remote_client() else {
             return;
         };
-        remote_client.update(cx, |client, cx| {
-            if !TelemetrySettings::get_global(cx).diagnostics {
+        remote_client.update(cx, |remote_client, cx| {
+            if !client.telemetry().diagnostics_enabled() {
                 return;
             }
-            let request = client.proto_client().request(proto::GetCrashFiles {});
+            let request = remote_client
+                .proto_client()
+                .request(proto::GetCrashFiles {});
             cx.background_spawn(async move {
                 let GetCrashFilesResponse { crashes } = request.await?;
 
@@ -53,15 +49,9 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, installation_id: Option<String>
                 } in crashes
                 {
                     if let Some(metadata) = serde_json::from_str(&metadata).log_err() {
-                        upload_minidump(
-                            http_client.clone(),
-                            endpoint,
-                            minidump_contents,
-                            &metadata,
-                            installation_id.clone(),
-                        )
-                        .await
-                        .log_err();
+                        upload_minidump(client.clone(), endpoint, minidump_contents, &metadata)
+                            .await
+                            .log_err();
                     }
                 }
 
@@ -73,210 +63,6 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, installation_id: Option<String>
     .detach();
 }
 
-#[cfg(target_os = "macos")]
-pub fn monitor_main_thread_hangs(
-    http_client: Arc<HttpClientWithUrl>,
-    installation_id: Option<String>,
-    cx: &App,
-) {
-    // This is too noisy to ship to stable for now.
-    if !matches!(
-        ReleaseChannel::global(cx),
-        ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
-    ) {
-        return;
-    }
-
-    use nix::sys::signal::{
-        SaFlags, SigAction, SigHandler, SigSet,
-        Signal::{self, SIGUSR2},
-        sigaction,
-    };
-
-    use parking_lot::Mutex;
-
-    use http_client::Method;
-    use release_channel::ReleaseChannel;
-    use std::{
-        ffi::c_int,
-        sync::{OnceLock, mpsc},
-        time::Duration,
-    };
-    use telemetry_events::{BacktraceFrame, HangReport};
-
-    use nix::sys::pthread;
-
-    let foreground_executor = cx.foreground_executor();
-    let background_executor = cx.background_executor();
-    let telemetry_settings = *client::TelemetrySettings::get_global(cx);
-
-    // Initialize SIGUSR2 handler to send a backtrace to a channel.
-    let (backtrace_tx, backtrace_rx) = mpsc::channel();
-    static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
-    static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
-    BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
-    BACKTRACE.lock().reserve(100);
-
-    fn handle_backtrace_signal() {
-        unsafe {
-            extern "C" fn handle_sigusr2(_i: c_int) {
-                unsafe {
-                    // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
-                    // which can only be triggered by This signal handler. In addition,
-                    // this signal handler is immediately removed by SA_RESETHAND, and this
-                    // signal handler cannot be re-entrant due to the SIGUSR2 mask defined
-                    // below
-                    let mut bt = BACKTRACE.lock();
-                    bt.clear();
-                    backtrace::trace_unsynchronized(|frame| {
-                        if bt.len() < bt.capacity() {
-                            bt.push(frame.clone());
-                            true
-                        } else {
-                            false
-                        }
-                    });
-                }
-
-                BACKTRACE_SENDER.get().unwrap().send(()).ok();
-            }
-
-            let mut mask = SigSet::empty();
-            mask.add(SIGUSR2);
-            sigaction(
-                Signal::SIGUSR2,
-                &SigAction::new(
-                    SigHandler::Handler(handle_sigusr2),
-                    SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
-                    mask,
-                ),
-            )
-            .log_err();
-        }
-    }
-
-    handle_backtrace_signal();
-    let main_thread = pthread::pthread_self();
-
-    let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
-    foreground_executor
-        .spawn(async move { while (rx.next().await).is_some() {} })
-        .detach();
-
-    background_executor
-        .spawn({
-            let background_executor = background_executor.clone();
-            async move {
-                loop {
-                    background_executor.timer(Duration::from_secs(1)).await;
-                    match tx.try_send(()) {
-                        Ok(_) => continue,
-                        Err(e) => {
-                            if e.into_send_error().is_full() {
-                                pthread::pthread_kill(main_thread, SIGUSR2).log_err();
-                            }
-                            // Only detect the first hang
-                            break;
-                        }
-                    }
-                }
-            }
-        })
-        .detach();
-
-    let app_version = release_channel::AppVersion::global(cx);
-    let os_name = client::telemetry::os_name();
-
-    background_executor
-        .clone()
-        .spawn(async move {
-            let os_version = client::telemetry::os_version();
-
-            loop {
-                while backtrace_rx.recv().is_ok() {
-                    if !telemetry_settings.diagnostics {
-                        return;
-                    }
-
-                    // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
-                    // the backtrace transmitter has fired, which itself is only done
-                    // by the signal handler. And due to SA_RESETHAND  the signal handler
-                    // will not run again until `handle_backtrace_signal` is called.
-                    let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
-                    let backtrace: Vec<_> = raw_backtrace
-                        .into_iter()
-                        .map(|frame| {
-                            let mut btf = BacktraceFrame {
-                                ip: frame.ip() as usize,
-                                symbol_addr: frame.symbol_address() as usize,
-                                base: frame.module_base_address().map(|addr| addr as usize),
-                                symbols: vec![],
-                            };
-
-                            backtrace::resolve_frame(&frame, |symbol| {
-                                if let Some(name) = symbol.name() {
-                                    btf.symbols.push(name.to_string());
-                                }
-                            });
-
-                            btf
-                        })
-                        .collect();
-
-                    // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
-                    handle_backtrace_signal();
-
-                    log::error!(
-                        "Suspected hang on main thread:\n{}",
-                        backtrace
-                            .iter()
-                            .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
-                            .collect::<Vec<_>>()
-                            .join("\n")
-                    );
-
-                    let report = HangReport {
-                        backtrace,
-                        app_version: Some(app_version),
-                        os_name: os_name.clone(),
-                        os_version: Some(os_version.clone()),
-                        architecture: std::env::consts::ARCH.into(),
-                        installation_id: installation_id.clone(),
-                    };
-
-                    let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
-                        continue;
-                    };
-
-                    let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
-                    else {
-                        continue;
-                    };
-
-                    let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
-                        continue;
-                    };
-
-                    let Ok(request) = http_client::Request::builder()
-                        .method(Method::POST)
-                        .uri(url.as_ref())
-                        .header("x-zed-checksum", checksum)
-                        .body(json_bytes.into())
-                    else {
-                        continue;
-                    };
-
-                    if let Some(response) = http_client.send(request).await.log_err()
-                        && response.status() != 200
-                    {
-                        log::error!("Failed to send hang report: HTTP {:?}", response.status());
-                    }
-                }
-            }
-        })
-        .detach()
-}
-
 fn monitor_hangs(cx: &App) {
     let main_thread_id = std::thread::current().id();
 
@@ -365,10 +151,7 @@ fn save_hang_trace(
     );
 }
 
-pub async fn upload_previous_minidumps(
-    http: Arc<HttpClientWithUrl>,
-    installation_id: Option<String>,
-) -> anyhow::Result<()> {
+pub async fn upload_previous_minidumps(client: Arc<Client>) -> anyhow::Result<()> {
     let Some(minidump_endpoint) = MINIDUMP_ENDPOINT.as_ref() else {
         log::warn!("Minidump endpoint not set");
         return Ok(());
@@ -385,13 +168,12 @@ pub async fn upload_previous_minidumps(
         json_path.set_extension("json");
         if let Ok(metadata) = serde_json::from_slice(&smol::fs::read(&json_path).await?)
             && upload_minidump(
-                http.clone(),
+                client.clone(),
                 minidump_endpoint,
                 smol::fs::read(&child_path)
                     .await
                     .context("Failed to read minidump")?,
                 &metadata,
-                installation_id.clone(),
             )
             .await
             .log_err()
@@ -405,11 +187,10 @@ pub async fn upload_previous_minidumps(
 }
 
 async fn upload_minidump(
-    http: Arc<HttpClientWithUrl>,
+    client: Arc<Client>,
     endpoint: &str,
     minidump: Vec<u8>,
     metadata: &crashes::CrashInfo,
-    installation_id: Option<String>,
 ) -> Result<()> {
     let mut form = Form::new()
         .part(
@@ -436,8 +217,19 @@ async fn upload_minidump(
     if let Some(minidump_error) = metadata.minidump_error.clone() {
         form = form.text("minidump_error", minidump_error);
     }
-    if let Some(id) = installation_id.clone() {
-        form = form.text("sentry[user][id]", id)
+
+    if let Some(id) = client.telemetry().metrics_id() {
+        form = form.text("sentry[user][id]", id.to_string());
+        form = form.text(
+            "sentry[user][is_staff]",
+            if client.telemetry().is_staff().unwrap_or_default() {
+                "true"
+            } else {
+                "false"
+            },
+        );
+    } else if let Some(id) = client.telemetry().installation_id() {
+        form = form.text("sentry[user][id]", format!("installation-{}", id))
     }
 
     ::telemetry::event!(
@@ -505,7 +297,10 @@ async fn upload_minidump(
     // TODO: feature-flag-context, and more of device-context like screen resolution, available ram, device model, etc
 
     let mut response_text = String::new();
-    let mut response = http.send_multipart_form(endpoint, form).await?;
+    let mut response = client
+        .http_client()
+        .send_multipart_form(endpoint, form)
+        .await?;
     response
         .body_mut()
         .read_to_string(&mut response_text)