Upload panics via collab instead of zed.dev (#11932)

Conrad Irwin created

Release Notes:

- N/A

Change summary

crates/collab/src/api/events.rs                 | 90 +++++++++++++++++++
crates/telemetry_events/src/telemetry_events.rs | 29 ++++++
crates/zed/src/reliability.rs                   | 58 ++++--------
3 files changed, 139 insertions(+), 38 deletions(-)

Detailed changes

crates/collab/src/api/events.rs 🔗

@@ -26,6 +26,7 @@ pub fn router() -> Router {
     Router::new()
         .route("/telemetry/events", post(post_events))
         .route("/telemetry/crashes", post(post_crash))
+        .route("/telemetry/panics", post(post_panic))
         .route("/telemetry/hangs", post(post_hang))
 }
 
@@ -325,6 +326,95 @@ pub async fn post_hang(
     Ok(())
 }
 
+pub async fn post_panic(
+    Extension(app): Extension<Arc<AppState>>,
+    TypedHeader(ZedChecksumHeader(checksum)): TypedHeader<ZedChecksumHeader>,
+    body: Bytes,
+) -> Result<()> {
+    let Some(expected) = calculate_json_checksum(app.clone(), &body) else {
+        return Err(Error::Http(
+            StatusCode::INTERNAL_SERVER_ERROR,
+            "events not enabled".into(),
+        ))?;
+    };
+
+    if checksum != expected {
+        return Err(Error::Http(
+            StatusCode::BAD_REQUEST,
+            "invalid checksum".into(),
+        ))?;
+    }
+
+    let report: telemetry_events::PanicRequest = serde_json::from_slice(&body)
+        .map_err(|_| Error::Http(StatusCode::BAD_REQUEST, "invalid json".into()))?;
+    let panic = report.panic;
+
+    tracing::error!(
+        service = "client",
+        version = %panic.app_version,
+        os_name = %panic.os_name,
+        os_version = %panic.os_version.clone().unwrap_or_default(),
+        installation_id = %panic.installation_id.unwrap_or_default(),
+        description = %panic.payload,
+        backtrace = %panic.backtrace.join("\n"),
+        "panic report");
+
+    let backtrace = if panic.backtrace.len() > 25 {
+        let total = panic.backtrace.len();
+        format!(
+            "{}\n   and {} more",
+            panic
+                .backtrace
+                .iter()
+                .take(20)
+                .cloned()
+                .collect::<Vec<_>>()
+                .join("\n"),
+            total - 20
+        )
+    } else {
+        panic.backtrace.join("\n")
+    };
+    let backtrace_with_summary = panic.payload + "\n" + &backtrace;
+
+    if let Some(slack_panics_webhook) = app.config.slack_panics_webhook.clone() {
+        let payload = slack::WebhookBody::new(|w| {
+            w.add_section(|s| s.text(slack::Text::markdown("Panic request".to_string())))
+                .add_section(|s| {
+                    s.add_field(slack::Text::markdown(format!(
+                        "*Version:*\n {} ",
+                        panic.app_version
+                    )))
+                    .add_field({
+                        slack::Text::markdown(format!(
+                            "*OS:*\n{} {}",
+                            panic.os_name,
+                            panic.os_version.unwrap_or_default()
+                        ))
+                    })
+                })
+                .add_rich_text(|r| r.add_preformatted(|p| p.add_text(backtrace_with_summary)))
+        });
+        let payload_json = serde_json::to_string(&payload).map_err(|err| {
+            log::error!("Failed to serialize payload to JSON: {err}");
+            Error::Internal(anyhow!(err))
+        })?;
+
+        reqwest::Client::new()
+            .post(slack_panics_webhook)
+            .header("Content-Type", "application/json")
+            .body(payload_json)
+            .send()
+            .await
+            .map_err(|err| {
+                log::error!("Failed to send payload to Slack: {err}");
+                Error::Internal(anyhow!(err))
+            })?;
+    }
+
+    Ok(())
+}
+
 pub async fn post_events(
     Extension(app): Extension<Arc<AppState>>,
     TypedHeader(ZedChecksumHeader(checksum)): TypedHeader<ZedChecksumHeader>,

crates/telemetry_events/src/telemetry_events.rs 🔗

@@ -155,3 +155,32 @@ pub struct HangReport {
     pub architecture: String,
     pub installation_id: Option<String>,
 }
+
+#[derive(Serialize, Deserialize)]
+pub struct LocationData {
+    pub file: String,
+    pub line: u32,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct Panic {
+    pub thread: String,
+    pub payload: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub location_data: Option<LocationData>,
+    pub backtrace: Vec<String>,
+    pub app_version: String,
+    pub release_channel: String,
+    pub os_name: String,
+    pub os_version: Option<String>,
+    pub architecture: String,
+    pub panicked_on: i64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub installation_id: Option<String>,
+    pub session_id: String,
+}
+
+#[derive(Serialize, Deserialize)]
+pub struct PanicRequest {
+    pub panic: Panic,
+}

crates/zed/src/reliability.rs 🔗

@@ -3,13 +3,13 @@ use backtrace::{self, Backtrace};
 use chrono::Utc;
 use db::kvp::KEY_VALUE_STORE;
 use gpui::{App, AppContext, SemanticVersion};
+use http::Method;
 use isahc::config::Configurable;
 
 use http::{self, HttpClient, HttpClientWithUrl};
 use paths::{CRASHES_DIR, CRASHES_RETIRED_DIR};
 use release_channel::ReleaseChannel;
 use release_channel::RELEASE_CHANNEL;
-use serde::{Deserialize, Serialize};
 use settings::Settings;
 use smol::stream::StreamExt;
 use std::{
@@ -18,39 +18,12 @@ use std::{
     sync::{atomic::Ordering, Arc},
 };
 use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
+use telemetry_events::LocationData;
+use telemetry_events::Panic;
+use telemetry_events::PanicRequest;
 use util::{paths, ResultExt};
 
 use crate::stdout_is_a_pty;
-
-#[derive(Serialize, Deserialize)]
-struct LocationData {
-    file: String,
-    line: u32,
-}
-
-#[derive(Serialize, Deserialize)]
-struct Panic {
-    thread: String,
-    payload: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    location_data: Option<LocationData>,
-    backtrace: Vec<String>,
-    app_version: String,
-    release_channel: String,
-    os_name: String,
-    os_version: Option<String>,
-    architecture: String,
-    panicked_on: i64,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    installation_id: Option<String>,
-    session_id: String,
-}
-
-#[derive(Serialize)]
-struct PanicRequest {
-    panic: Panic,
-}
-
 static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
 
 pub fn init_panic_hook(app: &App, installation_id: Option<String>, session_id: String) {
@@ -119,7 +92,7 @@ pub fn init_panic_hook(app: &App, installation_id: Option<String>, session_id: S
             backtrace.drain(0..=ix);
         }
 
-        let panic_data = Panic {
+        let panic_data = telemetry_events::Panic {
             thread: thread_name.into(),
             payload,
             location_data: info.location().map(|location| LocationData {
@@ -397,7 +370,7 @@ async fn upload_previous_panics(
     http: Arc<HttpClientWithUrl>,
     telemetry_settings: client::TelemetrySettings,
 ) -> Result<Option<(i64, String)>> {
-    let panic_report_url = http.build_url("/api/panic");
+    let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?;
     let mut children = smol::fs::read_dir(&*paths::LOGS_DIR).await?;
 
     let mut most_recent_panic = None;
@@ -440,12 +413,21 @@ async fn upload_previous_panics(
             if let Some(panic) = panic {
                 most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
 
-                let body = serde_json::to_string(&PanicRequest { panic }).unwrap();
+                let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
+
+                let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
+                    continue;
+                };
+
+                let Ok(request) = http::Request::builder()
+                    .method(Method::POST)
+                    .uri(panic_report_url.as_ref())
+                    .header("x-zed-checksum", checksum)
+                    .body(json_bytes.into())
+                else {
+                    continue;
+                };
 
-                let request = http::Request::post(&panic_report_url)
-                    .redirect_policy(isahc::config::RedirectPolicy::Follow)
-                    .header("Content-Type", "application/json")
-                    .body(body.into())?;
                 let response = http.send(request).await.context("error sending panic")?;
                 if !response.status().is_success() {
                     log::error!("Error uploading panic to server: {}", response.status());