Write panics and crashes to snowflake (#28284)

Max Brunsfeld created

This will let us create a better crashes dashboard, using Hex.

Release Notes:

- N/A

Change summary

crates/collab/src/api/billing.rs |  2 
crates/collab/src/api/events.rs  | 57 ++++++++++++++++++++++++++++++++-
crates/collab/src/llm.rs         |  4 +-
3 files changed, 57 insertions(+), 6 deletions(-)

Detailed changes

crates/collab/src/api/billing.rs 🔗

@@ -127,7 +127,7 @@ async fn update_billing_preferences(
 
     SnowflakeRow::new(
         "Spend Limit Updated",
-        user.metrics_id,
+        Some(user.metrics_id),
         user.admin,
         None,
         json!({

crates/collab/src/api/events.rs 🔗

@@ -149,6 +149,31 @@ pub async fn post_crash(
         "crash report"
     );
 
+    if let Some(kinesis_client) = app.kinesis_client.clone() {
+        if let Some(stream) = app.config.kinesis_stream.clone() {
+            let properties = json!({
+                "app_version": report.header.app_version,
+                "os_version": report.header.os_version,
+                "os_name": "macOS",
+                "bundle_id": report.header.bundle_id,
+                "incident_id": report.header.incident_id,
+                "installation_id": installation_id,
+                "description": description,
+                "backtrace": summary,
+            });
+            SnowflakeRow::new(
+                "Crash Report",
+                None,
+                false,
+                Some(installation_id),
+                properties,
+            )
+            .write(&Some(kinesis_client), &Some(stream))
+            .await
+            .log_err();
+        }
+    }
+
     if let Some(slack_panics_webhook) = app.config.slack_panics_webhook.clone() {
         let payload = slack::WebhookBody::new(|w| {
             w.add_section(|s| s.text(slack::Text::markdown(description)))
@@ -314,6 +339,8 @@ pub async fn post_panic(
             .ok();
     }
 
+    let backtrace = panic.backtrace.join("\n");
+
     tracing::error!(
         service = "client",
         version = %panic.app_version,
@@ -322,10 +349,34 @@ pub async fn post_panic(
         incident_id = %incident_id,
         installation_id = %panic.installation_id.clone().unwrap_or_default(),
         description = %panic.payload,
-        backtrace = %panic.backtrace.join("\n"),
+        backtrace = %backtrace,
         "panic report"
     );
 
+    if let Some(kinesis_client) = app.kinesis_client.clone() {
+        if let Some(stream) = app.config.kinesis_stream.clone() {
+            let properties = json!({
+                "app_version": panic.app_version,
+                "os_name": panic.os_name,
+                "os_version": panic.os_version,
+                "incident_id": incident_id,
+                "installation_id": panic.installation_id,
+                "description": panic.payload,
+                "backtrace": backtrace,
+            });
+            SnowflakeRow::new(
+                "Panic Report",
+                None,
+                false,
+                panic.installation_id.clone(),
+                properties,
+            )
+            .write(&Some(kinesis_client), &Some(stream))
+            .await
+            .log_err();
+        }
+    }
+
     let backtrace = if panic.backtrace.len() > 25 {
         let total = panic.backtrace.len();
         format!(
@@ -711,7 +762,7 @@ pub struct SnowflakeRow {
 impl SnowflakeRow {
     pub fn new(
         event_type: impl Into<String>,
-        metrics_id: Uuid,
+        metrics_id: Option<Uuid>,
         is_staff: bool,
         system_id: Option<String>,
         event_properties: serde_json::Value,
@@ -720,7 +771,7 @@ impl SnowflakeRow {
             time: chrono::Utc::now(),
             event_type: event_type.into(),
             device_id: system_id,
-            user_id: Some(metrics_id.to_string()),
+            user_id: metrics_id.map(|id| id.to_string()),
             insert_id: Some(uuid::Uuid::new_v4().to_string()),
             event_properties,
             user_properties: Some(json!({"is_staff": is_staff})),

crates/collab/src/llm.rs 🔗

@@ -591,7 +591,7 @@ async fn check_usage_limit(
 
             SnowflakeRow::new(
                 "Language Model Rate Limited",
-                claims.metrics_id,
+                Some(claims.metrics_id),
                 claims.is_staff,
                 claims.system_id.clone(),
                 json!({
@@ -719,7 +719,7 @@ impl<S> Drop for TokenCountingStream<S> {
                 });
                 SnowflakeRow::new(
                     "Language Model Used",
-                    claims.metrics_id,
+                    Some(claims.metrics_id),
                     claims.is_staff,
                     claims.system_id.clone(),
                     properties,