Send crash reports to Slack automatically

Conrad Irwin created

Change summary

Cargo.lock                             |  11 +
Procfile                               |   1 
crates/client/src/client.rs            |  10 +
crates/collab/.env.toml                |   3 
crates/collab/Cargo.toml               |   1 
crates/collab/k8s/collab.template.yml  |  15 +
crates/collab/src/api.rs               | 127 ++++++++++++++++
crates/collab/src/lib.rs               |   3 
crates/collab/src/tests/test_server.rs |   3 
crates/util/src/paths.rs               |   2 
crates/zed/src/main.rs                 | 212 +++++++++++++++++++--------
11 files changed, 319 insertions(+), 69 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -1474,6 +1474,7 @@ dependencies = [
  "env_logger",
  "envy",
  "file_finder",
+ "form-data-builder",
  "fs",
  "futures 0.3.28",
  "git",
@@ -2690,6 +2691,16 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
 
+[[package]]
+name = "form-data-builder"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30ff8fb4527b05539a9f573ba2831a1127038a7b45eea385a338a63dc5ab6829"
+dependencies = [
+ "base64 0.13.1",
+ "rand 0.8.5",
+]
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.0"

Procfile 🔗

@@ -1,4 +1,3 @@
-web: cd ../zed.dev && PORT=3000 npm run dev
 collab: cd crates/collab && RUST_LOG=${RUST_LOG:-warn,collab=info} cargo run serve
 livekit: livekit-server --dev
 postgrest: postgrest crates/collab/admin_api.conf

crates/client/src/client.rs 🔗

@@ -969,6 +969,16 @@ impl Client {
         Url::parse(&collab_url).context("invalid rpc url")
     }
 
+    // todo: this should probably be cached (And/or done better)
+    pub async fn get_collab_server_url(
+        http: Arc<dyn HttpClient>,
+        release_channel: Option<ReleaseChannel>,
+    ) -> Result<Url> {
+        let mut url = Self::get_rpc_url(http, release_channel).await?;
+        url.set_path("");
+        Ok(url)
+    }
+
     fn establish_websocket_connection(
         self: &Arc<Self>,
         credentials: &Credentials,

crates/collab/.env.toml 🔗

@@ -2,11 +2,14 @@ DATABASE_URL = "postgres://postgres@localhost/zed"
 DATABASE_MAX_CONNECTIONS = 5
 HTTP_PORT = 8080
 API_TOKEN = "secret"
+CLIENT_TOKEN = "618033988749894"
 INVITE_LINK_PREFIX = "http://localhost:3000/invites/"
 ZED_ENVIRONMENT = "development"
 LIVE_KIT_SERVER = "http://localhost:7880"
 LIVE_KIT_KEY = "devkey"
 LIVE_KIT_SECRET = "secret"
+# SLACK_PANIC_CHANNEL =
+# SLACK_API_KEY =
 
 # RUST_LOG=info
 # LOG_JSON=true

crates/collab/Cargo.toml 🔗

@@ -58,6 +58,7 @@ tracing = "0.1.34"
 tracing-log = "0.1.3"
 tracing-subscriber = { version = "0.3.11", features = ["env-filter", "json"] }
 uuid.workspace = true
+form-data-builder = "1.0.1"
 
 [dev-dependencies]
 audio = { path = "../audio" }

crates/collab/k8s/collab.template.yml 🔗

@@ -90,6 +90,11 @@ spec:
                 secretKeyRef:
                   name: api
                   key: token
+            - name: CLIENT_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: api
+                  key: client_token
             - name: LIVE_KIT_SERVER
               valueFrom:
                 secretKeyRef:
@@ -105,6 +110,16 @@ spec:
                 secretKeyRef:
                   name: livekit
                   key: secret
+            - name: SLACK_PANIC_CHANNEL
+              valueFrom:
+                secretKeyRef:
+                  name: slack
+                  key: api_key
+            - name: SLACK_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: slack
+                  key: panic_channel
             - name: INVITE_LINK_PREFIX
               value: ${INVITE_LINK_PREFIX}
             - name: RUST_BACKTRACE

crates/collab/src/api.rs 🔗

@@ -18,19 +18,28 @@ use serde::{Deserialize, Serialize};
 use std::sync::Arc;
 use tower::ServiceBuilder;
 use tracing::instrument;
+use util::{async_maybe, http::AsyncBody, ResultExt};
 
 pub fn routes(rpc_server: Arc<rpc::Server>, state: Arc<AppState>) -> Router<Body> {
-    Router::new()
+    let called_from_website = Router::new()
         .route("/user", get(get_authenticated_user))
         .route("/users/:id/access_tokens", post(create_access_token))
         .route("/panic", post(trace_panic))
         .route("/rpc_server_snapshot", get(get_rpc_server_snapshot))
         .layer(
             ServiceBuilder::new()
-                .layer(Extension(state))
+                .layer(Extension(state.clone()))
                 .layer(Extension(rpc_server))
                 .layer(middleware::from_fn(validate_api_token)),
-        )
+        );
+
+    let called_from_client = Router::new().route("/crash", post(trace_crash)).layer(
+        ServiceBuilder::new()
+            .layer(Extension(state))
+            .layer(middleware::from_fn(validate_client_secret)),
+    );
+
+    called_from_website.merge(called_from_client)
 }
 
 pub async fn validate_api_token<B>(req: Request<B>, next: Next<B>) -> impl IntoResponse {
@@ -64,6 +73,37 @@ pub async fn validate_api_token<B>(req: Request<B>, next: Next<B>) -> impl IntoR
     Ok::<_, Error>(next.run(req).await)
 }
 
+pub async fn validate_client_secret<B>(req: Request<B>, next: Next<B>) -> impl IntoResponse {
+    let token = req
+        .headers()
+        .get(http::header::AUTHORIZATION)
+        .and_then(|header| header.to_str().ok())
+        .ok_or_else(|| {
+            Error::Http(
+                StatusCode::BAD_REQUEST,
+                "missing authorization header".to_string(),
+            )
+        })?
+        .strip_prefix("token ")
+        .ok_or_else(|| {
+            Error::Http(
+                StatusCode::BAD_REQUEST,
+                "invalid authorization header".to_string(),
+            )
+        })?;
+
+    let state = req.extensions().get::<Arc<AppState>>().unwrap();
+
+    if token != state.config.client_token {
+        Err(Error::Http(
+            StatusCode::UNAUTHORIZED,
+            "invalid client secret".to_string(),
+        ))?
+    }
+
+    Ok::<_, Error>(next.run(req).await)
+}
+
 #[derive(Debug, Deserialize)]
 struct AuthenticatedUserParams {
     github_user_id: Option<i32>,
@@ -127,6 +167,87 @@ async fn trace_panic(panic: Json<Panic>) -> Result<()> {
     Ok(())
 }
 
+/// IPSHeader is the first line of an .ips file (in JSON format)
+/// https://developer.apple.com/documentation/xcode/interpreting-the-json-format-of-a-crash-report
+#[derive(Debug, Serialize, Deserialize)]
+struct IPSHeader {
+    timestamp: Option<String>,
+    name: Option<String>,
+    app_name: Option<String>,
+    app_version: Option<String>,
+    slice_uuid: Option<String>,
+    build_version: Option<String>,
+    platform: Option<i32>,
+    #[serde(rename = "bundleID")]
+    bundle_id: Option<String>,
+    share_with_app_devs: Option<i32>,
+    is_first_party: Option<i32>,
+    bug_type: Option<String>,
+    os_version: Option<String>,
+    roots_installed: Option<i32>,
+    incident_id: Option<String>,
+}
+
+#[instrument(skip(content, app))]
+async fn trace_crash(content: String, Extension(app): Extension<Arc<AppState>>) -> Result<()> {
+    let Some(header) = content.split("\n").next() else {
+        return Err(Error::Http(
+            StatusCode::BAD_REQUEST,
+            "invalid .ips file".to_string(),
+        ));
+    };
+    let header: IPSHeader = serde_json::from_slice(&header.as_bytes())?;
+    let text = content.as_str();
+
+    tracing::error!(app_version = %header.app_version.clone().unwrap_or_default(),
+        build_version = %header.build_version.unwrap_or_default(),
+        os_version = %header.os_version.unwrap_or_default(),
+        bundle_id = %header.bundle_id.clone().unwrap_or_default(),
+        text = %text,
+    "crash report");
+
+    async_maybe!({
+        let api_key = app.config.slack_api_key.clone()?;
+        let channel = app.config.slack_panic_channel.clone()?;
+
+        let mut body = form_data_builder::FormData::new(Vec::new());
+        body.write_field("content", text).log_err()?;
+        body.write_field("channels", channel.as_str()).log_err()?;
+        body.write_field(
+            "filename",
+            format!("zed-crash-{}.ips", header.incident_id.unwrap_or_default()).as_str(),
+        )
+        .log_err()?;
+        body.write_field(
+            "initial_comment",
+            format!(
+                "New crash in {} ({})",
+                header.bundle_id.unwrap_or_default(),
+                header.app_version.unwrap_or_default()
+            )
+            .as_str(),
+        )
+        .log_err()?;
+        let content_type = body.content_type_header();
+        let body = AsyncBody::from(body.finish().log_err()?);
+
+        let request = Request::post("https://slack.com/api/files.upload")
+            .header("Content-Type", content_type)
+            .header("Authorization", format!("Bearer {}", api_key))
+            .body(body)
+            .log_err()?;
+
+        let response = util::http::client().send(request).await.log_err()?;
+        if !response.status().is_success() {
+            tracing::error!(response = ?response, "failed to send crash report to slack");
+        }
+
+        Some(())
+    })
+    .await;
+    Ok(())
+}
+
 async fn get_rpc_server_snapshot(
     Extension(rpc_server): Extension<Arc<rpc::Server>>,
 ) -> Result<ErasedJson> {

crates/collab/src/lib.rs 🔗

@@ -101,6 +101,9 @@ pub struct Config {
     pub rust_log: Option<String>,
     pub log_json: Option<bool>,
     pub zed_environment: Arc<str>,
+    pub slack_api_key: Option<String>,
+    pub slack_panic_channel: Option<String>,
+    pub client_token: String,
 }
 
 impl Config {

crates/collab/src/tests/test_server.rs 🔗

@@ -465,6 +465,7 @@ impl TestServer {
                 database_url: "".into(),
                 database_max_connections: 0,
                 api_token: "".into(),
+                client_token: "".into(),
                 invite_link_prefix: "".into(),
                 live_kit_server: None,
                 live_kit_key: None,
@@ -472,6 +473,8 @@ impl TestServer {
                 rust_log: None,
                 log_json: None,
                 zed_environment: "test".into(),
+                slack_api_key: None,
+                slack_panic_channel: None,
             },
         })
     }

crates/util/src/paths.rs 🔗

@@ -15,6 +15,8 @@ lazy_static::lazy_static! {
     pub static ref COPILOT_DIR: PathBuf = HOME.join("Library/Application Support/Zed/copilot");
     pub static ref DEFAULT_PRETTIER_DIR: PathBuf = HOME.join("Library/Application Support/Zed/prettier");
     pub static ref DB_DIR: PathBuf = HOME.join("Library/Application Support/Zed/db");
+    pub static ref CRASHES_DIR: PathBuf = HOME.join("Library/Logs/DiagnosticReports");
+    pub static ref CRASHES_RETIRED_DIR: PathBuf = HOME.join("Library/Logs/DiagnosticReports/Retired");
     pub static ref SETTINGS: PathBuf = CONFIG_DIR.join("settings.json");
     pub static ref KEYMAP: PathBuf = CONFIG_DIR.join("keymap.json");
     pub static ref LAST_USERNAME: PathBuf = CONFIG_DIR.join("last-username.txt");

crates/zed/src/main.rs 🔗

@@ -43,7 +43,8 @@ use util::{
     async_maybe,
     channel::{parse_zed_link, AppCommitSha, ReleaseChannel, RELEASE_CHANNEL},
     http::{self, HttpClient},
-    paths, ResultExt,
+    paths::{self, CRASHES_DIR, CRASHES_RETIRED_DIR},
+    ResultExt,
 };
 use uuid::Uuid;
 use welcome::{show_welcome_view, BaseKeymap, FIRST_OPEN};
@@ -227,14 +228,14 @@ fn main() {
         initialize_workspace(app_state.clone(), cx);
 
         if stdout_is_a_pty() {
+            upload_panics_and_crashes(http.clone(), cx);
             cx.activate(true);
             let urls = collect_url_args();
             if !urls.is_empty() {
                 listener.open_urls(&urls)
             }
         } else {
-            upload_previous_panics(http.clone(), cx);
-
+            upload_panics_and_crashes(http.clone(), cx);
             // TODO Development mode that forces the CLI mode usually runs Zed binary as is instead
             // of an *app, hence gets no specific callbacks run. Emulate them here, if needed.
             if std::env::var(FORCE_CLI_MODE_ENV_VAR_NAME).ok().is_some()
@@ -597,77 +598,158 @@ fn init_panic_hook(app: &App, installation_id: Option<String>, session_id: Strin
     }));
 }
 
-fn upload_previous_panics(http: Arc<dyn HttpClient>, cx: &mut AppContext) {
+fn upload_panics_and_crashes(http: Arc<dyn HttpClient>, cx: &mut AppContext) {
     let telemetry_settings = *client::TelemetrySettings::get_global(cx);
-
+    let release_channel = cx.global::<ReleaseChannel>().clone();
     cx.background_executor()
         .spawn(async move {
-            let panic_report_url = format!("{}/api/panic", &*client::ZED_SERVER_URL);
-            let mut children = smol::fs::read_dir(&*paths::LOGS_DIR).await?;
-            while let Some(child) = children.next().await {
-                let child = child?;
-                let child_path = child.path();
-
-                if child_path.extension() != Some(OsStr::new("panic")) {
-                    continue;
-                }
-                let filename = if let Some(filename) = child_path.file_name() {
-                    filename.to_string_lossy()
-                } else {
-                    continue;
-                };
+            upload_previous_panics(http.clone(), telemetry_settings)
+                .await
+                .log_err();
+            upload_previous_crashes(http, telemetry_settings, release_channel)
+                .await
+                .log_err()
+        })
+        .detach()
+}
 
-                if !filename.starts_with("zed") {
-                    continue;
-                }
+/// upload panics to us (via zed.dev)
+async fn upload_previous_panics(
+    http: Arc<dyn HttpClient>,
+    telemetry_settings: client::TelemetrySettings,
+) -> Result<()> {
+    let panic_report_url = format!("{}/api/panic", &*client::ZED_SERVER_URL);
+    let mut children = smol::fs::read_dir(&*paths::LOGS_DIR).await?;
+    while let Some(child) = children.next().await {
+        let child = child?;
+        let child_path = child.path();
+
+        if child_path.extension() != Some(OsStr::new("panic")) {
+            continue;
+        }
+        let filename = if let Some(filename) = child_path.file_name() {
+            filename.to_string_lossy()
+        } else {
+            continue;
+        };
 
-                if telemetry_settings.diagnostics {
-                    let panic_file_content = smol::fs::read_to_string(&child_path)
-                        .await
-                        .context("error reading panic file")?;
-
-                    let panic = serde_json::from_str(&panic_file_content)
-                        .ok()
-                        .or_else(|| {
-                            panic_file_content
-                                .lines()
-                                .next()
-                                .and_then(|line| serde_json::from_str(line).ok())
-                        })
-                        .unwrap_or_else(|| {
-                            log::error!(
-                                "failed to deserialize panic file {:?}",
-                                panic_file_content
-                            );
-                            None
-                        });
+        if !filename.starts_with("zed") {
+            continue;
+        }
 
-                    if let Some(panic) = panic {
-                        let body = serde_json::to_string(&PanicRequest {
-                            panic,
-                            token: client::ZED_SECRET_CLIENT_TOKEN.into(),
-                        })
-                        .unwrap();
-
-                        let request = Request::post(&panic_report_url)
-                            .redirect_policy(isahc::config::RedirectPolicy::Follow)
-                            .header("Content-Type", "application/json")
-                            .body(body.into())?;
-                        let response = http.send(request).await.context("error sending panic")?;
-                        if !response.status().is_success() {
-                            log::error!("Error uploading panic to server: {}", response.status());
-                        }
-                    }
+        if telemetry_settings.diagnostics {
+            let panic_file_content = smol::fs::read_to_string(&child_path)
+                .await
+                .context("error reading panic file")?;
+
+            let panic = serde_json::from_str(&panic_file_content)
+                .ok()
+                .or_else(|| {
+                    panic_file_content
+                        .lines()
+                        .next()
+                        .and_then(|line| serde_json::from_str(line).ok())
+                })
+                .unwrap_or_else(|| {
+                    log::error!("failed to deserialize panic file {:?}", panic_file_content);
+                    None
+                });
+
+            if let Some(panic) = panic {
+                let body = serde_json::to_string(&PanicRequest {
+                    panic,
+                    token: client::ZED_SECRET_CLIENT_TOKEN.into(),
+                })
+                .unwrap();
+
+                let request = Request::post(&panic_report_url)
+                    .redirect_policy(isahc::config::RedirectPolicy::Follow)
+                    .header("Content-Type", "application/json")
+                    .body(body.into())?;
+                let response = http.send(request).await.context("error sending panic")?;
+                if !response.status().is_success() {
+                    log::error!("Error uploading panic to server: {}", response.status());
                 }
+            }
+        }
 
-                // We've done what we can, delete the file
-                std::fs::remove_file(child_path)
-                    .context("error removing panic")
-                    .log_err();
+        // We've done what we can, delete the file
+        std::fs::remove_file(child_path)
+            .context("error removing panic")
+            .log_err();
+    }
+    Ok::<_, anyhow::Error>(())
+}
+
+static LAST_CRASH_UPLOADED: &'static str = "LAST_CRASH_UPLOADED";
+
+/// upload crashes from apple's diagnostic reports to our server.
+/// (only if telemetry is enabled)
+async fn upload_previous_crashes(
+    http: Arc<dyn HttpClient>,
+    telemetry_settings: client::TelemetrySettings,
+    release_channel: ReleaseChannel,
+) -> Result<()> {
+    if !telemetry_settings.diagnostics {
+        return Ok(());
+    }
+    let last_uploaded = KEY_VALUE_STORE
+        .read_kvp(LAST_CRASH_UPLOADED)?
+        .unwrap_or("zed-2024-01-17-000000.ips".to_string()); // don't upload old crash reports from before we had this.
+    let mut uploaded = last_uploaded.clone();
+
+    let mut crash_report_url =
+        client::Client::get_collab_server_url(http.clone(), Some(release_channel)).await?;
+    crash_report_url.set_path("/crash");
+
+    for dir in [&*CRASHES_DIR, &*CRASHES_RETIRED_DIR] {
+        let mut children = smol::fs::read_dir(&dir).await?;
+        while let Some(child) = children.next().await {
+            let child = child?;
+            let Some(filename) = child
+                .path()
+                .file_name()
+                .map(|f| f.to_string_lossy().to_lowercase())
+            else {
+                continue;
+            };
+
+            if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
+                continue;
             }
-            Ok::<_, anyhow::Error>(())
-        })
-        .detach_and_log_err(cx);
+
+            if filename <= last_uploaded {
+                continue;
+            }
+
+            let body = smol::fs::read_to_string(&child.path())
+                .await
+                .context("error reading crash file")?;
+
+            let request = Request::post(&crash_report_url.to_string())
+                .redirect_policy(isahc::config::RedirectPolicy::Follow)
+                .header("Content-Type", "text/plain")
+                .header(
+                    "Authorization",
+                    format!("token {}", client::ZED_SECRET_CLIENT_TOKEN),
+                )
+                .body(body.into())?;
+
+            let response = http.send(request).await.context("error sending crash")?;
+            if !response.status().is_success() {
+                log::error!("Error uploading crash to server: {}", response.status());
+            }
+
+            if uploaded < filename {
+                uploaded = filename.clone();
+                KEY_VALUE_STORE
+                    .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
+                    .await?;
+            }
+        }
+    }
+
+    Ok(())
 }
 
 async fn load_login_shell_environment() -> Result<()> {