reliability.rs

  1use anyhow::{Context as _, Result};
  2use client::{Client, telemetry::MINIDUMP_ENDPOINT};
  3use futures::{AsyncReadExt, TryStreamExt};
  4use gpui::{App, AppContext as _, SerializedThreadTaskTimings};
  5use http_client::{self, AsyncBody, HttpClient, Request};
  6use log::info;
  7use project::Project;
  8use proto::{CrashReport, GetCrashFilesResponse};
  9use reqwest::{
 10    Method,
 11    multipart::{Form, Part},
 12};
 13use smol::stream::StreamExt;
 14use std::{ffi::OsStr, fs, sync::Arc, thread::ThreadId, time::Duration};
 15use util::ResultExt;
 16
 17use crate::STARTUP_TIME;
 18
 19pub fn init(client: Arc<Client>, cx: &mut App) {
 20    monitor_hangs(cx);
 21
 22    if client.telemetry().diagnostics_enabled() {
 23        let client = client.clone();
 24        cx.background_spawn(async move {
 25            upload_previous_minidumps(client).await.warn_on_err();
 26        })
 27        .detach()
 28    }
 29
 30    cx.observe_new(move |project: &mut Project, _, cx| {
 31        let client = client.clone();
 32
 33        let Some(remote_client) = project.remote_client() else {
 34            return;
 35        };
 36        remote_client.update(cx, |remote_client, cx| {
 37            if !client.telemetry().diagnostics_enabled() {
 38                return;
 39            }
 40            let request = remote_client
 41                .proto_client()
 42                .request(proto::GetCrashFiles {});
 43            cx.background_spawn(async move {
 44                let GetCrashFilesResponse { crashes } = request.await?;
 45
 46                let Some(endpoint) = MINIDUMP_ENDPOINT.as_ref() else {
 47                    return Ok(());
 48                };
 49                for CrashReport {
 50                    metadata,
 51                    minidump_contents,
 52                } in crashes
 53                {
 54                    if let Some(metadata) = serde_json::from_str(&metadata).log_err() {
 55                        upload_minidump(client.clone(), endpoint, minidump_contents, &metadata)
 56                            .await
 57                            .log_err();
 58                    }
 59                }
 60
 61                anyhow::Ok(())
 62            })
 63            .detach_and_log_err(cx);
 64        })
 65    })
 66    .detach();
 67}
 68
 69fn monitor_hangs(cx: &App) {
 70    let main_thread_id = std::thread::current().id();
 71
 72    let foreground_executor = cx.foreground_executor();
 73    let background_executor = cx.background_executor();
 74
 75    // 3 seconds hang
 76    let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
 77    foreground_executor
 78        .spawn(async move { while (rx.next().await).is_some() {} })
 79        .detach();
 80
 81    background_executor
 82        .spawn({
 83            let background_executor = background_executor.clone();
 84            async move {
 85                let mut hang_time = None;
 86
 87                let mut hanging = false;
 88                loop {
 89                    background_executor.timer(Duration::from_secs(1)).await;
 90                    match tx.try_send(()) {
 91                        Ok(_) => {
 92                            hang_time = None;
 93                            hanging = false;
 94                            continue;
 95                        }
 96                        Err(e) => {
 97                            let is_full = e.into_send_error().is_full();
 98                            if is_full && !hanging {
 99                                hanging = true;
100                                hang_time = Some(chrono::Local::now());
101                            }
102
103                            if is_full {
104                                save_hang_trace(
105                                    main_thread_id,
106                                    &background_executor,
107                                    hang_time.unwrap(),
108                                );
109                            }
110                        }
111                    }
112                }
113            }
114        })
115        .detach();
116}
117
118fn save_hang_trace(
119    main_thread_id: ThreadId,
120    background_executor: &gpui::BackgroundExecutor,
121    hang_time: chrono::DateTime<chrono::Local>,
122) {
123    let thread_timings = background_executor.dispatcher().get_all_timings();
124    let thread_timings = thread_timings
125        .into_iter()
126        .map(|mut timings| {
127            if timings.thread_id == main_thread_id {
128                timings.thread_name = Some("main".to_string());
129            }
130
131            SerializedThreadTaskTimings::convert(*STARTUP_TIME.get().unwrap(), timings)
132        })
133        .collect::<Vec<_>>();
134
135    let trace_path = paths::hang_traces_dir().join(&format!(
136        "hang-{}.miniprof",
137        hang_time.format("%Y-%m-%d_%H-%M-%S")
138    ));
139
140    let Some(timings) = serde_json::to_string(&thread_timings)
141        .context("hang timings serialization")
142        .log_err()
143    else {
144        return;
145    };
146
147    std::fs::write(&trace_path, timings)
148        .context("hang trace file writing")
149        .log_err();
150
151    info!(
152        "hang detected, trace file saved at: {}",
153        trace_path.display()
154    );
155}
156
157pub async fn upload_previous_minidumps(client: Arc<Client>) -> anyhow::Result<()> {
158    let Some(minidump_endpoint) = MINIDUMP_ENDPOINT.as_ref() else {
159        log::warn!("Minidump endpoint not set");
160        return Ok(());
161    };
162
163    let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
164    while let Some(child) = children.next().await {
165        let child = child?;
166        let child_path = child.path();
167        if child_path.extension() != Some(OsStr::new("dmp")) {
168            continue;
169        }
170        let mut json_path = child_path.clone();
171        json_path.set_extension("json");
172        let Ok(metadata) = smol::fs::read(&json_path)
173            .await
174            .map_err(|e| anyhow::anyhow!(e))
175            .and_then(|data| serde_json::from_slice(&data).map_err(|e| anyhow::anyhow!(e)))
176        else {
177            continue;
178        };
179        if upload_minidump(
180            client.clone(),
181            minidump_endpoint,
182            smol::fs::read(&child_path)
183                .await
184                .context("Failed to read minidump")?,
185            &metadata,
186        )
187        .await
188        .log_err()
189        .is_some()
190        {
191            fs::remove_file(child_path).ok();
192            fs::remove_file(json_path).ok();
193        }
194    }
195    Ok(())
196}
197
198async fn upload_minidump(
199    client: Arc<Client>,
200    endpoint: &str,
201    minidump: Vec<u8>,
202    metadata: &crashes::CrashInfo,
203) -> Result<()> {
204    let mut form = Form::new()
205        .part(
206            "upload_file_minidump",
207            Part::bytes(minidump)
208                .file_name("minidump.dmp")
209                .mime_str("application/octet-stream")?,
210        )
211        .text(
212            "sentry[tags][channel]",
213            metadata.init.release_channel.clone(),
214        )
215        .text("sentry[tags][version]", metadata.init.zed_version.clone())
216        .text("sentry[tags][binary]", metadata.init.binary.clone())
217        .text("sentry[release]", metadata.init.commit_sha.clone())
218        .text("platform", "rust");
219    let mut panic_message = "".to_owned();
220    if let Some(panic_info) = metadata.panic.as_ref() {
221        panic_message = panic_info.message.clone();
222        form = form
223            .text("sentry[logentry][formatted]", panic_info.message.clone())
224            .text("span", panic_info.span.clone());
225    }
226    if let Some(minidump_error) = metadata.minidump_error.clone() {
227        form = form.text("minidump_error", minidump_error);
228    }
229
230    if let Some(id) = client.telemetry().metrics_id() {
231        form = form.text("sentry[user][id]", id.to_string());
232        form = form.text(
233            "sentry[user][is_staff]",
234            if client.telemetry().is_staff().unwrap_or_default() {
235                "true"
236            } else {
237                "false"
238            },
239        );
240    } else if let Some(id) = client.telemetry().installation_id() {
241        form = form.text("sentry[user][id]", format!("installation-{}", id))
242    }
243
244    ::telemetry::event!(
245        "Minidump Uploaded",
246        panic_message = panic_message,
247        crashed_version = metadata.init.zed_version.clone(),
248        commit_sha = metadata.init.commit_sha.clone(),
249    );
250
251    let gpu_count = metadata.gpus.len();
252    for (index, gpu) in metadata.gpus.iter().cloned().enumerate() {
253        let system_specs::GpuInfo {
254            device_name,
255            device_pci_id,
256            vendor_name,
257            vendor_pci_id,
258            driver_version,
259            driver_name,
260        } = gpu;
261        let num = if gpu_count == 1 && metadata.active_gpu.is_none() {
262            String::new()
263        } else {
264            index.to_string()
265        };
266        let name = format!("gpu{num}");
267        let root = format!("sentry[contexts][{name}]");
268        form = form
269            .text(
270                format!("{root}[Description]"),
271                "A GPU found on the users system. May or may not be the GPU Zed is running on",
272            )
273            .text(format!("{root}[type]"), "gpu")
274            .text(format!("{root}[name]"), device_name.unwrap_or(name))
275            .text(format!("{root}[id]"), format!("{:#06x}", device_pci_id))
276            .text(
277                format!("{root}[vendor_id]"),
278                format!("{:#06x}", vendor_pci_id),
279            )
280            .text_if_some(format!("{root}[vendor_name]"), vendor_name)
281            .text_if_some(format!("{root}[driver_version]"), driver_version)
282            .text_if_some(format!("{root}[driver_name]"), driver_name);
283    }
284    if let Some(active_gpu) = metadata.active_gpu.clone() {
285        form = form
286            .text(
287                "sentry[contexts][Active_GPU][Description]",
288                "The GPU Zed is running on",
289            )
290            .text("sentry[contexts][Active_GPU][type]", "gpu")
291            .text("sentry[contexts][Active_GPU][name]", active_gpu.device_name)
292            .text(
293                "sentry[contexts][Active_GPU][driver_version]",
294                active_gpu.driver_info,
295            )
296            .text(
297                "sentry[contexts][Active_GPU][driver_name]",
298                active_gpu.driver_name,
299            )
300            .text(
301                "sentry[contexts][Active_GPU][is_software_emulated]",
302                active_gpu.is_software_emulated.to_string(),
303            );
304    }
305
306    // TODO: feature-flag-context, and more of device-context like screen resolution, available ram, device model, etc
307
308    let content_type = format!("multipart/form-data; boundary={}", form.boundary());
309    let mut body_bytes = Vec::new();
310    let mut stream = form
311        .into_stream()
312        .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
313        .into_async_read();
314    stream.read_to_end(&mut body_bytes).await?;
315    let req = Request::builder()
316        .method(Method::POST)
317        .uri(endpoint)
318        .header("Content-Type", content_type)
319        .body(AsyncBody::from(body_bytes))?;
320    let mut response_text = String::new();
321    let mut response = client.http_client().send(req).await?;
322    response
323        .body_mut()
324        .read_to_string(&mut response_text)
325        .await?;
326    if !response.status().is_success() {
327        anyhow::bail!("failed to upload minidump: {response_text}");
328    }
329    log::info!("Uploaded minidump. event id: {response_text}");
330    Ok(())
331}
332
333trait FormExt {
334    fn text_if_some(
335        self,
336        label: impl Into<std::borrow::Cow<'static, str>>,
337        value: Option<impl Into<std::borrow::Cow<'static, str>>>,
338    ) -> Self;
339}
340
341impl FormExt for Form {
342    fn text_if_some(
343        self,
344        label: impl Into<std::borrow::Cow<'static, str>>,
345        value: Option<impl Into<std::borrow::Cow<'static, str>>>,
346    ) -> Self {
347        match value {
348            Some(value) => self.text(label.into(), value.into()),
349            None => self,
350        }
351    }
352}