1use crate::stdout_is_a_pty;
2use anyhow::{Context as _, Result};
3use backtrace::{self, Backtrace};
4use chrono::Utc;
5use client::{
6 TelemetrySettings,
7 telemetry::{self, MINIDUMP_ENDPOINT},
8};
9use db::kvp::KEY_VALUE_STORE;
10use futures::AsyncReadExt;
11use gpui::{App, AppContext as _, SemanticVersion};
12use http_client::{self, HttpClient, HttpClientWithUrl, HttpRequestExt, Method};
13use paths::{crashes_dir, crashes_retired_dir};
14use project::Project;
15use proto::{CrashReport, GetCrashFilesResponse};
16use release_channel::{AppCommitSha, RELEASE_CHANNEL, ReleaseChannel};
17use reqwest::multipart::{Form, Part};
18use settings::Settings;
19use smol::stream::StreamExt;
20use std::{
21 env,
22 ffi::{OsStr, c_void},
23 fs,
24 io::Write,
25 panic,
26 sync::{
27 Arc,
28 atomic::{AtomicU32, Ordering},
29 },
30 thread,
31};
32use telemetry_events::{LocationData, Panic, PanicRequest};
33use url::Url;
34use util::ResultExt;
35
36static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
37
38pub fn init_panic_hook(
39 app_version: SemanticVersion,
40 app_commit_sha: Option<AppCommitSha>,
41 system_id: Option<String>,
42 installation_id: Option<String>,
43 session_id: String,
44) {
45 let is_pty = stdout_is_a_pty();
46
47 panic::set_hook(Box::new(move |info| {
48 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
49 if prior_panic_count > 0 {
50 // Give the panic-ing thread time to write the panic file
51 loop {
52 thread::yield_now();
53 }
54 }
55
56 let payload = info
57 .payload()
58 .downcast_ref::<&str>()
59 .map(|s| s.to_string())
60 .or_else(|| info.payload().downcast_ref::<String>().cloned())
61 .unwrap_or_else(|| "Box<Any>".to_string());
62
63 crashes::handle_panic(payload.clone(), info.location());
64
65 let thread = thread::current();
66 let thread_name = thread.name().unwrap_or("<unnamed>");
67
68 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
69 let location = info.location().unwrap();
70 let backtrace = Backtrace::new();
71 eprintln!(
72 "Thread {:?} panicked with {:?} at {}:{}:{}\n{}{:?}",
73 thread_name,
74 payload,
75 location.file(),
76 location.line(),
77 location.column(),
78 match app_commit_sha.as_ref() {
79 Some(commit_sha) => format!(
80 "https://github.com/zed-industries/zed/blob/{}/{}#L{} \
81 (may not be uploaded, line may be incorrect if files modified)\n",
82 commit_sha.full(),
83 location.file(),
84 location.line()
85 ),
86 None => "".to_string(),
87 },
88 backtrace,
89 );
90 std::process::exit(-1);
91 }
92 let main_module_base_address = get_main_module_base_address();
93
94 let backtrace = Backtrace::new();
95 let mut symbols = backtrace
96 .frames()
97 .iter()
98 .flat_map(|frame| {
99 let base = frame
100 .module_base_address()
101 .unwrap_or(main_module_base_address);
102 frame.symbols().iter().map(move |symbol| {
103 format!(
104 "{}+{}",
105 symbol
106 .name()
107 .as_ref()
108 .map_or("<unknown>".to_owned(), <_>::to_string),
109 (frame.ip() as isize).saturating_sub(base as isize)
110 )
111 })
112 })
113 .collect::<Vec<_>>();
114
115 // Strip out leading stack frames for rust panic-handling.
116 if let Some(ix) = symbols
117 .iter()
118 .position(|name| name == "rust_begin_unwind" || name == "_rust_begin_unwind")
119 {
120 symbols.drain(0..=ix);
121 }
122
123 let panic_data = telemetry_events::Panic {
124 thread: thread_name.into(),
125 payload,
126 location_data: info.location().map(|location| LocationData {
127 file: location.file().into(),
128 line: location.line(),
129 }),
130 app_version: app_version.to_string(),
131 app_commit_sha: app_commit_sha.as_ref().map(|sha| sha.full()),
132 release_channel: RELEASE_CHANNEL.dev_name().into(),
133 target: env!("TARGET").to_owned().into(),
134 os_name: telemetry::os_name(),
135 os_version: Some(telemetry::os_version()),
136 architecture: env::consts::ARCH.into(),
137 panicked_on: Utc::now().timestamp_millis(),
138 backtrace: symbols,
139 system_id: system_id.clone(),
140 installation_id: installation_id.clone(),
141 session_id: session_id.clone(),
142 };
143
144 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
145 log::error!("{}", panic_data_json);
146 }
147 zlog::flush();
148
149 if !is_pty {
150 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
151 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
152 let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
153 let panic_file = fs::OpenOptions::new()
154 .write(true)
155 .create_new(true)
156 .open(&panic_file_path)
157 .log_err();
158 if let Some(mut panic_file) = panic_file {
159 writeln!(&mut panic_file, "{panic_data_json}").log_err();
160 panic_file.flush().log_err();
161 }
162 }
163 }
164
165 std::process::abort();
166 }));
167}
168
169#[cfg(not(target_os = "windows"))]
170fn get_main_module_base_address() -> *mut c_void {
171 let mut dl_info = libc::Dl_info {
172 dli_fname: std::ptr::null(),
173 dli_fbase: std::ptr::null_mut(),
174 dli_sname: std::ptr::null(),
175 dli_saddr: std::ptr::null_mut(),
176 };
177 unsafe {
178 libc::dladdr(get_main_module_base_address as _, &mut dl_info);
179 }
180 dl_info.dli_fbase
181}
182
183#[cfg(target_os = "windows")]
184fn get_main_module_base_address() -> *mut c_void {
185 std::ptr::null_mut()
186}
187
188pub fn init(
189 http_client: Arc<HttpClientWithUrl>,
190 system_id: Option<String>,
191 installation_id: Option<String>,
192 session_id: String,
193 cx: &mut App,
194) {
195 #[cfg(target_os = "macos")]
196 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
197
198 let Some(panic_report_url) = http_client
199 .build_zed_api_url("/telemetry/panics", &[])
200 .log_err()
201 else {
202 return;
203 };
204
205 upload_panics_and_crashes(
206 http_client.clone(),
207 panic_report_url.clone(),
208 installation_id.clone(),
209 cx,
210 );
211
212 cx.observe_new(move |project: &mut Project, _, cx| {
213 let http_client = http_client.clone();
214 let panic_report_url = panic_report_url.clone();
215 let session_id = session_id.clone();
216 let installation_id = installation_id.clone();
217 let system_id = system_id.clone();
218
219 let Some(ssh_client) = project.ssh_client() else {
220 return;
221 };
222 ssh_client.update(cx, |client, cx| {
223 if !TelemetrySettings::get_global(cx).diagnostics {
224 return;
225 }
226 let request = client.proto_client().request(proto::GetCrashFiles {});
227 cx.background_spawn(async move {
228 let GetCrashFilesResponse {
229 legacy_panics,
230 crashes,
231 } = request.await?;
232
233 for panic in legacy_panics {
234 if let Some(mut panic) = serde_json::from_str::<Panic>(&panic).log_err() {
235 panic.session_id = session_id.clone();
236 panic.system_id = system_id.clone();
237 panic.installation_id = installation_id.clone();
238 upload_panic(&http_client, &panic_report_url, panic, &mut None).await?;
239 }
240 }
241
242 let Some(endpoint) = MINIDUMP_ENDPOINT.as_ref() else {
243 return Ok(());
244 };
245 for CrashReport {
246 metadata,
247 minidump_contents,
248 } in crashes
249 {
250 if let Some(metadata) = serde_json::from_str(&metadata).log_err() {
251 upload_minidump(
252 http_client.clone(),
253 endpoint,
254 minidump_contents,
255 &metadata,
256 )
257 .await
258 .log_err();
259 }
260 }
261
262 anyhow::Ok(())
263 })
264 .detach_and_log_err(cx);
265 })
266 })
267 .detach();
268}
269
270#[cfg(target_os = "macos")]
271pub fn monitor_main_thread_hangs(
272 http_client: Arc<HttpClientWithUrl>,
273 installation_id: Option<String>,
274 cx: &App,
275) {
276 // This is too noisy to ship to stable for now.
277 if !matches!(
278 ReleaseChannel::global(cx),
279 ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
280 ) {
281 return;
282 }
283
284 use nix::sys::signal::{
285 SaFlags, SigAction, SigHandler, SigSet,
286 Signal::{self, SIGUSR2},
287 sigaction,
288 };
289
290 use parking_lot::Mutex;
291
292 use http_client::Method;
293 use std::{
294 ffi::c_int,
295 sync::{OnceLock, mpsc},
296 time::Duration,
297 };
298 use telemetry_events::{BacktraceFrame, HangReport};
299
300 use nix::sys::pthread;
301
302 let foreground_executor = cx.foreground_executor();
303 let background_executor = cx.background_executor();
304 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
305
306 // Initialize SIGUSR2 handler to send a backtrace to a channel.
307 let (backtrace_tx, backtrace_rx) = mpsc::channel();
308 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
309 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
310 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
311 BACKTRACE.lock().reserve(100);
312
313 fn handle_backtrace_signal() {
314 unsafe {
315 extern "C" fn handle_sigusr2(_i: c_int) {
316 unsafe {
317 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
318 // which can only be triggered by This signal handler. In addition,
319 // this signal handler is immediately removed by SA_RESETHAND, and this
320 // signal handler cannot be re-entrant due to the SIGUSR2 mask defined
321 // below
322 let mut bt = BACKTRACE.lock();
323 bt.clear();
324 backtrace::trace_unsynchronized(|frame| {
325 if bt.len() < bt.capacity() {
326 bt.push(frame.clone());
327 true
328 } else {
329 false
330 }
331 });
332 }
333
334 BACKTRACE_SENDER.get().unwrap().send(()).ok();
335 }
336
337 let mut mask = SigSet::empty();
338 mask.add(SIGUSR2);
339 sigaction(
340 Signal::SIGUSR2,
341 &SigAction::new(
342 SigHandler::Handler(handle_sigusr2),
343 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
344 mask,
345 ),
346 )
347 .log_err();
348 }
349 }
350
351 handle_backtrace_signal();
352 let main_thread = pthread::pthread_self();
353
354 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
355 foreground_executor
356 .spawn(async move { while (rx.next().await).is_some() {} })
357 .detach();
358
359 background_executor
360 .spawn({
361 let background_executor = background_executor.clone();
362 async move {
363 loop {
364 background_executor.timer(Duration::from_secs(1)).await;
365 match tx.try_send(()) {
366 Ok(_) => continue,
367 Err(e) => {
368 if e.into_send_error().is_full() {
369 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
370 }
371 // Only detect the first hang
372 break;
373 }
374 }
375 }
376 }
377 })
378 .detach();
379
380 let app_version = release_channel::AppVersion::global(cx);
381 let os_name = client::telemetry::os_name();
382
383 background_executor
384 .clone()
385 .spawn(async move {
386 let os_version = client::telemetry::os_version();
387
388 loop {
389 while backtrace_rx.recv().is_ok() {
390 if !telemetry_settings.diagnostics {
391 return;
392 }
393
394 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
395 // the backtrace transmitter has fired, which itself is only done
396 // by the signal handler. And due to SA_RESETHAND the signal handler
397 // will not run again until `handle_backtrace_signal` is called.
398 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
399 let backtrace: Vec<_> = raw_backtrace
400 .into_iter()
401 .map(|frame| {
402 let mut btf = BacktraceFrame {
403 ip: frame.ip() as usize,
404 symbol_addr: frame.symbol_address() as usize,
405 base: frame.module_base_address().map(|addr| addr as usize),
406 symbols: vec![],
407 };
408
409 backtrace::resolve_frame(&frame, |symbol| {
410 if let Some(name) = symbol.name() {
411 btf.symbols.push(name.to_string());
412 }
413 });
414
415 btf
416 })
417 .collect();
418
419 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
420 handle_backtrace_signal();
421
422 log::error!(
423 "Suspected hang on main thread:\n{}",
424 backtrace
425 .iter()
426 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
427 .collect::<Vec<_>>()
428 .join("\n")
429 );
430
431 let report = HangReport {
432 backtrace,
433 app_version: Some(app_version),
434 os_name: os_name.clone(),
435 os_version: Some(os_version.clone()),
436 architecture: env::consts::ARCH.into(),
437 installation_id: installation_id.clone(),
438 };
439
440 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
441 continue;
442 };
443
444 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
445 else {
446 continue;
447 };
448
449 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
450 continue;
451 };
452
453 let Ok(request) = http_client::Request::builder()
454 .method(Method::POST)
455 .uri(url.as_ref())
456 .header("x-zed-checksum", checksum)
457 .body(json_bytes.into())
458 else {
459 continue;
460 };
461
462 if let Some(response) = http_client.send(request).await.log_err() {
463 if response.status() != 200 {
464 log::error!("Failed to send hang report: HTTP {:?}", response.status());
465 }
466 }
467 }
468 }
469 })
470 .detach()
471}
472
473fn upload_panics_and_crashes(
474 http: Arc<HttpClientWithUrl>,
475 panic_report_url: Url,
476 installation_id: Option<String>,
477 cx: &App,
478) {
479 if !client::TelemetrySettings::get_global(cx).diagnostics {
480 return;
481 }
482 cx.background_spawn(async move {
483 upload_previous_minidumps(http.clone()).await.warn_on_err();
484 let most_recent_panic = upload_previous_panics(http.clone(), &panic_report_url)
485 .await
486 .log_err()
487 .flatten();
488 upload_previous_crashes(http, most_recent_panic, installation_id)
489 .await
490 .log_err();
491 })
492 .detach()
493}
494
495/// Uploads panics via `zed.dev`.
496async fn upload_previous_panics(
497 http: Arc<HttpClientWithUrl>,
498 panic_report_url: &Url,
499) -> anyhow::Result<Option<(i64, String)>> {
500 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
501
502 let mut most_recent_panic = None;
503
504 while let Some(child) = children.next().await {
505 let child = child?;
506 let child_path = child.path();
507
508 if child_path.extension() != Some(OsStr::new("panic")) {
509 continue;
510 }
511 let filename = if let Some(filename) = child_path.file_name() {
512 filename.to_string_lossy()
513 } else {
514 continue;
515 };
516
517 if !filename.starts_with("zed") {
518 continue;
519 }
520
521 let panic_file_content = smol::fs::read_to_string(&child_path)
522 .await
523 .context("error reading panic file")?;
524
525 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
526 .log_err()
527 .or_else(|| {
528 panic_file_content
529 .lines()
530 .next()
531 .and_then(|line| serde_json::from_str(line).ok())
532 })
533 .unwrap_or_else(|| {
534 log::error!("failed to deserialize panic file {:?}", panic_file_content);
535 None
536 });
537
538 if let Some(panic) = panic
539 && upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await?
540 {
541 // We've done what we can, delete the file
542 fs::remove_file(child_path)
543 .context("error removing panic")
544 .log_err();
545 }
546 }
547
548 Ok(most_recent_panic)
549}
550
551pub async fn upload_previous_minidumps(http: Arc<HttpClientWithUrl>) -> anyhow::Result<()> {
552 let Some(minidump_endpoint) = MINIDUMP_ENDPOINT.as_ref() else {
553 log::warn!("Minidump endpoint not set");
554 return Ok(());
555 };
556
557 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
558 while let Some(child) = children.next().await {
559 let child = child?;
560 let child_path = child.path();
561 if child_path.extension() != Some(OsStr::new("dmp")) {
562 continue;
563 }
564 let mut json_path = child_path.clone();
565 json_path.set_extension("json");
566 if let Ok(metadata) = serde_json::from_slice(&smol::fs::read(&json_path).await?) {
567 if upload_minidump(
568 http.clone(),
569 &minidump_endpoint,
570 smol::fs::read(&child_path)
571 .await
572 .context("Failed to read minidump")?,
573 &metadata,
574 )
575 .await
576 .log_err()
577 .is_some()
578 {
579 fs::remove_file(child_path).ok();
580 fs::remove_file(json_path).ok();
581 }
582 }
583 }
584 Ok(())
585}
586
587async fn upload_minidump(
588 http: Arc<HttpClientWithUrl>,
589 endpoint: &str,
590 minidump: Vec<u8>,
591 metadata: &crashes::CrashInfo,
592) -> Result<()> {
593 let mut form = Form::new()
594 .part(
595 "upload_file_minidump",
596 Part::bytes(minidump)
597 .file_name("minidump.dmp")
598 .mime_str("application/octet-stream")?,
599 )
600 .text(
601 "sentry[tags][channel]",
602 metadata.init.release_channel.clone(),
603 )
604 .text("sentry[tags][version]", metadata.init.zed_version.clone())
605 .text("sentry[release]", metadata.init.commit_sha.clone())
606 .text("platform", "rust");
607 if let Some(panic_info) = metadata.panic.as_ref() {
608 form = form.text("sentry[logentry][formatted]", panic_info.message.clone());
609 form = form.text("span", panic_info.span.clone());
610 // TODO: add gpu-context, feature-flag-context, and more of device-context like gpu
611 // name, screen resolution, available ram, device model, etc
612 }
613
614 let mut response_text = String::new();
615 let mut response = http.send_multipart_form(endpoint, form).await?;
616 response
617 .body_mut()
618 .read_to_string(&mut response_text)
619 .await?;
620 if !response.status().is_success() {
621 anyhow::bail!("failed to upload minidump: {response_text}");
622 }
623 log::info!("Uploaded minidump. event id: {response_text}");
624 Ok(())
625}
626
627async fn upload_panic(
628 http: &Arc<HttpClientWithUrl>,
629 panic_report_url: &Url,
630 panic: telemetry_events::Panic,
631 most_recent_panic: &mut Option<(i64, String)>,
632) -> Result<bool> {
633 *most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
634
635 let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
636
637 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
638 return Ok(false);
639 };
640
641 let Ok(request) = http_client::Request::builder()
642 .method(Method::POST)
643 .uri(panic_report_url.as_ref())
644 .header("x-zed-checksum", checksum)
645 .body(json_bytes.into())
646 else {
647 return Ok(false);
648 };
649
650 let response = http.send(request).await.context("error sending panic")?;
651 if !response.status().is_success() {
652 log::error!("Error uploading panic to server: {}", response.status());
653 }
654
655 Ok(true)
656}
657const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
658
659/// upload crashes from apple's diagnostic reports to our server.
660/// (only if telemetry is enabled)
661async fn upload_previous_crashes(
662 http: Arc<HttpClientWithUrl>,
663 most_recent_panic: Option<(i64, String)>,
664 installation_id: Option<String>,
665) -> Result<()> {
666 let last_uploaded = KEY_VALUE_STORE
667 .read_kvp(LAST_CRASH_UPLOADED)?
668 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
669 let mut uploaded = last_uploaded.clone();
670
671 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
672
673 // Crash directories are only set on macOS.
674 for dir in [crashes_dir(), crashes_retired_dir()]
675 .iter()
676 .filter_map(|d| d.as_deref())
677 {
678 let mut children = smol::fs::read_dir(&dir).await?;
679 while let Some(child) = children.next().await {
680 let child = child?;
681 let Some(filename) = child
682 .path()
683 .file_name()
684 .map(|f| f.to_string_lossy().to_lowercase())
685 else {
686 continue;
687 };
688
689 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
690 continue;
691 }
692
693 if filename <= last_uploaded {
694 continue;
695 }
696
697 let body = smol::fs::read_to_string(&child.path())
698 .await
699 .context("error reading crash file")?;
700
701 let mut request = http_client::Request::post(&crash_report_url.to_string())
702 .follow_redirects(http_client::RedirectPolicy::FollowAll)
703 .header("Content-Type", "text/plain");
704
705 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
706 request = request
707 .header("x-zed-panicked-on", format!("{panicked_on}"))
708 .header("x-zed-panic", payload)
709 }
710 if let Some(installation_id) = installation_id.as_ref() {
711 request = request.header("x-zed-installation-id", installation_id);
712 }
713
714 let request = request.body(body.into())?;
715
716 let response = http.send(request).await.context("error sending crash")?;
717 if !response.status().is_success() {
718 log::error!("Error uploading crash to server: {}", response.status());
719 }
720
721 if uploaded < filename {
722 uploaded.clone_from(&filename);
723 KEY_VALUE_STORE
724 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
725 .await?;
726 }
727 }
728 }
729
730 Ok(())
731}