1use crate::stdout_is_a_pty;
2use anyhow::{Context as _, Result};
3use backtrace::{self, Backtrace};
4use chrono::Utc;
5use client::{telemetry, TelemetrySettings};
6use db::kvp::KEY_VALUE_STORE;
7use gpui::{App, SemanticVersion};
8use http_client::{self, HttpClient, HttpClientWithUrl, HttpRequestExt, Method};
9use paths::{crashes_dir, crashes_retired_dir};
10use project::Project;
11use release_channel::{AppCommitSha, ReleaseChannel, RELEASE_CHANNEL};
12use settings::Settings;
13use smol::stream::StreamExt;
14use std::{
15 env,
16 ffi::{c_void, OsStr},
17 sync::{atomic::Ordering, Arc},
18};
19use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
20use telemetry_events::{LocationData, Panic, PanicRequest};
21use url::Url;
22use util::ResultExt;
23
24static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
25
26pub fn init_panic_hook(
27 app_version: SemanticVersion,
28 app_commit_sha: Option<AppCommitSha>,
29 system_id: Option<String>,
30 installation_id: Option<String>,
31 session_id: String,
32) {
33 let is_pty = stdout_is_a_pty();
34
35 panic::set_hook(Box::new(move |info| {
36 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
37 if prior_panic_count > 0 {
38 // Give the panic-ing thread time to write the panic file
39 loop {
40 std::thread::yield_now();
41 }
42 }
43
44 let thread = thread::current();
45 let thread_name = thread.name().unwrap_or("<unnamed>");
46
47 let payload = info
48 .payload()
49 .downcast_ref::<&str>()
50 .map(|s| s.to_string())
51 .or_else(|| info.payload().downcast_ref::<String>().cloned())
52 .unwrap_or_else(|| "Box<Any>".to_string());
53
54 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
55 let location = info.location().unwrap();
56 let backtrace = Backtrace::new();
57 eprintln!(
58 "Thread {:?} panicked with {:?} at {}:{}:{}\n{}{:?}",
59 thread_name,
60 payload,
61 location.file(),
62 location.line(),
63 location.column(),
64 match app_commit_sha.as_ref() {
65 Some(commit_sha) => format!(
66 "https://github.com/zed-industries/zed/blob/{}/src/{}#L{} \
67 (may not be uploaded, line may be incorrect if files modified)\n",
68 commit_sha.0,
69 location.file(),
70 location.line()
71 ),
72 None => "".to_string(),
73 },
74 backtrace,
75 );
76 std::process::exit(-1);
77 }
78 let main_module_base_address = get_main_module_base_address();
79
80 let backtrace = Backtrace::new();
81 let mut symbols = backtrace
82 .frames()
83 .iter()
84 .flat_map(|frame| {
85 let base = frame
86 .module_base_address()
87 .unwrap_or(main_module_base_address);
88 frame.symbols().iter().map(move |symbol| {
89 format!(
90 "{}+{}",
91 symbol
92 .name()
93 .as_ref()
94 .map_or("<unknown>".to_owned(), <_>::to_string),
95 (frame.ip() as isize).saturating_sub(base as isize)
96 )
97 })
98 })
99 .collect::<Vec<_>>();
100
101 // Strip out leading stack frames for rust panic-handling.
102 if let Some(ix) = symbols
103 .iter()
104 .position(|name| name == "rust_begin_unwind" || name == "_rust_begin_unwind")
105 {
106 symbols.drain(0..=ix);
107 }
108
109 let panic_data = telemetry_events::Panic {
110 thread: thread_name.into(),
111 payload,
112 location_data: info.location().map(|location| LocationData {
113 file: location.file().into(),
114 line: location.line(),
115 }),
116 app_version: app_version.to_string(),
117 app_commit_sha: app_commit_sha.as_ref().map(|sha| sha.0.clone()),
118 release_channel: RELEASE_CHANNEL.dev_name().into(),
119 target: env!("TARGET").to_owned().into(),
120 os_name: telemetry::os_name(),
121 os_version: Some(telemetry::os_version()),
122 architecture: env::consts::ARCH.into(),
123 panicked_on: Utc::now().timestamp_millis(),
124 backtrace: symbols,
125 system_id: system_id.clone(),
126 installation_id: installation_id.clone(),
127 session_id: session_id.clone(),
128 };
129
130 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
131 log::error!("{}", panic_data_json);
132 }
133
134 if !is_pty {
135 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
136 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
137 let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
138 let panic_file = std::fs::OpenOptions::new()
139 .append(true)
140 .create(true)
141 .open(&panic_file_path)
142 .log_err();
143 if let Some(mut panic_file) = panic_file {
144 writeln!(&mut panic_file, "{panic_data_json}").log_err();
145 panic_file.flush().log_err();
146 }
147 }
148 }
149
150 std::process::abort();
151 }));
152}
153
154#[cfg(not(target_os = "windows"))]
155fn get_main_module_base_address() -> *mut c_void {
156 let mut dl_info = libc::Dl_info {
157 dli_fname: std::ptr::null(),
158 dli_fbase: std::ptr::null_mut(),
159 dli_sname: std::ptr::null(),
160 dli_saddr: std::ptr::null_mut(),
161 };
162 unsafe {
163 libc::dladdr(get_main_module_base_address as _, &mut dl_info);
164 }
165 dl_info.dli_fbase
166}
167
168#[cfg(target_os = "windows")]
169fn get_main_module_base_address() -> *mut c_void {
170 std::ptr::null_mut()
171}
172
173pub fn init(
174 http_client: Arc<HttpClientWithUrl>,
175 system_id: Option<String>,
176 installation_id: Option<String>,
177 session_id: String,
178 cx: &mut App,
179) {
180 #[cfg(target_os = "macos")]
181 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
182
183 let Some(panic_report_url) = http_client
184 .build_zed_api_url("/telemetry/panics", &[])
185 .log_err()
186 else {
187 return;
188 };
189
190 upload_panics_and_crashes(
191 http_client.clone(),
192 panic_report_url.clone(),
193 installation_id.clone(),
194 cx,
195 );
196
197 cx.observe_new(move |project: &mut Project, _, cx| {
198 let http_client = http_client.clone();
199 let panic_report_url = panic_report_url.clone();
200 let session_id = session_id.clone();
201 let installation_id = installation_id.clone();
202 let system_id = system_id.clone();
203
204 if let Some(ssh_client) = project.ssh_client() {
205 ssh_client.update(cx, |client, cx| {
206 if TelemetrySettings::get_global(cx).diagnostics {
207 let request = client.proto_client().request(proto::GetPanicFiles {});
208 cx.background_executor()
209 .spawn(async move {
210 let panic_files = request.await?;
211 for file in panic_files.file_contents {
212 let panic: Option<Panic> = serde_json::from_str(&file)
213 .log_err()
214 .or_else(|| {
215 file.lines()
216 .next()
217 .and_then(|line| serde_json::from_str(line).ok())
218 })
219 .unwrap_or_else(|| {
220 log::error!("failed to deserialize panic file {:?}", file);
221 None
222 });
223
224 if let Some(mut panic) = panic {
225 panic.session_id = session_id.clone();
226 panic.system_id = system_id.clone();
227 panic.installation_id = installation_id.clone();
228
229 upload_panic(&http_client, &panic_report_url, panic, &mut None)
230 .await?;
231 }
232 }
233
234 anyhow::Ok(())
235 })
236 .detach_and_log_err(cx);
237 }
238 })
239 }
240 })
241 .detach();
242}
243
244#[cfg(target_os = "macos")]
245pub fn monitor_main_thread_hangs(
246 http_client: Arc<HttpClientWithUrl>,
247 installation_id: Option<String>,
248 cx: &App,
249) {
250 // This is too noisy to ship to stable for now.
251 if !matches!(
252 ReleaseChannel::global(cx),
253 ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
254 ) {
255 return;
256 }
257
258 use nix::sys::signal::{
259 sigaction, SaFlags, SigAction, SigHandler, SigSet,
260 Signal::{self, SIGUSR2},
261 };
262
263 use parking_lot::Mutex;
264
265 use http_client::Method;
266 use std::{
267 ffi::c_int,
268 sync::{mpsc, OnceLock},
269 time::Duration,
270 };
271 use telemetry_events::{BacktraceFrame, HangReport};
272
273 use nix::sys::pthread;
274
275 let foreground_executor = cx.foreground_executor();
276 let background_executor = cx.background_executor();
277 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
278
279 // Initialize SIGUSR2 handler to send a backtrace to a channel.
280 let (backtrace_tx, backtrace_rx) = mpsc::channel();
281 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
282 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
283 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
284 BACKTRACE.lock().reserve(100);
285
286 fn handle_backtrace_signal() {
287 unsafe {
288 extern "C" fn handle_sigusr2(_i: c_int) {
289 unsafe {
290 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
291 // which can only be triggered by This signal handler. In addition,
292 // this signal handler is immediately removed by SA_RESETHAND, and this
293 // signal handler cannot be re-entrant due to to the SIGUSR2 mask defined
294 // below
295 let mut bt = BACKTRACE.lock();
296 bt.clear();
297 backtrace::trace_unsynchronized(|frame| {
298 if bt.len() < bt.capacity() {
299 bt.push(frame.clone());
300 true
301 } else {
302 false
303 }
304 });
305 }
306
307 BACKTRACE_SENDER.get().unwrap().send(()).ok();
308 }
309
310 let mut mask = SigSet::empty();
311 mask.add(SIGUSR2);
312 sigaction(
313 Signal::SIGUSR2,
314 &SigAction::new(
315 SigHandler::Handler(handle_sigusr2),
316 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
317 mask,
318 ),
319 )
320 .log_err();
321 }
322 }
323
324 handle_backtrace_signal();
325 let main_thread = pthread::pthread_self();
326
327 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
328 foreground_executor
329 .spawn(async move { while (rx.next().await).is_some() {} })
330 .detach();
331
332 background_executor
333 .spawn({
334 let background_executor = background_executor.clone();
335 async move {
336 loop {
337 background_executor.timer(Duration::from_secs(1)).await;
338 match tx.try_send(()) {
339 Ok(_) => continue,
340 Err(e) => {
341 if e.into_send_error().is_full() {
342 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
343 }
344 // Only detect the first hang
345 break;
346 }
347 }
348 }
349 }
350 })
351 .detach();
352
353 let app_version = release_channel::AppVersion::global(cx);
354 let os_name = client::telemetry::os_name();
355
356 background_executor
357 .clone()
358 .spawn(async move {
359 let os_version = client::telemetry::os_version();
360
361 loop {
362 while backtrace_rx.recv().is_ok() {
363 if !telemetry_settings.diagnostics {
364 return;
365 }
366
367 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
368 // the backtrace transmitter has fired, which itself is only done
369 // by the signal handler. And due to SA_RESETHAND the signal handler
370 // will not run again until `handle_backtrace_signal` is called.
371 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
372 let backtrace: Vec<_> = raw_backtrace
373 .into_iter()
374 .map(|frame| {
375 let mut btf = BacktraceFrame {
376 ip: frame.ip() as usize,
377 symbol_addr: frame.symbol_address() as usize,
378 base: frame.module_base_address().map(|addr| addr as usize),
379 symbols: vec![],
380 };
381
382 backtrace::resolve_frame(&frame, |symbol| {
383 if let Some(name) = symbol.name() {
384 btf.symbols.push(name.to_string());
385 }
386 });
387
388 btf
389 })
390 .collect();
391
392 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
393 handle_backtrace_signal();
394
395 log::error!(
396 "Suspected hang on main thread:\n{}",
397 backtrace
398 .iter()
399 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
400 .collect::<Vec<_>>()
401 .join("\n")
402 );
403
404 let report = HangReport {
405 backtrace,
406 app_version: Some(app_version),
407 os_name: os_name.clone(),
408 os_version: Some(os_version.clone()),
409 architecture: env::consts::ARCH.into(),
410 installation_id: installation_id.clone(),
411 };
412
413 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
414 continue;
415 };
416
417 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
418 else {
419 continue;
420 };
421
422 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
423 continue;
424 };
425
426 let Ok(request) = http_client::Request::builder()
427 .method(Method::POST)
428 .uri(url.as_ref())
429 .header("x-zed-checksum", checksum)
430 .body(json_bytes.into())
431 else {
432 continue;
433 };
434
435 if let Some(response) = http_client.send(request).await.log_err() {
436 if response.status() != 200 {
437 log::error!("Failed to send hang report: HTTP {:?}", response.status());
438 }
439 }
440 }
441 }
442 })
443 .detach()
444}
445
446fn upload_panics_and_crashes(
447 http: Arc<HttpClientWithUrl>,
448 panic_report_url: Url,
449 installation_id: Option<String>,
450 cx: &App,
451) {
452 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
453 cx.background_executor()
454 .spawn(async move {
455 let most_recent_panic =
456 upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings)
457 .await
458 .log_err()
459 .flatten();
460 upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
461 .await
462 .log_err()
463 })
464 .detach()
465}
466
467/// Uploads panics via `zed.dev`.
468async fn upload_previous_panics(
469 http: Arc<HttpClientWithUrl>,
470 panic_report_url: &Url,
471 telemetry_settings: client::TelemetrySettings,
472) -> anyhow::Result<Option<(i64, String)>> {
473 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
474
475 let mut most_recent_panic = None;
476
477 while let Some(child) = children.next().await {
478 let child = child?;
479 let child_path = child.path();
480
481 if child_path.extension() != Some(OsStr::new("panic")) {
482 continue;
483 }
484 let filename = if let Some(filename) = child_path.file_name() {
485 filename.to_string_lossy()
486 } else {
487 continue;
488 };
489
490 if !filename.starts_with("zed") {
491 continue;
492 }
493
494 if telemetry_settings.diagnostics {
495 let panic_file_content = smol::fs::read_to_string(&child_path)
496 .await
497 .context("error reading panic file")?;
498
499 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
500 .log_err()
501 .or_else(|| {
502 panic_file_content
503 .lines()
504 .next()
505 .and_then(|line| serde_json::from_str(line).ok())
506 })
507 .unwrap_or_else(|| {
508 log::error!("failed to deserialize panic file {:?}", panic_file_content);
509 None
510 });
511
512 if let Some(panic) = panic {
513 if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? {
514 continue;
515 }
516 }
517 }
518
519 // We've done what we can, delete the file
520 std::fs::remove_file(child_path)
521 .context("error removing panic")
522 .log_err();
523 }
524 Ok(most_recent_panic)
525}
526
527async fn upload_panic(
528 http: &Arc<HttpClientWithUrl>,
529 panic_report_url: &Url,
530 panic: telemetry_events::Panic,
531 most_recent_panic: &mut Option<(i64, String)>,
532) -> Result<bool> {
533 *most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
534
535 let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
536
537 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
538 return Ok(false);
539 };
540
541 let Ok(request) = http_client::Request::builder()
542 .method(Method::POST)
543 .uri(panic_report_url.as_ref())
544 .header("x-zed-checksum", checksum)
545 .body(json_bytes.into())
546 else {
547 return Ok(false);
548 };
549
550 let response = http.send(request).await.context("error sending panic")?;
551 if !response.status().is_success() {
552 log::error!("Error uploading panic to server: {}", response.status());
553 }
554
555 Ok(true)
556}
557const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
558
559/// upload crashes from apple's diagnostic reports to our server.
560/// (only if telemetry is enabled)
561async fn upload_previous_crashes(
562 http: Arc<HttpClientWithUrl>,
563 most_recent_panic: Option<(i64, String)>,
564 installation_id: Option<String>,
565 telemetry_settings: client::TelemetrySettings,
566) -> Result<()> {
567 if !telemetry_settings.diagnostics {
568 return Ok(());
569 }
570 let last_uploaded = KEY_VALUE_STORE
571 .read_kvp(LAST_CRASH_UPLOADED)?
572 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
573 let mut uploaded = last_uploaded.clone();
574
575 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
576
577 // Crash directories are only set on macOS.
578 for dir in [crashes_dir(), crashes_retired_dir()]
579 .iter()
580 .filter_map(|d| d.as_deref())
581 {
582 let mut children = smol::fs::read_dir(&dir).await?;
583 while let Some(child) = children.next().await {
584 let child = child?;
585 let Some(filename) = child
586 .path()
587 .file_name()
588 .map(|f| f.to_string_lossy().to_lowercase())
589 else {
590 continue;
591 };
592
593 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
594 continue;
595 }
596
597 if filename <= last_uploaded {
598 continue;
599 }
600
601 let body = smol::fs::read_to_string(&child.path())
602 .await
603 .context("error reading crash file")?;
604
605 let mut request = http_client::Request::post(&crash_report_url.to_string())
606 .follow_redirects(http_client::RedirectPolicy::FollowAll)
607 .header("Content-Type", "text/plain");
608
609 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
610 request = request
611 .header("x-zed-panicked-on", format!("{panicked_on}"))
612 .header("x-zed-panic", payload)
613 }
614 if let Some(installation_id) = installation_id.as_ref() {
615 request = request.header("x-zed-installation-id", installation_id);
616 }
617
618 let request = request.body(body.into())?;
619
620 let response = http.send(request).await.context("error sending crash")?;
621 if !response.status().is_success() {
622 log::error!("Error uploading crash to server: {}", response.status());
623 }
624
625 if uploaded < filename {
626 uploaded.clone_from(&filename);
627 KEY_VALUE_STORE
628 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
629 .await?;
630 }
631 }
632 }
633
634 Ok(())
635}