1use crate::stdout_is_a_pty;
2use anyhow::{Context as _, Result};
3use backtrace::{self, Backtrace};
4use chrono::Utc;
5use client::{TelemetrySettings, telemetry};
6use db::kvp::KEY_VALUE_STORE;
7use gpui::{App, AppContext as _, SemanticVersion};
8use http_client::{self, HttpClient, HttpClientWithUrl, HttpRequestExt, Method};
9use paths::{crashes_dir, crashes_retired_dir};
10use project::Project;
11use release_channel::{AppCommitSha, RELEASE_CHANNEL, ReleaseChannel};
12use settings::Settings;
13use smol::stream::StreamExt;
14use std::{
15 env,
16 ffi::{OsStr, c_void},
17 sync::{Arc, atomic::Ordering},
18};
19use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
20use telemetry_events::{LocationData, Panic, PanicRequest};
21use url::Url;
22use util::ResultExt;
23
24static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
25
26pub fn init_panic_hook(
27 app_version: SemanticVersion,
28 app_commit_sha: Option<AppCommitSha>,
29 system_id: Option<String>,
30 installation_id: Option<String>,
31 session_id: String,
32) {
33 let is_pty = stdout_is_a_pty();
34
35 panic::set_hook(Box::new(move |info| {
36 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
37 if prior_panic_count > 0 {
38 // Give the panic-ing thread time to write the panic file
39 loop {
40 std::thread::yield_now();
41 }
42 }
43
44 let thread = thread::current();
45 let thread_name = thread.name().unwrap_or("<unnamed>");
46
47 let payload = info
48 .payload()
49 .downcast_ref::<&str>()
50 .map(|s| s.to_string())
51 .or_else(|| info.payload().downcast_ref::<String>().cloned())
52 .unwrap_or_else(|| "Box<Any>".to_string());
53
54 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
55 let location = info.location().unwrap();
56 let backtrace = Backtrace::new();
57 eprintln!(
58 "Thread {:?} panicked with {:?} at {}:{}:{}\n{}{:?}",
59 thread_name,
60 payload,
61 location.file(),
62 location.line(),
63 location.column(),
64 match app_commit_sha.as_ref() {
65 Some(commit_sha) => format!(
66 "https://github.com/zed-industries/zed/blob/{}/src/{}#L{} \
67 (may not be uploaded, line may be incorrect if files modified)\n",
68 commit_sha.full(),
69 location.file(),
70 location.line()
71 ),
72 None => "".to_string(),
73 },
74 backtrace,
75 );
76 std::process::exit(-1);
77 }
78 let main_module_base_address = get_main_module_base_address();
79
80 let backtrace = Backtrace::new();
81 let mut symbols = backtrace
82 .frames()
83 .iter()
84 .flat_map(|frame| {
85 let base = frame
86 .module_base_address()
87 .unwrap_or(main_module_base_address);
88 frame.symbols().iter().map(move |symbol| {
89 format!(
90 "{}+{}",
91 symbol
92 .name()
93 .as_ref()
94 .map_or("<unknown>".to_owned(), <_>::to_string),
95 (frame.ip() as isize).saturating_sub(base as isize)
96 )
97 })
98 })
99 .collect::<Vec<_>>();
100
101 // Strip out leading stack frames for rust panic-handling.
102 if let Some(ix) = symbols
103 .iter()
104 .position(|name| name == "rust_begin_unwind" || name == "_rust_begin_unwind")
105 {
106 symbols.drain(0..=ix);
107 }
108
109 let panic_data = telemetry_events::Panic {
110 thread: thread_name.into(),
111 payload,
112 location_data: info.location().map(|location| LocationData {
113 file: location.file().into(),
114 line: location.line(),
115 }),
116 app_version: app_version.to_string(),
117 app_commit_sha: app_commit_sha.as_ref().map(|sha| sha.full()),
118 release_channel: RELEASE_CHANNEL.dev_name().into(),
119 target: env!("TARGET").to_owned().into(),
120 os_name: telemetry::os_name(),
121 os_version: Some(telemetry::os_version()),
122 architecture: env::consts::ARCH.into(),
123 panicked_on: Utc::now().timestamp_millis(),
124 backtrace: symbols,
125 system_id: system_id.clone(),
126 installation_id: installation_id.clone(),
127 session_id: session_id.clone(),
128 };
129
130 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
131 log::error!("{}", panic_data_json);
132 }
133 zlog::flush();
134
135 if !is_pty {
136 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
137 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
138 let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
139 let panic_file = std::fs::OpenOptions::new()
140 .append(true)
141 .create(true)
142 .open(&panic_file_path)
143 .log_err();
144 if let Some(mut panic_file) = panic_file {
145 writeln!(&mut panic_file, "{panic_data_json}").log_err();
146 panic_file.flush().log_err();
147 }
148 }
149 }
150
151 std::process::abort();
152 }));
153}
154
155#[cfg(not(target_os = "windows"))]
156fn get_main_module_base_address() -> *mut c_void {
157 let mut dl_info = libc::Dl_info {
158 dli_fname: std::ptr::null(),
159 dli_fbase: std::ptr::null_mut(),
160 dli_sname: std::ptr::null(),
161 dli_saddr: std::ptr::null_mut(),
162 };
163 unsafe {
164 libc::dladdr(get_main_module_base_address as _, &mut dl_info);
165 }
166 dl_info.dli_fbase
167}
168
169#[cfg(target_os = "windows")]
170fn get_main_module_base_address() -> *mut c_void {
171 std::ptr::null_mut()
172}
173
174pub fn init(
175 http_client: Arc<HttpClientWithUrl>,
176 system_id: Option<String>,
177 installation_id: Option<String>,
178 session_id: String,
179 cx: &mut App,
180) {
181 #[cfg(target_os = "macos")]
182 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
183
184 let Some(panic_report_url) = http_client
185 .build_zed_api_url("/telemetry/panics", &[])
186 .log_err()
187 else {
188 return;
189 };
190
191 upload_panics_and_crashes(
192 http_client.clone(),
193 panic_report_url.clone(),
194 installation_id.clone(),
195 cx,
196 );
197
198 cx.observe_new(move |project: &mut Project, _, cx| {
199 let http_client = http_client.clone();
200 let panic_report_url = panic_report_url.clone();
201 let session_id = session_id.clone();
202 let installation_id = installation_id.clone();
203 let system_id = system_id.clone();
204
205 if let Some(ssh_client) = project.ssh_client() {
206 ssh_client.update(cx, |client, cx| {
207 if TelemetrySettings::get_global(cx).diagnostics {
208 let request = client.proto_client().request(proto::GetPanicFiles {});
209 cx.background_spawn(async move {
210 let panic_files = request.await?;
211 for file in panic_files.file_contents {
212 let panic: Option<Panic> = serde_json::from_str(&file)
213 .log_err()
214 .or_else(|| {
215 file.lines()
216 .next()
217 .and_then(|line| serde_json::from_str(line).ok())
218 })
219 .unwrap_or_else(|| {
220 log::error!("failed to deserialize panic file {:?}", file);
221 None
222 });
223
224 if let Some(mut panic) = panic {
225 panic.session_id = session_id.clone();
226 panic.system_id = system_id.clone();
227 panic.installation_id = installation_id.clone();
228
229 upload_panic(&http_client, &panic_report_url, panic, &mut None)
230 .await?;
231 }
232 }
233
234 anyhow::Ok(())
235 })
236 .detach_and_log_err(cx);
237 }
238 })
239 }
240 })
241 .detach();
242}
243
244#[cfg(target_os = "macos")]
245pub fn monitor_main_thread_hangs(
246 http_client: Arc<HttpClientWithUrl>,
247 installation_id: Option<String>,
248 cx: &App,
249) {
250 // This is too noisy to ship to stable for now.
251 if !matches!(
252 ReleaseChannel::global(cx),
253 ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
254 ) {
255 return;
256 }
257
258 use nix::sys::signal::{
259 SaFlags, SigAction, SigHandler, SigSet,
260 Signal::{self, SIGUSR2},
261 sigaction,
262 };
263
264 use parking_lot::Mutex;
265
266 use http_client::Method;
267 use std::{
268 ffi::c_int,
269 sync::{OnceLock, mpsc},
270 time::Duration,
271 };
272 use telemetry_events::{BacktraceFrame, HangReport};
273
274 use nix::sys::pthread;
275
276 let foreground_executor = cx.foreground_executor();
277 let background_executor = cx.background_executor();
278 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
279
280 // Initialize SIGUSR2 handler to send a backtrace to a channel.
281 let (backtrace_tx, backtrace_rx) = mpsc::channel();
282 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
283 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
284 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
285 BACKTRACE.lock().reserve(100);
286
287 fn handle_backtrace_signal() {
288 unsafe {
289 extern "C" fn handle_sigusr2(_i: c_int) {
290 unsafe {
291 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
292 // which can only be triggered by This signal handler. In addition,
293 // this signal handler is immediately removed by SA_RESETHAND, and this
294 // signal handler cannot be re-entrant due to the SIGUSR2 mask defined
295 // below
296 let mut bt = BACKTRACE.lock();
297 bt.clear();
298 backtrace::trace_unsynchronized(|frame| {
299 if bt.len() < bt.capacity() {
300 bt.push(frame.clone());
301 true
302 } else {
303 false
304 }
305 });
306 }
307
308 BACKTRACE_SENDER.get().unwrap().send(()).ok();
309 }
310
311 let mut mask = SigSet::empty();
312 mask.add(SIGUSR2);
313 sigaction(
314 Signal::SIGUSR2,
315 &SigAction::new(
316 SigHandler::Handler(handle_sigusr2),
317 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
318 mask,
319 ),
320 )
321 .log_err();
322 }
323 }
324
325 handle_backtrace_signal();
326 let main_thread = pthread::pthread_self();
327
328 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
329 foreground_executor
330 .spawn(async move { while (rx.next().await).is_some() {} })
331 .detach();
332
333 background_executor
334 .spawn({
335 let background_executor = background_executor.clone();
336 async move {
337 loop {
338 background_executor.timer(Duration::from_secs(1)).await;
339 match tx.try_send(()) {
340 Ok(_) => continue,
341 Err(e) => {
342 if e.into_send_error().is_full() {
343 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
344 }
345 // Only detect the first hang
346 break;
347 }
348 }
349 }
350 }
351 })
352 .detach();
353
354 let app_version = release_channel::AppVersion::global(cx);
355 let os_name = client::telemetry::os_name();
356
357 background_executor
358 .clone()
359 .spawn(async move {
360 let os_version = client::telemetry::os_version();
361
362 loop {
363 while backtrace_rx.recv().is_ok() {
364 if !telemetry_settings.diagnostics {
365 return;
366 }
367
368 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
369 // the backtrace transmitter has fired, which itself is only done
370 // by the signal handler. And due to SA_RESETHAND the signal handler
371 // will not run again until `handle_backtrace_signal` is called.
372 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
373 let backtrace: Vec<_> = raw_backtrace
374 .into_iter()
375 .map(|frame| {
376 let mut btf = BacktraceFrame {
377 ip: frame.ip() as usize,
378 symbol_addr: frame.symbol_address() as usize,
379 base: frame.module_base_address().map(|addr| addr as usize),
380 symbols: vec![],
381 };
382
383 backtrace::resolve_frame(&frame, |symbol| {
384 if let Some(name) = symbol.name() {
385 btf.symbols.push(name.to_string());
386 }
387 });
388
389 btf
390 })
391 .collect();
392
393 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
394 handle_backtrace_signal();
395
396 log::error!(
397 "Suspected hang on main thread:\n{}",
398 backtrace
399 .iter()
400 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
401 .collect::<Vec<_>>()
402 .join("\n")
403 );
404
405 let report = HangReport {
406 backtrace,
407 app_version: Some(app_version),
408 os_name: os_name.clone(),
409 os_version: Some(os_version.clone()),
410 architecture: env::consts::ARCH.into(),
411 installation_id: installation_id.clone(),
412 };
413
414 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
415 continue;
416 };
417
418 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
419 else {
420 continue;
421 };
422
423 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
424 continue;
425 };
426
427 let Ok(request) = http_client::Request::builder()
428 .method(Method::POST)
429 .uri(url.as_ref())
430 .header("x-zed-checksum", checksum)
431 .body(json_bytes.into())
432 else {
433 continue;
434 };
435
436 if let Some(response) = http_client.send(request).await.log_err() {
437 if response.status() != 200 {
438 log::error!("Failed to send hang report: HTTP {:?}", response.status());
439 }
440 }
441 }
442 }
443 })
444 .detach()
445}
446
447fn upload_panics_and_crashes(
448 http: Arc<HttpClientWithUrl>,
449 panic_report_url: Url,
450 installation_id: Option<String>,
451 cx: &App,
452) {
453 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
454 cx.background_spawn(async move {
455 let most_recent_panic =
456 upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings)
457 .await
458 .log_err()
459 .flatten();
460 upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
461 .await
462 .log_err()
463 })
464 .detach()
465}
466
467/// Uploads panics via `zed.dev`.
468async fn upload_previous_panics(
469 http: Arc<HttpClientWithUrl>,
470 panic_report_url: &Url,
471 telemetry_settings: client::TelemetrySettings,
472) -> anyhow::Result<Option<(i64, String)>> {
473 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
474
475 let mut most_recent_panic = None;
476
477 while let Some(child) = children.next().await {
478 let child = child?;
479 let child_path = child.path();
480
481 if child_path.extension() != Some(OsStr::new("panic")) {
482 continue;
483 }
484 let filename = if let Some(filename) = child_path.file_name() {
485 filename.to_string_lossy()
486 } else {
487 continue;
488 };
489
490 if !filename.starts_with("zed") {
491 continue;
492 }
493
494 if telemetry_settings.diagnostics {
495 let panic_file_content = smol::fs::read_to_string(&child_path)
496 .await
497 .context("error reading panic file")?;
498
499 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
500 .log_err()
501 .or_else(|| {
502 panic_file_content
503 .lines()
504 .next()
505 .and_then(|line| serde_json::from_str(line).ok())
506 })
507 .unwrap_or_else(|| {
508 log::error!("failed to deserialize panic file {:?}", panic_file_content);
509 None
510 });
511
512 if let Some(panic) = panic {
513 if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? {
514 continue;
515 }
516 }
517 }
518
519 // We've done what we can, delete the file
520 std::fs::remove_file(child_path)
521 .context("error removing panic")
522 .log_err();
523 }
524 Ok(most_recent_panic)
525}
526
527async fn upload_panic(
528 http: &Arc<HttpClientWithUrl>,
529 panic_report_url: &Url,
530 panic: telemetry_events::Panic,
531 most_recent_panic: &mut Option<(i64, String)>,
532) -> Result<bool> {
533 *most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
534
535 let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
536
537 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
538 return Ok(false);
539 };
540
541 let Ok(request) = http_client::Request::builder()
542 .method(Method::POST)
543 .uri(panic_report_url.as_ref())
544 .header("x-zed-checksum", checksum)
545 .body(json_bytes.into())
546 else {
547 return Ok(false);
548 };
549
550 let response = http.send(request).await.context("error sending panic")?;
551 if !response.status().is_success() {
552 log::error!("Error uploading panic to server: {}", response.status());
553 }
554
555 Ok(true)
556}
557const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
558
559/// upload crashes from apple's diagnostic reports to our server.
560/// (only if telemetry is enabled)
561async fn upload_previous_crashes(
562 http: Arc<HttpClientWithUrl>,
563 most_recent_panic: Option<(i64, String)>,
564 installation_id: Option<String>,
565 telemetry_settings: client::TelemetrySettings,
566) -> Result<()> {
567 if !telemetry_settings.diagnostics {
568 return Ok(());
569 }
570 let last_uploaded = KEY_VALUE_STORE
571 .read_kvp(LAST_CRASH_UPLOADED)?
572 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
573 let mut uploaded = last_uploaded.clone();
574
575 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
576
577 // Crash directories are only set on macOS.
578 for dir in [crashes_dir(), crashes_retired_dir()]
579 .iter()
580 .filter_map(|d| d.as_deref())
581 {
582 let mut children = smol::fs::read_dir(&dir).await?;
583 while let Some(child) = children.next().await {
584 let child = child?;
585 let Some(filename) = child
586 .path()
587 .file_name()
588 .map(|f| f.to_string_lossy().to_lowercase())
589 else {
590 continue;
591 };
592
593 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
594 continue;
595 }
596
597 if filename <= last_uploaded {
598 continue;
599 }
600
601 let body = smol::fs::read_to_string(&child.path())
602 .await
603 .context("error reading crash file")?;
604
605 let mut request = http_client::Request::post(&crash_report_url.to_string())
606 .follow_redirects(http_client::RedirectPolicy::FollowAll)
607 .header("Content-Type", "text/plain");
608
609 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
610 request = request
611 .header("x-zed-panicked-on", format!("{panicked_on}"))
612 .header("x-zed-panic", payload)
613 }
614 if let Some(installation_id) = installation_id.as_ref() {
615 request = request.header("x-zed-installation-id", installation_id);
616 }
617
618 let request = request.body(body.into())?;
619
620 let response = http.send(request).await.context("error sending crash")?;
621 if !response.status().is_success() {
622 log::error!("Error uploading crash to server: {}", response.status());
623 }
624
625 if uploaded < filename {
626 uploaded.clone_from(&filename);
627 KEY_VALUE_STORE
628 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
629 .await?;
630 }
631 }
632 }
633
634 Ok(())
635}