1use anyhow::{Context, Result};
2use backtrace::{self, Backtrace};
3use chrono::Utc;
4use client::{telemetry, TelemetrySettings};
5use db::kvp::KEY_VALUE_STORE;
6use gpui::{AppContext, SemanticVersion};
7use http_client::{HttpRequestExt, Method};
8
9use http_client::{self, HttpClient, HttpClientWithUrl};
10use paths::{crashes_dir, crashes_retired_dir};
11use project::Project;
12use release_channel::ReleaseChannel;
13use release_channel::RELEASE_CHANNEL;
14use settings::Settings;
15use smol::stream::StreamExt;
16use std::{
17 env,
18 ffi::OsStr,
19 sync::{atomic::Ordering, Arc},
20};
21use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
22use telemetry_events::LocationData;
23use telemetry_events::Panic;
24use telemetry_events::PanicRequest;
25use url::Url;
26use util::ResultExt;
27
28use crate::stdout_is_a_pty;
29static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
30
31pub fn init_panic_hook(
32 app_version: SemanticVersion,
33 system_id: Option<String>,
34 installation_id: Option<String>,
35 session_id: String,
36) {
37 let is_pty = stdout_is_a_pty();
38
39 panic::set_hook(Box::new(move |info| {
40 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
41 if prior_panic_count > 0 {
42 // Give the panic-ing thread time to write the panic file
43 loop {
44 std::thread::yield_now();
45 }
46 }
47
48 let thread = thread::current();
49 let thread_name = thread.name().unwrap_or("<unnamed>");
50
51 let payload = info
52 .payload()
53 .downcast_ref::<&str>()
54 .map(|s| s.to_string())
55 .or_else(|| info.payload().downcast_ref::<String>().cloned())
56 .unwrap_or_else(|| "Box<Any>".to_string());
57
58 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
59 let location = info.location().unwrap();
60 let backtrace = Backtrace::new();
61 eprintln!(
62 "Thread {:?} panicked with {:?} at {}:{}:{}\n{:?}",
63 thread_name,
64 payload,
65 location.file(),
66 location.line(),
67 location.column(),
68 backtrace,
69 );
70 std::process::exit(-1);
71 }
72
73 let backtrace = Backtrace::new();
74 let mut backtrace = backtrace
75 .frames()
76 .iter()
77 .flat_map(|frame| {
78 frame
79 .symbols()
80 .iter()
81 .filter_map(|frame| Some(format!("{:#}", frame.name()?)))
82 })
83 .collect::<Vec<_>>();
84
85 // Strip out leading stack frames for rust panic-handling.
86 if let Some(ix) = backtrace
87 .iter()
88 .position(|name| name == "rust_begin_unwind")
89 {
90 backtrace.drain(0..=ix);
91 }
92
93 let panic_data = telemetry_events::Panic {
94 thread: thread_name.into(),
95 payload,
96 location_data: info.location().map(|location| LocationData {
97 file: location.file().into(),
98 line: location.line(),
99 }),
100 app_version: app_version.to_string(),
101 release_channel: RELEASE_CHANNEL.display_name().into(),
102 os_name: telemetry::os_name(),
103 os_version: Some(telemetry::os_version()),
104 architecture: env::consts::ARCH.into(),
105 panicked_on: Utc::now().timestamp_millis(),
106 backtrace,
107 system_id: system_id.clone(),
108 installation_id: installation_id.clone(),
109 session_id: session_id.clone(),
110 };
111
112 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
113 log::error!("{}", panic_data_json);
114 }
115
116 if !is_pty {
117 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
118 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
119 let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
120 let panic_file = std::fs::OpenOptions::new()
121 .append(true)
122 .create(true)
123 .open(&panic_file_path)
124 .log_err();
125 if let Some(mut panic_file) = panic_file {
126 writeln!(&mut panic_file, "{panic_data_json}").log_err();
127 panic_file.flush().log_err();
128 }
129 }
130 }
131
132 std::process::abort();
133 }));
134}
135
136pub fn init(
137 http_client: Arc<HttpClientWithUrl>,
138 system_id: Option<String>,
139 installation_id: Option<String>,
140 session_id: String,
141 cx: &mut AppContext,
142) {
143 #[cfg(target_os = "macos")]
144 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
145
146 let Some(panic_report_url) = http_client
147 .build_zed_api_url("/telemetry/panics", &[])
148 .log_err()
149 else {
150 return;
151 };
152
153 upload_panics_and_crashes(
154 http_client.clone(),
155 panic_report_url.clone(),
156 installation_id.clone(),
157 cx,
158 );
159
160 cx.observe_new_models(move |project: &mut Project, cx| {
161 let http_client = http_client.clone();
162 let panic_report_url = panic_report_url.clone();
163 let session_id = session_id.clone();
164 let installation_id = installation_id.clone();
165 let system_id = system_id.clone();
166
167 if let Some(ssh_client) = project.ssh_client() {
168 ssh_client.update(cx, |client, cx| {
169 if TelemetrySettings::get_global(cx).diagnostics {
170 let request = client.proto_client().request(proto::GetPanicFiles {});
171 cx.background_executor()
172 .spawn(async move {
173 let panic_files = request.await?;
174 for file in panic_files.file_contents {
175 let panic: Option<Panic> = serde_json::from_str(&file)
176 .log_err()
177 .or_else(|| {
178 file.lines()
179 .next()
180 .and_then(|line| serde_json::from_str(line).ok())
181 })
182 .unwrap_or_else(|| {
183 log::error!("failed to deserialize panic file {:?}", file);
184 None
185 });
186
187 if let Some(mut panic) = panic {
188 panic.session_id = session_id.clone();
189 panic.system_id = system_id.clone();
190 panic.installation_id = installation_id.clone();
191
192 upload_panic(&http_client, &panic_report_url, panic, &mut None)
193 .await?;
194 }
195 }
196
197 anyhow::Ok(())
198 })
199 .detach_and_log_err(cx);
200 }
201 })
202 }
203 })
204 .detach();
205}
206
207#[cfg(target_os = "macos")]
208pub fn monitor_main_thread_hangs(
209 http_client: Arc<HttpClientWithUrl>,
210 installation_id: Option<String>,
211 cx: &AppContext,
212) {
213 // This is too noisy to ship to stable for now.
214 if !matches!(
215 ReleaseChannel::global(cx),
216 ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
217 ) {
218 return;
219 }
220
221 use nix::sys::signal::{
222 sigaction, SaFlags, SigAction, SigHandler, SigSet,
223 Signal::{self, SIGUSR2},
224 };
225
226 use parking_lot::Mutex;
227
228 use http_client::Method;
229 use std::{
230 ffi::c_int,
231 sync::{mpsc, OnceLock},
232 time::Duration,
233 };
234 use telemetry_events::{BacktraceFrame, HangReport};
235
236 use nix::sys::pthread;
237
238 let foreground_executor = cx.foreground_executor();
239 let background_executor = cx.background_executor();
240 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
241
242 // Initialize SIGUSR2 handler to send a backtrace to a channel.
243 let (backtrace_tx, backtrace_rx) = mpsc::channel();
244 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
245 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
246 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
247 BACKTRACE.lock().reserve(100);
248
249 fn handle_backtrace_signal() {
250 unsafe {
251 extern "C" fn handle_sigusr2(_i: c_int) {
252 unsafe {
253 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
254 // which can only be triggered by This signal handler. In addition,
255 // this signal handler is immediately removed by SA_RESETHAND, and this
256 // signal handler cannot be re-entrant due to to the SIGUSR2 mask defined
257 // below
258 let mut bt = BACKTRACE.lock();
259 bt.clear();
260 backtrace::trace_unsynchronized(|frame| {
261 if bt.len() < bt.capacity() {
262 bt.push(frame.clone());
263 true
264 } else {
265 false
266 }
267 });
268 }
269
270 BACKTRACE_SENDER.get().unwrap().send(()).ok();
271 }
272
273 let mut mask = SigSet::empty();
274 mask.add(SIGUSR2);
275 sigaction(
276 Signal::SIGUSR2,
277 &SigAction::new(
278 SigHandler::Handler(handle_sigusr2),
279 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
280 mask,
281 ),
282 )
283 .log_err();
284 }
285 }
286
287 handle_backtrace_signal();
288 let main_thread = pthread::pthread_self();
289
290 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
291 foreground_executor
292 .spawn(async move { while (rx.next().await).is_some() {} })
293 .detach();
294
295 background_executor
296 .spawn({
297 let background_executor = background_executor.clone();
298 async move {
299 loop {
300 background_executor.timer(Duration::from_secs(1)).await;
301 match tx.try_send(()) {
302 Ok(_) => continue,
303 Err(e) => {
304 if e.into_send_error().is_full() {
305 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
306 }
307 // Only detect the first hang
308 break;
309 }
310 }
311 }
312 }
313 })
314 .detach();
315
316 let app_version = release_channel::AppVersion::global(cx);
317 let os_name = client::telemetry::os_name();
318
319 background_executor
320 .clone()
321 .spawn(async move {
322 let os_version = client::telemetry::os_version();
323
324 loop {
325 while backtrace_rx.recv().is_ok() {
326 if !telemetry_settings.diagnostics {
327 return;
328 }
329
330 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
331 // the backtrace transmitter has fired, which itself is only done
332 // by the signal handler. And due to SA_RESETHAND the signal handler
333 // will not run again until `handle_backtrace_signal` is called.
334 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
335 let backtrace: Vec<_> = raw_backtrace
336 .into_iter()
337 .map(|frame| {
338 let mut btf = BacktraceFrame {
339 ip: frame.ip() as usize,
340 symbol_addr: frame.symbol_address() as usize,
341 base: frame.module_base_address().map(|addr| addr as usize),
342 symbols: vec![],
343 };
344
345 backtrace::resolve_frame(&frame, |symbol| {
346 if let Some(name) = symbol.name() {
347 btf.symbols.push(name.to_string());
348 }
349 });
350
351 btf
352 })
353 .collect();
354
355 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
356 handle_backtrace_signal();
357
358 log::error!(
359 "Suspected hang on main thread:\n{}",
360 backtrace
361 .iter()
362 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
363 .collect::<Vec<_>>()
364 .join("\n")
365 );
366
367 let report = HangReport {
368 backtrace,
369 app_version: Some(app_version),
370 os_name: os_name.clone(),
371 os_version: Some(os_version.clone()),
372 architecture: env::consts::ARCH.into(),
373 installation_id: installation_id.clone(),
374 };
375
376 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
377 continue;
378 };
379
380 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
381 else {
382 continue;
383 };
384
385 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
386 continue;
387 };
388
389 let Ok(request) = http_client::Request::builder()
390 .method(Method::POST)
391 .uri(url.as_ref())
392 .header("x-zed-checksum", checksum)
393 .body(json_bytes.into())
394 else {
395 continue;
396 };
397
398 if let Some(response) = http_client.send(request).await.log_err() {
399 if response.status() != 200 {
400 log::error!("Failed to send hang report: HTTP {:?}", response.status());
401 }
402 }
403 }
404 }
405 })
406 .detach()
407}
408
409fn upload_panics_and_crashes(
410 http: Arc<HttpClientWithUrl>,
411 panic_report_url: Url,
412 installation_id: Option<String>,
413 cx: &AppContext,
414) {
415 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
416 cx.background_executor()
417 .spawn(async move {
418 let most_recent_panic =
419 upload_previous_panics(http.clone(), &panic_report_url, telemetry_settings)
420 .await
421 .log_err()
422 .flatten();
423 upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
424 .await
425 .log_err()
426 })
427 .detach()
428}
429
430/// Uploads panics via `zed.dev`.
431async fn upload_previous_panics(
432 http: Arc<HttpClientWithUrl>,
433 panic_report_url: &Url,
434 telemetry_settings: client::TelemetrySettings,
435) -> anyhow::Result<Option<(i64, String)>> {
436 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
437
438 let mut most_recent_panic = None;
439
440 while let Some(child) = children.next().await {
441 let child = child?;
442 let child_path = child.path();
443
444 if child_path.extension() != Some(OsStr::new("panic")) {
445 continue;
446 }
447 let filename = if let Some(filename) = child_path.file_name() {
448 filename.to_string_lossy()
449 } else {
450 continue;
451 };
452
453 if !filename.starts_with("zed") {
454 continue;
455 }
456
457 if telemetry_settings.diagnostics {
458 let panic_file_content = smol::fs::read_to_string(&child_path)
459 .await
460 .context("error reading panic file")?;
461
462 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
463 .log_err()
464 .or_else(|| {
465 panic_file_content
466 .lines()
467 .next()
468 .and_then(|line| serde_json::from_str(line).ok())
469 })
470 .unwrap_or_else(|| {
471 log::error!("failed to deserialize panic file {:?}", panic_file_content);
472 None
473 });
474
475 if let Some(panic) = panic {
476 if !upload_panic(&http, &panic_report_url, panic, &mut most_recent_panic).await? {
477 continue;
478 }
479 }
480 }
481
482 // We've done what we can, delete the file
483 std::fs::remove_file(child_path)
484 .context("error removing panic")
485 .log_err();
486 }
487 Ok(most_recent_panic)
488}
489
490async fn upload_panic(
491 http: &Arc<HttpClientWithUrl>,
492 panic_report_url: &Url,
493 panic: telemetry_events::Panic,
494 most_recent_panic: &mut Option<(i64, String)>,
495) -> Result<bool> {
496 *most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
497
498 let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
499
500 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
501 return Ok(false);
502 };
503
504 let Ok(request) = http_client::Request::builder()
505 .method(Method::POST)
506 .uri(panic_report_url.as_ref())
507 .header("x-zed-checksum", checksum)
508 .body(json_bytes.into())
509 else {
510 return Ok(false);
511 };
512
513 let response = http.send(request).await.context("error sending panic")?;
514 if !response.status().is_success() {
515 log::error!("Error uploading panic to server: {}", response.status());
516 }
517
518 Ok(true)
519}
520const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
521
522/// upload crashes from apple's diagnostic reports to our server.
523/// (only if telemetry is enabled)
524async fn upload_previous_crashes(
525 http: Arc<HttpClientWithUrl>,
526 most_recent_panic: Option<(i64, String)>,
527 installation_id: Option<String>,
528 telemetry_settings: client::TelemetrySettings,
529) -> Result<()> {
530 if !telemetry_settings.diagnostics {
531 return Ok(());
532 }
533 let last_uploaded = KEY_VALUE_STORE
534 .read_kvp(LAST_CRASH_UPLOADED)?
535 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
536 let mut uploaded = last_uploaded.clone();
537
538 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
539
540 // Crash directories are only set on macOS.
541 for dir in [crashes_dir(), crashes_retired_dir()]
542 .iter()
543 .filter_map(|d| d.as_deref())
544 {
545 let mut children = smol::fs::read_dir(&dir).await?;
546 while let Some(child) = children.next().await {
547 let child = child?;
548 let Some(filename) = child
549 .path()
550 .file_name()
551 .map(|f| f.to_string_lossy().to_lowercase())
552 else {
553 continue;
554 };
555
556 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
557 continue;
558 }
559
560 if filename <= last_uploaded {
561 continue;
562 }
563
564 let body = smol::fs::read_to_string(&child.path())
565 .await
566 .context("error reading crash file")?;
567
568 let mut request = http_client::Request::post(&crash_report_url.to_string())
569 .follow_redirects(http_client::RedirectPolicy::FollowAll)
570 .header("Content-Type", "text/plain");
571
572 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
573 request = request
574 .header("x-zed-panicked-on", format!("{panicked_on}"))
575 .header("x-zed-panic", payload)
576 }
577 if let Some(installation_id) = installation_id.as_ref() {
578 request = request.header("x-zed-installation-id", installation_id);
579 }
580
581 let request = request.body(body.into())?;
582
583 let response = http.send(request).await.context("error sending crash")?;
584 if !response.status().is_success() {
585 log::error!("Error uploading crash to server: {}", response.status());
586 }
587
588 if uploaded < filename {
589 uploaded.clone_from(&filename);
590 KEY_VALUE_STORE
591 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
592 .await?;
593 }
594 }
595 }
596
597 Ok(())
598}