1use anyhow::{Context, Result};
2use backtrace::{self, Backtrace};
3use chrono::Utc;
4use client::telemetry;
5use db::kvp::KEY_VALUE_STORE;
6use gpui::{AppContext, SemanticVersion};
7use http_client::{HttpRequestExt, Method};
8
9use http_client::{self, HttpClient, HttpClientWithUrl};
10use paths::{crashes_dir, crashes_retired_dir};
11use release_channel::ReleaseChannel;
12use release_channel::RELEASE_CHANNEL;
13use settings::Settings;
14use smol::stream::StreamExt;
15use std::{
16 env,
17 ffi::OsStr,
18 sync::{atomic::Ordering, Arc},
19};
20use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
21use telemetry_events::LocationData;
22use telemetry_events::Panic;
23use telemetry_events::PanicRequest;
24use util::ResultExt;
25
26use crate::stdout_is_a_pty;
27static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
28
29pub fn init_panic_hook(
30 app_version: SemanticVersion,
31 system_id: Option<String>,
32 installation_id: Option<String>,
33 session_id: String,
34) {
35 let is_pty = stdout_is_a_pty();
36
37 panic::set_hook(Box::new(move |info| {
38 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
39 if prior_panic_count > 0 {
40 // Give the panic-ing thread time to write the panic file
41 loop {
42 std::thread::yield_now();
43 }
44 }
45
46 let thread = thread::current();
47 let thread_name = thread.name().unwrap_or("<unnamed>");
48
49 let payload = info
50 .payload()
51 .downcast_ref::<&str>()
52 .map(|s| s.to_string())
53 .or_else(|| info.payload().downcast_ref::<String>().cloned())
54 .unwrap_or_else(|| "Box<Any>".to_string());
55
56 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
57 let location = info.location().unwrap();
58 let backtrace = Backtrace::new();
59 eprintln!(
60 "Thread {:?} panicked with {:?} at {}:{}:{}\n{:?}",
61 thread_name,
62 payload,
63 location.file(),
64 location.line(),
65 location.column(),
66 backtrace,
67 );
68 std::process::exit(-1);
69 }
70
71 let backtrace = Backtrace::new();
72 let mut backtrace = backtrace
73 .frames()
74 .iter()
75 .flat_map(|frame| {
76 frame
77 .symbols()
78 .iter()
79 .filter_map(|frame| Some(format!("{:#}", frame.name()?)))
80 })
81 .collect::<Vec<_>>();
82
83 // Strip out leading stack frames for rust panic-handling.
84 if let Some(ix) = backtrace
85 .iter()
86 .position(|name| name == "rust_begin_unwind")
87 {
88 backtrace.drain(0..=ix);
89 }
90
91 let panic_data = telemetry_events::Panic {
92 thread: thread_name.into(),
93 payload,
94 location_data: info.location().map(|location| LocationData {
95 file: location.file().into(),
96 line: location.line(),
97 }),
98 app_version: app_version.to_string(),
99 release_channel: RELEASE_CHANNEL.display_name().into(),
100 os_name: telemetry::os_name(),
101 os_version: Some(telemetry::os_version()),
102 architecture: env::consts::ARCH.into(),
103 panicked_on: Utc::now().timestamp_millis(),
104 backtrace,
105 system_id: system_id.clone(),
106 installation_id: installation_id.clone(),
107 session_id: session_id.clone(),
108 };
109
110 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
111 log::error!("{}", panic_data_json);
112 }
113
114 if !is_pty {
115 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
116 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
117 let panic_file_path = paths::logs_dir().join(format!("zed-{timestamp}.panic"));
118 let panic_file = std::fs::OpenOptions::new()
119 .append(true)
120 .create(true)
121 .open(&panic_file_path)
122 .log_err();
123 if let Some(mut panic_file) = panic_file {
124 writeln!(&mut panic_file, "{panic_data_json}").log_err();
125 panic_file.flush().log_err();
126 }
127 }
128 }
129
130 std::process::abort();
131 }));
132}
133
134pub fn init(
135 http_client: Arc<HttpClientWithUrl>,
136 installation_id: Option<String>,
137 cx: &mut AppContext,
138) {
139 #[cfg(target_os = "macos")]
140 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
141
142 upload_panics_and_crashes(http_client, installation_id, cx)
143}
144
145#[cfg(target_os = "macos")]
146pub fn monitor_main_thread_hangs(
147 http_client: Arc<HttpClientWithUrl>,
148 installation_id: Option<String>,
149 cx: &AppContext,
150) {
151 // This is too noisy to ship to stable for now.
152 if !matches!(
153 ReleaseChannel::global(cx),
154 ReleaseChannel::Dev | ReleaseChannel::Nightly | ReleaseChannel::Preview
155 ) {
156 return;
157 }
158
159 use nix::sys::signal::{
160 sigaction, SaFlags, SigAction, SigHandler, SigSet,
161 Signal::{self, SIGUSR2},
162 };
163
164 use parking_lot::Mutex;
165
166 use http_client::Method;
167 use std::{
168 ffi::c_int,
169 sync::{mpsc, OnceLock},
170 time::Duration,
171 };
172 use telemetry_events::{BacktraceFrame, HangReport};
173
174 use nix::sys::pthread;
175
176 let foreground_executor = cx.foreground_executor();
177 let background_executor = cx.background_executor();
178 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
179
180 // Initialize SIGUSR2 handler to send a backtrace to a channel.
181 let (backtrace_tx, backtrace_rx) = mpsc::channel();
182 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
183 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
184 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
185 BACKTRACE.lock().reserve(100);
186
187 fn handle_backtrace_signal() {
188 unsafe {
189 extern "C" fn handle_sigusr2(_i: c_int) {
190 unsafe {
191 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
192 // which can only be triggered by This signal handler. In addition,
193 // this signal handler is immediately removed by SA_RESETHAND, and this
194 // signal handler cannot be re-entrant due to to the SIGUSR2 mask defined
195 // below
196 let mut bt = BACKTRACE.lock();
197 bt.clear();
198 backtrace::trace_unsynchronized(|frame| {
199 if bt.len() < bt.capacity() {
200 bt.push(frame.clone());
201 true
202 } else {
203 false
204 }
205 });
206 }
207
208 BACKTRACE_SENDER.get().unwrap().send(()).ok();
209 }
210
211 let mut mask = SigSet::empty();
212 mask.add(SIGUSR2);
213 sigaction(
214 Signal::SIGUSR2,
215 &SigAction::new(
216 SigHandler::Handler(handle_sigusr2),
217 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
218 mask,
219 ),
220 )
221 .log_err();
222 }
223 }
224
225 handle_backtrace_signal();
226 let main_thread = pthread::pthread_self();
227
228 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
229 foreground_executor
230 .spawn(async move { while (rx.next().await).is_some() {} })
231 .detach();
232
233 background_executor
234 .spawn({
235 let background_executor = background_executor.clone();
236 async move {
237 loop {
238 background_executor.timer(Duration::from_secs(1)).await;
239 match tx.try_send(()) {
240 Ok(_) => continue,
241 Err(e) => {
242 if e.into_send_error().is_full() {
243 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
244 }
245 // Only detect the first hang
246 break;
247 }
248 }
249 }
250 }
251 })
252 .detach();
253
254 let app_version = release_channel::AppVersion::global(cx);
255 let os_name = client::telemetry::os_name();
256
257 background_executor
258 .clone()
259 .spawn(async move {
260 let os_version = client::telemetry::os_version();
261
262 loop {
263 while backtrace_rx.recv().is_ok() {
264 if !telemetry_settings.diagnostics {
265 return;
266 }
267
268 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
269 // the backtrace transmitter has fired, which itself is only done
270 // by the signal handler. And due to SA_RESETHAND the signal handler
271 // will not run again until `handle_backtrace_signal` is called.
272 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
273 let backtrace: Vec<_> = raw_backtrace
274 .into_iter()
275 .map(|frame| {
276 let mut btf = BacktraceFrame {
277 ip: frame.ip() as usize,
278 symbol_addr: frame.symbol_address() as usize,
279 base: frame.module_base_address().map(|addr| addr as usize),
280 symbols: vec![],
281 };
282
283 backtrace::resolve_frame(&frame, |symbol| {
284 if let Some(name) = symbol.name() {
285 btf.symbols.push(name.to_string());
286 }
287 });
288
289 btf
290 })
291 .collect();
292
293 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
294 handle_backtrace_signal();
295
296 log::error!(
297 "Suspected hang on main thread:\n{}",
298 backtrace
299 .iter()
300 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
301 .collect::<Vec<_>>()
302 .join("\n")
303 );
304
305 let report = HangReport {
306 backtrace,
307 app_version: Some(app_version),
308 os_name: os_name.clone(),
309 os_version: Some(os_version.clone()),
310 architecture: env::consts::ARCH.into(),
311 installation_id: installation_id.clone(),
312 };
313
314 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
315 continue;
316 };
317
318 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
319 else {
320 continue;
321 };
322
323 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
324 continue;
325 };
326
327 let Ok(request) = http_client::Request::builder()
328 .method(Method::POST)
329 .uri(url.as_ref())
330 .header("x-zed-checksum", checksum)
331 .body(json_bytes.into())
332 else {
333 continue;
334 };
335
336 if let Some(response) = http_client.send(request).await.log_err() {
337 if response.status() != 200 {
338 log::error!("Failed to send hang report: HTTP {:?}", response.status());
339 }
340 }
341 }
342 }
343 })
344 .detach()
345}
346
347fn upload_panics_and_crashes(
348 http: Arc<HttpClientWithUrl>,
349 installation_id: Option<String>,
350 cx: &AppContext,
351) {
352 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
353 cx.background_executor()
354 .spawn(async move {
355 let most_recent_panic = upload_previous_panics(http.clone(), telemetry_settings)
356 .await
357 .log_err()
358 .flatten();
359 upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
360 .await
361 .log_err()
362 })
363 .detach()
364}
365
366/// Uploads panics via `zed.dev`.
367async fn upload_previous_panics(
368 http: Arc<HttpClientWithUrl>,
369 telemetry_settings: client::TelemetrySettings,
370) -> Result<Option<(i64, String)>> {
371 let panic_report_url = http.build_zed_api_url("/telemetry/panics", &[])?;
372 let mut children = smol::fs::read_dir(paths::logs_dir()).await?;
373
374 let mut most_recent_panic = None;
375
376 while let Some(child) = children.next().await {
377 let child = child?;
378 let child_path = child.path();
379
380 if child_path.extension() != Some(OsStr::new("panic")) {
381 continue;
382 }
383 let filename = if let Some(filename) = child_path.file_name() {
384 filename.to_string_lossy()
385 } else {
386 continue;
387 };
388
389 if !filename.starts_with("zed") {
390 continue;
391 }
392
393 if telemetry_settings.diagnostics {
394 let panic_file_content = smol::fs::read_to_string(&child_path)
395 .await
396 .context("error reading panic file")?;
397
398 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
399 .ok()
400 .or_else(|| {
401 panic_file_content
402 .lines()
403 .next()
404 .and_then(|line| serde_json::from_str(line).ok())
405 })
406 .unwrap_or_else(|| {
407 log::error!("failed to deserialize panic file {:?}", panic_file_content);
408 None
409 });
410
411 if let Some(panic) = panic {
412 most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
413
414 let json_bytes = serde_json::to_vec(&PanicRequest { panic }).unwrap();
415
416 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes) else {
417 continue;
418 };
419
420 let Ok(request) = http_client::Request::builder()
421 .method(Method::POST)
422 .uri(panic_report_url.as_ref())
423 .header("x-zed-checksum", checksum)
424 .body(json_bytes.into())
425 else {
426 continue;
427 };
428
429 let response = http.send(request).await.context("error sending panic")?;
430 if !response.status().is_success() {
431 log::error!("Error uploading panic to server: {}", response.status());
432 }
433 }
434 }
435
436 // We've done what we can, delete the file
437 std::fs::remove_file(child_path)
438 .context("error removing panic")
439 .log_err();
440 }
441 Ok::<_, anyhow::Error>(most_recent_panic)
442}
443
444const LAST_CRASH_UPLOADED: &str = "LAST_CRASH_UPLOADED";
445
446/// upload crashes from apple's diagnostic reports to our server.
447/// (only if telemetry is enabled)
448async fn upload_previous_crashes(
449 http: Arc<HttpClientWithUrl>,
450 most_recent_panic: Option<(i64, String)>,
451 installation_id: Option<String>,
452 telemetry_settings: client::TelemetrySettings,
453) -> Result<()> {
454 if !telemetry_settings.diagnostics {
455 return Ok(());
456 }
457 let last_uploaded = KEY_VALUE_STORE
458 .read_kvp(LAST_CRASH_UPLOADED)?
459 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
460 let mut uploaded = last_uploaded.clone();
461
462 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
463
464 // Crash directories are only set on macOS.
465 for dir in [crashes_dir(), crashes_retired_dir()]
466 .iter()
467 .filter_map(|d| d.as_deref())
468 {
469 let mut children = smol::fs::read_dir(&dir).await?;
470 while let Some(child) = children.next().await {
471 let child = child?;
472 let Some(filename) = child
473 .path()
474 .file_name()
475 .map(|f| f.to_string_lossy().to_lowercase())
476 else {
477 continue;
478 };
479
480 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
481 continue;
482 }
483
484 if filename <= last_uploaded {
485 continue;
486 }
487
488 let body = smol::fs::read_to_string(&child.path())
489 .await
490 .context("error reading crash file")?;
491
492 let mut request = http_client::Request::post(&crash_report_url.to_string())
493 .follow_redirects(http_client::RedirectPolicy::FollowAll)
494 .header("Content-Type", "text/plain");
495
496 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
497 request = request
498 .header("x-zed-panicked-on", format!("{panicked_on}"))
499 .header("x-zed-panic", payload)
500 }
501 if let Some(installation_id) = installation_id.as_ref() {
502 request = request.header("x-zed-installation-id", installation_id);
503 }
504
505 let request = request.body(body.into())?;
506
507 let response = http.send(request).await.context("error sending crash")?;
508 if !response.status().is_success() {
509 log::error!("Error uploading crash to server: {}", response.status());
510 }
511
512 if uploaded < filename {
513 uploaded.clone_from(&filename);
514 KEY_VALUE_STORE
515 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
516 .await?;
517 }
518 }
519 }
520
521 Ok(())
522}