1use anyhow::{Context, Result};
2use backtrace::{self, Backtrace};
3use chrono::Utc;
4use db::kvp::KEY_VALUE_STORE;
5use gpui::{App, AppContext, SemanticVersion};
6use isahc::config::Configurable;
7
8use paths::{CRASHES_DIR, CRASHES_RETIRED_DIR};
9use release_channel::ReleaseChannel;
10use release_channel::RELEASE_CHANNEL;
11use serde::{Deserialize, Serialize};
12use settings::Settings;
13use smol::stream::StreamExt;
14use std::{
15 env,
16 ffi::OsStr,
17 sync::{atomic::Ordering, Arc},
18};
19use std::{io::Write, panic, sync::atomic::AtomicU32, thread};
20use util::{
21 http::{self, HttpClient, HttpClientWithUrl},
22 paths, ResultExt,
23};
24
25use crate::stdout_is_a_pty;
26
27#[derive(Serialize, Deserialize)]
28struct LocationData {
29 file: String,
30 line: u32,
31}
32
33#[derive(Serialize, Deserialize)]
34struct Panic {
35 thread: String,
36 payload: String,
37 #[serde(skip_serializing_if = "Option::is_none")]
38 location_data: Option<LocationData>,
39 backtrace: Vec<String>,
40 app_version: String,
41 release_channel: String,
42 os_name: String,
43 os_version: Option<String>,
44 architecture: String,
45 panicked_on: i64,
46 #[serde(skip_serializing_if = "Option::is_none")]
47 installation_id: Option<String>,
48 session_id: String,
49}
50
51#[derive(Serialize)]
52struct PanicRequest {
53 panic: Panic,
54}
55
56static PANIC_COUNT: AtomicU32 = AtomicU32::new(0);
57
58pub fn init_panic_hook(app: &App, installation_id: Option<String>, session_id: String) {
59 let is_pty = stdout_is_a_pty();
60 let app_metadata = app.metadata();
61
62 panic::set_hook(Box::new(move |info| {
63 let prior_panic_count = PANIC_COUNT.fetch_add(1, Ordering::SeqCst);
64 if prior_panic_count > 0 {
65 // Give the panic-ing thread time to write the panic file
66 loop {
67 std::thread::yield_now();
68 }
69 }
70
71 let thread = thread::current();
72 let thread_name = thread.name().unwrap_or("<unnamed>");
73
74 let payload = info
75 .payload()
76 .downcast_ref::<&str>()
77 .map(|s| s.to_string())
78 .or_else(|| info.payload().downcast_ref::<String>().map(|s| s.clone()))
79 .unwrap_or_else(|| "Box<Any>".to_string());
80
81 if *release_channel::RELEASE_CHANNEL == ReleaseChannel::Dev {
82 let location = info.location().unwrap();
83 let backtrace = Backtrace::new();
84 eprintln!(
85 "Thread {:?} panicked with {:?} at {}:{}:{}\n{:?}",
86 thread_name,
87 payload,
88 location.file(),
89 location.line(),
90 location.column(),
91 backtrace,
92 );
93 std::process::exit(-1);
94 }
95
96 let app_version = if let Some(version) = app_metadata.app_version {
97 version.to_string()
98 } else {
99 option_env!("CARGO_PKG_VERSION")
100 .unwrap_or("dev")
101 .to_string()
102 };
103
104 let backtrace = Backtrace::new();
105 let mut backtrace = backtrace
106 .frames()
107 .iter()
108 .flat_map(|frame| {
109 frame
110 .symbols()
111 .iter()
112 .filter_map(|frame| Some(format!("{:#}", frame.name()?)))
113 })
114 .collect::<Vec<_>>();
115
116 // Strip out leading stack frames for rust panic-handling.
117 if let Some(ix) = backtrace
118 .iter()
119 .position(|name| name == "rust_begin_unwind")
120 {
121 backtrace.drain(0..=ix);
122 }
123
124 let panic_data = Panic {
125 thread: thread_name.into(),
126 payload,
127 location_data: info.location().map(|location| LocationData {
128 file: location.file().into(),
129 line: location.line(),
130 }),
131 app_version: app_version.to_string(),
132 release_channel: RELEASE_CHANNEL.display_name().into(),
133 os_name: app_metadata.os_name.into(),
134 os_version: app_metadata
135 .os_version
136 .as_ref()
137 .map(SemanticVersion::to_string),
138 architecture: env::consts::ARCH.into(),
139 panicked_on: Utc::now().timestamp_millis(),
140 backtrace,
141 installation_id: installation_id.clone(),
142 session_id: session_id.clone(),
143 };
144
145 if let Some(panic_data_json) = serde_json::to_string_pretty(&panic_data).log_err() {
146 log::error!("{}", panic_data_json);
147 }
148
149 if !is_pty {
150 if let Some(panic_data_json) = serde_json::to_string(&panic_data).log_err() {
151 let timestamp = chrono::Utc::now().format("%Y_%m_%d %H_%M_%S").to_string();
152 let panic_file_path = paths::LOGS_DIR.join(format!("zed-{}.panic", timestamp));
153 let panic_file = std::fs::OpenOptions::new()
154 .append(true)
155 .create(true)
156 .open(&panic_file_path)
157 .log_err();
158 if let Some(mut panic_file) = panic_file {
159 writeln!(&mut panic_file, "{}", panic_data_json).log_err();
160 panic_file.flush().log_err();
161 }
162 }
163 }
164
165 std::process::abort();
166 }));
167}
168
169pub fn init(
170 http_client: Arc<HttpClientWithUrl>,
171 installation_id: Option<String>,
172 cx: &mut AppContext,
173) {
174 #[cfg(target_os = "macos")]
175 monitor_main_thread_hangs(http_client.clone(), installation_id.clone(), cx);
176
177 upload_panics_and_crashes(http_client, installation_id, cx)
178}
179
180#[cfg(target_os = "macos")]
181pub fn monitor_main_thread_hangs(
182 http_client: Arc<HttpClientWithUrl>,
183 installation_id: Option<String>,
184 cx: &AppContext,
185) {
186 use nix::sys::signal::{
187 sigaction, SaFlags, SigAction, SigHandler, SigSet,
188 Signal::{self, SIGUSR2},
189 };
190
191 use parking_lot::Mutex;
192
193 use std::{
194 ffi::c_int,
195 sync::{mpsc, OnceLock},
196 time::Duration,
197 };
198 use telemetry_events::{BacktraceFrame, HangReport};
199 use util::http::Method;
200
201 use nix::sys::pthread;
202
203 let foreground_executor = cx.foreground_executor();
204 let background_executor = cx.background_executor();
205 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
206 let metadata = cx.app_metadata();
207
208 // Initialize SIGUSR2 handler to send a backrace to a channel.
209 let (backtrace_tx, backtrace_rx) = mpsc::channel();
210 static BACKTRACE: Mutex<Vec<backtrace::Frame>> = Mutex::new(Vec::new());
211 static BACKTRACE_SENDER: OnceLock<mpsc::Sender<()>> = OnceLock::new();
212 BACKTRACE_SENDER.get_or_init(|| backtrace_tx);
213 BACKTRACE.lock().reserve(100);
214
215 fn handle_backtrace_signal() {
216 unsafe {
217 extern "C" fn handle_sigusr2(_i: c_int) {
218 unsafe {
219 // ASYNC SIGNAL SAFETY: This lock is only accessed one other time,
220 // which can only be triggered by This signal handler. In addition,
221 // this signal handler is immediately removed by SA_RESETHAND, and this
222 // signal handler cannot be re-entrant due to to the SIGUSR2 mask defined
223 // below
224 let mut bt = BACKTRACE.lock();
225 bt.clear();
226 backtrace::trace_unsynchronized(|frame| {
227 if bt.len() < bt.capacity() {
228 bt.push(frame.clone());
229 true
230 } else {
231 false
232 }
233 });
234 }
235
236 BACKTRACE_SENDER.get().unwrap().send(()).ok();
237 }
238
239 let mut mask = SigSet::empty();
240 mask.add(SIGUSR2);
241 sigaction(
242 Signal::SIGUSR2,
243 &SigAction::new(
244 SigHandler::Handler(handle_sigusr2),
245 SaFlags::SA_RESTART | SaFlags::SA_RESETHAND,
246 mask,
247 ),
248 )
249 .log_err();
250 }
251 }
252
253 handle_backtrace_signal();
254 let main_thread = pthread::pthread_self();
255
256 let (mut tx, mut rx) = futures::channel::mpsc::channel(3);
257 foreground_executor
258 .spawn(async move { while let Some(_) = rx.next().await {} })
259 .detach();
260
261 background_executor
262 .spawn({
263 let background_executor = background_executor.clone();
264 async move {
265 loop {
266 background_executor.timer(Duration::from_secs(1)).await;
267 match tx.try_send(()) {
268 Ok(_) => continue,
269 Err(e) => {
270 if e.into_send_error().is_full() {
271 pthread::pthread_kill(main_thread, SIGUSR2).log_err();
272 }
273 // Only detect the first hang
274 break;
275 }
276 }
277 }
278 }
279 })
280 .detach();
281
282 background_executor
283 .clone()
284 .spawn(async move {
285 loop {
286 while let Some(_) = backtrace_rx.recv().ok() {
287 if !telemetry_settings.diagnostics {
288 return;
289 }
290
291 // ASYNC SIGNAL SAFETY: This lock is only accessed _after_
292 // the backtrace transmitter has fired, which itself is only done
293 // by the signal handler. And due to SA_RESETHAND the signal handler
294 // will not run again until `handle_backtrace_signal` is called.
295 let raw_backtrace = BACKTRACE.lock().drain(..).collect::<Vec<_>>();
296 let backtrace: Vec<_> = raw_backtrace
297 .into_iter()
298 .map(|frame| {
299 let mut btf = BacktraceFrame {
300 ip: frame.ip() as usize,
301 symbol_addr: frame.symbol_address() as usize,
302 base: frame.module_base_address().map(|addr| addr as usize),
303 symbols: vec![],
304 };
305
306 backtrace::resolve_frame(&frame, |symbol| {
307 if let Some(name) = symbol.name() {
308 btf.symbols.push(name.to_string());
309 }
310 });
311
312 btf
313 })
314 .collect();
315
316 // IMPORTANT: Don't move this to before `BACKTRACE.lock()`
317 handle_backtrace_signal();
318
319 log::error!(
320 "Suspected hang on main thread:\n{}",
321 backtrace
322 .iter()
323 .flat_map(|bt| bt.symbols.first().as_ref().map(|s| s.as_str()))
324 .collect::<Vec<_>>()
325 .join("\n")
326 );
327
328 let report = HangReport {
329 backtrace,
330 app_version: metadata.app_version,
331 os_name: metadata.os_name.to_owned(),
332 os_version: metadata.os_version,
333 architecture: env::consts::ARCH.into(),
334 installation_id: installation_id.clone(),
335 };
336
337 let Some(json_bytes) = serde_json::to_vec(&report).log_err() else {
338 continue;
339 };
340
341 let Some(checksum) = client::telemetry::calculate_json_checksum(&json_bytes)
342 else {
343 continue;
344 };
345
346 let Ok(url) = http_client.build_zed_api_url("/telemetry/hangs", &[]) else {
347 continue;
348 };
349
350 let Ok(request) = http::Request::builder()
351 .method(Method::POST)
352 .uri(url.as_ref())
353 .header("x-zed-checksum", checksum)
354 .body(json_bytes.into())
355 else {
356 continue;
357 };
358
359 if let Some(response) = http_client.send(request).await.log_err() {
360 if response.status() != 200 {
361 log::error!("Failed to send hang report: HTTP {:?}", response.status());
362 }
363 }
364 }
365 }
366 })
367 .detach()
368}
369
370fn upload_panics_and_crashes(
371 http: Arc<HttpClientWithUrl>,
372 installation_id: Option<String>,
373 cx: &mut AppContext,
374) {
375 let telemetry_settings = *client::TelemetrySettings::get_global(cx);
376 cx.background_executor()
377 .spawn(async move {
378 let most_recent_panic = upload_previous_panics(http.clone(), telemetry_settings)
379 .await
380 .log_err()
381 .flatten();
382 upload_previous_crashes(http, most_recent_panic, installation_id, telemetry_settings)
383 .await
384 .log_err()
385 })
386 .detach()
387}
388
389/// Uploads panics via `zed.dev`.
390async fn upload_previous_panics(
391 http: Arc<HttpClientWithUrl>,
392 telemetry_settings: client::TelemetrySettings,
393) -> Result<Option<(i64, String)>> {
394 let panic_report_url = http.build_url("/api/panic");
395 let mut children = smol::fs::read_dir(&*paths::LOGS_DIR).await?;
396
397 let mut most_recent_panic = None;
398
399 while let Some(child) = children.next().await {
400 let child = child?;
401 let child_path = child.path();
402
403 if child_path.extension() != Some(OsStr::new("panic")) {
404 continue;
405 }
406 let filename = if let Some(filename) = child_path.file_name() {
407 filename.to_string_lossy()
408 } else {
409 continue;
410 };
411
412 if !filename.starts_with("zed") {
413 continue;
414 }
415
416 if telemetry_settings.diagnostics {
417 let panic_file_content = smol::fs::read_to_string(&child_path)
418 .await
419 .context("error reading panic file")?;
420
421 let panic: Option<Panic> = serde_json::from_str(&panic_file_content)
422 .ok()
423 .or_else(|| {
424 panic_file_content
425 .lines()
426 .next()
427 .and_then(|line| serde_json::from_str(line).ok())
428 })
429 .unwrap_or_else(|| {
430 log::error!("failed to deserialize panic file {:?}", panic_file_content);
431 None
432 });
433
434 if let Some(panic) = panic {
435 most_recent_panic = Some((panic.panicked_on, panic.payload.clone()));
436
437 let body = serde_json::to_string(&PanicRequest { panic }).unwrap();
438
439 let request = http::Request::post(&panic_report_url)
440 .redirect_policy(isahc::config::RedirectPolicy::Follow)
441 .header("Content-Type", "application/json")
442 .body(body.into())?;
443 let response = http.send(request).await.context("error sending panic")?;
444 if !response.status().is_success() {
445 log::error!("Error uploading panic to server: {}", response.status());
446 }
447 }
448 }
449
450 // We've done what we can, delete the file
451 std::fs::remove_file(child_path)
452 .context("error removing panic")
453 .log_err();
454 }
455 Ok::<_, anyhow::Error>(most_recent_panic)
456}
457
458static LAST_CRASH_UPLOADED: &'static str = "LAST_CRASH_UPLOADED";
459
460/// upload crashes from apple's diagnostic reports to our server.
461/// (only if telemetry is enabled)
462async fn upload_previous_crashes(
463 http: Arc<HttpClientWithUrl>,
464 most_recent_panic: Option<(i64, String)>,
465 installation_id: Option<String>,
466 telemetry_settings: client::TelemetrySettings,
467) -> Result<()> {
468 if !telemetry_settings.diagnostics {
469 return Ok(());
470 }
471 let last_uploaded = KEY_VALUE_STORE
472 .read_kvp(LAST_CRASH_UPLOADED)?
473 .unwrap_or("zed-2024-01-17-221900.ips".to_string()); // don't upload old crash reports from before we had this.
474 let mut uploaded = last_uploaded.clone();
475
476 let crash_report_url = http.build_zed_api_url("/telemetry/crashes", &[])?;
477
478 // crash directories are only set on MacOS
479 for dir in [&*CRASHES_DIR, &*CRASHES_RETIRED_DIR]
480 .iter()
481 .filter_map(|d| d.as_deref())
482 {
483 let mut children = smol::fs::read_dir(&dir).await?;
484 while let Some(child) = children.next().await {
485 let child = child?;
486 let Some(filename) = child
487 .path()
488 .file_name()
489 .map(|f| f.to_string_lossy().to_lowercase())
490 else {
491 continue;
492 };
493
494 if !filename.starts_with("zed-") || !filename.ends_with(".ips") {
495 continue;
496 }
497
498 if filename <= last_uploaded {
499 continue;
500 }
501
502 let body = smol::fs::read_to_string(&child.path())
503 .await
504 .context("error reading crash file")?;
505
506 let mut request = http::Request::post(&crash_report_url.to_string())
507 .redirect_policy(isahc::config::RedirectPolicy::Follow)
508 .header("Content-Type", "text/plain");
509
510 if let Some((panicked_on, payload)) = most_recent_panic.as_ref() {
511 request = request
512 .header("x-zed-panicked-on", format!("{}", panicked_on))
513 .header("x-zed-panic", payload)
514 }
515 if let Some(installation_id) = installation_id.as_ref() {
516 request = request.header("x-zed-installation-id", installation_id);
517 }
518
519 let request = request.body(body.into())?;
520
521 let response = http.send(request).await.context("error sending crash")?;
522 if !response.status().is_success() {
523 log::error!("Error uploading crash to server: {}", response.status());
524 }
525
526 if uploaded < filename {
527 uploaded = filename.clone();
528 KEY_VALUE_STORE
529 .write_kvp(LAST_CRASH_UPLOADED.to_string(), filename)
530 .await?;
531 }
532 }
533 }
534
535 Ok(())
536}