1use crash_handler::{CrashEventResult, CrashHandler};
2use futures::future::BoxFuture;
3use log::info;
4use minidumper::{Client, LoopAction, MinidumpBinary, Server, SocketName};
5use parking_lot::Mutex;
6use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
7use serde::{Deserialize, Serialize};
8use std::mem;
9
10#[cfg(not(target_os = "windows"))]
11use smol::process::Command;
12use system_specs::GpuSpecs;
13
14#[cfg(target_os = "macos")]
15use std::sync::atomic::AtomicU32;
16use std::{
17 env,
18 fs::{self, File},
19 io,
20 panic::{self, PanicHookInfo},
21 path::{Path, PathBuf},
22 process::{self},
23 sync::{
24 Arc, OnceLock,
25 atomic::{AtomicBool, Ordering},
26 },
27 thread,
28 time::Duration,
29};
30
31// set once the crash handler has initialized and the client has connected to it
32static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
33// set when the first minidump request is made to avoid generating duplicate crash reports
34pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
35const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
36const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
37
38static PENDING_CRASH_SERVER_MESSAGES: Mutex<Vec<CrashServerMessage>> = Mutex::new(Vec::new());
39
40#[cfg(target_os = "macos")]
41static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
42
43fn should_install_crash_handler() -> bool {
44 if let Ok(value) = env::var("ZED_GENERATE_MINIDUMPS") {
45 return value == "true" || value == "1";
46 }
47
48 if *RELEASE_CHANNEL == ReleaseChannel::Dev {
49 return false;
50 }
51
52 true
53}
54
55/// Install crash signal handlers and spawn the crash-handler subprocess.
56///
57/// The synchronous portion (signal handlers, panic hook) runs inline.
58/// The async keepalive task is passed to `spawn` so the caller decides
59/// which executor to schedule it on.
60pub fn init(crash_init: InitCrashHandler, spawn: impl FnOnce(BoxFuture<'static, ()>)) {
61 if !should_install_crash_handler() {
62 let old_hook = panic::take_hook();
63 panic::set_hook(Box::new(move |info| {
64 unsafe { env::set_var("RUST_BACKTRACE", "1") };
65 old_hook(info);
66 // prevent the macOS crash dialog from popping up
67 if cfg!(target_os = "macos") {
68 std::process::exit(1);
69 }
70 }));
71 return;
72 }
73
74 panic::set_hook(Box::new(panic_hook));
75
76 let handler = CrashHandler::attach(unsafe {
77 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
78 let Some(client) = CRASH_HANDLER.get() else {
79 return CrashEventResult::Handled(false);
80 };
81
82 // only request a minidump once
83 let res = if REQUESTED_MINIDUMP
84 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
85 .is_ok()
86 {
87 #[cfg(target_os = "macos")]
88 suspend_all_other_threads();
89
90 // on macos this "ping" is needed to ensure that all our
91 // `client.send_message` calls have been processed before we trigger the
92 // minidump request.
93 client.ping().ok();
94 client.request_dump(crash_context).is_ok()
95 } else {
96 true
97 };
98 CrashEventResult::Handled(res)
99 })
100 })
101 .expect("failed to attach signal handler");
102
103 info!("crash signal handlers installed");
104
105 spawn(Box::pin(connect_and_keepalive(crash_init, handler)));
106}
107
108/// Spawn the crash-handler subprocess, connect the IPC client, and run the
109/// keepalive ping loop. Called on a background executor by [`init`].
110async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandler) {
111 let exe = env::current_exe().expect("unable to find ourselves");
112 let zed_pid = process::id();
113 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
114 #[cfg(not(target_os = "windows"))]
115 let _crash_handler = Command::new(exe)
116 .arg("--crash-handler")
117 .arg(&socket_name)
118 .spawn()
119 .expect("unable to spawn server process");
120
121 #[cfg(target_os = "windows")]
122 spawn_crash_handler_windows(&exe, &socket_name);
123
124 info!("spawning crash handler process");
125 send_crash_server_message(CrashServerMessage::Init(crash_init));
126
127 let mut elapsed = Duration::ZERO;
128 let retry_frequency = Duration::from_millis(100);
129 let mut maybe_client = None;
130 while maybe_client.is_none() {
131 if let Ok(client) = Client::with_name(SocketName::Path(&socket_name)) {
132 maybe_client = Some(client);
133 info!("connected to crash handler process after {elapsed:?}");
134 break;
135 }
136 elapsed += retry_frequency;
137 // Crash reporting is called outside of gpui in the remote server right now
138 #[allow(clippy::disallowed_methods)]
139 smol::Timer::after(retry_frequency).await;
140 }
141 let client = maybe_client.unwrap();
142 let client = Arc::new(client);
143
144 #[cfg(target_os = "linux")]
145 handler.set_ptracer(Some(_crash_handler.id()));
146
147 // Publishing the client to the OnceLock makes it visible to the signal
148 // handler callback installed earlier.
149 CRASH_HANDLER.set(client.clone()).ok();
150 let messages: Vec<_> = mem::take(PENDING_CRASH_SERVER_MESSAGES.lock().as_mut());
151 for message in messages.into_iter() {
152 send_crash_server_message(message);
153 }
154 // mem::forget so that the drop is not called
155 mem::forget(handler);
156 info!("crash handler registered");
157
158 loop {
159 client.ping().ok();
160 // Crash reporting is called outside of gpui in the remote server right now
161 #[allow(clippy::disallowed_methods)]
162 smol::Timer::after(Duration::from_secs(10)).await;
163 }
164}
165
166#[cfg(target_os = "macos")]
167unsafe fn suspend_all_other_threads() {
168 let task = unsafe { mach2::traps::current_task() };
169 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
170 let mut count = 0;
171 unsafe {
172 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
173 }
174 let current = unsafe { mach2::mach_init::mach_thread_self() };
175 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
176 for i in 0..count {
177 let t = unsafe { *threads.add(i as usize) };
178 if t != current && t != panic_thread {
179 unsafe { mach2::thread_act::thread_suspend(t) };
180 }
181 }
182}
183
184pub struct CrashServer {
185 initialization_params: Mutex<Option<InitCrashHandler>>,
186 panic_info: Mutex<Option<CrashPanic>>,
187 active_gpu: Mutex<Option<system_specs::GpuSpecs>>,
188 user_info: Mutex<Option<UserInfo>>,
189 has_connection: Arc<AtomicBool>,
190}
191
192#[derive(Debug, Deserialize, Serialize, Clone)]
193pub struct CrashInfo {
194 pub init: InitCrashHandler,
195 pub panic: Option<CrashPanic>,
196 pub minidump_error: Option<String>,
197 pub gpus: Vec<system_specs::GpuInfo>,
198 pub active_gpu: Option<system_specs::GpuSpecs>,
199 pub user_info: Option<UserInfo>,
200}
201
202#[derive(Debug, Deserialize, Serialize, Clone)]
203pub struct InitCrashHandler {
204 pub session_id: String,
205 pub zed_version: String,
206 pub binary: String,
207 pub release_channel: String,
208 pub commit_sha: String,
209}
210
211#[derive(Deserialize, Serialize, Debug, Clone)]
212pub struct CrashPanic {
213 pub message: String,
214 pub span: String,
215}
216
217#[derive(Deserialize, Serialize, Debug, Clone)]
218pub struct UserInfo {
219 pub metrics_id: Option<String>,
220 pub is_staff: Option<bool>,
221}
222
223fn send_crash_server_message(message: CrashServerMessage) {
224 let Some(crash_server) = CRASH_HANDLER.get() else {
225 PENDING_CRASH_SERVER_MESSAGES.lock().push(message);
226 return;
227 };
228 let data = match serde_json::to_vec(&message) {
229 Ok(data) => data,
230 Err(err) => {
231 log::warn!("Failed to serialize crash server message: {:?}", err);
232 return;
233 }
234 };
235
236 if let Err(err) = crash_server.send_message(0, data) {
237 log::warn!("Failed to send data to crash server {:?}", err);
238 }
239}
240
241pub fn set_gpu_info(specs: GpuSpecs) {
242 send_crash_server_message(CrashServerMessage::GPUInfo(specs));
243}
244
245pub fn set_user_info(info: UserInfo) {
246 send_crash_server_message(CrashServerMessage::UserInfo(info));
247}
248
249#[derive(Serialize, Deserialize, Debug)]
250enum CrashServerMessage {
251 Init(InitCrashHandler),
252 Panic(CrashPanic),
253 GPUInfo(GpuSpecs),
254 UserInfo(UserInfo),
255}
256
257impl minidumper::ServerHandler for CrashServer {
258 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
259 let dump_path = paths::logs_dir()
260 .join(
261 &self
262 .initialization_params
263 .lock()
264 .as_ref()
265 .expect("Missing initialization data")
266 .session_id,
267 )
268 .with_extension("dmp");
269 let file = File::create(&dump_path)?;
270 Ok((file, dump_path))
271 }
272
273 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
274 let minidump_error = match result {
275 Ok(MinidumpBinary { mut file, path, .. }) => {
276 use io::Write;
277 file.flush().ok();
278 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
279 drop(file);
280 let original_file = File::open(&path).unwrap();
281 let compressed_path = path.with_extension("zstd");
282 let compressed_file = File::create(&compressed_path).unwrap();
283 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
284 fs::rename(&compressed_path, path).unwrap();
285 None
286 }
287 Err(e) => Some(format!("{e:?}")),
288 };
289
290 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
291 let gpus = vec![];
292
293 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
294 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
295 Ok(gpus) => gpus,
296 Err(err) => {
297 log::warn!("Failed to collect GPU information for crash report: {err}");
298 vec![]
299 }
300 };
301
302 let crash_info = CrashInfo {
303 init: self
304 .initialization_params
305 .lock()
306 .clone()
307 .expect("not initialized"),
308 panic: self.panic_info.lock().clone(),
309 minidump_error,
310 active_gpu: self.active_gpu.lock().clone(),
311 gpus,
312 user_info: self.user_info.lock().clone(),
313 };
314
315 let crash_data_path = paths::logs_dir()
316 .join(&crash_info.init.session_id)
317 .with_extension("json");
318
319 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
320
321 LoopAction::Exit
322 }
323
324 fn on_message(&self, _: u32, buffer: Vec<u8>) {
325 let message: CrashServerMessage =
326 serde_json::from_slice(&buffer).expect("invalid init data");
327 match message {
328 CrashServerMessage::Init(init_data) => {
329 self.initialization_params.lock().replace(init_data);
330 }
331 CrashServerMessage::Panic(crash_panic) => {
332 self.panic_info.lock().replace(crash_panic);
333 }
334 CrashServerMessage::GPUInfo(gpu_specs) => {
335 self.active_gpu.lock().replace(gpu_specs);
336 }
337 CrashServerMessage::UserInfo(user_info) => {
338 self.user_info.lock().replace(user_info);
339 }
340 }
341 }
342
343 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
344 LoopAction::Exit
345 }
346
347 fn on_client_connected(&self, _clients: usize) -> LoopAction {
348 self.has_connection.store(true, Ordering::SeqCst);
349 LoopAction::Continue
350 }
351}
352
353pub fn panic_hook(info: &PanicHookInfo) {
354 let message = info.payload_as_str().unwrap_or("Box<Any>").to_owned();
355
356 let span = info
357 .location()
358 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
359 .unwrap_or_default();
360
361 let current_thread = std::thread::current();
362 let thread_name = current_thread.name().unwrap_or("<unnamed>");
363
364 // wait 500ms for the crash handler process to start up
365 // if it's still not there just write panic info and no minidump
366 let retry_frequency = Duration::from_millis(100);
367 for _ in 0..5 {
368 if CRASH_HANDLER.get().is_some() {
369 break;
370 }
371 thread::sleep(retry_frequency);
372 }
373 let location = info
374 .location()
375 .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
376 log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
377
378 send_crash_server_message(CrashServerMessage::Panic(CrashPanic { message, span }));
379 log::error!("triggering a crash to generate a minidump...");
380
381 #[cfg(target_os = "macos")]
382 PANIC_THREAD_ID.store(
383 unsafe { mach2::mach_init::mach_thread_self() },
384 Ordering::SeqCst,
385 );
386
387 cfg_if::cfg_if! {
388 if #[cfg(target_os = "windows")] {
389 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
390 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
391 } else {
392 std::process::abort();
393 }
394 }
395}
396
397#[cfg(target_os = "windows")]
398fn spawn_crash_handler_windows(exe: &Path, socket_name: &Path) {
399 use std::ffi::OsStr;
400 use std::iter::once;
401 use std::os::windows::ffi::OsStrExt;
402 use windows::Win32::System::Threading::{
403 CreateProcessW, PROCESS_CREATION_FLAGS, PROCESS_INFORMATION, STARTF_FORCEOFFFEEDBACK,
404 STARTUPINFOW,
405 };
406 use windows::core::PWSTR;
407
408 let mut command_line: Vec<u16> = OsStr::new(&format!(
409 "\"{}\" --crash-handler \"{}\"",
410 exe.display(),
411 socket_name.display()
412 ))
413 .encode_wide()
414 .chain(once(0))
415 .collect();
416
417 let mut startup_info = STARTUPINFOW::default();
418 startup_info.cb = std::mem::size_of::<STARTUPINFOW>() as u32;
419
420 // By default, Windows enables a "busy" cursor when a GUI application is launched.
421 // This cursor is disabled once the application starts processing window messages.
422 // Since the crash handler process doesn't process messages, this "busy" cursor stays enabled for a long time.
423 // Disable the cursor feedback to prevent this from happening.
424 startup_info.dwFlags = STARTF_FORCEOFFFEEDBACK;
425
426 let mut process_info = PROCESS_INFORMATION::default();
427
428 unsafe {
429 CreateProcessW(
430 None,
431 Some(PWSTR(command_line.as_mut_ptr())),
432 None,
433 None,
434 false,
435 PROCESS_CREATION_FLAGS(0),
436 None,
437 None,
438 &startup_info,
439 &mut process_info,
440 )
441 .expect("unable to spawn server process");
442
443 windows::Win32::Foundation::CloseHandle(process_info.hProcess).ok();
444 windows::Win32::Foundation::CloseHandle(process_info.hThread).ok();
445 }
446}
447
448pub fn crash_server(socket: &Path) {
449 let Ok(mut server) = Server::with_name(SocketName::Path(socket)) else {
450 log::info!("Couldn't create socket, there may already be a running crash server");
451 return;
452 };
453
454 let shutdown = Arc::new(AtomicBool::new(false));
455 let has_connection = Arc::new(AtomicBool::new(false));
456
457 thread::Builder::new()
458 .name("CrashServerTimeout".to_owned())
459 .spawn({
460 let shutdown = shutdown.clone();
461 let has_connection = has_connection.clone();
462 move || {
463 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
464 if !has_connection.load(Ordering::SeqCst) {
465 shutdown.store(true, Ordering::SeqCst);
466 }
467 }
468 })
469 .unwrap();
470
471 server
472 .run(
473 Box::new(CrashServer {
474 initialization_params: Mutex::default(),
475 panic_info: Mutex::default(),
476 user_info: Mutex::default(),
477 has_connection,
478 active_gpu: Mutex::default(),
479 }),
480 &shutdown,
481 Some(CRASH_HANDLER_PING_TIMEOUT),
482 )
483 .expect("failed to run server");
484}