1use crash_handler::{CrashEventResult, CrashHandler};
2use futures::future::BoxFuture;
3use log::info;
4use minidumper::{Client, LoopAction, MinidumpBinary};
5use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
6use serde::{Deserialize, Serialize};
7use std::mem;
8
9#[cfg(not(target_os = "windows"))]
10use smol::process::Command;
11
12#[cfg(target_os = "macos")]
13use std::sync::atomic::AtomicU32;
14use std::{
15 env,
16 fs::{self, File},
17 io,
18 panic::{self, PanicHookInfo},
19 path::{Path, PathBuf},
20 process::{self},
21 sync::{
22 Arc, OnceLock,
23 atomic::{AtomicBool, Ordering},
24 },
25 thread,
26 time::Duration,
27};
28
29// set once the crash handler has initialized and the client has connected to it
30pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
31// set when the first minidump request is made to avoid generating duplicate crash reports
32pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
33const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
34const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
35
36#[cfg(target_os = "macos")]
37static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
38
39fn should_install_crash_handler() -> bool {
40 if let Ok(value) = env::var("ZED_GENERATE_MINIDUMPS") {
41 return value == "true" || value == "1";
42 }
43
44 if *RELEASE_CHANNEL == ReleaseChannel::Dev {
45 return false;
46 }
47
48 true
49}
50
51/// Install crash signal handlers and spawn the crash-handler subprocess.
52///
53/// The synchronous portion (signal handlers, panic hook) runs inline.
54/// The async keepalive task is passed to `spawn` so the caller decides
55/// which executor to schedule it on.
56pub fn init(crash_init: InitCrashHandler, spawn: impl FnOnce(BoxFuture<'static, ()>)) {
57 if !should_install_crash_handler() {
58 let old_hook = panic::take_hook();
59 panic::set_hook(Box::new(move |info| {
60 unsafe { env::set_var("RUST_BACKTRACE", "1") };
61 old_hook(info);
62 // prevent the macOS crash dialog from popping up
63 if cfg!(target_os = "macos") {
64 std::process::exit(1);
65 }
66 }));
67 return;
68 }
69
70 panic::set_hook(Box::new(panic_hook));
71
72 let handler = CrashHandler::attach(unsafe {
73 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
74 let Some(client) = CRASH_HANDLER.get() else {
75 return CrashEventResult::Handled(false);
76 };
77
78 // only request a minidump once
79 let res = if REQUESTED_MINIDUMP
80 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
81 .is_ok()
82 {
83 #[cfg(target_os = "macos")]
84 suspend_all_other_threads();
85
86 // on macos this "ping" is needed to ensure that all our
87 // `client.send_message` calls have been processed before we trigger the
88 // minidump request.
89 client.ping().ok();
90 client.request_dump(crash_context).is_ok()
91 } else {
92 true
93 };
94 CrashEventResult::Handled(res)
95 })
96 })
97 .expect("failed to attach signal handler");
98
99 info!("crash signal handlers installed");
100
101 spawn(Box::pin(connect_and_keepalive(crash_init, handler)));
102}
103
104/// Spawn the crash-handler subprocess, connect the IPC client, and run the
105/// keepalive ping loop. Called on a background executor by [`init`].
106async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandler) {
107 let exe = env::current_exe().expect("unable to find ourselves");
108 let zed_pid = process::id();
109 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
110 #[cfg(not(target_os = "windows"))]
111 let _crash_handler = Command::new(exe)
112 .arg("--crash-handler")
113 .arg(&socket_name)
114 .spawn()
115 .expect("unable to spawn server process");
116
117 #[cfg(target_os = "windows")]
118 spawn_crash_handler_windows(&exe, &socket_name);
119
120 info!("spawning crash handler process");
121
122 let mut elapsed = Duration::ZERO;
123 let retry_frequency = Duration::from_millis(100);
124 let mut maybe_client = None;
125 while maybe_client.is_none() {
126 if let Ok(client) = Client::with_name(socket_name.as_path()) {
127 maybe_client = Some(client);
128 info!("connected to crash handler process after {elapsed:?}");
129 break;
130 }
131 elapsed += retry_frequency;
132 // Crash reporting is called outside of gpui in the remote server right now
133 #[allow(clippy::disallowed_methods)]
134 smol::Timer::after(retry_frequency).await;
135 }
136 let client = maybe_client.unwrap();
137 client
138 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
139 .unwrap();
140
141 let client = Arc::new(client);
142
143 #[cfg(target_os = "linux")]
144 handler.set_ptracer(Some(_crash_handler.id()));
145
146 // Publishing the client to the OnceLock makes it visible to the signal
147 // handler callback installed earlier.
148 CRASH_HANDLER.set(client.clone()).ok();
149 // mem::forget so that the drop is not called
150 mem::forget(handler);
151 info!("crash handler registered");
152
153 loop {
154 client.ping().ok();
155 // Crash reporting is called outside of gpui in the remote server right now
156 #[allow(clippy::disallowed_methods)]
157 smol::Timer::after(Duration::from_secs(10)).await;
158 }
159}
160
161#[cfg(target_os = "macos")]
162unsafe fn suspend_all_other_threads() {
163 let task = unsafe { mach2::traps::current_task() };
164 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
165 let mut count = 0;
166 unsafe {
167 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
168 }
169 let current = unsafe { mach2::mach_init::mach_thread_self() };
170 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
171 for i in 0..count {
172 let t = unsafe { *threads.add(i as usize) };
173 if t != current && t != panic_thread {
174 unsafe { mach2::thread_act::thread_suspend(t) };
175 }
176 }
177}
178
179pub struct CrashServer {
180 initialization_params: OnceLock<InitCrashHandler>,
181 panic_info: OnceLock<CrashPanic>,
182 active_gpu: OnceLock<system_specs::GpuSpecs>,
183 has_connection: Arc<AtomicBool>,
184}
185
186#[derive(Debug, Deserialize, Serialize, Clone)]
187pub struct CrashInfo {
188 pub init: InitCrashHandler,
189 pub panic: Option<CrashPanic>,
190 pub minidump_error: Option<String>,
191 pub gpus: Vec<system_specs::GpuInfo>,
192 pub active_gpu: Option<system_specs::GpuSpecs>,
193}
194
195#[derive(Debug, Deserialize, Serialize, Clone)]
196pub struct InitCrashHandler {
197 pub session_id: String,
198 pub zed_version: String,
199 pub binary: String,
200 pub release_channel: String,
201 pub commit_sha: String,
202}
203
204#[derive(Deserialize, Serialize, Debug, Clone)]
205pub struct CrashPanic {
206 pub message: String,
207 pub span: String,
208}
209
210impl minidumper::ServerHandler for CrashServer {
211 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
212 let err_message = "Missing initialization data";
213 let dump_path = paths::logs_dir()
214 .join(
215 &self
216 .initialization_params
217 .get()
218 .expect(err_message)
219 .session_id,
220 )
221 .with_extension("dmp");
222 let file = File::create(&dump_path)?;
223 Ok((file, dump_path))
224 }
225
226 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
227 let minidump_error = match result {
228 Ok(MinidumpBinary { mut file, path, .. }) => {
229 use io::Write;
230 file.flush().ok();
231 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
232 drop(file);
233 let original_file = File::open(&path).unwrap();
234 let compressed_path = path.with_extension("zstd");
235 let compressed_file = File::create(&compressed_path).unwrap();
236 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
237 fs::rename(&compressed_path, path).unwrap();
238 None
239 }
240 Err(e) => Some(format!("{e:?}")),
241 };
242
243 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
244 let gpus = vec![];
245
246 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
247 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
248 Ok(gpus) => gpus,
249 Err(err) => {
250 log::warn!("Failed to collect GPU information for crash report: {err}");
251 vec![]
252 }
253 };
254
255 let crash_info = CrashInfo {
256 init: self
257 .initialization_params
258 .get()
259 .expect("not initialized")
260 .clone(),
261 panic: self.panic_info.get().cloned(),
262 minidump_error,
263 active_gpu: self.active_gpu.get().cloned(),
264 gpus,
265 };
266
267 let crash_data_path = paths::logs_dir()
268 .join(&crash_info.init.session_id)
269 .with_extension("json");
270
271 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
272
273 LoopAction::Exit
274 }
275
276 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
277 match kind {
278 1 => {
279 let init_data =
280 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
281 self.initialization_params
282 .set(init_data)
283 .expect("already initialized");
284 }
285 2 => {
286 let panic_data =
287 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
288 self.panic_info.set(panic_data).expect("already panicked");
289 }
290 3 => {
291 let gpu_specs: system_specs::GpuSpecs =
292 bincode::deserialize(&buffer).expect("gpu specs");
293 // we ignore the case where it was already set because this message is sent
294 // on each new window. in theory all zed windows should be using the same
295 // GPU so this is fine.
296 self.active_gpu.set(gpu_specs).ok();
297 }
298 _ => {
299 panic!("invalid message kind");
300 }
301 }
302 }
303
304 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
305 LoopAction::Exit
306 }
307
308 fn on_client_connected(&self, _clients: usize) -> LoopAction {
309 self.has_connection.store(true, Ordering::SeqCst);
310 LoopAction::Continue
311 }
312}
313
314pub fn panic_hook(info: &PanicHookInfo) {
315 let message = info.payload_as_str().unwrap_or("Box<Any>").to_owned();
316
317 let span = info
318 .location()
319 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
320 .unwrap_or_default();
321
322 let current_thread = std::thread::current();
323 let thread_name = current_thread.name().unwrap_or("<unnamed>");
324
325 // wait 500ms for the crash handler process to start up
326 // if it's still not there just write panic info and no minidump
327 let retry_frequency = Duration::from_millis(100);
328 for _ in 0..5 {
329 if let Some(client) = CRASH_HANDLER.get() {
330 let location = info
331 .location()
332 .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
333 log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
334 client
335 .send_message(
336 2,
337 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
338 )
339 .ok();
340 log::error!("triggering a crash to generate a minidump...");
341
342 #[cfg(target_os = "macos")]
343 PANIC_THREAD_ID.store(
344 unsafe { mach2::mach_init::mach_thread_self() },
345 Ordering::SeqCst,
346 );
347
348 cfg_if::cfg_if! {
349 if #[cfg(target_os = "windows")] {
350 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
351 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
352 break;
353 } else {
354 std::process::abort();
355 }
356 }
357 }
358 thread::sleep(retry_frequency);
359 }
360}
361
362#[cfg(target_os = "windows")]
363fn spawn_crash_handler_windows(exe: &Path, socket_name: &Path) {
364 use std::ffi::OsStr;
365 use std::iter::once;
366 use std::os::windows::ffi::OsStrExt;
367 use windows::Win32::System::Threading::{
368 CreateProcessW, PROCESS_CREATION_FLAGS, PROCESS_INFORMATION, STARTF_FORCEOFFFEEDBACK,
369 STARTUPINFOW,
370 };
371 use windows::core::PWSTR;
372
373 let mut command_line: Vec<u16> = OsStr::new(&format!(
374 "\"{}\" --crash-handler \"{}\"",
375 exe.display(),
376 socket_name.display()
377 ))
378 .encode_wide()
379 .chain(once(0))
380 .collect();
381
382 let mut startup_info = STARTUPINFOW::default();
383 startup_info.cb = std::mem::size_of::<STARTUPINFOW>() as u32;
384
385 // By default, Windows enables a "busy" cursor when a GUI application is launched.
386 // This cursor is disabled once the application starts processing window messages.
387 // Since the crash handler process doesn't process messages, this "busy" cursor stays enabled for a long time.
388 // Disable the cursor feedback to prevent this from happening.
389 startup_info.dwFlags = STARTF_FORCEOFFFEEDBACK;
390
391 let mut process_info = PROCESS_INFORMATION::default();
392
393 unsafe {
394 CreateProcessW(
395 None,
396 Some(PWSTR(command_line.as_mut_ptr())),
397 None,
398 None,
399 false,
400 PROCESS_CREATION_FLAGS(0),
401 None,
402 None,
403 &startup_info,
404 &mut process_info,
405 )
406 .expect("unable to spawn server process");
407
408 windows::Win32::Foundation::CloseHandle(process_info.hProcess).ok();
409 windows::Win32::Foundation::CloseHandle(process_info.hThread).ok();
410 }
411}
412
413pub fn crash_server(socket: &Path) {
414 let Ok(mut server) = minidumper::Server::with_name(socket) else {
415 log::info!("Couldn't create socket, there may already be a running crash server");
416 return;
417 };
418
419 let shutdown = Arc::new(AtomicBool::new(false));
420 let has_connection = Arc::new(AtomicBool::new(false));
421
422 thread::Builder::new()
423 .name("CrashServerTimeout".to_owned())
424 .spawn({
425 let shutdown = shutdown.clone();
426 let has_connection = has_connection.clone();
427 move || {
428 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
429 if !has_connection.load(Ordering::SeqCst) {
430 shutdown.store(true, Ordering::SeqCst);
431 }
432 }
433 })
434 .unwrap();
435
436 server
437 .run(
438 Box::new(CrashServer {
439 initialization_params: OnceLock::new(),
440 panic_info: OnceLock::new(),
441 has_connection,
442 active_gpu: OnceLock::new(),
443 }),
444 &shutdown,
445 Some(CRASH_HANDLER_PING_TIMEOUT),
446 )
447 .expect("failed to run server");
448}