1use crash_handler::{CrashEventResult, CrashHandler};
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7#[cfg(not(target_os = "windows"))]
8use smol::process::Command;
9
10#[cfg(target_os = "macos")]
11use std::sync::atomic::AtomicU32;
12use std::{
13 env,
14 fs::{self, File},
15 io,
16 panic::{self, PanicHookInfo},
17 path::{Path, PathBuf},
18 process::{self},
19 sync::{
20 Arc, OnceLock,
21 atomic::{AtomicBool, Ordering},
22 },
23 thread,
24 time::Duration,
25};
26
27// set once the crash handler has initialized and the client has connected to it
28pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
29// set when the first minidump request is made to avoid generating duplicate crash reports
30pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
31const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
32const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
33
34#[cfg(target_os = "macos")]
35static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
36
37pub async fn init(crash_init: InitCrashHandler) {
38 let gen_var = match env::var("ZED_GENERATE_MINIDUMPS") {
39 Ok(v) => {
40 if v == "false" || v == "0" {
41 Some(false)
42 } else {
43 Some(true)
44 }
45 }
46 Err(_) => None,
47 };
48
49 match (gen_var, *RELEASE_CHANNEL) {
50 (Some(false), _) | (None, ReleaseChannel::Dev) => {
51 let old_hook = panic::take_hook();
52 panic::set_hook(Box::new(move |info| {
53 unsafe { env::set_var("RUST_BACKTRACE", "1") };
54 old_hook(info);
55 // prevent the macOS crash dialog from popping up
56 if cfg!(target_os = "macos") {
57 std::process::exit(1);
58 }
59 }));
60 return;
61 }
62 _ => {
63 panic::set_hook(Box::new(panic_hook));
64 }
65 }
66
67 let exe = env::current_exe().expect("unable to find ourselves");
68 let zed_pid = process::id();
69 // TODO: we should be able to get away with using 1 crash-handler process per machine,
70 // but for now we append the PID of the current process which makes it unique per remote
71 // server or interactive zed instance. This solves an issue where occasionally the socket
72 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
73 // system and block further attempts to initialize crash handlers with that socket path.
74 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
75 #[cfg(not(target_os = "windows"))]
76 let _crash_handler = Command::new(exe)
77 .arg("--crash-handler")
78 .arg(&socket_name)
79 .spawn()
80 .expect("unable to spawn server process");
81
82 #[cfg(target_os = "windows")]
83 spawn_crash_handler_windows(&exe, &socket_name);
84
85 #[cfg(target_os = "linux")]
86 let server_pid = _crash_handler.id();
87 info!("spawning crash handler process");
88
89 let mut elapsed = Duration::ZERO;
90 let retry_frequency = Duration::from_millis(100);
91 let mut maybe_client = None;
92 while maybe_client.is_none() {
93 if let Ok(client) = Client::with_name(socket_name.as_path()) {
94 maybe_client = Some(client);
95 info!("connected to crash handler process after {elapsed:?}");
96 break;
97 }
98 elapsed += retry_frequency;
99 // Crash reporting is called outside of gpui in the remote server right now
100 #[allow(clippy::disallowed_methods)]
101 smol::Timer::after(retry_frequency).await;
102 }
103 let client = maybe_client.unwrap();
104 client
105 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
106 .unwrap();
107
108 let client = Arc::new(client);
109 let handler = CrashHandler::attach(unsafe {
110 let client = client.clone();
111 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
112 // only request a minidump once
113 let res = if REQUESTED_MINIDUMP
114 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
115 .is_ok()
116 {
117 #[cfg(target_os = "macos")]
118 suspend_all_other_threads();
119
120 // on macos this "ping" is needed to ensure that all our
121 // `client.send_message` calls have been processed before we trigger the
122 // minidump request.
123 client.ping().ok();
124 client.request_dump(crash_context).is_ok()
125 } else {
126 true
127 };
128 CrashEventResult::Handled(res)
129 })
130 })
131 .expect("failed to attach signal handler");
132
133 #[cfg(target_os = "linux")]
134 {
135 handler.set_ptracer(Some(server_pid));
136 }
137 CRASH_HANDLER.set(client.clone()).ok();
138 std::mem::forget(handler);
139 info!("crash handler registered");
140
141 loop {
142 client.ping().ok();
143 // Crash reporting is called outside of gpui in the remote server right now
144 #[allow(clippy::disallowed_methods)]
145 smol::Timer::after(Duration::from_secs(10)).await;
146 }
147}
148
149#[cfg(target_os = "macos")]
150unsafe fn suspend_all_other_threads() {
151 let task = unsafe { mach2::traps::current_task() };
152 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
153 let mut count = 0;
154 unsafe {
155 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
156 }
157 let current = unsafe { mach2::mach_init::mach_thread_self() };
158 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
159 for i in 0..count {
160 let t = unsafe { *threads.add(i as usize) };
161 if t != current && t != panic_thread {
162 unsafe { mach2::thread_act::thread_suspend(t) };
163 }
164 }
165}
166
167pub struct CrashServer {
168 initialization_params: OnceLock<InitCrashHandler>,
169 panic_info: OnceLock<CrashPanic>,
170 active_gpu: OnceLock<system_specs::GpuSpecs>,
171 has_connection: Arc<AtomicBool>,
172}
173
174#[derive(Debug, Deserialize, Serialize, Clone)]
175pub struct CrashInfo {
176 pub init: InitCrashHandler,
177 pub panic: Option<CrashPanic>,
178 pub minidump_error: Option<String>,
179 pub gpus: Vec<system_specs::GpuInfo>,
180 pub active_gpu: Option<system_specs::GpuSpecs>,
181}
182
183#[derive(Debug, Deserialize, Serialize, Clone)]
184pub struct InitCrashHandler {
185 pub session_id: String,
186 pub zed_version: String,
187 pub binary: String,
188 pub release_channel: String,
189 pub commit_sha: String,
190}
191
192#[derive(Deserialize, Serialize, Debug, Clone)]
193pub struct CrashPanic {
194 pub message: String,
195 pub span: String,
196}
197
198impl minidumper::ServerHandler for CrashServer {
199 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
200 let err_message = "Missing initialization data";
201 let dump_path = paths::logs_dir()
202 .join(
203 &self
204 .initialization_params
205 .get()
206 .expect(err_message)
207 .session_id,
208 )
209 .with_extension("dmp");
210 let file = File::create(&dump_path)?;
211 Ok((file, dump_path))
212 }
213
214 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
215 let minidump_error = match result {
216 Ok(MinidumpBinary { mut file, path, .. }) => {
217 use io::Write;
218 file.flush().ok();
219 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
220 drop(file);
221 let original_file = File::open(&path).unwrap();
222 let compressed_path = path.with_extension("zstd");
223 let compressed_file = File::create(&compressed_path).unwrap();
224 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
225 fs::rename(&compressed_path, path).unwrap();
226 None
227 }
228 Err(e) => Some(format!("{e:?}")),
229 };
230
231 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
232 let gpus = vec![];
233
234 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
235 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
236 Ok(gpus) => gpus,
237 Err(err) => {
238 log::warn!("Failed to collect GPU information for crash report: {err}");
239 vec![]
240 }
241 };
242
243 let crash_info = CrashInfo {
244 init: self
245 .initialization_params
246 .get()
247 .expect("not initialized")
248 .clone(),
249 panic: self.panic_info.get().cloned(),
250 minidump_error,
251 active_gpu: self.active_gpu.get().cloned(),
252 gpus,
253 };
254
255 let crash_data_path = paths::logs_dir()
256 .join(&crash_info.init.session_id)
257 .with_extension("json");
258
259 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
260
261 LoopAction::Exit
262 }
263
264 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
265 match kind {
266 1 => {
267 let init_data =
268 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
269 self.initialization_params
270 .set(init_data)
271 .expect("already initialized");
272 }
273 2 => {
274 let panic_data =
275 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
276 self.panic_info.set(panic_data).expect("already panicked");
277 }
278 3 => {
279 let gpu_specs: system_specs::GpuSpecs =
280 bincode::deserialize(&buffer).expect("gpu specs");
281 // we ignore the case where it was already set because this message is sent
282 // on each new window. in theory all zed windows should be using the same
283 // GPU so this is fine.
284 self.active_gpu.set(gpu_specs).ok();
285 }
286 _ => {
287 panic!("invalid message kind");
288 }
289 }
290 }
291
292 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
293 LoopAction::Exit
294 }
295
296 fn on_client_connected(&self, _clients: usize) -> LoopAction {
297 self.has_connection.store(true, Ordering::SeqCst);
298 LoopAction::Continue
299 }
300}
301
302pub fn panic_hook(info: &PanicHookInfo) {
303 // Don't handle a panic on threads that are not relevant to the main execution.
304 if extension_host::wasm_host::IS_WASM_THREAD.with(|v| v.load(Ordering::Acquire)) {
305 log::error!("wasm thread panicked!");
306 return;
307 }
308
309 let message = info.payload_as_str().unwrap_or("Box<Any>").to_owned();
310
311 let span = info
312 .location()
313 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
314 .unwrap_or_default();
315
316 let current_thread = std::thread::current();
317 let thread_name = current_thread.name().unwrap_or("<unnamed>");
318
319 // wait 500ms for the crash handler process to start up
320 // if it's still not there just write panic info and no minidump
321 let retry_frequency = Duration::from_millis(100);
322 for _ in 0..5 {
323 if let Some(client) = CRASH_HANDLER.get() {
324 let location = info
325 .location()
326 .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
327 log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
328 client
329 .send_message(
330 2,
331 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
332 )
333 .ok();
334 log::error!("triggering a crash to generate a minidump...");
335
336 #[cfg(target_os = "macos")]
337 PANIC_THREAD_ID.store(
338 unsafe { mach2::mach_init::mach_thread_self() },
339 Ordering::SeqCst,
340 );
341
342 cfg_if::cfg_if! {
343 if #[cfg(target_os = "windows")] {
344 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
345 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
346 break;
347 } else {
348 std::process::abort();
349 }
350 }
351 }
352 thread::sleep(retry_frequency);
353 }
354}
355
356#[cfg(target_os = "windows")]
357fn spawn_crash_handler_windows(exe: &Path, socket_name: &Path) {
358 use std::ffi::OsStr;
359 use std::iter::once;
360 use std::os::windows::ffi::OsStrExt;
361 use windows::Win32::System::Threading::{
362 CreateProcessW, PROCESS_CREATION_FLAGS, PROCESS_INFORMATION, STARTF_FORCEOFFFEEDBACK,
363 STARTUPINFOW,
364 };
365 use windows::core::PWSTR;
366
367 let mut command_line: Vec<u16> = OsStr::new(&format!(
368 "\"{}\" --crash-handler \"{}\"",
369 exe.display(),
370 socket_name.display()
371 ))
372 .encode_wide()
373 .chain(once(0))
374 .collect();
375
376 let mut startup_info = STARTUPINFOW::default();
377 startup_info.cb = std::mem::size_of::<STARTUPINFOW>() as u32;
378
379 // By default, Windows enables a "busy" cursor when a GUI application is launched.
380 // This cursor is disabled once the application starts processing window messages.
381 // Since the crash handler process doesn't process messages, this "busy" cursor stays enabled for a long time.
382 // Disable the cursor feedback to prevent this from happening.
383 startup_info.dwFlags = STARTF_FORCEOFFFEEDBACK;
384
385 let mut process_info = PROCESS_INFORMATION::default();
386
387 unsafe {
388 CreateProcessW(
389 None,
390 Some(PWSTR(command_line.as_mut_ptr())),
391 None,
392 None,
393 false,
394 PROCESS_CREATION_FLAGS(0),
395 None,
396 None,
397 &startup_info,
398 &mut process_info,
399 )
400 .expect("unable to spawn server process");
401
402 windows::Win32::Foundation::CloseHandle(process_info.hProcess).ok();
403 windows::Win32::Foundation::CloseHandle(process_info.hThread).ok();
404 }
405}
406
407pub fn crash_server(socket: &Path) {
408 let Ok(mut server) = minidumper::Server::with_name(socket) else {
409 log::info!("Couldn't create socket, there may already be a running crash server");
410 return;
411 };
412
413 let shutdown = Arc::new(AtomicBool::new(false));
414 let has_connection = Arc::new(AtomicBool::new(false));
415
416 thread::Builder::new()
417 .name("CrashServerTimeout".to_owned())
418 .spawn({
419 let shutdown = shutdown.clone();
420 let has_connection = has_connection.clone();
421 move || {
422 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
423 if !has_connection.load(Ordering::SeqCst) {
424 shutdown.store(true, Ordering::SeqCst);
425 }
426 }
427 })
428 .unwrap();
429
430 server
431 .run(
432 Box::new(CrashServer {
433 initialization_params: OnceLock::new(),
434 panic_info: OnceLock::new(),
435 has_connection,
436 active_gpu: OnceLock::new(),
437 }),
438 &shutdown,
439 Some(CRASH_HANDLER_PING_TIMEOUT),
440 )
441 .expect("failed to run server");
442}