crashes.rs

  1use crash_handler::{CrashEventResult, CrashHandler};
  2use futures::future::BoxFuture;
  3use log::info;
  4use minidumper::{Client, LoopAction, MinidumpBinary, Server, SocketName};
  5use parking_lot::Mutex;
  6use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
  7use serde::{Deserialize, Serialize};
  8use std::mem;
  9
 10#[cfg(not(target_os = "windows"))]
 11use smol::process::Command;
 12use system_specs::GpuSpecs;
 13
 14#[cfg(target_os = "macos")]
 15use std::sync::atomic::AtomicU32;
 16use std::{
 17    env,
 18    fs::{self, File},
 19    io,
 20    panic::{self, PanicHookInfo},
 21    path::{Path, PathBuf},
 22    process::{self},
 23    sync::{
 24        Arc, OnceLock,
 25        atomic::{AtomicBool, Ordering},
 26    },
 27    thread,
 28    time::Duration,
 29};
 30
 31// set once the crash handler has initialized and the client has connected to it
 32static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
 33// set when the first minidump request is made to avoid generating duplicate crash reports
 34pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
 35const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
 36const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
 37
 38static PENDING_CRASH_SERVER_MESSAGES: Mutex<Vec<CrashServerMessage>> = Mutex::new(Vec::new());
 39
 40#[cfg(target_os = "macos")]
 41static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
 42
 43fn should_install_crash_handler() -> bool {
 44    if let Ok(value) = env::var("ZED_GENERATE_MINIDUMPS") {
 45        return value == "true" || value == "1";
 46    }
 47
 48    if *RELEASE_CHANNEL == ReleaseChannel::Dev {
 49        return false;
 50    }
 51
 52    true
 53}
 54
 55/// Install crash signal handlers and spawn the crash-handler subprocess.
 56///
 57/// The synchronous portion (signal handlers, panic hook) runs inline.
 58/// The async keepalive task is passed to `spawn` so the caller decides
 59/// which executor to schedule it on.
 60pub fn init(crash_init: InitCrashHandler, spawn: impl FnOnce(BoxFuture<'static, ()>)) {
 61    if !should_install_crash_handler() {
 62        let old_hook = panic::take_hook();
 63        panic::set_hook(Box::new(move |info| {
 64            unsafe { env::set_var("RUST_BACKTRACE", "1") };
 65            old_hook(info);
 66            // prevent the macOS crash dialog from popping up
 67            if cfg!(target_os = "macos") {
 68                std::process::exit(1);
 69            }
 70        }));
 71        return;
 72    }
 73
 74    panic::set_hook(Box::new(panic_hook));
 75
 76    let handler = CrashHandler::attach(unsafe {
 77        crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
 78            let Some(client) = CRASH_HANDLER.get() else {
 79                return CrashEventResult::Handled(false);
 80            };
 81
 82            // only request a minidump once
 83            let res = if REQUESTED_MINIDUMP
 84                .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
 85                .is_ok()
 86            {
 87                #[cfg(target_os = "macos")]
 88                suspend_all_other_threads();
 89
 90                // on macos this "ping" is needed to ensure that all our
 91                // `client.send_message` calls have been processed before we trigger the
 92                // minidump request.
 93                client.ping().ok();
 94                client.request_dump(crash_context).is_ok()
 95            } else {
 96                true
 97            };
 98            CrashEventResult::Handled(res)
 99        })
100    })
101    .expect("failed to attach signal handler");
102
103    info!("crash signal handlers installed");
104
105    spawn(Box::pin(connect_and_keepalive(crash_init, handler)));
106}
107
108/// Spawn the crash-handler subprocess, connect the IPC client, and run the
109/// keepalive ping loop. Called on a background executor by [`init`].
110async fn connect_and_keepalive(crash_init: InitCrashHandler, handler: CrashHandler) {
111    let exe = env::current_exe().expect("unable to find ourselves");
112    let zed_pid = process::id();
113    let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
114    #[cfg(not(target_os = "windows"))]
115    let _crash_handler = Command::new(exe)
116        .arg("--crash-handler")
117        .arg(&socket_name)
118        .spawn()
119        .expect("unable to spawn server process");
120
121    #[cfg(target_os = "windows")]
122    spawn_crash_handler_windows(&exe, &socket_name);
123
124    info!("spawning crash handler process");
125    send_crash_server_message(CrashServerMessage::Init(crash_init));
126
127    let mut elapsed = Duration::ZERO;
128    let retry_frequency = Duration::from_millis(100);
129    let mut maybe_client = None;
130    while maybe_client.is_none() {
131        if let Ok(client) = Client::with_name(SocketName::Path(&socket_name)) {
132            maybe_client = Some(client);
133            info!("connected to crash handler process after {elapsed:?}");
134            break;
135        }
136        elapsed += retry_frequency;
137        // Crash reporting is called outside of gpui in the remote server right now
138        #[allow(clippy::disallowed_methods)]
139        smol::Timer::after(retry_frequency).await;
140    }
141    let client = maybe_client.unwrap();
142    let client = Arc::new(client);
143
144    #[cfg(target_os = "linux")]
145    handler.set_ptracer(Some(_crash_handler.id()));
146
147    // Publishing the client to the OnceLock makes it visible to the signal
148    // handler callback installed earlier.
149    CRASH_HANDLER.set(client.clone()).ok();
150    let messages: Vec<_> = mem::take(PENDING_CRASH_SERVER_MESSAGES.lock().as_mut());
151    for message in messages.into_iter() {
152        send_crash_server_message(message);
153    }
154    // mem::forget so that the drop is not called
155    mem::forget(handler);
156    info!("crash handler registered");
157
158    loop {
159        client.ping().ok();
160        // Crash reporting is called outside of gpui in the remote server right now
161        #[allow(clippy::disallowed_methods)]
162        smol::Timer::after(Duration::from_secs(10)).await;
163    }
164}
165
166#[cfg(target_os = "macos")]
167unsafe fn suspend_all_other_threads() {
168    let task = unsafe { mach2::traps::current_task() };
169    let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
170    let mut count = 0;
171    unsafe {
172        mach2::task::task_threads(task, &raw mut threads, &raw mut count);
173    }
174    let current = unsafe { mach2::mach_init::mach_thread_self() };
175    let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
176    for i in 0..count {
177        let t = unsafe { *threads.add(i as usize) };
178        if t != current && t != panic_thread {
179            unsafe { mach2::thread_act::thread_suspend(t) };
180        }
181    }
182}
183
184pub struct CrashServer {
185    initialization_params: Mutex<Option<InitCrashHandler>>,
186    panic_info: Mutex<Option<CrashPanic>>,
187    active_gpu: Mutex<Option<system_specs::GpuSpecs>>,
188    user_info: Mutex<Option<UserInfo>>,
189    has_connection: Arc<AtomicBool>,
190}
191
192#[derive(Debug, Deserialize, Serialize, Clone)]
193pub struct CrashInfo {
194    pub init: InitCrashHandler,
195    pub panic: Option<CrashPanic>,
196    pub minidump_error: Option<String>,
197    pub gpus: Vec<system_specs::GpuInfo>,
198    pub active_gpu: Option<system_specs::GpuSpecs>,
199    pub user_info: Option<UserInfo>,
200}
201
202#[derive(Debug, Deserialize, Serialize, Clone)]
203pub struct InitCrashHandler {
204    pub session_id: String,
205    pub zed_version: String,
206    pub binary: String,
207    pub release_channel: String,
208    pub commit_sha: String,
209}
210
211#[derive(Deserialize, Serialize, Debug, Clone)]
212pub struct CrashPanic {
213    pub message: String,
214    pub span: String,
215}
216
217#[derive(Deserialize, Serialize, Debug, Clone)]
218pub struct UserInfo {
219    pub metrics_id: Option<String>,
220    pub is_staff: Option<bool>,
221}
222
223fn send_crash_server_message(message: CrashServerMessage) {
224    let Some(crash_server) = CRASH_HANDLER.get() else {
225        PENDING_CRASH_SERVER_MESSAGES.lock().push(message);
226        return;
227    };
228    let data = match serde_json::to_vec(&message) {
229        Ok(data) => data,
230        Err(err) => {
231            log::warn!("Failed to serialize crash server message: {:?}", err);
232            return;
233        }
234    };
235
236    if let Err(err) = crash_server.send_message(0, data) {
237        log::warn!("Failed to send data to crash server {:?}", err);
238    }
239}
240
241pub fn set_gpu_info(specs: GpuSpecs) {
242    send_crash_server_message(CrashServerMessage::GPUInfo(specs));
243}
244
245pub fn set_user_info(info: UserInfo) {
246    send_crash_server_message(CrashServerMessage::UserInfo(info));
247}
248
249#[derive(Serialize, Deserialize, Debug)]
250enum CrashServerMessage {
251    Init(InitCrashHandler),
252    Panic(CrashPanic),
253    GPUInfo(GpuSpecs),
254    UserInfo(UserInfo),
255}
256
257impl minidumper::ServerHandler for CrashServer {
258    fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
259        let dump_path = paths::logs_dir()
260            .join(
261                &self
262                    .initialization_params
263                    .lock()
264                    .as_ref()
265                    .expect("Missing initialization data")
266                    .session_id,
267            )
268            .with_extension("dmp");
269        let file = File::create(&dump_path)?;
270        Ok((file, dump_path))
271    }
272
273    fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
274        let minidump_error = match result {
275            Ok(MinidumpBinary { mut file, path, .. }) => {
276                use io::Write;
277                file.flush().ok();
278                // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
279                drop(file);
280                let original_file = File::open(&path).unwrap();
281                let compressed_path = path.with_extension("zstd");
282                let compressed_file = File::create(&compressed_path).unwrap();
283                zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
284                fs::rename(&compressed_path, path).unwrap();
285                None
286            }
287            Err(e) => Some(format!("{e:?}")),
288        };
289
290        #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
291        let gpus = vec![];
292
293        #[cfg(any(target_os = "linux", target_os = "freebsd"))]
294        let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
295            Ok(gpus) => gpus,
296            Err(err) => {
297                log::warn!("Failed to collect GPU information for crash report: {err}");
298                vec![]
299            }
300        };
301
302        let crash_info = CrashInfo {
303            init: self
304                .initialization_params
305                .lock()
306                .clone()
307                .expect("not initialized"),
308            panic: self.panic_info.lock().clone(),
309            minidump_error,
310            active_gpu: self.active_gpu.lock().clone(),
311            gpus,
312            user_info: self.user_info.lock().clone(),
313        };
314
315        let crash_data_path = paths::logs_dir()
316            .join(&crash_info.init.session_id)
317            .with_extension("json");
318
319        fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
320
321        LoopAction::Exit
322    }
323
324    fn on_message(&self, _: u32, buffer: Vec<u8>) {
325        let message: CrashServerMessage =
326            serde_json::from_slice(&buffer).expect("invalid init data");
327        match message {
328            CrashServerMessage::Init(init_data) => {
329                self.initialization_params.lock().replace(init_data);
330            }
331            CrashServerMessage::Panic(crash_panic) => {
332                self.panic_info.lock().replace(crash_panic);
333            }
334            CrashServerMessage::GPUInfo(gpu_specs) => {
335                self.active_gpu.lock().replace(gpu_specs);
336            }
337            CrashServerMessage::UserInfo(user_info) => {
338                self.user_info.lock().replace(user_info);
339            }
340        }
341    }
342
343    fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
344        LoopAction::Exit
345    }
346
347    fn on_client_connected(&self, _clients: usize) -> LoopAction {
348        self.has_connection.store(true, Ordering::SeqCst);
349        LoopAction::Continue
350    }
351}
352
353pub fn panic_hook(info: &PanicHookInfo) {
354    let message = info.payload_as_str().unwrap_or("Box<Any>").to_owned();
355
356    let span = info
357        .location()
358        .map(|loc| format!("{}:{}", loc.file(), loc.line()))
359        .unwrap_or_default();
360
361    let current_thread = std::thread::current();
362    let thread_name = current_thread.name().unwrap_or("<unnamed>");
363
364    // wait 500ms for the crash handler process to start up
365    // if it's still not there just write panic info and no minidump
366    let retry_frequency = Duration::from_millis(100);
367    for _ in 0..5 {
368        if CRASH_HANDLER.get().is_some() {
369            break;
370        }
371        thread::sleep(retry_frequency);
372    }
373    let location = info
374        .location()
375        .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
376    log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
377
378    send_crash_server_message(CrashServerMessage::Panic(CrashPanic { message, span }));
379    log::error!("triggering a crash to generate a minidump...");
380
381    #[cfg(target_os = "macos")]
382    PANIC_THREAD_ID.store(
383        unsafe { mach2::mach_init::mach_thread_self() },
384        Ordering::SeqCst,
385    );
386
387    cfg_if::cfg_if! {
388        if #[cfg(target_os = "windows")] {
389            // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
390            CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
391        } else {
392            std::process::abort();
393        }
394    }
395}
396
397#[cfg(target_os = "windows")]
398fn spawn_crash_handler_windows(exe: &Path, socket_name: &Path) {
399    use std::ffi::OsStr;
400    use std::iter::once;
401    use std::os::windows::ffi::OsStrExt;
402    use windows::Win32::System::Threading::{
403        CreateProcessW, PROCESS_CREATION_FLAGS, PROCESS_INFORMATION, STARTF_FORCEOFFFEEDBACK,
404        STARTUPINFOW,
405    };
406    use windows::core::PWSTR;
407
408    let mut command_line: Vec<u16> = OsStr::new(&format!(
409        "\"{}\" --crash-handler \"{}\"",
410        exe.display(),
411        socket_name.display()
412    ))
413    .encode_wide()
414    .chain(once(0))
415    .collect();
416
417    let mut startup_info = STARTUPINFOW::default();
418    startup_info.cb = std::mem::size_of::<STARTUPINFOW>() as u32;
419
420    // By default, Windows enables a "busy" cursor when a GUI application is launched.
421    // This cursor is disabled once the application starts processing window messages.
422    // Since the crash handler process doesn't process messages, this "busy" cursor stays enabled for a long time.
423    // Disable the cursor feedback to prevent this from happening.
424    startup_info.dwFlags = STARTF_FORCEOFFFEEDBACK;
425
426    let mut process_info = PROCESS_INFORMATION::default();
427
428    unsafe {
429        CreateProcessW(
430            None,
431            Some(PWSTR(command_line.as_mut_ptr())),
432            None,
433            None,
434            false,
435            PROCESS_CREATION_FLAGS(0),
436            None,
437            None,
438            &startup_info,
439            &mut process_info,
440        )
441        .expect("unable to spawn server process");
442
443        windows::Win32::Foundation::CloseHandle(process_info.hProcess).ok();
444        windows::Win32::Foundation::CloseHandle(process_info.hThread).ok();
445    }
446}
447
448pub fn crash_server(socket: &Path) {
449    let Ok(mut server) = Server::with_name(SocketName::Path(socket)) else {
450        log::info!("Couldn't create socket, there may already be a running crash server");
451        return;
452    };
453
454    let shutdown = Arc::new(AtomicBool::new(false));
455    let has_connection = Arc::new(AtomicBool::new(false));
456
457    thread::Builder::new()
458        .name("CrashServerTimeout".to_owned())
459        .spawn({
460            let shutdown = shutdown.clone();
461            let has_connection = has_connection.clone();
462            move || {
463                std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
464                if !has_connection.load(Ordering::SeqCst) {
465                    shutdown.store(true, Ordering::SeqCst);
466                }
467            }
468        })
469        .unwrap();
470
471    server
472        .run(
473            Box::new(CrashServer {
474                initialization_params: Mutex::default(),
475                panic_info: Mutex::default(),
476                user_info: Mutex::default(),
477                has_connection,
478                active_gpu: Mutex::default(),
479            }),
480            &shutdown,
481            Some(CRASH_HANDLER_PING_TIMEOUT),
482        )
483        .expect("failed to run server");
484}