1use crash_handler::{CrashEventResult, CrashHandler};
  2use log::info;
  3use minidumper::{Client, LoopAction, MinidumpBinary};
  4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
  5use serde::{Deserialize, Serialize};
  6use smol::process::Command;
  7
  8#[cfg(target_os = "macos")]
  9use std::sync::atomic::AtomicU32;
 10use std::{
 11    env,
 12    fs::{self, File},
 13    io,
 14    panic::{self, PanicHookInfo},
 15    path::{Path, PathBuf},
 16    process::{self},
 17    sync::{
 18        Arc, OnceLock,
 19        atomic::{AtomicBool, Ordering},
 20    },
 21    thread,
 22    time::Duration,
 23};
 24
 25// set once the crash handler has initialized and the client has connected to it
 26pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
 27// set when the first minidump request is made to avoid generating duplicate crash reports
 28pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
 29const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
 30const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
 31
 32#[cfg(target_os = "macos")]
 33static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
 34
 35pub async fn init(crash_init: InitCrashHandler) {
 36    if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
 37        let old_hook = panic::take_hook();
 38        panic::set_hook(Box::new(move |info| {
 39            unsafe { env::set_var("RUST_BACKTRACE", "1") };
 40            old_hook(info);
 41            // prevent the macOS crash dialog from popping up
 42            std::process::exit(1);
 43        }));
 44        return;
 45    } else {
 46        panic::set_hook(Box::new(panic_hook));
 47    }
 48
 49    let exe = env::current_exe().expect("unable to find ourselves");
 50    let zed_pid = process::id();
 51    // TODO: we should be able to get away with using 1 crash-handler process per machine,
 52    // but for now we append the PID of the current process which makes it unique per remote
 53    // server or interactive zed instance. This solves an issue where occasionally the socket
 54    // used by the crash handler isn't destroyed correctly which causes it to stay on the file
 55    // system and block further attempts to initialize crash handlers with that socket path.
 56    let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
 57    let _crash_handler = Command::new(exe)
 58        .arg("--crash-handler")
 59        .arg(&socket_name)
 60        .spawn()
 61        .expect("unable to spawn server process");
 62    #[cfg(target_os = "linux")]
 63    let server_pid = _crash_handler.id();
 64    info!("spawning crash handler process");
 65
 66    let mut elapsed = Duration::ZERO;
 67    let retry_frequency = Duration::from_millis(100);
 68    let mut maybe_client = None;
 69    while maybe_client.is_none() {
 70        if let Ok(client) = Client::with_name(socket_name.as_path()) {
 71            maybe_client = Some(client);
 72            info!("connected to crash handler process after {elapsed:?}");
 73            break;
 74        }
 75        elapsed += retry_frequency;
 76        smol::Timer::after(retry_frequency).await;
 77    }
 78    let client = maybe_client.unwrap();
 79    client
 80        .send_message(1, serde_json::to_vec(&crash_init).unwrap())
 81        .unwrap();
 82
 83    let client = Arc::new(client);
 84    let handler = CrashHandler::attach(unsafe {
 85        let client = client.clone();
 86        crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
 87            // only request a minidump once
 88            let res = if REQUESTED_MINIDUMP
 89                .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
 90                .is_ok()
 91            {
 92                #[cfg(target_os = "macos")]
 93                suspend_all_other_threads();
 94
 95                // on macos this "ping" is needed to ensure that all our
 96                // `client.send_message` calls have been processed before we trigger the
 97                // minidump request.
 98                client.ping().ok();
 99                client.request_dump(crash_context).is_ok()
100            } else {
101                true
102            };
103            CrashEventResult::Handled(res)
104        })
105    })
106    .expect("failed to attach signal handler");
107
108    #[cfg(target_os = "linux")]
109    {
110        handler.set_ptracer(Some(server_pid));
111    }
112    CRASH_HANDLER.set(client.clone()).ok();
113    std::mem::forget(handler);
114    info!("crash handler registered");
115
116    loop {
117        client.ping().ok();
118        smol::Timer::after(Duration::from_secs(10)).await;
119    }
120}
121
122#[cfg(target_os = "macos")]
123unsafe fn suspend_all_other_threads() {
124    let task = unsafe { mach2::traps::current_task() };
125    let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
126    let mut count = 0;
127    unsafe {
128        mach2::task::task_threads(task, &raw mut threads, &raw mut count);
129    }
130    let current = unsafe { mach2::mach_init::mach_thread_self() };
131    let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
132    for i in 0..count {
133        let t = unsafe { *threads.add(i as usize) };
134        if t != current && t != panic_thread {
135            unsafe { mach2::thread_act::thread_suspend(t) };
136        }
137    }
138}
139
140pub struct CrashServer {
141    initialization_params: OnceLock<InitCrashHandler>,
142    panic_info: OnceLock<CrashPanic>,
143    active_gpu: OnceLock<system_specs::GpuSpecs>,
144    has_connection: Arc<AtomicBool>,
145}
146
147#[derive(Debug, Deserialize, Serialize, Clone)]
148pub struct CrashInfo {
149    pub init: InitCrashHandler,
150    pub panic: Option<CrashPanic>,
151    pub minidump_error: Option<String>,
152    pub gpus: Vec<system_specs::GpuInfo>,
153    pub active_gpu: Option<system_specs::GpuSpecs>,
154}
155
156#[derive(Debug, Deserialize, Serialize, Clone)]
157pub struct InitCrashHandler {
158    pub session_id: String,
159    pub zed_version: String,
160    pub binary: String,
161    pub release_channel: String,
162    pub commit_sha: String,
163}
164
165#[derive(Deserialize, Serialize, Debug, Clone)]
166pub struct CrashPanic {
167    pub message: String,
168    pub span: String,
169}
170
171impl minidumper::ServerHandler for CrashServer {
172    fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
173        let err_message = "Missing initialization data";
174        let dump_path = paths::logs_dir()
175            .join(
176                &self
177                    .initialization_params
178                    .get()
179                    .expect(err_message)
180                    .session_id,
181            )
182            .with_extension("dmp");
183        let file = File::create(&dump_path)?;
184        Ok((file, dump_path))
185    }
186
187    fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
188        let minidump_error = match result {
189            Ok(MinidumpBinary { mut file, path, .. }) => {
190                use io::Write;
191                file.flush().ok();
192                // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
193                drop(file);
194                let original_file = File::open(&path).unwrap();
195                let compressed_path = path.with_extension("zstd");
196                let compressed_file = File::create(&compressed_path).unwrap();
197                zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
198                fs::rename(&compressed_path, path).unwrap();
199                None
200            }
201            Err(e) => Some(format!("{e:?}")),
202        };
203
204        #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
205        let gpus = vec![];
206
207        #[cfg(any(target_os = "linux", target_os = "freebsd"))]
208        let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
209            Ok(gpus) => gpus,
210            Err(err) => {
211                log::warn!("Failed to collect GPU information for crash report: {err}");
212                vec![]
213            }
214        };
215
216        let crash_info = CrashInfo {
217            init: self
218                .initialization_params
219                .get()
220                .expect("not initialized")
221                .clone(),
222            panic: self.panic_info.get().cloned(),
223            minidump_error,
224            active_gpu: self.active_gpu.get().cloned(),
225            gpus,
226        };
227
228        let crash_data_path = paths::logs_dir()
229            .join(&crash_info.init.session_id)
230            .with_extension("json");
231
232        fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
233
234        LoopAction::Exit
235    }
236
237    fn on_message(&self, kind: u32, buffer: Vec<u8>) {
238        match kind {
239            1 => {
240                let init_data =
241                    serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
242                self.initialization_params
243                    .set(init_data)
244                    .expect("already initialized");
245            }
246            2 => {
247                let panic_data =
248                    serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
249                self.panic_info.set(panic_data).expect("already panicked");
250            }
251            3 => {
252                let gpu_specs: system_specs::GpuSpecs =
253                    bincode::deserialize(&buffer).expect("gpu specs");
254                self.active_gpu
255                    .set(gpu_specs)
256                    .expect("already set active gpu");
257            }
258            _ => {
259                panic!("invalid message kind");
260            }
261        }
262    }
263
264    fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
265        LoopAction::Exit
266    }
267
268    fn on_client_connected(&self, _clients: usize) -> LoopAction {
269        self.has_connection.store(true, Ordering::SeqCst);
270        LoopAction::Continue
271    }
272}
273
274pub fn panic_hook(info: &PanicHookInfo) {
275    let message = info
276        .payload()
277        .downcast_ref::<&str>()
278        .map(|s| s.to_string())
279        .or_else(|| info.payload().downcast_ref::<String>().cloned())
280        .unwrap_or_else(|| "Box<Any>".to_string());
281
282    let span = info
283        .location()
284        .map(|loc| format!("{}:{}", loc.file(), loc.line()))
285        .unwrap_or_default();
286
287    // wait 500ms for the crash handler process to start up
288    // if it's still not there just write panic info and no minidump
289    let retry_frequency = Duration::from_millis(100);
290    for _ in 0..5 {
291        if let Some(client) = CRASH_HANDLER.get() {
292            client
293                .send_message(
294                    2,
295                    serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
296                )
297                .ok();
298            log::error!("triggering a crash to generate a minidump...");
299
300            #[cfg(target_os = "macos")]
301            PANIC_THREAD_ID.store(
302                unsafe { mach2::mach_init::mach_thread_self() },
303                Ordering::SeqCst,
304            );
305
306            cfg_if::cfg_if! {
307                if #[cfg(target_os = "windows")] {
308                    // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
309                    CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
310                    break;
311                } else {
312                    std::process::abort();
313                }
314            }
315        }
316        thread::sleep(retry_frequency);
317    }
318}
319
320pub fn crash_server(socket: &Path) {
321    let Ok(mut server) = minidumper::Server::with_name(socket) else {
322        log::info!("Couldn't create socket, there may already be a running crash server");
323        return;
324    };
325
326    let shutdown = Arc::new(AtomicBool::new(false));
327    let has_connection = Arc::new(AtomicBool::new(false));
328
329    thread::Builder::new()
330        .name("CrashServerTimeout".to_owned())
331        .spawn({
332            let shutdown = shutdown.clone();
333            let has_connection = has_connection.clone();
334            move || {
335                std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
336                if !has_connection.load(Ordering::SeqCst) {
337                    shutdown.store(true, Ordering::SeqCst);
338                }
339            }
340        })
341        .unwrap();
342
343    server
344        .run(
345            Box::new(CrashServer {
346                initialization_params: OnceLock::new(),
347                panic_info: OnceLock::new(),
348                has_connection,
349                active_gpu: OnceLock::new(),
350            }),
351            &shutdown,
352            Some(CRASH_HANDLER_PING_TIMEOUT),
353        )
354        .expect("failed to run server");
355}