1use crash_handler::{CrashEventResult, CrashHandler};
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6use smol::process::Command;
7
8#[cfg(target_os = "macos")]
9use std::sync::atomic::AtomicU32;
10use std::{
11 env,
12 fs::{self, File},
13 io,
14 panic::{self, PanicHookInfo},
15 path::{Path, PathBuf},
16 process::{self},
17 sync::{
18 Arc, OnceLock,
19 atomic::{AtomicBool, Ordering},
20 },
21 thread,
22 time::Duration,
23};
24
25// set once the crash handler has initialized and the client has connected to it
26pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
27// set when the first minidump request is made to avoid generating duplicate crash reports
28pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
29const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
30const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
31
32#[cfg(target_os = "macos")]
33static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
34
35pub async fn init(crash_init: InitCrashHandler) {
36 let gen_var = match env::var("ZED_GENERATE_MINIDUMPS") {
37 Ok(v) => {
38 if v == "false" || v == "0" {
39 Some(false)
40 } else {
41 Some(true)
42 }
43 }
44 Err(_) => None,
45 };
46
47 match (gen_var, *RELEASE_CHANNEL) {
48 (Some(false), _) | (None, ReleaseChannel::Dev) => {
49 let old_hook = panic::take_hook();
50 panic::set_hook(Box::new(move |info| {
51 unsafe { env::set_var("RUST_BACKTRACE", "1") };
52 old_hook(info);
53 // prevent the macOS crash dialog from popping up
54 if cfg!(target_os = "macos") {
55 std::process::exit(1);
56 }
57 }));
58 return;
59 }
60 _ => {
61 panic::set_hook(Box::new(panic_hook));
62 }
63 }
64
65 let exe = env::current_exe().expect("unable to find ourselves");
66 let zed_pid = process::id();
67 // TODO: we should be able to get away with using 1 crash-handler process per machine,
68 // but for now we append the PID of the current process which makes it unique per remote
69 // server or interactive zed instance. This solves an issue where occasionally the socket
70 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
71 // system and block further attempts to initialize crash handlers with that socket path.
72 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
73 let _crash_handler = Command::new(exe)
74 .arg("--crash-handler")
75 .arg(&socket_name)
76 .spawn()
77 .expect("unable to spawn server process");
78 #[cfg(target_os = "linux")]
79 let server_pid = _crash_handler.id();
80 info!("spawning crash handler process");
81
82 let mut elapsed = Duration::ZERO;
83 let retry_frequency = Duration::from_millis(100);
84 let mut maybe_client = None;
85 while maybe_client.is_none() {
86 if let Ok(client) = Client::with_name(socket_name.as_path()) {
87 maybe_client = Some(client);
88 info!("connected to crash handler process after {elapsed:?}");
89 break;
90 }
91 elapsed += retry_frequency;
92 smol::Timer::after(retry_frequency).await;
93 }
94 let client = maybe_client.unwrap();
95 client
96 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
97 .unwrap();
98
99 let client = Arc::new(client);
100 let handler = CrashHandler::attach(unsafe {
101 let client = client.clone();
102 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
103 // only request a minidump once
104 let res = if REQUESTED_MINIDUMP
105 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
106 .is_ok()
107 {
108 #[cfg(target_os = "macos")]
109 suspend_all_other_threads();
110
111 // on macos this "ping" is needed to ensure that all our
112 // `client.send_message` calls have been processed before we trigger the
113 // minidump request.
114 client.ping().ok();
115 client.request_dump(crash_context).is_ok()
116 } else {
117 true
118 };
119 CrashEventResult::Handled(res)
120 })
121 })
122 .expect("failed to attach signal handler");
123
124 #[cfg(target_os = "linux")]
125 {
126 handler.set_ptracer(Some(server_pid));
127 }
128 CRASH_HANDLER.set(client.clone()).ok();
129 std::mem::forget(handler);
130 info!("crash handler registered");
131
132 loop {
133 client.ping().ok();
134 smol::Timer::after(Duration::from_secs(10)).await;
135 }
136}
137
138#[cfg(target_os = "macos")]
139unsafe fn suspend_all_other_threads() {
140 let task = unsafe { mach2::traps::current_task() };
141 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
142 let mut count = 0;
143 unsafe {
144 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
145 }
146 let current = unsafe { mach2::mach_init::mach_thread_self() };
147 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
148 for i in 0..count {
149 let t = unsafe { *threads.add(i as usize) };
150 if t != current && t != panic_thread {
151 unsafe { mach2::thread_act::thread_suspend(t) };
152 }
153 }
154}
155
156pub struct CrashServer {
157 initialization_params: OnceLock<InitCrashHandler>,
158 panic_info: OnceLock<CrashPanic>,
159 active_gpu: OnceLock<system_specs::GpuSpecs>,
160 has_connection: Arc<AtomicBool>,
161}
162
163#[derive(Debug, Deserialize, Serialize, Clone)]
164pub struct CrashInfo {
165 pub init: InitCrashHandler,
166 pub panic: Option<CrashPanic>,
167 pub minidump_error: Option<String>,
168 pub gpus: Vec<system_specs::GpuInfo>,
169 pub active_gpu: Option<system_specs::GpuSpecs>,
170}
171
172#[derive(Debug, Deserialize, Serialize, Clone)]
173pub struct InitCrashHandler {
174 pub session_id: String,
175 pub zed_version: String,
176 pub binary: String,
177 pub release_channel: String,
178 pub commit_sha: String,
179}
180
181#[derive(Deserialize, Serialize, Debug, Clone)]
182pub struct CrashPanic {
183 pub message: String,
184 pub span: String,
185}
186
187impl minidumper::ServerHandler for CrashServer {
188 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
189 let err_message = "Missing initialization data";
190 let dump_path = paths::logs_dir()
191 .join(
192 &self
193 .initialization_params
194 .get()
195 .expect(err_message)
196 .session_id,
197 )
198 .with_extension("dmp");
199 let file = File::create(&dump_path)?;
200 Ok((file, dump_path))
201 }
202
203 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
204 let minidump_error = match result {
205 Ok(MinidumpBinary { mut file, path, .. }) => {
206 use io::Write;
207 file.flush().ok();
208 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
209 drop(file);
210 let original_file = File::open(&path).unwrap();
211 let compressed_path = path.with_extension("zstd");
212 let compressed_file = File::create(&compressed_path).unwrap();
213 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
214 fs::rename(&compressed_path, path).unwrap();
215 None
216 }
217 Err(e) => Some(format!("{e:?}")),
218 };
219
220 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
221 let gpus = vec![];
222
223 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
224 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
225 Ok(gpus) => gpus,
226 Err(err) => {
227 log::warn!("Failed to collect GPU information for crash report: {err}");
228 vec![]
229 }
230 };
231
232 let crash_info = CrashInfo {
233 init: self
234 .initialization_params
235 .get()
236 .expect("not initialized")
237 .clone(),
238 panic: self.panic_info.get().cloned(),
239 minidump_error,
240 active_gpu: self.active_gpu.get().cloned(),
241 gpus,
242 };
243
244 let crash_data_path = paths::logs_dir()
245 .join(&crash_info.init.session_id)
246 .with_extension("json");
247
248 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
249
250 LoopAction::Exit
251 }
252
253 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
254 match kind {
255 1 => {
256 let init_data =
257 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
258 self.initialization_params
259 .set(init_data)
260 .expect("already initialized");
261 }
262 2 => {
263 let panic_data =
264 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
265 self.panic_info.set(panic_data).expect("already panicked");
266 }
267 3 => {
268 let gpu_specs: system_specs::GpuSpecs =
269 bincode::deserialize(&buffer).expect("gpu specs");
270 // we ignore the case where it was already set because this message is sent
271 // on each new window. in theory all zed windows should be using the same
272 // GPU so this is fine.
273 self.active_gpu.set(gpu_specs).ok();
274 }
275 _ => {
276 panic!("invalid message kind");
277 }
278 }
279 }
280
281 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
282 LoopAction::Exit
283 }
284
285 fn on_client_connected(&self, _clients: usize) -> LoopAction {
286 self.has_connection.store(true, Ordering::SeqCst);
287 LoopAction::Continue
288 }
289}
290
291pub fn panic_hook(info: &PanicHookInfo) {
292 // Don't handle a panic on threads that are not relevant to the main execution.
293 if extension_host::wasm_host::IS_WASM_THREAD.with(|v| v.load(Ordering::Acquire)) {
294 log::error!("wasm thread panicked!");
295 return;
296 }
297
298 let message = info.payload_as_str().unwrap_or("Box<Any>").to_owned();
299
300 let span = info
301 .location()
302 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
303 .unwrap_or_default();
304
305 let current_thread = std::thread::current();
306 let thread_name = current_thread.name().unwrap_or("<unnamed>");
307
308 // wait 500ms for the crash handler process to start up
309 // if it's still not there just write panic info and no minidump
310 let retry_frequency = Duration::from_millis(100);
311 for _ in 0..5 {
312 if let Some(client) = CRASH_HANDLER.get() {
313 let location = info
314 .location()
315 .map_or_else(|| "<unknown>".to_owned(), |location| location.to_string());
316 log::error!("thread '{thread_name}' panicked at {location}:\n{message}...");
317 client
318 .send_message(
319 2,
320 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
321 )
322 .ok();
323 log::error!("triggering a crash to generate a minidump...");
324
325 #[cfg(target_os = "macos")]
326 PANIC_THREAD_ID.store(
327 unsafe { mach2::mach_init::mach_thread_self() },
328 Ordering::SeqCst,
329 );
330
331 cfg_if::cfg_if! {
332 if #[cfg(target_os = "windows")] {
333 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
334 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
335 break;
336 } else {
337 std::process::abort();
338 }
339 }
340 }
341 thread::sleep(retry_frequency);
342 }
343}
344
345pub fn crash_server(socket: &Path) {
346 let Ok(mut server) = minidumper::Server::with_name(socket) else {
347 log::info!("Couldn't create socket, there may already be a running crash server");
348 return;
349 };
350
351 let shutdown = Arc::new(AtomicBool::new(false));
352 let has_connection = Arc::new(AtomicBool::new(false));
353
354 thread::Builder::new()
355 .name("CrashServerTimeout".to_owned())
356 .spawn({
357 let shutdown = shutdown.clone();
358 let has_connection = has_connection.clone();
359 move || {
360 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
361 if !has_connection.load(Ordering::SeqCst) {
362 shutdown.store(true, Ordering::SeqCst);
363 }
364 }
365 })
366 .unwrap();
367
368 server
369 .run(
370 Box::new(CrashServer {
371 initialization_params: OnceLock::new(),
372 panic_info: OnceLock::new(),
373 has_connection,
374 active_gpu: OnceLock::new(),
375 }),
376 &shutdown,
377 Some(CRASH_HANDLER_PING_TIMEOUT),
378 )
379 .expect("failed to run server");
380}