1use crash_handler::{CrashEventResult, CrashHandler};
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6use smol::process::Command;
7
8#[cfg(target_os = "macos")]
9use std::sync::atomic::AtomicU32;
10use std::{
11 env,
12 fs::{self, File},
13 io,
14 panic::{self, PanicHookInfo},
15 path::{Path, PathBuf},
16 process::{self},
17 sync::{
18 Arc, OnceLock,
19 atomic::{AtomicBool, Ordering},
20 },
21 thread,
22 time::Duration,
23};
24
25// set once the crash handler has initialized and the client has connected to it
26pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
27// set when the first minidump request is made to avoid generating duplicate crash reports
28pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
29const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
30const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
31
32#[cfg(target_os = "macos")]
33static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
34
35pub async fn init(crash_init: InitCrashHandler) {
36 let gen_var = match env::var("ZED_GENERATE_MINIDUMPS") {
37 Ok(v) => {
38 if v == "false" || v == "0" {
39 Some(false)
40 } else {
41 Some(true)
42 }
43 }
44 Err(_) => None,
45 };
46
47 match (gen_var, *RELEASE_CHANNEL) {
48 (Some(false), _) | (None, ReleaseChannel::Dev) => {
49 let old_hook = panic::take_hook();
50 panic::set_hook(Box::new(move |info| {
51 unsafe { env::set_var("RUST_BACKTRACE", "1") };
52 old_hook(info);
53 // prevent the macOS crash dialog from popping up
54 std::process::exit(1);
55 }));
56 return;
57 }
58 (Some(true), _) | (None, _) => {
59 panic::set_hook(Box::new(panic_hook));
60 }
61 }
62
63 let exe = env::current_exe().expect("unable to find ourselves");
64 let zed_pid = process::id();
65 // TODO: we should be able to get away with using 1 crash-handler process per machine,
66 // but for now we append the PID of the current process which makes it unique per remote
67 // server or interactive zed instance. This solves an issue where occasionally the socket
68 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
69 // system and block further attempts to initialize crash handlers with that socket path.
70 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
71 let _crash_handler = Command::new(exe)
72 .arg("--crash-handler")
73 .arg(&socket_name)
74 .spawn()
75 .expect("unable to spawn server process");
76 #[cfg(target_os = "linux")]
77 let server_pid = _crash_handler.id();
78 info!("spawning crash handler process");
79
80 let mut elapsed = Duration::ZERO;
81 let retry_frequency = Duration::from_millis(100);
82 let mut maybe_client = None;
83 while maybe_client.is_none() {
84 if let Ok(client) = Client::with_name(socket_name.as_path()) {
85 maybe_client = Some(client);
86 info!("connected to crash handler process after {elapsed:?}");
87 break;
88 }
89 elapsed += retry_frequency;
90 smol::Timer::after(retry_frequency).await;
91 }
92 let client = maybe_client.unwrap();
93 client
94 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
95 .unwrap();
96
97 let client = Arc::new(client);
98 let handler = CrashHandler::attach(unsafe {
99 let client = client.clone();
100 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
101 // only request a minidump once
102 let res = if REQUESTED_MINIDUMP
103 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
104 .is_ok()
105 {
106 #[cfg(target_os = "macos")]
107 suspend_all_other_threads();
108
109 // on macos this "ping" is needed to ensure that all our
110 // `client.send_message` calls have been processed before we trigger the
111 // minidump request.
112 client.ping().ok();
113 client.request_dump(crash_context).is_ok()
114 } else {
115 true
116 };
117 CrashEventResult::Handled(res)
118 })
119 })
120 .expect("failed to attach signal handler");
121
122 #[cfg(target_os = "linux")]
123 {
124 handler.set_ptracer(Some(server_pid));
125 }
126 CRASH_HANDLER.set(client.clone()).ok();
127 std::mem::forget(handler);
128 info!("crash handler registered");
129
130 loop {
131 client.ping().ok();
132 smol::Timer::after(Duration::from_secs(10)).await;
133 }
134}
135
136#[cfg(target_os = "macos")]
137unsafe fn suspend_all_other_threads() {
138 let task = unsafe { mach2::traps::current_task() };
139 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
140 let mut count = 0;
141 unsafe {
142 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
143 }
144 let current = unsafe { mach2::mach_init::mach_thread_self() };
145 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
146 for i in 0..count {
147 let t = unsafe { *threads.add(i as usize) };
148 if t != current && t != panic_thread {
149 unsafe { mach2::thread_act::thread_suspend(t) };
150 }
151 }
152}
153
154pub struct CrashServer {
155 initialization_params: OnceLock<InitCrashHandler>,
156 panic_info: OnceLock<CrashPanic>,
157 active_gpu: OnceLock<system_specs::GpuSpecs>,
158 has_connection: Arc<AtomicBool>,
159}
160
161#[derive(Debug, Deserialize, Serialize, Clone)]
162pub struct CrashInfo {
163 pub init: InitCrashHandler,
164 pub panic: Option<CrashPanic>,
165 pub minidump_error: Option<String>,
166 pub gpus: Vec<system_specs::GpuInfo>,
167 pub active_gpu: Option<system_specs::GpuSpecs>,
168}
169
170#[derive(Debug, Deserialize, Serialize, Clone)]
171pub struct InitCrashHandler {
172 pub session_id: String,
173 pub zed_version: String,
174 pub binary: String,
175 pub release_channel: String,
176 pub commit_sha: String,
177}
178
179#[derive(Deserialize, Serialize, Debug, Clone)]
180pub struct CrashPanic {
181 pub message: String,
182 pub span: String,
183}
184
185impl minidumper::ServerHandler for CrashServer {
186 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
187 let err_message = "Missing initialization data";
188 let dump_path = paths::logs_dir()
189 .join(
190 &self
191 .initialization_params
192 .get()
193 .expect(err_message)
194 .session_id,
195 )
196 .with_extension("dmp");
197 let file = File::create(&dump_path)?;
198 Ok((file, dump_path))
199 }
200
201 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
202 let minidump_error = match result {
203 Ok(MinidumpBinary { mut file, path, .. }) => {
204 use io::Write;
205 file.flush().ok();
206 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
207 drop(file);
208 let original_file = File::open(&path).unwrap();
209 let compressed_path = path.with_extension("zstd");
210 let compressed_file = File::create(&compressed_path).unwrap();
211 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
212 fs::rename(&compressed_path, path).unwrap();
213 None
214 }
215 Err(e) => Some(format!("{e:?}")),
216 };
217
218 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
219 let gpus = vec![];
220
221 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
222 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
223 Ok(gpus) => gpus,
224 Err(err) => {
225 log::warn!("Failed to collect GPU information for crash report: {err}");
226 vec![]
227 }
228 };
229
230 let crash_info = CrashInfo {
231 init: self
232 .initialization_params
233 .get()
234 .expect("not initialized")
235 .clone(),
236 panic: self.panic_info.get().cloned(),
237 minidump_error,
238 active_gpu: self.active_gpu.get().cloned(),
239 gpus,
240 };
241
242 let crash_data_path = paths::logs_dir()
243 .join(&crash_info.init.session_id)
244 .with_extension("json");
245
246 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
247
248 LoopAction::Exit
249 }
250
251 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
252 match kind {
253 1 => {
254 let init_data =
255 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
256 self.initialization_params
257 .set(init_data)
258 .expect("already initialized");
259 }
260 2 => {
261 let panic_data =
262 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
263 self.panic_info.set(panic_data).expect("already panicked");
264 }
265 3 => {
266 let gpu_specs: system_specs::GpuSpecs =
267 bincode::deserialize(&buffer).expect("gpu specs");
268 self.active_gpu
269 .set(gpu_specs)
270 .expect("already set active gpu");
271 }
272 _ => {
273 panic!("invalid message kind");
274 }
275 }
276 }
277
278 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
279 LoopAction::Exit
280 }
281
282 fn on_client_connected(&self, _clients: usize) -> LoopAction {
283 self.has_connection.store(true, Ordering::SeqCst);
284 LoopAction::Continue
285 }
286}
287
288pub fn panic_hook(info: &PanicHookInfo) {
289 let message = info
290 .payload()
291 .downcast_ref::<&str>()
292 .map(|s| s.to_string())
293 .or_else(|| info.payload().downcast_ref::<String>().cloned())
294 .unwrap_or_else(|| "Box<Any>".to_string());
295
296 let span = info
297 .location()
298 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
299 .unwrap_or_default();
300
301 // wait 500ms for the crash handler process to start up
302 // if it's still not there just write panic info and no minidump
303 let retry_frequency = Duration::from_millis(100);
304 for _ in 0..5 {
305 if let Some(client) = CRASH_HANDLER.get() {
306 client
307 .send_message(
308 2,
309 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
310 )
311 .ok();
312 log::error!("triggering a crash to generate a minidump...");
313
314 #[cfg(target_os = "macos")]
315 PANIC_THREAD_ID.store(
316 unsafe { mach2::mach_init::mach_thread_self() },
317 Ordering::SeqCst,
318 );
319
320 cfg_if::cfg_if! {
321 if #[cfg(target_os = "windows")] {
322 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
323 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
324 break;
325 } else {
326 std::process::abort();
327 }
328 }
329 }
330 thread::sleep(retry_frequency);
331 }
332}
333
334pub fn crash_server(socket: &Path) {
335 let Ok(mut server) = minidumper::Server::with_name(socket) else {
336 log::info!("Couldn't create socket, there may already be a running crash server");
337 return;
338 };
339
340 let shutdown = Arc::new(AtomicBool::new(false));
341 let has_connection = Arc::new(AtomicBool::new(false));
342
343 thread::Builder::new()
344 .name("CrashServerTimeout".to_owned())
345 .spawn({
346 let shutdown = shutdown.clone();
347 let has_connection = has_connection.clone();
348 move || {
349 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
350 if !has_connection.load(Ordering::SeqCst) {
351 shutdown.store(true, Ordering::SeqCst);
352 }
353 }
354 })
355 .unwrap();
356
357 server
358 .run(
359 Box::new(CrashServer {
360 initialization_params: OnceLock::new(),
361 panic_info: OnceLock::new(),
362 has_connection,
363 active_gpu: OnceLock::new(),
364 }),
365 &shutdown,
366 Some(CRASH_HANDLER_PING_TIMEOUT),
367 )
368 .expect("failed to run server");
369}