1use crash_handler::{CrashEventResult, CrashHandler};
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6use smol::process::Command;
7
8#[cfg(target_os = "macos")]
9use std::sync::atomic::AtomicU32;
10use std::{
11 env,
12 fs::{self, File},
13 io,
14 panic::{self, PanicHookInfo},
15 path::{Path, PathBuf},
16 process::{self},
17 sync::{
18 Arc, OnceLock,
19 atomic::{AtomicBool, Ordering},
20 },
21 thread,
22 time::Duration,
23};
24
25// set once the crash handler has initialized and the client has connected to it
26pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
27// set when the first minidump request is made to avoid generating duplicate crash reports
28pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
29const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
30const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
31
32#[cfg(target_os = "macos")]
33static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
34
35pub async fn init(crash_init: InitCrashHandler) {
36 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
37 let old_hook = panic::take_hook();
38 panic::set_hook(Box::new(move |info| {
39 unsafe { env::set_var("RUST_BACKTRACE", "1") };
40 old_hook(info);
41 // prevent the macOS crash dialog from popping up
42 std::process::exit(1);
43 }));
44 return;
45 } else {
46 panic::set_hook(Box::new(panic_hook));
47 }
48
49 let exe = env::current_exe().expect("unable to find ourselves");
50 let zed_pid = process::id();
51 // TODO: we should be able to get away with using 1 crash-handler process per machine,
52 // but for now we append the PID of the current process which makes it unique per remote
53 // server or interactive zed instance. This solves an issue where occasionally the socket
54 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
55 // system and block further attempts to initialize crash handlers with that socket path.
56 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
57 let _crash_handler = Command::new(exe)
58 .arg("--crash-handler")
59 .arg(&socket_name)
60 .spawn()
61 .expect("unable to spawn server process");
62 #[cfg(target_os = "linux")]
63 let server_pid = _crash_handler.id();
64 info!("spawning crash handler process");
65
66 let mut elapsed = Duration::ZERO;
67 let retry_frequency = Duration::from_millis(100);
68 let mut maybe_client = None;
69 while maybe_client.is_none() {
70 if let Ok(client) = Client::with_name(socket_name.as_path()) {
71 maybe_client = Some(client);
72 info!("connected to crash handler process after {elapsed:?}");
73 break;
74 }
75 elapsed += retry_frequency;
76 smol::Timer::after(retry_frequency).await;
77 }
78 let client = maybe_client.unwrap();
79 client
80 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
81 .unwrap();
82
83 let client = Arc::new(client);
84 let handler = CrashHandler::attach(unsafe {
85 let client = client.clone();
86 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
87 // only request a minidump once
88 let res = if REQUESTED_MINIDUMP
89 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
90 .is_ok()
91 {
92 #[cfg(target_os = "macos")]
93 suspend_all_other_threads();
94
95 // on macos this "ping" is needed to ensure that all our
96 // `client.send_message` calls have been processed before we trigger the
97 // minidump request.
98 client.ping().ok();
99 client.request_dump(crash_context).is_ok()
100 } else {
101 true
102 };
103 CrashEventResult::Handled(res)
104 })
105 })
106 .expect("failed to attach signal handler");
107
108 #[cfg(target_os = "linux")]
109 {
110 handler.set_ptracer(Some(server_pid));
111 }
112 CRASH_HANDLER.set(client.clone()).ok();
113 std::mem::forget(handler);
114 info!("crash handler registered");
115
116 loop {
117 client.ping().ok();
118 smol::Timer::after(Duration::from_secs(10)).await;
119 }
120}
121
122#[cfg(target_os = "macos")]
123unsafe fn suspend_all_other_threads() {
124 let task = unsafe { mach2::traps::current_task() };
125 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
126 let mut count = 0;
127 unsafe {
128 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
129 }
130 let current = unsafe { mach2::mach_init::mach_thread_self() };
131 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
132 for i in 0..count {
133 let t = unsafe { *threads.add(i as usize) };
134 if t != current && t != panic_thread {
135 unsafe { mach2::thread_act::thread_suspend(t) };
136 }
137 }
138}
139
140pub struct CrashServer {
141 initialization_params: OnceLock<InitCrashHandler>,
142 panic_info: OnceLock<CrashPanic>,
143 active_gpu: OnceLock<system_specs::GpuSpecs>,
144 has_connection: Arc<AtomicBool>,
145}
146
147#[derive(Debug, Deserialize, Serialize, Clone)]
148pub struct CrashInfo {
149 pub init: InitCrashHandler,
150 pub panic: Option<CrashPanic>,
151 pub minidump_error: Option<String>,
152 pub gpus: Vec<system_specs::GpuInfo>,
153 pub active_gpu: Option<system_specs::GpuSpecs>,
154}
155
156#[derive(Debug, Deserialize, Serialize, Clone)]
157pub struct InitCrashHandler {
158 pub session_id: String,
159 pub zed_version: String,
160 pub binary: String,
161 pub release_channel: String,
162 pub commit_sha: String,
163}
164
165#[derive(Deserialize, Serialize, Debug, Clone)]
166pub struct CrashPanic {
167 pub message: String,
168 pub span: String,
169}
170
171impl minidumper::ServerHandler for CrashServer {
172 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
173 let err_message = "Missing initialization data";
174 let dump_path = paths::logs_dir()
175 .join(
176 &self
177 .initialization_params
178 .get()
179 .expect(err_message)
180 .session_id,
181 )
182 .with_extension("dmp");
183 let file = File::create(&dump_path)?;
184 Ok((file, dump_path))
185 }
186
187 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
188 let minidump_error = match result {
189 Ok(MinidumpBinary { mut file, path, .. }) => {
190 use io::Write;
191 file.flush().ok();
192 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
193 drop(file);
194 let original_file = File::open(&path).unwrap();
195 let compressed_path = path.with_extension("zstd");
196 let compressed_file = File::create(&compressed_path).unwrap();
197 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
198 fs::rename(&compressed_path, path).unwrap();
199 None
200 }
201 Err(e) => Some(format!("{e:?}")),
202 };
203
204 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
205 let gpus = vec![];
206
207 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
208 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
209 Ok(gpus) => gpus,
210 Err(err) => {
211 log::warn!("Failed to collect GPU information for crash report: {err}");
212 vec![]
213 }
214 };
215
216 let crash_info = CrashInfo {
217 init: self
218 .initialization_params
219 .get()
220 .expect("not initialized")
221 .clone(),
222 panic: self.panic_info.get().cloned(),
223 minidump_error,
224 active_gpu: self.active_gpu.get().cloned(),
225 gpus,
226 };
227
228 let crash_data_path = paths::logs_dir()
229 .join(&crash_info.init.session_id)
230 .with_extension("json");
231
232 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
233
234 LoopAction::Exit
235 }
236
237 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
238 match kind {
239 1 => {
240 let init_data =
241 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
242 self.initialization_params
243 .set(init_data)
244 .expect("already initialized");
245 }
246 2 => {
247 let panic_data =
248 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
249 self.panic_info.set(panic_data).expect("already panicked");
250 }
251 3 => {
252 let gpu_specs: system_specs::GpuSpecs =
253 bincode::deserialize(&buffer).expect("gpu specs");
254 self.active_gpu
255 .set(gpu_specs)
256 .expect("already set active gpu");
257 }
258 _ => {
259 panic!("invalid message kind");
260 }
261 }
262 }
263
264 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
265 LoopAction::Exit
266 }
267
268 fn on_client_connected(&self, _clients: usize) -> LoopAction {
269 self.has_connection.store(true, Ordering::SeqCst);
270 LoopAction::Continue
271 }
272}
273
274pub fn panic_hook(info: &PanicHookInfo) {
275 let message = info
276 .payload()
277 .downcast_ref::<&str>()
278 .map(|s| s.to_string())
279 .or_else(|| info.payload().downcast_ref::<String>().cloned())
280 .unwrap_or_else(|| "Box<Any>".to_string());
281
282 let span = info
283 .location()
284 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
285 .unwrap_or_default();
286
287 // wait 500ms for the crash handler process to start up
288 // if it's still not there just write panic info and no minidump
289 let retry_frequency = Duration::from_millis(100);
290 for _ in 0..5 {
291 if let Some(client) = CRASH_HANDLER.get() {
292 client
293 .send_message(
294 2,
295 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
296 )
297 .ok();
298 log::error!("triggering a crash to generate a minidump...");
299
300 #[cfg(target_os = "macos")]
301 PANIC_THREAD_ID.store(
302 unsafe { mach2::mach_init::mach_thread_self() },
303 Ordering::SeqCst,
304 );
305
306 cfg_if::cfg_if! {
307 if #[cfg(target_os = "windows")] {
308 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
309 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
310 break;
311 } else {
312 std::process::abort();
313 }
314 }
315 }
316 thread::sleep(retry_frequency);
317 }
318}
319
320pub fn crash_server(socket: &Path) {
321 let Ok(mut server) = minidumper::Server::with_name(socket) else {
322 log::info!("Couldn't create socket, there may already be a running crash server");
323 return;
324 };
325
326 let shutdown = Arc::new(AtomicBool::new(false));
327 let has_connection = Arc::new(AtomicBool::new(false));
328
329 thread::Builder::new()
330 .name("CrashServerTimeout".to_owned())
331 .spawn({
332 let shutdown = shutdown.clone();
333 let has_connection = has_connection.clone();
334 move || {
335 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
336 if !has_connection.load(Ordering::SeqCst) {
337 shutdown.store(true, Ordering::SeqCst);
338 }
339 }
340 })
341 .unwrap();
342
343 server
344 .run(
345 Box::new(CrashServer {
346 initialization_params: OnceLock::new(),
347 panic_info: OnceLock::new(),
348 has_connection,
349 active_gpu: OnceLock::new(),
350 }),
351 &shutdown,
352 Some(CRASH_HANDLER_PING_TIMEOUT),
353 )
354 .expect("failed to run server");
355}