1use crash_handler::{CrashEventResult, CrashHandler};
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7#[cfg(target_os = "macos")]
8use std::sync::atomic::AtomicU32;
9use std::{
10 env,
11 fs::{self, File},
12 io,
13 panic::{self, PanicHookInfo},
14 path::{Path, PathBuf},
15 process::{self, Command},
16 sync::{
17 Arc, OnceLock,
18 atomic::{AtomicBool, Ordering},
19 },
20 thread,
21 time::Duration,
22};
23
24// set once the crash handler has initialized and the client has connected to it
25pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
26// set when the first minidump request is made to avoid generating duplicate crash reports
27pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
28const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
29const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
30
31#[cfg(target_os = "macos")]
32static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
33
34pub async fn init(crash_init: InitCrashHandler) {
35 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
36 let old_hook = panic::take_hook();
37 panic::set_hook(Box::new(move |info| {
38 unsafe { env::set_var("RUST_BACKTRACE", "1") };
39 old_hook(info);
40 // prevent the macOS crash dialog from popping up
41 std::process::exit(1);
42 }));
43 return;
44 } else {
45 panic::set_hook(Box::new(panic_hook));
46 }
47
48 let exe = env::current_exe().expect("unable to find ourselves");
49 let zed_pid = process::id();
50 // TODO: we should be able to get away with using 1 crash-handler process per machine,
51 // but for now we append the PID of the current process which makes it unique per remote
52 // server or interactive zed instance. This solves an issue where occasionally the socket
53 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
54 // system and block further attempts to initialize crash handlers with that socket path.
55 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
56 #[allow(unused)]
57 let server_pid = Command::new(exe)
58 .arg("--crash-handler")
59 .arg(&socket_name)
60 .spawn()
61 .expect("unable to spawn server process")
62 .id();
63 info!("spawning crash handler process");
64
65 let mut elapsed = Duration::ZERO;
66 let retry_frequency = Duration::from_millis(100);
67 let mut maybe_client = None;
68 while maybe_client.is_none() {
69 if let Ok(client) = Client::with_name(socket_name.as_path()) {
70 maybe_client = Some(client);
71 info!("connected to crash handler process after {elapsed:?}");
72 break;
73 }
74 elapsed += retry_frequency;
75 smol::Timer::after(retry_frequency).await;
76 }
77 let client = maybe_client.unwrap();
78 client
79 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
80 .unwrap();
81
82 let client = Arc::new(client);
83 let handler = CrashHandler::attach(unsafe {
84 let client = client.clone();
85 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
86 // only request a minidump once
87 let res = if REQUESTED_MINIDUMP
88 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
89 .is_ok()
90 {
91 #[cfg(target_os = "macos")]
92 suspend_all_other_threads();
93
94 client.ping().unwrap();
95 client.request_dump(crash_context).is_ok()
96 } else {
97 true
98 };
99 CrashEventResult::Handled(res)
100 })
101 })
102 .expect("failed to attach signal handler");
103
104 #[cfg(target_os = "linux")]
105 {
106 handler.set_ptracer(Some(server_pid));
107 }
108 CRASH_HANDLER.set(client.clone()).ok();
109 std::mem::forget(handler);
110 info!("crash handler registered");
111
112 loop {
113 client.ping().ok();
114 smol::Timer::after(Duration::from_secs(10)).await;
115 }
116}
117
118#[cfg(target_os = "macos")]
119unsafe fn suspend_all_other_threads() {
120 let task = unsafe { mach2::traps::current_task() };
121 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
122 let mut count = 0;
123 unsafe {
124 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
125 }
126 let current = unsafe { mach2::mach_init::mach_thread_self() };
127 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
128 for i in 0..count {
129 let t = unsafe { *threads.add(i as usize) };
130 if t != current && t != panic_thread {
131 unsafe { mach2::thread_act::thread_suspend(t) };
132 }
133 }
134}
135
136pub struct CrashServer {
137 initialization_params: OnceLock<InitCrashHandler>,
138 panic_info: OnceLock<CrashPanic>,
139 active_gpu: OnceLock<system_specs::GpuSpecs>,
140 has_connection: Arc<AtomicBool>,
141}
142
143#[derive(Debug, Deserialize, Serialize, Clone)]
144pub struct CrashInfo {
145 pub init: InitCrashHandler,
146 pub panic: Option<CrashPanic>,
147 pub minidump_error: Option<String>,
148 pub gpus: Vec<system_specs::GpuInfo>,
149 pub active_gpu: Option<system_specs::GpuSpecs>,
150}
151
152#[derive(Debug, Deserialize, Serialize, Clone)]
153pub struct InitCrashHandler {
154 pub session_id: String,
155 pub zed_version: String,
156 pub release_channel: String,
157 pub commit_sha: String,
158}
159
160#[derive(Deserialize, Serialize, Debug, Clone)]
161pub struct CrashPanic {
162 pub message: String,
163 pub span: String,
164}
165
166impl minidumper::ServerHandler for CrashServer {
167 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
168 let err_message = "Missing initialization data";
169 let dump_path = paths::logs_dir()
170 .join(
171 &self
172 .initialization_params
173 .get()
174 .expect(err_message)
175 .session_id,
176 )
177 .with_extension("dmp");
178 let file = File::create(&dump_path)?;
179 Ok((file, dump_path))
180 }
181
182 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
183 let minidump_error = match result {
184 Ok(MinidumpBinary { mut file, path, .. }) => {
185 use io::Write;
186 file.flush().ok();
187 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
188 drop(file);
189 let original_file = File::open(&path).unwrap();
190 let compressed_path = path.with_extension("zstd");
191 let compressed_file = File::create(&compressed_path).unwrap();
192 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
193 fs::rename(&compressed_path, path).unwrap();
194 None
195 }
196 Err(e) => Some(format!("{e:?}")),
197 };
198
199 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
200 let gpus = vec![];
201
202 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
203 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
204 Ok(gpus) => gpus,
205 Err(err) => {
206 log::warn!("Failed to collect GPU information for crash report: {err}");
207 vec![]
208 }
209 };
210
211 let crash_info = CrashInfo {
212 init: self
213 .initialization_params
214 .get()
215 .expect("not initialized")
216 .clone(),
217 panic: self.panic_info.get().cloned(),
218 minidump_error,
219 active_gpu: self.active_gpu.get().cloned(),
220 gpus,
221 };
222
223 let crash_data_path = paths::logs_dir()
224 .join(&crash_info.init.session_id)
225 .with_extension("json");
226
227 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
228
229 LoopAction::Exit
230 }
231
232 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
233 match kind {
234 1 => {
235 let init_data =
236 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
237 self.initialization_params
238 .set(init_data)
239 .expect("already initialized");
240 }
241 2 => {
242 let panic_data =
243 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
244 self.panic_info.set(panic_data).expect("already panicked");
245 }
246 3 => {
247 let gpu_specs: system_specs::GpuSpecs =
248 bincode::deserialize(&buffer).expect("gpu specs");
249 self.active_gpu
250 .set(gpu_specs)
251 .expect("already set active gpu");
252 }
253 _ => {
254 panic!("invalid message kind");
255 }
256 }
257 }
258
259 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
260 LoopAction::Exit
261 }
262
263 fn on_client_connected(&self, _clients: usize) -> LoopAction {
264 self.has_connection.store(true, Ordering::SeqCst);
265 LoopAction::Continue
266 }
267}
268
269pub fn panic_hook(info: &PanicHookInfo) {
270 let message = info
271 .payload()
272 .downcast_ref::<&str>()
273 .map(|s| s.to_string())
274 .or_else(|| info.payload().downcast_ref::<String>().cloned())
275 .unwrap_or_else(|| "Box<Any>".to_string());
276
277 let span = info
278 .location()
279 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
280 .unwrap_or_default();
281
282 // wait 500ms for the crash handler process to start up
283 // if it's still not there just write panic info and no minidump
284 let retry_frequency = Duration::from_millis(100);
285 for _ in 0..5 {
286 if let Some(client) = CRASH_HANDLER.get() {
287 client
288 .send_message(
289 2,
290 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
291 )
292 .ok();
293 log::error!("triggering a crash to generate a minidump...");
294
295 #[cfg(target_os = "macos")]
296 PANIC_THREAD_ID.store(
297 unsafe { mach2::mach_init::mach_thread_self() },
298 Ordering::SeqCst,
299 );
300
301 cfg_if::cfg_if! {
302 if #[cfg(target_os = "windows")] {
303 // https://learn.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
304 CrashHandler.simulate_exception(Some(234)); // (MORE_DATA_AVAILABLE)
305 break;
306 } else {
307 std::process::abort();
308 }
309 }
310 }
311 thread::sleep(retry_frequency);
312 }
313}
314
315pub fn crash_server(socket: &Path) {
316 let Ok(mut server) = minidumper::Server::with_name(socket) else {
317 log::info!("Couldn't create socket, there may already be a running crash server");
318 return;
319 };
320
321 let shutdown = Arc::new(AtomicBool::new(false));
322 let has_connection = Arc::new(AtomicBool::new(false));
323
324 thread::Builder::new()
325 .name("CrashServerTimeout".to_owned())
326 .spawn({
327 let shutdown = shutdown.clone();
328 let has_connection = has_connection.clone();
329 move || {
330 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
331 if !has_connection.load(Ordering::SeqCst) {
332 shutdown.store(true, Ordering::SeqCst);
333 }
334 }
335 })
336 .unwrap();
337
338 server
339 .run(
340 Box::new(CrashServer {
341 initialization_params: OnceLock::new(),
342 panic_info: OnceLock::new(),
343 has_connection,
344 active_gpu: OnceLock::new(),
345 }),
346 &shutdown,
347 Some(CRASH_HANDLER_PING_TIMEOUT),
348 )
349 .expect("failed to run server");
350}