1use crash_handler::CrashHandler;
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7#[cfg(target_os = "macos")]
8use std::sync::atomic::AtomicU32;
9use std::{
10 env,
11 fs::{self, File},
12 io,
13 panic::Location,
14 path::{Path, PathBuf},
15 process::{self, Command},
16 sync::{
17 Arc, OnceLock,
18 atomic::{AtomicBool, Ordering},
19 },
20 thread,
21 time::Duration,
22};
23
24// set once the crash handler has initialized and the client has connected to it
25pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
26// set when the first minidump request is made to avoid generating duplicate crash reports
27pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
28const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
29const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
30
31#[cfg(target_os = "macos")]
32static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
33
34pub async fn init(crash_init: InitCrashHandler) {
35 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
36 return;
37 }
38
39 let exe = env::current_exe().expect("unable to find ourselves");
40 let zed_pid = process::id();
41 // TODO: we should be able to get away with using 1 crash-handler process per machine,
42 // but for now we append the PID of the current process which makes it unique per remote
43 // server or interactive zed instance. This solves an issue where occasionally the socket
44 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
45 // system and block further attempts to initialize crash handlers with that socket path.
46 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
47 #[allow(unused)]
48 let server_pid = Command::new(exe)
49 .arg("--crash-handler")
50 .arg(&socket_name)
51 .spawn()
52 .expect("unable to spawn server process")
53 .id();
54 info!("spawning crash handler process");
55
56 let mut elapsed = Duration::ZERO;
57 let retry_frequency = Duration::from_millis(100);
58 let mut maybe_client = None;
59 while maybe_client.is_none() {
60 if let Ok(client) = Client::with_name(socket_name.as_path()) {
61 maybe_client = Some(client);
62 info!("connected to crash handler process after {elapsed:?}");
63 break;
64 }
65 elapsed += retry_frequency;
66 smol::Timer::after(retry_frequency).await;
67 }
68 let client = maybe_client.unwrap();
69 client
70 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
71 .unwrap();
72
73 let client = Arc::new(client);
74 let handler = crash_handler::CrashHandler::attach(unsafe {
75 let client = client.clone();
76 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
77 // only request a minidump once
78 let res = if REQUESTED_MINIDUMP
79 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
80 .is_ok()
81 {
82 #[cfg(target_os = "macos")]
83 suspend_all_other_threads();
84
85 client.ping().unwrap();
86 client.request_dump(crash_context).is_ok()
87 } else {
88 true
89 };
90 crash_handler::CrashEventResult::Handled(res)
91 })
92 })
93 .expect("failed to attach signal handler");
94
95 #[cfg(target_os = "linux")]
96 {
97 handler.set_ptracer(Some(server_pid));
98 }
99 CRASH_HANDLER.set(client.clone()).ok();
100 std::mem::forget(handler);
101 info!("crash handler registered");
102
103 loop {
104 client.ping().ok();
105 smol::Timer::after(Duration::from_secs(10)).await;
106 }
107}
108
109#[cfg(target_os = "macos")]
110unsafe fn suspend_all_other_threads() {
111 let task = unsafe { mach2::traps::current_task() };
112 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
113 let mut count = 0;
114 unsafe {
115 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
116 }
117 let current = unsafe { mach2::mach_init::mach_thread_self() };
118 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
119 for i in 0..count {
120 let t = unsafe { *threads.add(i as usize) };
121 if t != current && t != panic_thread {
122 unsafe { mach2::thread_act::thread_suspend(t) };
123 }
124 }
125}
126
127pub struct CrashServer {
128 initialization_params: OnceLock<InitCrashHandler>,
129 panic_info: OnceLock<CrashPanic>,
130 active_gpu: OnceLock<system_specs::GpuSpecs>,
131 has_connection: Arc<AtomicBool>,
132}
133
134#[derive(Debug, Deserialize, Serialize, Clone)]
135pub struct CrashInfo {
136 pub init: InitCrashHandler,
137 pub panic: Option<CrashPanic>,
138 pub minidump_error: Option<String>,
139 pub gpus: Vec<system_specs::GpuInfo>,
140 pub active_gpu: Option<system_specs::GpuSpecs>,
141}
142
143#[derive(Debug, Deserialize, Serialize, Clone)]
144pub struct InitCrashHandler {
145 pub session_id: String,
146 pub zed_version: String,
147 pub release_channel: String,
148 pub commit_sha: String,
149}
150
151#[derive(Deserialize, Serialize, Debug, Clone)]
152pub struct CrashPanic {
153 pub message: String,
154 pub span: String,
155}
156
157impl minidumper::ServerHandler for CrashServer {
158 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
159 let err_message = "Missing initialization data";
160 let dump_path = paths::logs_dir()
161 .join(
162 &self
163 .initialization_params
164 .get()
165 .expect(err_message)
166 .session_id,
167 )
168 .with_extension("dmp");
169 let file = File::create(&dump_path)?;
170 Ok((file, dump_path))
171 }
172
173 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
174 let minidump_error = match result {
175 Ok(MinidumpBinary { mut file, path, .. }) => {
176 use io::Write;
177 file.flush().ok();
178 // TODO: clean this up once https://github.com/EmbarkStudios/crash-handling/issues/101 is addressed
179 drop(file);
180 let original_file = File::open(&path).unwrap();
181 let compressed_path = path.with_extension("zstd");
182 let compressed_file = File::create(&compressed_path).unwrap();
183 zstd::stream::copy_encode(original_file, compressed_file, 0).ok();
184 fs::rename(&compressed_path, path).unwrap();
185 None
186 }
187 Err(e) => Some(format!("{e:?}")),
188 };
189
190 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
191 let gpus = vec![];
192
193 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
194 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
195 Ok(gpus) => gpus,
196 Err(err) => {
197 log::warn!("Failed to collect GPU information for crash report: {err}");
198 vec![]
199 }
200 };
201
202 let crash_info = CrashInfo {
203 init: self
204 .initialization_params
205 .get()
206 .expect("not initialized")
207 .clone(),
208 panic: self.panic_info.get().cloned(),
209 minidump_error,
210 active_gpu: self.active_gpu.get().cloned(),
211 gpus,
212 };
213
214 let crash_data_path = paths::logs_dir()
215 .join(&crash_info.init.session_id)
216 .with_extension("json");
217
218 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
219
220 LoopAction::Exit
221 }
222
223 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
224 match kind {
225 1 => {
226 let init_data =
227 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
228 self.initialization_params
229 .set(init_data)
230 .expect("already initialized");
231 }
232 2 => {
233 let panic_data =
234 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
235 self.panic_info.set(panic_data).expect("already panicked");
236 }
237 3 => {
238 let gpu_specs: system_specs::GpuSpecs =
239 bincode::deserialize(&buffer).expect("gpu specs");
240 self.active_gpu
241 .set(gpu_specs)
242 .expect("already set active gpu");
243 }
244 _ => {
245 panic!("invalid message kind");
246 }
247 }
248 }
249
250 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
251 LoopAction::Exit
252 }
253
254 fn on_client_connected(&self, _clients: usize) -> LoopAction {
255 self.has_connection.store(true, Ordering::SeqCst);
256 LoopAction::Continue
257 }
258}
259
260pub fn handle_panic(message: String, span: Option<&Location>) {
261 let span = span
262 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
263 .unwrap_or_default();
264
265 // wait 500ms for the crash handler process to start up
266 // if it's still not there just write panic info and no minidump
267 let retry_frequency = Duration::from_millis(100);
268 for _ in 0..5 {
269 if let Some(client) = CRASH_HANDLER.get() {
270 client
271 .send_message(
272 2,
273 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
274 )
275 .ok();
276 log::error!("triggering a crash to generate a minidump...");
277
278 #[cfg(target_os = "macos")]
279 PANIC_THREAD_ID.store(
280 unsafe { mach2::mach_init::mach_thread_self() },
281 Ordering::SeqCst,
282 );
283
284 #[cfg(target_os = "linux")]
285 CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
286 #[cfg(not(target_os = "linux"))]
287 CrashHandler.simulate_exception(None);
288 break;
289 }
290 thread::sleep(retry_frequency);
291 }
292}
293
294pub fn crash_server(socket: &Path) {
295 let Ok(mut server) = minidumper::Server::with_name(socket) else {
296 log::info!("Couldn't create socket, there may already be a running crash server");
297 return;
298 };
299
300 let shutdown = Arc::new(AtomicBool::new(false));
301 let has_connection = Arc::new(AtomicBool::new(false));
302
303 std::thread::spawn({
304 let shutdown = shutdown.clone();
305 let has_connection = has_connection.clone();
306 move || {
307 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
308 if !has_connection.load(Ordering::SeqCst) {
309 shutdown.store(true, Ordering::SeqCst);
310 }
311 }
312 });
313
314 server
315 .run(
316 Box::new(CrashServer {
317 initialization_params: OnceLock::new(),
318 panic_info: OnceLock::new(),
319 has_connection,
320 active_gpu: OnceLock::new(),
321 }),
322 &shutdown,
323 Some(CRASH_HANDLER_PING_TIMEOUT),
324 )
325 .expect("failed to run server");
326}