1use crash_handler::CrashHandler;
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7#[cfg(target_os = "macos")]
8use std::sync::atomic::AtomicU32;
9use std::{
10 env,
11 fs::{self, File},
12 io,
13 panic::Location,
14 path::{Path, PathBuf},
15 process::{self, Command},
16 sync::{
17 Arc, OnceLock,
18 atomic::{AtomicBool, Ordering},
19 },
20 thread,
21 time::Duration,
22};
23
24// set once the crash handler has initialized and the client has connected to it
25pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
26// set when the first minidump request is made to avoid generating duplicate crash reports
27pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
28const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
29const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
30
31#[cfg(target_os = "macos")]
32static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
33
34pub async fn init(crash_init: InitCrashHandler) {
35 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
36 return;
37 }
38
39 let exe = env::current_exe().expect("unable to find ourselves");
40 let zed_pid = process::id();
41 // TODO: we should be able to get away with using 1 crash-handler process per machine,
42 // but for now we append the PID of the current process which makes it unique per remote
43 // server or interactive zed instance. This solves an issue where occasionally the socket
44 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
45 // system and block further attempts to initialize crash handlers with that socket path.
46 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
47 #[allow(unused)]
48 let server_pid = Command::new(exe)
49 .arg("--crash-handler")
50 .arg(&socket_name)
51 .spawn()
52 .expect("unable to spawn server process")
53 .id();
54 info!("spawning crash handler process");
55
56 let mut elapsed = Duration::ZERO;
57 let retry_frequency = Duration::from_millis(100);
58 let mut maybe_client = None;
59 while maybe_client.is_none() {
60 if let Ok(client) = Client::with_name(socket_name.as_path()) {
61 maybe_client = Some(client);
62 info!("connected to crash handler process after {elapsed:?}");
63 break;
64 }
65 elapsed += retry_frequency;
66 smol::Timer::after(retry_frequency).await;
67 }
68 let client = maybe_client.unwrap();
69 client
70 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
71 .unwrap();
72
73 let client = Arc::new(client);
74 let handler = crash_handler::CrashHandler::attach(unsafe {
75 let client = client.clone();
76 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
77 // only request a minidump once
78 let res = if REQUESTED_MINIDUMP
79 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
80 .is_ok()
81 {
82 #[cfg(target_os = "macos")]
83 suspend_all_other_threads();
84
85 client.ping().unwrap();
86 client.request_dump(crash_context).is_ok()
87 } else {
88 true
89 };
90 crash_handler::CrashEventResult::Handled(res)
91 })
92 })
93 .expect("failed to attach signal handler");
94
95 #[cfg(target_os = "linux")]
96 {
97 handler.set_ptracer(Some(server_pid));
98 }
99 CRASH_HANDLER.set(client.clone()).ok();
100 std::mem::forget(handler);
101 info!("crash handler registered");
102
103 loop {
104 client.ping().ok();
105 smol::Timer::after(Duration::from_secs(10)).await;
106 }
107}
108
109#[cfg(target_os = "macos")]
110unsafe fn suspend_all_other_threads() {
111 let task = unsafe { mach2::traps::current_task() };
112 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
113 let mut count = 0;
114 unsafe {
115 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
116 }
117 let current = unsafe { mach2::mach_init::mach_thread_self() };
118 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
119 for i in 0..count {
120 let t = unsafe { *threads.add(i as usize) };
121 if t != current && t != panic_thread {
122 unsafe { mach2::thread_act::thread_suspend(t) };
123 }
124 }
125}
126
127pub struct CrashServer {
128 initialization_params: OnceLock<InitCrashHandler>,
129 panic_info: OnceLock<CrashPanic>,
130 active_gpu: OnceLock<system_specs::GpuSpecs>,
131 has_connection: Arc<AtomicBool>,
132}
133
134#[derive(Debug, Deserialize, Serialize, Clone)]
135pub struct CrashInfo {
136 pub init: InitCrashHandler,
137 pub panic: Option<CrashPanic>,
138 pub minidump_error: Option<String>,
139 pub gpus: Vec<system_specs::GpuInfo>,
140 pub active_gpu: Option<system_specs::GpuSpecs>,
141}
142
143#[derive(Debug, Deserialize, Serialize, Clone)]
144pub struct InitCrashHandler {
145 pub session_id: String,
146 pub zed_version: String,
147 pub release_channel: String,
148 pub commit_sha: String,
149}
150
151#[derive(Deserialize, Serialize, Debug, Clone)]
152pub struct CrashPanic {
153 pub message: String,
154 pub span: String,
155}
156
157impl minidumper::ServerHandler for CrashServer {
158 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
159 let err_message = "Missing initialization data";
160 let dump_path = paths::logs_dir()
161 .join(
162 &self
163 .initialization_params
164 .get()
165 .expect(err_message)
166 .session_id,
167 )
168 .with_extension("dmp");
169 let file = File::create(&dump_path)?;
170 Ok((file, dump_path))
171 }
172
173 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
174 let minidump_error = match result {
175 Ok(mut md_bin) => {
176 use io::Write;
177 let _ = md_bin.file.flush();
178 None
179 }
180 Err(e) => Some(format!("{e:?}")),
181 };
182
183 #[cfg(not(any(target_os = "linux", target_os = "freebsd")))]
184 let gpus = vec![];
185
186 #[cfg(any(target_os = "linux", target_os = "freebsd"))]
187 let gpus = match system_specs::read_gpu_info_from_sys_class_drm() {
188 Ok(gpus) => gpus,
189 Err(err) => {
190 log::warn!("Failed to collect GPU information for crash report: {err}");
191 vec![]
192 }
193 };
194
195 let crash_info = CrashInfo {
196 init: self
197 .initialization_params
198 .get()
199 .expect("not initialized")
200 .clone(),
201 panic: self.panic_info.get().cloned(),
202 minidump_error,
203 active_gpu: self.active_gpu.get().cloned(),
204 gpus,
205 };
206
207 let crash_data_path = paths::logs_dir()
208 .join(&crash_info.init.session_id)
209 .with_extension("json");
210
211 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
212
213 LoopAction::Exit
214 }
215
216 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
217 match kind {
218 1 => {
219 let init_data =
220 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
221 self.initialization_params
222 .set(init_data)
223 .expect("already initialized");
224 }
225 2 => {
226 let panic_data =
227 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
228 self.panic_info.set(panic_data).expect("already panicked");
229 }
230 3 => {
231 let gpu_specs: system_specs::GpuSpecs =
232 bincode::deserialize(&buffer).expect("gpu specs");
233 self.active_gpu
234 .set(gpu_specs)
235 .expect("already set active gpu");
236 }
237 _ => {
238 panic!("invalid message kind");
239 }
240 }
241 }
242
243 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
244 LoopAction::Exit
245 }
246
247 fn on_client_connected(&self, _clients: usize) -> LoopAction {
248 self.has_connection.store(true, Ordering::SeqCst);
249 LoopAction::Continue
250 }
251}
252
253pub fn handle_panic(message: String, span: Option<&Location>) {
254 let span = span
255 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
256 .unwrap_or_default();
257
258 // wait 500ms for the crash handler process to start up
259 // if it's still not there just write panic info and no minidump
260 let retry_frequency = Duration::from_millis(100);
261 for _ in 0..5 {
262 if let Some(client) = CRASH_HANDLER.get() {
263 client
264 .send_message(
265 2,
266 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
267 )
268 .ok();
269 log::error!("triggering a crash to generate a minidump...");
270
271 #[cfg(target_os = "macos")]
272 PANIC_THREAD_ID.store(
273 unsafe { mach2::mach_init::mach_thread_self() },
274 Ordering::SeqCst,
275 );
276
277 #[cfg(target_os = "linux")]
278 CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
279 #[cfg(not(target_os = "linux"))]
280 CrashHandler.simulate_exception(None);
281 break;
282 }
283 thread::sleep(retry_frequency);
284 }
285}
286
287pub fn crash_server(socket: &Path) {
288 let Ok(mut server) = minidumper::Server::with_name(socket) else {
289 log::info!("Couldn't create socket, there may already be a running crash server");
290 return;
291 };
292
293 let shutdown = Arc::new(AtomicBool::new(false));
294 let has_connection = Arc::new(AtomicBool::new(false));
295
296 std::thread::spawn({
297 let shutdown = shutdown.clone();
298 let has_connection = has_connection.clone();
299 move || {
300 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
301 if !has_connection.load(Ordering::SeqCst) {
302 shutdown.store(true, Ordering::SeqCst);
303 }
304 }
305 });
306
307 server
308 .run(
309 Box::new(CrashServer {
310 initialization_params: OnceLock::new(),
311 panic_info: OnceLock::new(),
312 has_connection,
313 active_gpu: OnceLock::new(),
314 }),
315 &shutdown,
316 Some(CRASH_HANDLER_PING_TIMEOUT),
317 )
318 .expect("failed to run server");
319}