1use crash_handler::CrashHandler;
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7#[cfg(target_os = "macos")]
8use std::sync::atomic::AtomicU32;
9use std::{
10 env,
11 fs::{self, File},
12 io,
13 panic::Location,
14 path::{Path, PathBuf},
15 process::{self, Command},
16 sync::{
17 Arc, OnceLock,
18 atomic::{AtomicBool, Ordering},
19 },
20 thread,
21 time::Duration,
22};
23
24// set once the crash handler has initialized and the client has connected to it
25pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
26// set when the first minidump request is made to avoid generating duplicate crash reports
27pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
28const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
29const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
30
31#[cfg(target_os = "macos")]
32static PANIC_THREAD_ID: AtomicU32 = AtomicU32::new(0);
33
34pub async fn init(crash_init: InitCrashHandler) {
35 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
36 return;
37 }
38
39 let exe = env::current_exe().expect("unable to find ourselves");
40 let zed_pid = process::id();
41 // TODO: we should be able to get away with using 1 crash-handler process per machine,
42 // but for now we append the PID of the current process which makes it unique per remote
43 // server or interactive zed instance. This solves an issue where occasionally the socket
44 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
45 // system and block further attempts to initialize crash handlers with that socket path.
46 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
47 #[allow(unused)]
48 let server_pid = Command::new(exe)
49 .arg("--crash-handler")
50 .arg(&socket_name)
51 .spawn()
52 .expect("unable to spawn server process")
53 .id();
54 info!("spawning crash handler process");
55
56 let mut elapsed = Duration::ZERO;
57 let retry_frequency = Duration::from_millis(100);
58 let mut maybe_client = None;
59 while maybe_client.is_none() {
60 if let Ok(client) = Client::with_name(socket_name.as_path()) {
61 maybe_client = Some(client);
62 info!("connected to crash handler process after {elapsed:?}");
63 break;
64 }
65 elapsed += retry_frequency;
66 smol::Timer::after(retry_frequency).await;
67 }
68 let client = maybe_client.unwrap();
69 client
70 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
71 .unwrap();
72
73 let client = Arc::new(client);
74 let handler = crash_handler::CrashHandler::attach(unsafe {
75 let client = client.clone();
76 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
77 // only request a minidump once
78 let res = if REQUESTED_MINIDUMP
79 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
80 .is_ok()
81 {
82 #[cfg(target_os = "macos")]
83 suspend_all_other_threads();
84
85 client.ping().unwrap();
86 client.request_dump(crash_context).is_ok()
87 } else {
88 true
89 };
90 crash_handler::CrashEventResult::Handled(res)
91 })
92 })
93 .expect("failed to attach signal handler");
94
95 #[cfg(target_os = "linux")]
96 {
97 handler.set_ptracer(Some(server_pid));
98 }
99 CRASH_HANDLER.set(client.clone()).ok();
100 std::mem::forget(handler);
101 info!("crash handler registered");
102
103 loop {
104 client.ping().ok();
105 smol::Timer::after(Duration::from_secs(10)).await;
106 }
107}
108
109#[cfg(target_os = "macos")]
110unsafe fn suspend_all_other_threads() {
111 let task = unsafe { mach2::traps::current_task() };
112 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
113 let mut count = 0;
114 unsafe {
115 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
116 }
117 let current = unsafe { mach2::mach_init::mach_thread_self() };
118 let panic_thread = PANIC_THREAD_ID.load(Ordering::SeqCst);
119 for i in 0..count {
120 let t = unsafe { *threads.add(i as usize) };
121 if t != current && t != panic_thread {
122 unsafe { mach2::thread_act::thread_suspend(t) };
123 }
124 }
125}
126
127pub struct CrashServer {
128 initialization_params: OnceLock<InitCrashHandler>,
129 panic_info: OnceLock<CrashPanic>,
130 has_connection: Arc<AtomicBool>,
131}
132
133#[derive(Debug, Deserialize, Serialize, Clone)]
134pub struct CrashInfo {
135 pub init: InitCrashHandler,
136 pub panic: Option<CrashPanic>,
137 pub minidump_error: Option<String>,
138}
139
140#[derive(Debug, Deserialize, Serialize, Clone)]
141pub struct InitCrashHandler {
142 pub session_id: String,
143 pub zed_version: String,
144 pub release_channel: String,
145 pub commit_sha: String,
146 // pub gpu: String,
147}
148
149#[derive(Deserialize, Serialize, Debug, Clone)]
150pub struct CrashPanic {
151 pub message: String,
152 pub span: String,
153}
154
155impl minidumper::ServerHandler for CrashServer {
156 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
157 let err_message = "Missing initialization data";
158 let dump_path = paths::logs_dir()
159 .join(
160 &self
161 .initialization_params
162 .get()
163 .expect(err_message)
164 .session_id,
165 )
166 .with_extension("dmp");
167 let file = File::create(&dump_path)?;
168 Ok((file, dump_path))
169 }
170
171 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
172 let minidump_error = match result {
173 Ok(mut md_bin) => {
174 use io::Write;
175 let _ = md_bin.file.flush();
176 None
177 }
178 Err(e) => Some(format!("{e:?}")),
179 };
180
181 let crash_info = CrashInfo {
182 init: self
183 .initialization_params
184 .get()
185 .expect("not initialized")
186 .clone(),
187 panic: self.panic_info.get().cloned(),
188 minidump_error,
189 };
190
191 let crash_data_path = paths::logs_dir()
192 .join(&crash_info.init.session_id)
193 .with_extension("json");
194
195 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
196
197 LoopAction::Exit
198 }
199
200 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
201 match kind {
202 1 => {
203 let init_data =
204 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
205 self.initialization_params
206 .set(init_data)
207 .expect("already initialized");
208 }
209 2 => {
210 let panic_data =
211 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
212 self.panic_info.set(panic_data).expect("already panicked");
213 }
214 _ => {
215 panic!("invalid message kind");
216 }
217 }
218 }
219
220 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
221 LoopAction::Exit
222 }
223
224 fn on_client_connected(&self, _clients: usize) -> LoopAction {
225 self.has_connection.store(true, Ordering::SeqCst);
226 LoopAction::Continue
227 }
228}
229
230pub fn handle_panic(message: String, span: Option<&Location>) {
231 let span = span
232 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
233 .unwrap_or_default();
234
235 // wait 500ms for the crash handler process to start up
236 // if it's still not there just write panic info and no minidump
237 let retry_frequency = Duration::from_millis(100);
238 for _ in 0..5 {
239 if let Some(client) = CRASH_HANDLER.get() {
240 client
241 .send_message(
242 2,
243 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
244 )
245 .ok();
246 log::error!("triggering a crash to generate a minidump...");
247
248 #[cfg(target_os = "macos")]
249 PANIC_THREAD_ID.store(
250 unsafe { mach2::mach_init::mach_thread_self() },
251 Ordering::SeqCst,
252 );
253
254 #[cfg(target_os = "linux")]
255 CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
256 #[cfg(not(target_os = "linux"))]
257 CrashHandler.simulate_exception(None);
258 break;
259 }
260 thread::sleep(retry_frequency);
261 }
262}
263
264pub fn crash_server(socket: &Path) {
265 let Ok(mut server) = minidumper::Server::with_name(socket) else {
266 log::info!("Couldn't create socket, there may already be a running crash server");
267 return;
268 };
269
270 let shutdown = Arc::new(AtomicBool::new(false));
271 let has_connection = Arc::new(AtomicBool::new(false));
272
273 std::thread::spawn({
274 let shutdown = shutdown.clone();
275 let has_connection = has_connection.clone();
276 move || {
277 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
278 if !has_connection.load(Ordering::SeqCst) {
279 shutdown.store(true, Ordering::SeqCst);
280 }
281 }
282 });
283
284 server
285 .run(
286 Box::new(CrashServer {
287 initialization_params: OnceLock::new(),
288 panic_info: OnceLock::new(),
289 has_connection,
290 }),
291 &shutdown,
292 Some(CRASH_HANDLER_PING_TIMEOUT),
293 )
294 .expect("failed to run server");
295}