1use crash_handler::CrashHandler;
2use log::info;
3use minidumper::{Client, LoopAction, MinidumpBinary};
4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
5use serde::{Deserialize, Serialize};
6
7use std::{
8 env,
9 fs::{self, File},
10 io,
11 panic::Location,
12 path::{Path, PathBuf},
13 process::{self, Command},
14 sync::{
15 Arc, OnceLock,
16 atomic::{AtomicBool, Ordering},
17 },
18 thread,
19 time::Duration,
20};
21
22// set once the crash handler has initialized and the client has connected to it
23pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
24// set when the first minidump request is made to avoid generating duplicate crash reports
25pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
26const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
27const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
28
29pub async fn init(crash_init: InitCrashHandler) {
30 if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
31 return;
32 }
33
34 let exe = env::current_exe().expect("unable to find ourselves");
35 let zed_pid = process::id();
36 // TODO: we should be able to get away with using 1 crash-handler process per machine,
37 // but for now we append the PID of the current process which makes it unique per remote
38 // server or interactive zed instance. This solves an issue where occasionally the socket
39 // used by the crash handler isn't destroyed correctly which causes it to stay on the file
40 // system and block further attempts to initialize crash handlers with that socket path.
41 let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
42 #[allow(unused)]
43 let server_pid = Command::new(exe)
44 .arg("--crash-handler")
45 .arg(&socket_name)
46 .spawn()
47 .expect("unable to spawn server process")
48 .id();
49 info!("spawning crash handler process");
50
51 let mut elapsed = Duration::ZERO;
52 let retry_frequency = Duration::from_millis(100);
53 let mut maybe_client = None;
54 while maybe_client.is_none() {
55 if let Ok(client) = Client::with_name(socket_name.as_path()) {
56 maybe_client = Some(client);
57 info!("connected to crash handler process after {elapsed:?}");
58 break;
59 }
60 elapsed += retry_frequency;
61 smol::Timer::after(retry_frequency).await;
62 }
63 let client = maybe_client.unwrap();
64 client
65 .send_message(1, serde_json::to_vec(&crash_init).unwrap())
66 .unwrap();
67
68 let client = Arc::new(client);
69 let handler = crash_handler::CrashHandler::attach(unsafe {
70 let client = client.clone();
71 crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
72 // only request a minidump once
73 let res = if REQUESTED_MINIDUMP
74 .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
75 .is_ok()
76 {
77 #[cfg(target_os = "macos")]
78 suspend_all_other_threads();
79
80 client.ping().unwrap();
81 client.request_dump(crash_context).is_ok()
82 } else {
83 true
84 };
85 crash_handler::CrashEventResult::Handled(res)
86 })
87 })
88 .expect("failed to attach signal handler");
89
90 #[cfg(target_os = "linux")]
91 {
92 handler.set_ptracer(Some(server_pid));
93 }
94 CRASH_HANDLER.set(client.clone()).ok();
95 std::mem::forget(handler);
96 info!("crash handler registered");
97
98 loop {
99 client.ping().ok();
100 smol::Timer::after(Duration::from_secs(10)).await;
101 }
102}
103
104#[cfg(target_os = "macos")]
105unsafe fn suspend_all_other_threads() {
106 let task = unsafe { mach2::traps::current_task() };
107 let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
108 let mut count = 0;
109 unsafe {
110 mach2::task::task_threads(task, &raw mut threads, &raw mut count);
111 }
112 let current = unsafe { mach2::mach_init::mach_thread_self() };
113 for i in 0..count {
114 let t = unsafe { *threads.add(i as usize) };
115 if t != current {
116 unsafe { mach2::thread_act::thread_suspend(t) };
117 }
118 }
119}
120
121pub struct CrashServer {
122 initialization_params: OnceLock<InitCrashHandler>,
123 panic_info: OnceLock<CrashPanic>,
124 has_connection: Arc<AtomicBool>,
125}
126
127#[derive(Debug, Deserialize, Serialize, Clone)]
128pub struct CrashInfo {
129 pub init: InitCrashHandler,
130 pub panic: Option<CrashPanic>,
131 pub minidump_error: Option<String>,
132}
133
134#[derive(Debug, Deserialize, Serialize, Clone)]
135pub struct InitCrashHandler {
136 pub session_id: String,
137 pub zed_version: String,
138 pub release_channel: String,
139 pub commit_sha: String,
140 // pub gpu: String,
141}
142
143#[derive(Deserialize, Serialize, Debug, Clone)]
144pub struct CrashPanic {
145 pub message: String,
146 pub span: String,
147}
148
149impl minidumper::ServerHandler for CrashServer {
150 fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
151 let err_message = "Missing initialization data";
152 let dump_path = paths::logs_dir()
153 .join(
154 &self
155 .initialization_params
156 .get()
157 .expect(err_message)
158 .session_id,
159 )
160 .with_extension("dmp");
161 let file = File::create(&dump_path)?;
162 Ok((file, dump_path))
163 }
164
165 fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
166 let minidump_error = match result {
167 Ok(mut md_bin) => {
168 use io::Write;
169 let _ = md_bin.file.flush();
170 None
171 }
172 Err(e) => Some(format!("{e:?}")),
173 };
174
175 let crash_info = CrashInfo {
176 init: self
177 .initialization_params
178 .get()
179 .expect("not initialized")
180 .clone(),
181 panic: self.panic_info.get().cloned(),
182 minidump_error,
183 };
184
185 let crash_data_path = paths::logs_dir()
186 .join(&crash_info.init.session_id)
187 .with_extension("json");
188
189 fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
190
191 LoopAction::Exit
192 }
193
194 fn on_message(&self, kind: u32, buffer: Vec<u8>) {
195 match kind {
196 1 => {
197 let init_data =
198 serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
199 self.initialization_params
200 .set(init_data)
201 .expect("already initialized");
202 }
203 2 => {
204 let panic_data =
205 serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
206 self.panic_info.set(panic_data).expect("already panicked");
207 }
208 _ => {
209 panic!("invalid message kind");
210 }
211 }
212 }
213
214 fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
215 LoopAction::Exit
216 }
217
218 fn on_client_connected(&self, _clients: usize) -> LoopAction {
219 self.has_connection.store(true, Ordering::SeqCst);
220 LoopAction::Continue
221 }
222}
223
224pub fn handle_panic(message: String, span: Option<&Location>) {
225 let span = span
226 .map(|loc| format!("{}:{}", loc.file(), loc.line()))
227 .unwrap_or_default();
228
229 // wait 500ms for the crash handler process to start up
230 // if it's still not there just write panic info and no minidump
231 let retry_frequency = Duration::from_millis(100);
232 for _ in 0..5 {
233 if let Some(client) = CRASH_HANDLER.get() {
234 client
235 .send_message(
236 2,
237 serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
238 )
239 .ok();
240 log::error!("triggering a crash to generate a minidump...");
241 #[cfg(target_os = "linux")]
242 CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
243 #[cfg(not(target_os = "linux"))]
244 CrashHandler.simulate_exception(None);
245 break;
246 }
247 thread::sleep(retry_frequency);
248 }
249}
250
251pub fn crash_server(socket: &Path) {
252 let Ok(mut server) = minidumper::Server::with_name(socket) else {
253 log::info!("Couldn't create socket, there may already be a running crash server");
254 return;
255 };
256
257 let shutdown = Arc::new(AtomicBool::new(false));
258 let has_connection = Arc::new(AtomicBool::new(false));
259
260 std::thread::spawn({
261 let shutdown = shutdown.clone();
262 let has_connection = has_connection.clone();
263 move || {
264 std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
265 if !has_connection.load(Ordering::SeqCst) {
266 shutdown.store(true, Ordering::SeqCst);
267 }
268 }
269 });
270
271 server
272 .run(
273 Box::new(CrashServer {
274 initialization_params: OnceLock::new(),
275 panic_info: OnceLock::new(),
276 has_connection,
277 }),
278 &shutdown,
279 Some(CRASH_HANDLER_PING_TIMEOUT),
280 )
281 .expect("failed to run server");
282}