crashes.rs

  1use crash_handler::CrashHandler;
  2use log::info;
  3use minidumper::{Client, LoopAction, MinidumpBinary};
  4use release_channel::{RELEASE_CHANNEL, ReleaseChannel};
  5use serde::{Deserialize, Serialize};
  6
  7use std::{
  8    env,
  9    fs::{self, File},
 10    io,
 11    panic::Location,
 12    path::{Path, PathBuf},
 13    process::{self, Command},
 14    sync::{
 15        Arc, OnceLock,
 16        atomic::{AtomicBool, Ordering},
 17    },
 18    thread,
 19    time::Duration,
 20};
 21
 22// set once the crash handler has initialized and the client has connected to it
 23pub static CRASH_HANDLER: OnceLock<Arc<Client>> = OnceLock::new();
 24// set when the first minidump request is made to avoid generating duplicate crash reports
 25pub static REQUESTED_MINIDUMP: AtomicBool = AtomicBool::new(false);
 26const CRASH_HANDLER_PING_TIMEOUT: Duration = Duration::from_secs(60);
 27const CRASH_HANDLER_CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
 28
 29pub async fn init(crash_init: InitCrashHandler) {
 30    if *RELEASE_CHANNEL == ReleaseChannel::Dev && env::var("ZED_GENERATE_MINIDUMPS").is_err() {
 31        return;
 32    }
 33
 34    let exe = env::current_exe().expect("unable to find ourselves");
 35    let zed_pid = process::id();
 36    // TODO: we should be able to get away with using 1 crash-handler process per machine,
 37    // but for now we append the PID of the current process which makes it unique per remote
 38    // server or interactive zed instance. This solves an issue where occasionally the socket
 39    // used by the crash handler isn't destroyed correctly which causes it to stay on the file
 40    // system and block further attempts to initialize crash handlers with that socket path.
 41    let socket_name = paths::temp_dir().join(format!("zed-crash-handler-{zed_pid}"));
 42    #[allow(unused)]
 43    let server_pid = Command::new(exe)
 44        .arg("--crash-handler")
 45        .arg(&socket_name)
 46        .spawn()
 47        .expect("unable to spawn server process")
 48        .id();
 49    info!("spawning crash handler process");
 50
 51    let mut elapsed = Duration::ZERO;
 52    let retry_frequency = Duration::from_millis(100);
 53    let mut maybe_client = None;
 54    while maybe_client.is_none() {
 55        if let Ok(client) = Client::with_name(socket_name.as_path()) {
 56            maybe_client = Some(client);
 57            info!("connected to crash handler process after {elapsed:?}");
 58            break;
 59        }
 60        elapsed += retry_frequency;
 61        smol::Timer::after(retry_frequency).await;
 62    }
 63    let client = maybe_client.unwrap();
 64    client
 65        .send_message(1, serde_json::to_vec(&crash_init).unwrap())
 66        .unwrap();
 67
 68    let client = Arc::new(client);
 69    let handler = crash_handler::CrashHandler::attach(unsafe {
 70        let client = client.clone();
 71        crash_handler::make_crash_event(move |crash_context: &crash_handler::CrashContext| {
 72            // only request a minidump once
 73            let res = if REQUESTED_MINIDUMP
 74                .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
 75                .is_ok()
 76            {
 77                #[cfg(target_os = "macos")]
 78                suspend_all_other_threads();
 79
 80                client.ping().unwrap();
 81                client.request_dump(crash_context).is_ok()
 82            } else {
 83                true
 84            };
 85            crash_handler::CrashEventResult::Handled(res)
 86        })
 87    })
 88    .expect("failed to attach signal handler");
 89
 90    #[cfg(target_os = "linux")]
 91    {
 92        handler.set_ptracer(Some(server_pid));
 93    }
 94    CRASH_HANDLER.set(client.clone()).ok();
 95    std::mem::forget(handler);
 96    info!("crash handler registered");
 97
 98    loop {
 99        client.ping().ok();
100        smol::Timer::after(Duration::from_secs(10)).await;
101    }
102}
103
104#[cfg(target_os = "macos")]
105unsafe fn suspend_all_other_threads() {
106    let task = unsafe { mach2::traps::current_task() };
107    let mut threads: mach2::mach_types::thread_act_array_t = std::ptr::null_mut();
108    let mut count = 0;
109    unsafe {
110        mach2::task::task_threads(task, &raw mut threads, &raw mut count);
111    }
112    let current = unsafe { mach2::mach_init::mach_thread_self() };
113    for i in 0..count {
114        let t = unsafe { *threads.add(i as usize) };
115        if t != current {
116            unsafe { mach2::thread_act::thread_suspend(t) };
117        }
118    }
119}
120
121pub struct CrashServer {
122    initialization_params: OnceLock<InitCrashHandler>,
123    panic_info: OnceLock<CrashPanic>,
124    has_connection: Arc<AtomicBool>,
125}
126
127#[derive(Debug, Deserialize, Serialize, Clone)]
128pub struct CrashInfo {
129    pub init: InitCrashHandler,
130    pub panic: Option<CrashPanic>,
131    pub minidump_error: Option<String>,
132}
133
134#[derive(Debug, Deserialize, Serialize, Clone)]
135pub struct InitCrashHandler {
136    pub session_id: String,
137    pub zed_version: String,
138    pub release_channel: String,
139    pub commit_sha: String,
140    // pub gpu: String,
141}
142
143#[derive(Deserialize, Serialize, Debug, Clone)]
144pub struct CrashPanic {
145    pub message: String,
146    pub span: String,
147}
148
149impl minidumper::ServerHandler for CrashServer {
150    fn create_minidump_file(&self) -> Result<(File, PathBuf), io::Error> {
151        let err_message = "Missing initialization data";
152        let dump_path = paths::logs_dir()
153            .join(
154                &self
155                    .initialization_params
156                    .get()
157                    .expect(err_message)
158                    .session_id,
159            )
160            .with_extension("dmp");
161        let file = File::create(&dump_path)?;
162        Ok((file, dump_path))
163    }
164
165    fn on_minidump_created(&self, result: Result<MinidumpBinary, minidumper::Error>) -> LoopAction {
166        let minidump_error = match result {
167            Ok(mut md_bin) => {
168                use io::Write;
169                let _ = md_bin.file.flush();
170                None
171            }
172            Err(e) => Some(format!("{e:?}")),
173        };
174
175        let crash_info = CrashInfo {
176            init: self
177                .initialization_params
178                .get()
179                .expect("not initialized")
180                .clone(),
181            panic: self.panic_info.get().cloned(),
182            minidump_error,
183        };
184
185        let crash_data_path = paths::logs_dir()
186            .join(&crash_info.init.session_id)
187            .with_extension("json");
188
189        fs::write(crash_data_path, serde_json::to_vec(&crash_info).unwrap()).ok();
190
191        LoopAction::Exit
192    }
193
194    fn on_message(&self, kind: u32, buffer: Vec<u8>) {
195        match kind {
196            1 => {
197                let init_data =
198                    serde_json::from_slice::<InitCrashHandler>(&buffer).expect("invalid init data");
199                self.initialization_params
200                    .set(init_data)
201                    .expect("already initialized");
202            }
203            2 => {
204                let panic_data =
205                    serde_json::from_slice::<CrashPanic>(&buffer).expect("invalid panic data");
206                self.panic_info.set(panic_data).expect("already panicked");
207            }
208            _ => {
209                panic!("invalid message kind");
210            }
211        }
212    }
213
214    fn on_client_disconnected(&self, _clients: usize) -> LoopAction {
215        LoopAction::Exit
216    }
217
218    fn on_client_connected(&self, _clients: usize) -> LoopAction {
219        self.has_connection.store(true, Ordering::SeqCst);
220        LoopAction::Continue
221    }
222}
223
224pub fn handle_panic(message: String, span: Option<&Location>) {
225    let span = span
226        .map(|loc| format!("{}:{}", loc.file(), loc.line()))
227        .unwrap_or_default();
228
229    // wait 500ms for the crash handler process to start up
230    // if it's still not there just write panic info and no minidump
231    let retry_frequency = Duration::from_millis(100);
232    for _ in 0..5 {
233        if let Some(client) = CRASH_HANDLER.get() {
234            client
235                .send_message(
236                    2,
237                    serde_json::to_vec(&CrashPanic { message, span }).unwrap(),
238                )
239                .ok();
240            log::error!("triggering a crash to generate a minidump...");
241            #[cfg(target_os = "linux")]
242            CrashHandler.simulate_signal(crash_handler::Signal::Trap as u32);
243            #[cfg(not(target_os = "linux"))]
244            CrashHandler.simulate_exception(None);
245            break;
246        }
247        thread::sleep(retry_frequency);
248    }
249}
250
251pub fn crash_server(socket: &Path) {
252    let Ok(mut server) = minidumper::Server::with_name(socket) else {
253        log::info!("Couldn't create socket, there may already be a running crash server");
254        return;
255    };
256
257    let shutdown = Arc::new(AtomicBool::new(false));
258    let has_connection = Arc::new(AtomicBool::new(false));
259
260    std::thread::spawn({
261        let shutdown = shutdown.clone();
262        let has_connection = has_connection.clone();
263        move || {
264            std::thread::sleep(CRASH_HANDLER_CONNECT_TIMEOUT);
265            if !has_connection.load(Ordering::SeqCst) {
266                shutdown.store(true, Ordering::SeqCst);
267            }
268        }
269    });
270
271    server
272        .run(
273            Box::new(CrashServer {
274                initialization_params: OnceLock::new(),
275                panic_info: OnceLock::new(),
276                has_connection,
277            }),
278            &shutdown,
279            Some(CRASH_HANDLER_PING_TIMEOUT),
280        )
281        .expect("failed to run server");
282}