telemetry.rs

  1mod event_coalescer;
  2
  3use crate::{ChannelId, TelemetrySettings};
  4use anyhow::Result;
  5use clock::SystemClock;
  6use collections::{HashMap, HashSet};
  7use futures::channel::mpsc;
  8use futures::{Future, StreamExt};
  9use gpui::{AppContext, BackgroundExecutor, Task};
 10use http_client::{self, AsyncBody, HttpClient, HttpClientWithUrl, Method, Request};
 11use parking_lot::Mutex;
 12use release_channel::ReleaseChannel;
 13use settings::{Settings, SettingsStore};
 14use sha2::{Digest, Sha256};
 15use std::fs::File;
 16use std::io::Write;
 17use std::time::Instant;
 18use std::{
 19    env, mem,
 20    path::PathBuf,
 21    sync::{Arc, LazyLock},
 22    time::Duration,
 23};
 24use telemetry_events::{
 25    AppEvent, AssistantEvent, CallEvent, EditEvent, Event, EventRequestBody, EventWrapper,
 26    InlineCompletionEvent,
 27};
 28use util::{ResultExt, TryFutureExt};
 29use worktree::{UpdatedEntriesSet, WorktreeId};
 30
 31use self::event_coalescer::EventCoalescer;
 32
 33pub struct Telemetry {
 34    clock: Arc<dyn SystemClock>,
 35    http_client: Arc<HttpClientWithUrl>,
 36    executor: BackgroundExecutor,
 37    state: Arc<Mutex<TelemetryState>>,
 38}
 39
 40struct TelemetryState {
 41    settings: TelemetrySettings,
 42    system_id: Option<Arc<str>>,       // Per system
 43    installation_id: Option<Arc<str>>, // Per app installation (different for dev, nightly, preview, and stable)
 44    session_id: Option<String>,        // Per app launch
 45    metrics_id: Option<Arc<str>>,      // Per logged-in user
 46    release_channel: Option<&'static str>,
 47    architecture: &'static str,
 48    events_queue: Vec<EventWrapper>,
 49    flush_events_task: Option<Task<()>>,
 50    log_file: Option<File>,
 51    is_staff: Option<bool>,
 52    first_event_date_time: Option<Instant>,
 53    event_coalescer: EventCoalescer,
 54    max_queue_size: usize,
 55    worktree_id_map: WorktreeIdMap,
 56
 57    os_name: String,
 58    app_version: String,
 59    os_version: Option<String>,
 60}
 61
 62#[derive(Debug)]
 63struct WorktreeIdMap(HashMap<String, ProjectCache>);
 64
 65#[derive(Debug)]
 66struct ProjectCache {
 67    name: String,
 68    worktree_ids_reported: HashSet<WorktreeId>,
 69}
 70
 71impl ProjectCache {
 72    fn new(name: String) -> Self {
 73        Self {
 74            name,
 75            worktree_ids_reported: HashSet::default(),
 76        }
 77    }
 78}
 79
 80#[cfg(debug_assertions)]
 81const MAX_QUEUE_LEN: usize = 5;
 82
 83#[cfg(not(debug_assertions))]
 84const MAX_QUEUE_LEN: usize = 50;
 85
 86#[cfg(debug_assertions)]
 87const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 88
 89#[cfg(not(debug_assertions))]
 90const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5);
 91static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
 92    option_env!("ZED_CLIENT_CHECKSUM_SEED")
 93        .map(|s| s.as_bytes().into())
 94        .or_else(|| {
 95            env::var("ZED_CLIENT_CHECKSUM_SEED")
 96                .ok()
 97                .map(|s| s.as_bytes().into())
 98        })
 99});
100
101pub fn os_name() -> String {
102    #[cfg(target_os = "macos")]
103    {
104        "macOS".to_string()
105    }
106    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
107    {
108        format!("Linux {}", gpui::guess_compositor())
109    }
110
111    #[cfg(target_os = "windows")]
112    {
113        "Windows".to_string()
114    }
115}
116
117/// Note: This might do blocking IO! Only call from background threads
118pub fn os_version() -> String {
119    #[cfg(target_os = "macos")]
120    {
121        use cocoa::base::nil;
122        use cocoa::foundation::NSProcessInfo;
123
124        unsafe {
125            let process_info = cocoa::foundation::NSProcessInfo::processInfo(nil);
126            let version = process_info.operatingSystemVersion();
127            gpui::SemanticVersion::new(
128                version.majorVersion as usize,
129                version.minorVersion as usize,
130                version.patchVersion as usize,
131            )
132            .to_string()
133        }
134    }
135    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
136    {
137        use std::path::Path;
138
139        let content = if let Ok(file) = std::fs::read_to_string(&Path::new("/etc/os-release")) {
140            file
141        } else if let Ok(file) = std::fs::read_to_string(&Path::new("/usr/lib/os-release")) {
142            file
143        } else {
144            log::error!("Failed to load /etc/os-release, /usr/lib/os-release");
145            "".to_string()
146        };
147        let mut name = "unknown".to_string();
148        let mut version = "unknown".to_string();
149
150        for line in content.lines() {
151            if line.starts_with("ID=") {
152                name = line.trim_start_matches("ID=").trim_matches('"').to_string();
153            }
154            if line.starts_with("VERSION_ID=") {
155                version = line
156                    .trim_start_matches("VERSION_ID=")
157                    .trim_matches('"')
158                    .to_string();
159            }
160        }
161
162        format!("{} {}", name, version)
163    }
164
165    #[cfg(target_os = "windows")]
166    {
167        let mut info = unsafe { std::mem::zeroed() };
168        let status = unsafe { windows::Wdk::System::SystemServices::RtlGetVersion(&mut info) };
169        if status.is_ok() {
170            gpui::SemanticVersion::new(
171                info.dwMajorVersion as _,
172                info.dwMinorVersion as _,
173                info.dwBuildNumber as _,
174            )
175            .to_string()
176        } else {
177            "unknown".to_string()
178        }
179    }
180}
181
182impl Telemetry {
183    pub fn new(
184        clock: Arc<dyn SystemClock>,
185        client: Arc<HttpClientWithUrl>,
186        cx: &mut AppContext,
187    ) -> Arc<Self> {
188        let release_channel =
189            ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name());
190
191        TelemetrySettings::register(cx);
192
193        let state = Arc::new(Mutex::new(TelemetryState {
194            settings: *TelemetrySettings::get_global(cx),
195            architecture: env::consts::ARCH,
196            release_channel,
197            system_id: None,
198            installation_id: None,
199            session_id: None,
200            metrics_id: None,
201            events_queue: Vec::new(),
202            flush_events_task: None,
203            log_file: None,
204            is_staff: None,
205            first_event_date_time: None,
206            event_coalescer: EventCoalescer::new(clock.clone()),
207            max_queue_size: MAX_QUEUE_LEN,
208            worktree_id_map: WorktreeIdMap(HashMap::from_iter([
209                (
210                    "pnpm-lock.yaml".to_string(),
211                    ProjectCache::new("pnpm".to_string()),
212                ),
213                (
214                    "yarn.lock".to_string(),
215                    ProjectCache::new("yarn".to_string()),
216                ),
217                (
218                    "package.json".to_string(),
219                    ProjectCache::new("node".to_string()),
220                ),
221            ])),
222
223            os_version: None,
224            os_name: os_name(),
225            app_version: release_channel::AppVersion::global(cx).to_string(),
226        }));
227        Self::log_file_path();
228
229        cx.background_executor()
230            .spawn({
231                let state = state.clone();
232                let os_version = os_version();
233                state.lock().os_version = Some(os_version.clone());
234                async move {
235                    if let Some(tempfile) = File::create(Self::log_file_path()).log_err() {
236                        state.lock().log_file = Some(tempfile);
237                    }
238                }
239            })
240            .detach();
241
242        cx.observe_global::<SettingsStore>({
243            let state = state.clone();
244
245            move |cx| {
246                let mut state = state.lock();
247                state.settings = *TelemetrySettings::get_global(cx);
248            }
249        })
250        .detach();
251
252        let this = Arc::new(Self {
253            clock,
254            http_client: client,
255            executor: cx.background_executor().clone(),
256            state,
257        });
258
259        let (tx, mut rx) = mpsc::unbounded();
260        ::telemetry::init(tx);
261
262        cx.background_executor()
263            .spawn({
264                let this = Arc::downgrade(&this);
265                async move {
266                    while let Some(event) = rx.next().await {
267                        let Some(state) = this.upgrade() else { break };
268                        state.report_event(Event::Flexible(event))
269                    }
270                }
271            })
272            .detach();
273
274        // We should only ever have one instance of Telemetry, leak the subscription to keep it alive
275        // rather than store in TelemetryState, complicating spawn as subscriptions are not Send
276        std::mem::forget(cx.on_app_quit({
277            let this = this.clone();
278            move |_| this.shutdown_telemetry()
279        }));
280
281        this
282    }
283
284    #[cfg(any(test, feature = "test-support"))]
285    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> {
286        Task::ready(())
287    }
288
289    // Skip calling this function in tests.
290    // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings
291    #[cfg(not(any(test, feature = "test-support")))]
292    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> {
293        self.report_app_event("close".to_string());
294        // TODO: close final edit period and make sure it's sent
295        Task::ready(())
296    }
297
298    pub fn log_file_path() -> PathBuf {
299        paths::logs_dir().join("telemetry.log")
300    }
301
302    pub fn start(
303        self: &Arc<Self>,
304        system_id: Option<String>,
305        installation_id: Option<String>,
306        session_id: String,
307        cx: &AppContext,
308    ) {
309        let mut state = self.state.lock();
310        state.system_id = system_id.map(|id| id.into());
311        state.installation_id = installation_id.map(|id| id.into());
312        state.session_id = Some(session_id);
313        state.app_version = release_channel::AppVersion::global(cx).to_string();
314        state.os_name = os_name();
315    }
316
317    pub fn metrics_enabled(self: &Arc<Self>) -> bool {
318        let state = self.state.lock();
319        let enabled = state.settings.metrics;
320        drop(state);
321        enabled
322    }
323
324    pub fn set_authenticated_user_info(
325        self: &Arc<Self>,
326        metrics_id: Option<String>,
327        is_staff: bool,
328    ) {
329        let mut state = self.state.lock();
330
331        if !state.settings.metrics {
332            return;
333        }
334
335        let metrics_id: Option<Arc<str>> = metrics_id.map(|id| id.into());
336        state.metrics_id.clone_from(&metrics_id);
337        state.is_staff = Some(is_staff);
338        drop(state);
339    }
340
341    pub fn report_inline_completion_event(
342        self: &Arc<Self>,
343        provider: String,
344        suggestion_accepted: bool,
345        file_extension: Option<String>,
346    ) {
347        let event = Event::InlineCompletion(InlineCompletionEvent {
348            provider,
349            suggestion_accepted,
350            file_extension,
351        });
352
353        self.report_event(event)
354    }
355
356    pub fn report_assistant_event(self: &Arc<Self>, event: AssistantEvent) {
357        self.report_event(Event::Assistant(event));
358    }
359
360    pub fn report_call_event(
361        self: &Arc<Self>,
362        operation: &'static str,
363        room_id: Option<u64>,
364        channel_id: Option<ChannelId>,
365    ) {
366        let event = Event::Call(CallEvent {
367            operation: operation.to_string(),
368            room_id,
369            channel_id: channel_id.map(|cid| cid.0),
370        });
371
372        self.report_event(event)
373    }
374
375    pub fn report_app_event(self: &Arc<Self>, operation: String) -> Event {
376        let event = Event::App(AppEvent { operation });
377
378        self.report_event(event.clone());
379
380        event
381    }
382
383    pub fn log_edit_event(self: &Arc<Self>, environment: &'static str, is_via_ssh: bool) {
384        let mut state = self.state.lock();
385        let period_data = state.event_coalescer.log_event(environment);
386        drop(state);
387
388        if let Some((start, end, environment)) = period_data {
389            let event = Event::Edit(EditEvent {
390                duration: end
391                    .saturating_duration_since(start)
392                    .min(Duration::from_secs(60 * 60 * 24))
393                    .as_millis() as i64,
394                environment: environment.to_string(),
395                is_via_ssh,
396            });
397
398            self.report_event(event);
399        }
400    }
401
402    pub fn report_discovered_project_events(
403        self: &Arc<Self>,
404        worktree_id: WorktreeId,
405        updated_entries_set: &UpdatedEntriesSet,
406    ) {
407        let project_type_names: Vec<String> = {
408            let mut state = self.state.lock();
409            state
410                .worktree_id_map
411                .0
412                .iter_mut()
413                .filter_map(|(project_file_name, project_type_telemetry)| {
414                    if project_type_telemetry
415                        .worktree_ids_reported
416                        .contains(&worktree_id)
417                    {
418                        return None;
419                    }
420
421                    let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
422                        path.as_ref()
423                            .file_name()
424                            .and_then(|name| name.to_str())
425                            .map(|name_str| name_str == project_file_name)
426                            .unwrap_or(false)
427                    });
428
429                    if !project_file_found {
430                        return None;
431                    }
432
433                    project_type_telemetry
434                        .worktree_ids_reported
435                        .insert(worktree_id);
436
437                    Some(project_type_telemetry.name.clone())
438                })
439                .collect()
440        };
441
442        // Done on purpose to avoid calling `self.state.lock()` multiple times
443        for project_type_name in project_type_names {
444            self.report_app_event(format!("open {} project", project_type_name));
445        }
446    }
447
448    fn report_event(self: &Arc<Self>, event: Event) {
449        let mut state = self.state.lock();
450
451        if !state.settings.metrics {
452            return;
453        }
454
455        if state.flush_events_task.is_none() {
456            let this = self.clone();
457            let executor = self.executor.clone();
458            state.flush_events_task = Some(self.executor.spawn(async move {
459                executor.timer(FLUSH_INTERVAL).await;
460                this.flush_events();
461            }));
462        }
463
464        let date_time = self.clock.utc_now();
465
466        let milliseconds_since_first_event = match state.first_event_date_time {
467            Some(first_event_date_time) => date_time
468                .saturating_duration_since(first_event_date_time)
469                .min(Duration::from_secs(60 * 60 * 24))
470                .as_millis() as i64,
471            None => {
472                state.first_event_date_time = Some(date_time);
473                0
474            }
475        };
476
477        let signed_in = state.metrics_id.is_some();
478        state.events_queue.push(EventWrapper {
479            signed_in,
480            milliseconds_since_first_event,
481            event,
482        });
483
484        if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size {
485            drop(state);
486            self.flush_events();
487        }
488    }
489
490    pub fn metrics_id(self: &Arc<Self>) -> Option<Arc<str>> {
491        self.state.lock().metrics_id.clone()
492    }
493
494    pub fn system_id(self: &Arc<Self>) -> Option<Arc<str>> {
495        self.state.lock().system_id.clone()
496    }
497
498    pub fn installation_id(self: &Arc<Self>) -> Option<Arc<str>> {
499        self.state.lock().installation_id.clone()
500    }
501
502    pub fn is_staff(self: &Arc<Self>) -> Option<bool> {
503        self.state.lock().is_staff
504    }
505
506    fn build_request(
507        self: &Arc<Self>,
508        // We take in the JSON bytes buffer so we can reuse the existing allocation.
509        mut json_bytes: Vec<u8>,
510        event_request: EventRequestBody,
511    ) -> Result<Request<AsyncBody>> {
512        json_bytes.clear();
513        serde_json::to_writer(&mut json_bytes, &event_request)?;
514
515        let checksum = calculate_json_checksum(&json_bytes).unwrap_or("".to_string());
516
517        Ok(Request::builder()
518            .method(Method::POST)
519            .uri(
520                self.http_client
521                    .build_zed_api_url("/telemetry/events", &[])?
522                    .as_ref(),
523            )
524            .header("Content-Type", "application/json")
525            .header("x-zed-checksum", checksum)
526            .body(json_bytes.into())?)
527    }
528
529    pub fn flush_events(self: &Arc<Self>) {
530        let mut state = self.state.lock();
531        state.first_event_date_time = None;
532        let mut events = mem::take(&mut state.events_queue);
533        state.flush_events_task.take();
534        drop(state);
535        if events.is_empty() {
536            return;
537        }
538
539        let this = self.clone();
540        self.executor
541            .spawn(
542                async move {
543                    let mut json_bytes = Vec::new();
544
545                    if let Some(file) = &mut this.state.lock().log_file {
546                        for event in &mut events {
547                            json_bytes.clear();
548                            serde_json::to_writer(&mut json_bytes, event)?;
549                            file.write_all(&json_bytes)?;
550                            file.write_all(b"\n")?;
551                        }
552                    }
553
554                    let request_body = {
555                        let state = this.state.lock();
556
557                        EventRequestBody {
558                            system_id: state.system_id.as_deref().map(Into::into),
559                            installation_id: state.installation_id.as_deref().map(Into::into),
560                            session_id: state.session_id.clone(),
561                            metrics_id: state.metrics_id.as_deref().map(Into::into),
562                            is_staff: state.is_staff,
563                            app_version: state.app_version.clone(),
564                            os_name: state.os_name.clone(),
565                            os_version: state.os_version.clone(),
566                            architecture: state.architecture.to_string(),
567
568                            release_channel: state.release_channel.map(Into::into),
569                            events,
570                        }
571                    };
572
573                    let request = this.build_request(json_bytes, request_body)?;
574                    let response = this.http_client.send(request).await?;
575                    if response.status() != 200 {
576                        log::error!("Failed to send events: HTTP {:?}", response.status());
577                    }
578                    anyhow::Ok(())
579                }
580                .log_err(),
581            )
582            .detach();
583    }
584}
585
586pub fn calculate_json_checksum(json: &impl AsRef<[u8]>) -> Option<String> {
587    let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else {
588        return None;
589    };
590
591    let mut summer = Sha256::new();
592    summer.update(checksum_seed);
593    summer.update(json);
594    summer.update(checksum_seed);
595    let mut checksum = String::new();
596    for byte in summer.finalize().as_slice() {
597        use std::fmt::Write;
598        write!(&mut checksum, "{:02x}", byte).unwrap();
599    }
600
601    Some(checksum)
602}
603
604#[cfg(test)]
605mod tests {
606    use super::*;
607    use clock::FakeSystemClock;
608    use gpui::TestAppContext;
609    use http_client::FakeHttpClient;
610
611    #[gpui::test]
612    fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) {
613        init_test(cx);
614        let clock = Arc::new(FakeSystemClock::new());
615        let http = FakeHttpClient::with_200_response();
616        let system_id = Some("system_id".to_string());
617        let installation_id = Some("installation_id".to_string());
618        let session_id = "session_id".to_string();
619
620        cx.update(|cx| {
621            let telemetry = Telemetry::new(clock.clone(), http, cx);
622
623            telemetry.state.lock().max_queue_size = 4;
624            telemetry.start(system_id, installation_id, session_id, cx);
625
626            assert!(is_empty_state(&telemetry));
627
628            let first_date_time = clock.utc_now();
629            let operation = "test".to_string();
630
631            let event = telemetry.report_app_event(operation.clone());
632            assert_eq!(
633                event,
634                Event::App(AppEvent {
635                    operation: operation.clone(),
636                })
637            );
638            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
639            assert!(telemetry.state.lock().flush_events_task.is_some());
640            assert_eq!(
641                telemetry.state.lock().first_event_date_time,
642                Some(first_date_time)
643            );
644
645            clock.advance(Duration::from_millis(100));
646
647            let event = telemetry.report_app_event(operation.clone());
648            assert_eq!(
649                event,
650                Event::App(AppEvent {
651                    operation: operation.clone(),
652                })
653            );
654            assert_eq!(telemetry.state.lock().events_queue.len(), 2);
655            assert!(telemetry.state.lock().flush_events_task.is_some());
656            assert_eq!(
657                telemetry.state.lock().first_event_date_time,
658                Some(first_date_time)
659            );
660
661            clock.advance(Duration::from_millis(100));
662
663            let event = telemetry.report_app_event(operation.clone());
664            assert_eq!(
665                event,
666                Event::App(AppEvent {
667                    operation: operation.clone(),
668                })
669            );
670            assert_eq!(telemetry.state.lock().events_queue.len(), 3);
671            assert!(telemetry.state.lock().flush_events_task.is_some());
672            assert_eq!(
673                telemetry.state.lock().first_event_date_time,
674                Some(first_date_time)
675            );
676
677            clock.advance(Duration::from_millis(100));
678
679            // Adding a 4th event should cause a flush
680            let event = telemetry.report_app_event(operation.clone());
681            assert_eq!(
682                event,
683                Event::App(AppEvent {
684                    operation: operation.clone(),
685                })
686            );
687
688            assert!(is_empty_state(&telemetry));
689        });
690    }
691
692    #[gpui::test]
693    async fn test_telemetry_flush_on_flush_interval(
694        executor: BackgroundExecutor,
695        cx: &mut TestAppContext,
696    ) {
697        init_test(cx);
698        let clock = Arc::new(FakeSystemClock::new());
699        let http = FakeHttpClient::with_200_response();
700        let system_id = Some("system_id".to_string());
701        let installation_id = Some("installation_id".to_string());
702        let session_id = "session_id".to_string();
703
704        cx.update(|cx| {
705            let telemetry = Telemetry::new(clock.clone(), http, cx);
706            telemetry.state.lock().max_queue_size = 4;
707            telemetry.start(system_id, installation_id, session_id, cx);
708
709            assert!(is_empty_state(&telemetry));
710
711            let first_date_time = clock.utc_now();
712            let operation = "test".to_string();
713
714            let event = telemetry.report_app_event(operation.clone());
715            assert_eq!(
716                event,
717                Event::App(AppEvent {
718                    operation: operation.clone(),
719                })
720            );
721            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
722            assert!(telemetry.state.lock().flush_events_task.is_some());
723            assert_eq!(
724                telemetry.state.lock().first_event_date_time,
725                Some(first_date_time)
726            );
727
728            let duration = Duration::from_millis(1);
729
730            // Test 1 millisecond before the flush interval limit is met
731            executor.advance_clock(FLUSH_INTERVAL - duration);
732
733            assert!(!is_empty_state(&telemetry));
734
735            // Test the exact moment the flush interval limit is met
736            executor.advance_clock(duration);
737
738            assert!(is_empty_state(&telemetry));
739        });
740    }
741
742    // TODO:
743    // Test settings
744    // Update FakeHTTPClient to keep track of the number of requests and assert on it
745
746    fn init_test(cx: &mut TestAppContext) {
747        cx.update(|cx| {
748            let settings_store = SettingsStore::test(cx);
749            cx.set_global(settings_store);
750        });
751    }
752
753    fn is_empty_state(telemetry: &Telemetry) -> bool {
754        telemetry.state.lock().events_queue.is_empty()
755            && telemetry.state.lock().flush_events_task.is_none()
756            && telemetry.state.lock().first_event_date_time.is_none()
757    }
758}