telemetry.rs

  1mod event_coalescer;
  2
  3use crate::TelemetrySettings;
  4use anyhow::Result;
  5use clock::SystemClock;
  6use futures::channel::mpsc;
  7use futures::{Future, FutureExt, StreamExt};
  8use gpui::{App, AppContext as _, BackgroundExecutor, Task};
  9use http_client::{self, AsyncBody, HttpClient, HttpClientWithUrl, Method, Request};
 10use parking_lot::Mutex;
 11use regex::Regex;
 12use release_channel::ReleaseChannel;
 13use settings::{Settings, SettingsStore};
 14use sha2::{Digest, Sha256};
 15use std::collections::HashSet;
 16use std::fs::File;
 17use std::io::Write;
 18use std::sync::LazyLock;
 19use std::time::Instant;
 20use std::{env, mem, path::PathBuf, sync::Arc, time::Duration};
 21use telemetry_events::{AssistantEventData, AssistantPhase, Event, EventRequestBody, EventWrapper};
 22use util::{ResultExt, TryFutureExt};
 23use worktree::{UpdatedEntriesSet, WorktreeId};
 24
 25use self::event_coalescer::EventCoalescer;
 26
 27pub struct Telemetry {
 28    clock: Arc<dyn SystemClock>,
 29    http_client: Arc<HttpClientWithUrl>,
 30    executor: BackgroundExecutor,
 31    state: Arc<Mutex<TelemetryState>>,
 32}
 33
 34struct TelemetryState {
 35    settings: TelemetrySettings,
 36    system_id: Option<Arc<str>>,       // Per system
 37    installation_id: Option<Arc<str>>, // Per app installation (different for dev, nightly, preview, and stable)
 38    session_id: Option<String>,        // Per app launch
 39    metrics_id: Option<Arc<str>>,      // Per logged-in user
 40    release_channel: Option<&'static str>,
 41    architecture: &'static str,
 42    events_queue: Vec<EventWrapper>,
 43    flush_events_task: Option<Task<()>>,
 44    log_file: Option<File>,
 45    is_staff: Option<bool>,
 46    first_event_date_time: Option<Instant>,
 47    event_coalescer: EventCoalescer,
 48    max_queue_size: usize,
 49    project_marker_patterns: ProjectMarkerPatterns,
 50
 51    os_name: String,
 52    app_version: String,
 53    os_version: Option<String>,
 54}
 55
 56#[derive(Debug)]
 57struct ProjectMarkerPatterns(Vec<(Regex, ProjectCache)>);
 58
 59#[derive(Debug)]
 60struct ProjectCache {
 61    name: String,
 62    worktree_ids_reported: HashSet<WorktreeId>,
 63}
 64
 65impl ProjectCache {
 66    fn new(name: String) -> Self {
 67        Self {
 68            name,
 69            worktree_ids_reported: HashSet::default(),
 70        }
 71    }
 72}
 73
 74#[cfg(debug_assertions)]
 75const MAX_QUEUE_LEN: usize = 5;
 76
 77#[cfg(not(debug_assertions))]
 78const MAX_QUEUE_LEN: usize = 50;
 79
 80#[cfg(debug_assertions)]
 81const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 82
 83#[cfg(not(debug_assertions))]
 84const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5);
 85static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
 86    option_env!("ZED_CLIENT_CHECKSUM_SEED")
 87        .map(|s| s.as_bytes().into())
 88        .or_else(|| {
 89            env::var("ZED_CLIENT_CHECKSUM_SEED")
 90                .ok()
 91                .map(|s| s.as_bytes().into())
 92        })
 93});
 94
 95pub fn os_name() -> String {
 96    #[cfg(target_os = "macos")]
 97    {
 98        "macOS".to_string()
 99    }
100    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
101    {
102        format!("Linux {}", gpui::guess_compositor())
103    }
104
105    #[cfg(target_os = "windows")]
106    {
107        "Windows".to_string()
108    }
109}
110
111/// Note: This might do blocking IO! Only call from background threads
112pub fn os_version() -> String {
113    #[cfg(target_os = "macos")]
114    {
115        use cocoa::base::nil;
116        use cocoa::foundation::NSProcessInfo;
117
118        unsafe {
119            let process_info = cocoa::foundation::NSProcessInfo::processInfo(nil);
120            let version = process_info.operatingSystemVersion();
121            gpui::SemanticVersion::new(
122                version.majorVersion as usize,
123                version.minorVersion as usize,
124                version.patchVersion as usize,
125            )
126            .to_string()
127        }
128    }
129    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
130    {
131        use std::path::Path;
132
133        let content = if let Ok(file) = std::fs::read_to_string(&Path::new("/etc/os-release")) {
134            file
135        } else if let Ok(file) = std::fs::read_to_string(&Path::new("/usr/lib/os-release")) {
136            file
137        } else {
138            log::error!("Failed to load /etc/os-release, /usr/lib/os-release");
139            "".to_string()
140        };
141        let mut name = "unknown";
142        let mut version = "unknown";
143
144        for line in content.lines() {
145            match line.split_once('=') {
146                Some(("ID", val)) => name = val.trim_matches('"'),
147                Some(("VERSION_ID", val)) => version = val.trim_matches('"'),
148                _ => {}
149            }
150        }
151
152        format!("{} {}", name, version)
153    }
154
155    #[cfg(target_os = "windows")]
156    {
157        let mut info = unsafe { std::mem::zeroed() };
158        let status = unsafe { windows::Wdk::System::SystemServices::RtlGetVersion(&mut info) };
159        if status.is_ok() {
160            gpui::SemanticVersion::new(
161                info.dwMajorVersion as _,
162                info.dwMinorVersion as _,
163                info.dwBuildNumber as _,
164            )
165            .to_string()
166        } else {
167            "unknown".to_string()
168        }
169    }
170}
171
172impl Telemetry {
173    pub fn new(
174        clock: Arc<dyn SystemClock>,
175        client: Arc<HttpClientWithUrl>,
176        cx: &mut App,
177    ) -> Arc<Self> {
178        let release_channel =
179            ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name());
180
181        TelemetrySettings::register(cx);
182
183        let state = Arc::new(Mutex::new(TelemetryState {
184            settings: *TelemetrySettings::get_global(cx),
185            architecture: env::consts::ARCH,
186            release_channel,
187            system_id: None,
188            installation_id: None,
189            session_id: None,
190            metrics_id: None,
191            events_queue: Vec::new(),
192            flush_events_task: None,
193            log_file: None,
194            is_staff: None,
195            first_event_date_time: None,
196            event_coalescer: EventCoalescer::new(clock.clone()),
197            max_queue_size: MAX_QUEUE_LEN,
198            project_marker_patterns: ProjectMarkerPatterns(vec![
199                (
200                    Regex::new(r"^pnpm-lock\.yaml$").unwrap(),
201                    ProjectCache::new("pnpm".to_string()),
202                ),
203                (
204                    Regex::new(r"^yarn\.lock$").unwrap(),
205                    ProjectCache::new("yarn".to_string()),
206                ),
207                (
208                    Regex::new(r"^package\.json$").unwrap(),
209                    ProjectCache::new("node".to_string()),
210                ),
211                (
212                    Regex::new(
213                        r"^(global\.json|Directory\.Build\.props|.*\.(csproj|fsproj|vbproj|sln))$",
214                    )
215                    .unwrap(),
216                    ProjectCache::new("dotnet".to_string()),
217                ),
218            ]),
219
220            os_version: None,
221            os_name: os_name(),
222            app_version: release_channel::AppVersion::global(cx).to_string(),
223        }));
224        Self::log_file_path();
225
226        cx.background_spawn({
227            let state = state.clone();
228            let os_version = os_version();
229            state.lock().os_version = Some(os_version);
230            async move {
231                if let Some(tempfile) = File::create(Self::log_file_path()).log_err() {
232                    state.lock().log_file = Some(tempfile);
233                }
234            }
235        })
236        .detach();
237
238        cx.observe_global::<SettingsStore>({
239            let state = state.clone();
240
241            move |cx| {
242                let mut state = state.lock();
243                state.settings = *TelemetrySettings::get_global(cx);
244            }
245        })
246        .detach();
247
248        let this = Arc::new(Self {
249            clock,
250            http_client: client,
251            executor: cx.background_executor().clone(),
252            state,
253        });
254
255        let (tx, mut rx) = mpsc::unbounded();
256        ::telemetry::init(tx);
257
258        cx.background_spawn({
259            let this = Arc::downgrade(&this);
260            async move {
261                while let Some(event) = rx.next().await {
262                    let Some(state) = this.upgrade() else { break };
263                    state.report_event(Event::Flexible(event))
264                }
265            }
266        })
267        .detach();
268
269        // We should only ever have one instance of Telemetry, leak the subscription to keep it alive
270        // rather than store in TelemetryState, complicating spawn as subscriptions are not Send
271        std::mem::forget(cx.on_app_quit({
272            let this = this.clone();
273            move |_| this.shutdown_telemetry()
274        }));
275
276        this
277    }
278
279    #[cfg(any(test, feature = "test-support"))]
280    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
281        Task::ready(())
282    }
283
284    // Skip calling this function in tests.
285    // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings
286    #[cfg(not(any(test, feature = "test-support")))]
287    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
288        telemetry::event!("App Closed");
289        // TODO: close final edit period and make sure it's sent
290        Task::ready(())
291    }
292
293    pub fn log_file_path() -> PathBuf {
294        paths::logs_dir().join("telemetry.log")
295    }
296
297    pub fn has_checksum_seed(&self) -> bool {
298        ZED_CLIENT_CHECKSUM_SEED.is_some()
299    }
300
301    pub fn start(
302        self: &Arc<Self>,
303        system_id: Option<String>,
304        installation_id: Option<String>,
305        session_id: String,
306        cx: &App,
307    ) {
308        let mut state = self.state.lock();
309        state.system_id = system_id.map(|id| id.into());
310        state.installation_id = installation_id.map(|id| id.into());
311        state.session_id = Some(session_id);
312        state.app_version = release_channel::AppVersion::global(cx).to_string();
313        state.os_name = os_name();
314    }
315
316    pub fn metrics_enabled(self: &Arc<Self>) -> bool {
317        let state = self.state.lock();
318        let enabled = state.settings.metrics;
319        drop(state);
320        enabled
321    }
322
323    pub fn set_authenticated_user_info(
324        self: &Arc<Self>,
325        metrics_id: Option<String>,
326        is_staff: bool,
327    ) {
328        let mut state = self.state.lock();
329
330        if !state.settings.metrics {
331            return;
332        }
333
334        let metrics_id: Option<Arc<str>> = metrics_id.map(|id| id.into());
335        state.metrics_id.clone_from(&metrics_id);
336        state.is_staff = Some(is_staff);
337        drop(state);
338    }
339
340    pub fn report_assistant_event(self: &Arc<Self>, event: AssistantEventData) {
341        let event_type = match event.phase {
342            AssistantPhase::Response => "Assistant Responded",
343            AssistantPhase::Invoked => "Assistant Invoked",
344            AssistantPhase::Accepted => "Assistant Response Accepted",
345            AssistantPhase::Rejected => "Assistant Response Rejected",
346        };
347
348        telemetry::event!(
349            event_type,
350            conversation_id = event.conversation_id,
351            kind = event.kind,
352            phase = event.phase,
353            message_id = event.message_id,
354            model = event.model,
355            model_provider = event.model_provider,
356            response_latency = event.response_latency,
357            error_message = event.error_message,
358            language_name = event.language_name,
359        );
360    }
361
362    pub fn log_edit_event(self: &Arc<Self>, environment: &'static str, is_via_ssh: bool) {
363        let mut state = self.state.lock();
364        let period_data = state.event_coalescer.log_event(environment);
365        drop(state);
366
367        if let Some((start, end, environment)) = period_data {
368            let duration = end
369                .saturating_duration_since(start)
370                .min(Duration::from_secs(60 * 60 * 24))
371                .as_millis() as i64;
372
373            telemetry::event!(
374                "Editor Edited",
375                duration = duration,
376                environment = environment,
377                is_via_ssh = is_via_ssh
378            );
379        }
380    }
381
382    pub fn report_discovered_project_events(
383        self: &Arc<Self>,
384        worktree_id: WorktreeId,
385        updated_entries_set: &UpdatedEntriesSet,
386    ) {
387        let project_type_names: Vec<String> = {
388            let mut state = self.state.lock();
389            state
390                .project_marker_patterns
391                .0
392                .iter_mut()
393                .filter_map(|(pattern, project_cache)| {
394                    if project_cache.worktree_ids_reported.contains(&worktree_id) {
395                        return None;
396                    }
397
398                    let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
399                        path.as_ref()
400                            .file_name()
401                            .and_then(|name| name.to_str())
402                            .map(|name_str| pattern.is_match(name_str))
403                            .unwrap_or(false)
404                    });
405
406                    if !project_file_found {
407                        return None;
408                    }
409
410                    project_cache.worktree_ids_reported.insert(worktree_id);
411
412                    Some(project_cache.name.clone())
413                })
414                .collect()
415        };
416
417        for project_type_name in project_type_names {
418            telemetry::event!("Project Opened", project_type = project_type_name);
419        }
420    }
421
422    fn report_event(self: &Arc<Self>, event: Event) {
423        let mut state = self.state.lock();
424        // RUST_LOG=telemetry=trace to debug telemetry events
425        log::trace!(target: "telemetry", "{:?}", event);
426
427        if !state.settings.metrics {
428            return;
429        }
430
431        if state.flush_events_task.is_none() {
432            let this = self.clone();
433            state.flush_events_task = Some(self.executor.spawn(async move {
434                this.executor.timer(FLUSH_INTERVAL).await;
435                this.flush_events().detach();
436            }));
437        }
438
439        let date_time = self.clock.utc_now();
440
441        let milliseconds_since_first_event = match state.first_event_date_time {
442            Some(first_event_date_time) => date_time
443                .saturating_duration_since(first_event_date_time)
444                .min(Duration::from_secs(60 * 60 * 24))
445                .as_millis() as i64,
446            None => {
447                state.first_event_date_time = Some(date_time);
448                0
449            }
450        };
451
452        let signed_in = state.metrics_id.is_some();
453        state.events_queue.push(EventWrapper {
454            signed_in,
455            milliseconds_since_first_event,
456            event,
457        });
458
459        if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size {
460            drop(state);
461            self.flush_events().detach();
462        }
463    }
464
465    pub fn metrics_id(self: &Arc<Self>) -> Option<Arc<str>> {
466        self.state.lock().metrics_id.clone()
467    }
468
469    pub fn system_id(self: &Arc<Self>) -> Option<Arc<str>> {
470        self.state.lock().system_id.clone()
471    }
472
473    pub fn installation_id(self: &Arc<Self>) -> Option<Arc<str>> {
474        self.state.lock().installation_id.clone()
475    }
476
477    pub fn is_staff(self: &Arc<Self>) -> Option<bool> {
478        self.state.lock().is_staff
479    }
480
481    fn build_request(
482        self: &Arc<Self>,
483        // We take in the JSON bytes buffer so we can reuse the existing allocation.
484        mut json_bytes: Vec<u8>,
485        event_request: &EventRequestBody,
486    ) -> Result<Request<AsyncBody>> {
487        json_bytes.clear();
488        serde_json::to_writer(&mut json_bytes, event_request)?;
489
490        let checksum = calculate_json_checksum(&json_bytes).unwrap_or_default();
491
492        Ok(Request::builder()
493            .method(Method::POST)
494            .uri(
495                self.http_client
496                    .build_zed_api_url("/telemetry/events", &[])?
497                    .as_ref(),
498            )
499            .header("Content-Type", "application/json")
500            .header("x-zed-checksum", checksum)
501            .body(json_bytes.into())?)
502    }
503
504    pub fn flush_events(self: &Arc<Self>) -> Task<()> {
505        let mut state = self.state.lock();
506        state.first_event_date_time = None;
507        let events = mem::take(&mut state.events_queue);
508        state.flush_events_task.take();
509        drop(state);
510        if events.is_empty() {
511            return Task::ready(());
512        }
513
514        let this = self.clone();
515        self.executor.spawn(
516            async move {
517                let mut json_bytes = Vec::new();
518
519                if let Some(file) = &mut this.state.lock().log_file {
520                    for event in &events {
521                        json_bytes.clear();
522                        serde_json::to_writer(&mut json_bytes, event)?;
523                        file.write_all(&json_bytes)?;
524                        file.write_all(b"\n")?;
525                    }
526                }
527
528                let request_body = {
529                    let state = this.state.lock();
530
531                    EventRequestBody {
532                        system_id: state.system_id.as_deref().map(Into::into),
533                        installation_id: state.installation_id.as_deref().map(Into::into),
534                        session_id: state.session_id.clone(),
535                        metrics_id: state.metrics_id.as_deref().map(Into::into),
536                        is_staff: state.is_staff,
537                        app_version: state.app_version.clone(),
538                        os_name: state.os_name.clone(),
539                        os_version: state.os_version.clone(),
540                        architecture: state.architecture.to_string(),
541
542                        release_channel: state.release_channel.map(Into::into),
543                        events,
544                    }
545                };
546
547                let request = this.build_request(json_bytes, &request_body)?;
548                let response = this.http_client.send(request).await?;
549                if response.status() != 200 {
550                    log::error!("Failed to send events: HTTP {:?}", response.status());
551                }
552                anyhow::Ok(())
553            }
554            .log_err()
555            .map(|_| ()),
556        )
557    }
558}
559
560pub fn calculate_json_checksum(json: &impl AsRef<[u8]>) -> Option<String> {
561    let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else {
562        return None;
563    };
564
565    let mut summer = Sha256::new();
566    summer.update(checksum_seed);
567    summer.update(json);
568    summer.update(checksum_seed);
569    let mut checksum = String::new();
570    for byte in summer.finalize().as_slice() {
571        use std::fmt::Write;
572        write!(&mut checksum, "{:02x}", byte).unwrap();
573    }
574
575    Some(checksum)
576}
577
578#[cfg(test)]
579mod tests {
580    use super::*;
581    use clock::FakeSystemClock;
582    use gpui::TestAppContext;
583    use http_client::FakeHttpClient;
584    use std::collections::HashMap;
585    use telemetry_events::FlexibleEvent;
586
587    #[gpui::test]
588    fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) {
589        init_test(cx);
590        let clock = Arc::new(FakeSystemClock::new());
591        let http = FakeHttpClient::with_200_response();
592        let system_id = Some("system_id".to_string());
593        let installation_id = Some("installation_id".to_string());
594        let session_id = "session_id".to_string();
595
596        cx.update(|cx| {
597            let telemetry = Telemetry::new(clock.clone(), http, cx);
598
599            telemetry.state.lock().max_queue_size = 4;
600            telemetry.start(system_id, installation_id, session_id, cx);
601
602            assert!(is_empty_state(&telemetry));
603
604            let first_date_time = clock.utc_now();
605            let event_properties = HashMap::from_iter([(
606                "test_key".to_string(),
607                serde_json::Value::String("test_value".to_string()),
608            )]);
609
610            let event = FlexibleEvent {
611                event_type: "test".to_string(),
612                event_properties,
613            };
614
615            telemetry.report_event(Event::Flexible(event.clone()));
616            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
617            assert!(telemetry.state.lock().flush_events_task.is_some());
618            assert_eq!(
619                telemetry.state.lock().first_event_date_time,
620                Some(first_date_time)
621            );
622
623            clock.advance(Duration::from_millis(100));
624
625            telemetry.report_event(Event::Flexible(event.clone()));
626            assert_eq!(telemetry.state.lock().events_queue.len(), 2);
627            assert!(telemetry.state.lock().flush_events_task.is_some());
628            assert_eq!(
629                telemetry.state.lock().first_event_date_time,
630                Some(first_date_time)
631            );
632
633            clock.advance(Duration::from_millis(100));
634
635            telemetry.report_event(Event::Flexible(event.clone()));
636            assert_eq!(telemetry.state.lock().events_queue.len(), 3);
637            assert!(telemetry.state.lock().flush_events_task.is_some());
638            assert_eq!(
639                telemetry.state.lock().first_event_date_time,
640                Some(first_date_time)
641            );
642
643            clock.advance(Duration::from_millis(100));
644
645            // Adding a 4th event should cause a flush
646            telemetry.report_event(Event::Flexible(event));
647            assert!(is_empty_state(&telemetry));
648        });
649    }
650
651    #[gpui::test]
652    async fn test_telemetry_flush_on_flush_interval(
653        executor: BackgroundExecutor,
654        cx: &mut TestAppContext,
655    ) {
656        init_test(cx);
657        let clock = Arc::new(FakeSystemClock::new());
658        let http = FakeHttpClient::with_200_response();
659        let system_id = Some("system_id".to_string());
660        let installation_id = Some("installation_id".to_string());
661        let session_id = "session_id".to_string();
662
663        cx.update(|cx| {
664            let telemetry = Telemetry::new(clock.clone(), http, cx);
665            telemetry.state.lock().max_queue_size = 4;
666            telemetry.start(system_id, installation_id, session_id, cx);
667
668            assert!(is_empty_state(&telemetry));
669            let first_date_time = clock.utc_now();
670
671            let event_properties = HashMap::from_iter([(
672                "test_key".to_string(),
673                serde_json::Value::String("test_value".to_string()),
674            )]);
675
676            let event = FlexibleEvent {
677                event_type: "test".to_string(),
678                event_properties,
679            };
680
681            telemetry.report_event(Event::Flexible(event));
682            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
683            assert!(telemetry.state.lock().flush_events_task.is_some());
684            assert_eq!(
685                telemetry.state.lock().first_event_date_time,
686                Some(first_date_time)
687            );
688
689            let duration = Duration::from_millis(1);
690
691            // Test 1 millisecond before the flush interval limit is met
692            executor.advance_clock(FLUSH_INTERVAL - duration);
693
694            assert!(!is_empty_state(&telemetry));
695
696            // Test the exact moment the flush interval limit is met
697            executor.advance_clock(duration);
698
699            assert!(is_empty_state(&telemetry));
700        });
701    }
702
703    // TODO:
704    // Test settings
705    // Update FakeHTTPClient to keep track of the number of requests and assert on it
706
707    fn init_test(cx: &mut TestAppContext) {
708        cx.update(|cx| {
709            let settings_store = SettingsStore::test(cx);
710            cx.set_global(settings_store);
711        });
712    }
713
714    fn is_empty_state(telemetry: &Telemetry) -> bool {
715        telemetry.state.lock().events_queue.is_empty()
716            && telemetry.state.lock().flush_events_task.is_none()
717            && telemetry.state.lock().first_event_date_time.is_none()
718    }
719}