telemetry.rs

  1mod event_coalescer;
  2
  3use crate::TelemetrySettings;
  4use anyhow::Result;
  5use clock::SystemClock;
  6use futures::channel::mpsc;
  7use futures::{Future, StreamExt};
  8use gpui::{App, AppContext as _, BackgroundExecutor, Task};
  9use http_client::{self, AsyncBody, HttpClient, HttpClientWithUrl, Method, Request};
 10use parking_lot::Mutex;
 11use release_channel::ReleaseChannel;
 12use settings::{Settings, SettingsStore};
 13use sha2::{Digest, Sha256};
 14use std::collections::{HashMap, HashSet};
 15use std::fs::File;
 16use std::io::Write;
 17use std::sync::LazyLock;
 18use std::time::Instant;
 19use std::{env, mem, path::PathBuf, sync::Arc, time::Duration};
 20use telemetry_events::{AssistantEventData, AssistantPhase, Event, EventRequestBody, EventWrapper};
 21use util::{ResultExt, TryFutureExt};
 22use worktree::{UpdatedEntriesSet, WorktreeId};
 23
 24use self::event_coalescer::EventCoalescer;
 25
 26pub struct Telemetry {
 27    clock: Arc<dyn SystemClock>,
 28    http_client: Arc<HttpClientWithUrl>,
 29    executor: BackgroundExecutor,
 30    state: Arc<Mutex<TelemetryState>>,
 31}
 32
 33struct TelemetryState {
 34    settings: TelemetrySettings,
 35    system_id: Option<Arc<str>>,       // Per system
 36    installation_id: Option<Arc<str>>, // Per app installation (different for dev, nightly, preview, and stable)
 37    session_id: Option<String>,        // Per app launch
 38    metrics_id: Option<Arc<str>>,      // Per logged-in user
 39    release_channel: Option<&'static str>,
 40    architecture: &'static str,
 41    events_queue: Vec<EventWrapper>,
 42    flush_events_task: Option<Task<()>>,
 43    log_file: Option<File>,
 44    is_staff: Option<bool>,
 45    first_event_date_time: Option<Instant>,
 46    event_coalescer: EventCoalescer,
 47    max_queue_size: usize,
 48    worktree_id_map: WorktreeIdMap,
 49
 50    os_name: String,
 51    app_version: String,
 52    os_version: Option<String>,
 53}
 54
 55#[derive(Debug)]
 56struct WorktreeIdMap(HashMap<String, ProjectCache>);
 57
 58#[derive(Debug)]
 59struct ProjectCache {
 60    name: String,
 61    worktree_ids_reported: HashSet<WorktreeId>,
 62}
 63
 64impl ProjectCache {
 65    fn new(name: String) -> Self {
 66        Self {
 67            name,
 68            worktree_ids_reported: HashSet::default(),
 69        }
 70    }
 71}
 72
 73#[cfg(debug_assertions)]
 74const MAX_QUEUE_LEN: usize = 5;
 75
 76#[cfg(not(debug_assertions))]
 77const MAX_QUEUE_LEN: usize = 50;
 78
 79#[cfg(debug_assertions)]
 80const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 81
 82#[cfg(not(debug_assertions))]
 83const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5);
 84static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
 85    option_env!("ZED_CLIENT_CHECKSUM_SEED")
 86        .map(|s| s.as_bytes().into())
 87        .or_else(|| {
 88            env::var("ZED_CLIENT_CHECKSUM_SEED")
 89                .ok()
 90                .map(|s| s.as_bytes().into())
 91        })
 92});
 93
 94pub fn os_name() -> String {
 95    #[cfg(target_os = "macos")]
 96    {
 97        "macOS".to_string()
 98    }
 99    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
100    {
101        format!("Linux {}", gpui::guess_compositor())
102    }
103
104    #[cfg(target_os = "windows")]
105    {
106        "Windows".to_string()
107    }
108}
109
110/// Note: This might do blocking IO! Only call from background threads
111pub fn os_version() -> String {
112    #[cfg(target_os = "macos")]
113    {
114        use cocoa::base::nil;
115        use cocoa::foundation::NSProcessInfo;
116
117        unsafe {
118            let process_info = cocoa::foundation::NSProcessInfo::processInfo(nil);
119            let version = process_info.operatingSystemVersion();
120            gpui::SemanticVersion::new(
121                version.majorVersion as usize,
122                version.minorVersion as usize,
123                version.patchVersion as usize,
124            )
125            .to_string()
126        }
127    }
128    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
129    {
130        use std::path::Path;
131
132        let content = if let Ok(file) = std::fs::read_to_string(&Path::new("/etc/os-release")) {
133            file
134        } else if let Ok(file) = std::fs::read_to_string(&Path::new("/usr/lib/os-release")) {
135            file
136        } else {
137            log::error!("Failed to load /etc/os-release, /usr/lib/os-release");
138            "".to_string()
139        };
140        let mut name = "unknown".to_string();
141        let mut version = "unknown".to_string();
142
143        for line in content.lines() {
144            if line.starts_with("ID=") {
145                name = line.trim_start_matches("ID=").trim_matches('"').to_string();
146            }
147            if line.starts_with("VERSION_ID=") {
148                version = line
149                    .trim_start_matches("VERSION_ID=")
150                    .trim_matches('"')
151                    .to_string();
152            }
153        }
154
155        format!("{} {}", name, version)
156    }
157
158    #[cfg(target_os = "windows")]
159    {
160        let mut info = unsafe { std::mem::zeroed() };
161        let status = unsafe { windows::Wdk::System::SystemServices::RtlGetVersion(&mut info) };
162        if status.is_ok() {
163            gpui::SemanticVersion::new(
164                info.dwMajorVersion as _,
165                info.dwMinorVersion as _,
166                info.dwBuildNumber as _,
167            )
168            .to_string()
169        } else {
170            "unknown".to_string()
171        }
172    }
173}
174
175impl Telemetry {
176    pub fn new(
177        clock: Arc<dyn SystemClock>,
178        client: Arc<HttpClientWithUrl>,
179        cx: &mut App,
180    ) -> Arc<Self> {
181        let release_channel =
182            ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name());
183
184        TelemetrySettings::register(cx);
185
186        let state = Arc::new(Mutex::new(TelemetryState {
187            settings: *TelemetrySettings::get_global(cx),
188            architecture: env::consts::ARCH,
189            release_channel,
190            system_id: None,
191            installation_id: None,
192            session_id: None,
193            metrics_id: None,
194            events_queue: Vec::new(),
195            flush_events_task: None,
196            log_file: None,
197            is_staff: None,
198            first_event_date_time: None,
199            event_coalescer: EventCoalescer::new(clock.clone()),
200            max_queue_size: MAX_QUEUE_LEN,
201            worktree_id_map: WorktreeIdMap(HashMap::from_iter([
202                (
203                    "pnpm-lock.yaml".to_string(),
204                    ProjectCache::new("pnpm".to_string()),
205                ),
206                (
207                    "yarn.lock".to_string(),
208                    ProjectCache::new("yarn".to_string()),
209                ),
210                (
211                    "package.json".to_string(),
212                    ProjectCache::new("node".to_string()),
213                ),
214            ])),
215
216            os_version: None,
217            os_name: os_name(),
218            app_version: release_channel::AppVersion::global(cx).to_string(),
219        }));
220        Self::log_file_path();
221
222        cx.background_spawn({
223            let state = state.clone();
224            let os_version = os_version();
225            state.lock().os_version = Some(os_version.clone());
226            async move {
227                if let Some(tempfile) = File::create(Self::log_file_path()).log_err() {
228                    state.lock().log_file = Some(tempfile);
229                }
230            }
231        })
232        .detach();
233
234        cx.observe_global::<SettingsStore>({
235            let state = state.clone();
236
237            move |cx| {
238                let mut state = state.lock();
239                state.settings = *TelemetrySettings::get_global(cx);
240            }
241        })
242        .detach();
243
244        let this = Arc::new(Self {
245            clock,
246            http_client: client,
247            executor: cx.background_executor().clone(),
248            state,
249        });
250
251        let (tx, mut rx) = mpsc::unbounded();
252        ::telemetry::init(tx);
253
254        cx.background_spawn({
255            let this = Arc::downgrade(&this);
256            async move {
257                while let Some(event) = rx.next().await {
258                    let Some(state) = this.upgrade() else { break };
259                    state.report_event(Event::Flexible(event))
260                }
261            }
262        })
263        .detach();
264
265        // We should only ever have one instance of Telemetry, leak the subscription to keep it alive
266        // rather than store in TelemetryState, complicating spawn as subscriptions are not Send
267        std::mem::forget(cx.on_app_quit({
268            let this = this.clone();
269            move |_| this.shutdown_telemetry()
270        }));
271
272        this
273    }
274
275    #[cfg(any(test, feature = "test-support"))]
276    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
277        Task::ready(())
278    }
279
280    // Skip calling this function in tests.
281    // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings
282    #[cfg(not(any(test, feature = "test-support")))]
283    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
284        telemetry::event!("App Closed");
285        // TODO: close final edit period and make sure it's sent
286        Task::ready(())
287    }
288
289    pub fn log_file_path() -> PathBuf {
290        paths::logs_dir().join("telemetry.log")
291    }
292
293    pub fn start(
294        self: &Arc<Self>,
295        system_id: Option<String>,
296        installation_id: Option<String>,
297        session_id: String,
298        cx: &App,
299    ) {
300        let mut state = self.state.lock();
301        state.system_id = system_id.map(|id| id.into());
302        state.installation_id = installation_id.map(|id| id.into());
303        state.session_id = Some(session_id);
304        state.app_version = release_channel::AppVersion::global(cx).to_string();
305        state.os_name = os_name();
306    }
307
308    pub fn metrics_enabled(self: &Arc<Self>) -> bool {
309        let state = self.state.lock();
310        let enabled = state.settings.metrics;
311        drop(state);
312        enabled
313    }
314
315    pub fn set_authenticated_user_info(
316        self: &Arc<Self>,
317        metrics_id: Option<String>,
318        is_staff: bool,
319    ) {
320        let mut state = self.state.lock();
321
322        if !state.settings.metrics {
323            return;
324        }
325
326        let metrics_id: Option<Arc<str>> = metrics_id.map(|id| id.into());
327        state.metrics_id.clone_from(&metrics_id);
328        state.is_staff = Some(is_staff);
329        drop(state);
330    }
331
332    pub fn report_assistant_event(self: &Arc<Self>, event: AssistantEventData) {
333        let event_type = match event.phase {
334            AssistantPhase::Response => "Assistant Responded",
335            AssistantPhase::Invoked => "Assistant Invoked",
336            AssistantPhase::Accepted => "Assistant Response Accepted",
337            AssistantPhase::Rejected => "Assistant Response Rejected",
338        };
339
340        telemetry::event!(
341            event_type,
342            conversation_id = event.conversation_id,
343            kind = event.kind,
344            phase = event.phase,
345            message_id = event.message_id,
346            model = event.model,
347            model_provider = event.model_provider,
348            response_latency = event.response_latency,
349            error_message = event.error_message,
350            language_name = event.language_name,
351        );
352    }
353
354    pub fn log_edit_event(self: &Arc<Self>, environment: &'static str, is_via_ssh: bool) {
355        let mut state = self.state.lock();
356        let period_data = state.event_coalescer.log_event(environment);
357        drop(state);
358
359        if let Some((start, end, environment)) = period_data {
360            let duration = end
361                .saturating_duration_since(start)
362                .min(Duration::from_secs(60 * 60 * 24))
363                .as_millis() as i64;
364
365            telemetry::event!(
366                "Editor Edited",
367                duration = duration,
368                environment = environment.to_string(),
369                is_via_ssh = is_via_ssh
370            );
371        }
372    }
373
374    pub fn report_discovered_project_events(
375        self: &Arc<Self>,
376        worktree_id: WorktreeId,
377        updated_entries_set: &UpdatedEntriesSet,
378    ) {
379        let project_type_names: Vec<String> = {
380            let mut state = self.state.lock();
381            state
382                .worktree_id_map
383                .0
384                .iter_mut()
385                .filter_map(|(project_file_name, project_type_telemetry)| {
386                    if project_type_telemetry
387                        .worktree_ids_reported
388                        .contains(&worktree_id)
389                    {
390                        return None;
391                    }
392
393                    let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
394                        path.as_ref()
395                            .file_name()
396                            .and_then(|name| name.to_str())
397                            .map(|name_str| name_str == project_file_name)
398                            .unwrap_or(false)
399                    });
400
401                    if !project_file_found {
402                        return None;
403                    }
404
405                    project_type_telemetry
406                        .worktree_ids_reported
407                        .insert(worktree_id);
408
409                    Some(project_type_telemetry.name.clone())
410                })
411                .collect()
412        };
413
414        for project_type_name in project_type_names {
415            telemetry::event!("Project Opened", project_type = project_type_name);
416        }
417    }
418
419    fn report_event(self: &Arc<Self>, event: Event) {
420        let mut state = self.state.lock();
421        // RUST_LOG=telemetry=trace to debug telemetry events
422        log::trace!(target: "telemetry", "{:?}", event);
423
424        if !state.settings.metrics {
425            return;
426        }
427
428        if state.flush_events_task.is_none() {
429            let this = self.clone();
430            let executor = self.executor.clone();
431            state.flush_events_task = Some(self.executor.spawn(async move {
432                executor.timer(FLUSH_INTERVAL).await;
433                this.flush_events();
434            }));
435        }
436
437        let date_time = self.clock.utc_now();
438
439        let milliseconds_since_first_event = match state.first_event_date_time {
440            Some(first_event_date_time) => date_time
441                .saturating_duration_since(first_event_date_time)
442                .min(Duration::from_secs(60 * 60 * 24))
443                .as_millis() as i64,
444            None => {
445                state.first_event_date_time = Some(date_time);
446                0
447            }
448        };
449
450        let signed_in = state.metrics_id.is_some();
451        state.events_queue.push(EventWrapper {
452            signed_in,
453            milliseconds_since_first_event,
454            event,
455        });
456
457        if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size {
458            drop(state);
459            self.flush_events();
460        }
461    }
462
463    pub fn metrics_id(self: &Arc<Self>) -> Option<Arc<str>> {
464        self.state.lock().metrics_id.clone()
465    }
466
467    pub fn system_id(self: &Arc<Self>) -> Option<Arc<str>> {
468        self.state.lock().system_id.clone()
469    }
470
471    pub fn installation_id(self: &Arc<Self>) -> Option<Arc<str>> {
472        self.state.lock().installation_id.clone()
473    }
474
475    pub fn is_staff(self: &Arc<Self>) -> Option<bool> {
476        self.state.lock().is_staff
477    }
478
479    fn build_request(
480        self: &Arc<Self>,
481        // We take in the JSON bytes buffer so we can reuse the existing allocation.
482        mut json_bytes: Vec<u8>,
483        event_request: EventRequestBody,
484    ) -> Result<Request<AsyncBody>> {
485        json_bytes.clear();
486        serde_json::to_writer(&mut json_bytes, &event_request)?;
487
488        let checksum = calculate_json_checksum(&json_bytes).unwrap_or("".to_string());
489
490        Ok(Request::builder()
491            .method(Method::POST)
492            .uri(
493                self.http_client
494                    .build_zed_api_url("/telemetry/events", &[])?
495                    .as_ref(),
496            )
497            .header("Content-Type", "application/json")
498            .header("x-zed-checksum", checksum)
499            .body(json_bytes.into())?)
500    }
501
502    pub fn flush_events(self: &Arc<Self>) {
503        let mut state = self.state.lock();
504        state.first_event_date_time = None;
505        let mut events = mem::take(&mut state.events_queue);
506        state.flush_events_task.take();
507        drop(state);
508        if events.is_empty() {
509            return;
510        }
511
512        let this = self.clone();
513        self.executor
514            .spawn(
515                async move {
516                    let mut json_bytes = Vec::new();
517
518                    if let Some(file) = &mut this.state.lock().log_file {
519                        for event in &mut events {
520                            json_bytes.clear();
521                            serde_json::to_writer(&mut json_bytes, event)?;
522                            file.write_all(&json_bytes)?;
523                            file.write_all(b"\n")?;
524                        }
525                    }
526
527                    let request_body = {
528                        let state = this.state.lock();
529
530                        EventRequestBody {
531                            system_id: state.system_id.as_deref().map(Into::into),
532                            installation_id: state.installation_id.as_deref().map(Into::into),
533                            session_id: state.session_id.clone(),
534                            metrics_id: state.metrics_id.as_deref().map(Into::into),
535                            is_staff: state.is_staff,
536                            app_version: state.app_version.clone(),
537                            os_name: state.os_name.clone(),
538                            os_version: state.os_version.clone(),
539                            architecture: state.architecture.to_string(),
540
541                            release_channel: state.release_channel.map(Into::into),
542                            events,
543                        }
544                    };
545
546                    let request = this.build_request(json_bytes, request_body)?;
547                    let response = this.http_client.send(request).await?;
548                    if response.status() != 200 {
549                        log::error!("Failed to send events: HTTP {:?}", response.status());
550                    }
551                    anyhow::Ok(())
552                }
553                .log_err(),
554            )
555            .detach();
556    }
557}
558
559pub fn calculate_json_checksum(json: &impl AsRef<[u8]>) -> Option<String> {
560    let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else {
561        return None;
562    };
563
564    let mut summer = Sha256::new();
565    summer.update(checksum_seed);
566    summer.update(json);
567    summer.update(checksum_seed);
568    let mut checksum = String::new();
569    for byte in summer.finalize().as_slice() {
570        use std::fmt::Write;
571        write!(&mut checksum, "{:02x}", byte).unwrap();
572    }
573
574    Some(checksum)
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580    use clock::FakeSystemClock;
581    use gpui::TestAppContext;
582    use http_client::FakeHttpClient;
583    use telemetry_events::FlexibleEvent;
584
585    #[gpui::test]
586    fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) {
587        init_test(cx);
588        let clock = Arc::new(FakeSystemClock::new());
589        let http = FakeHttpClient::with_200_response();
590        let system_id = Some("system_id".to_string());
591        let installation_id = Some("installation_id".to_string());
592        let session_id = "session_id".to_string();
593
594        cx.update(|cx| {
595            let telemetry = Telemetry::new(clock.clone(), http, cx);
596
597            telemetry.state.lock().max_queue_size = 4;
598            telemetry.start(system_id, installation_id, session_id, cx);
599
600            assert!(is_empty_state(&telemetry));
601
602            let first_date_time = clock.utc_now();
603            let event_properties = HashMap::from_iter([(
604                "test_key".to_string(),
605                serde_json::Value::String("test_value".to_string()),
606            )]);
607
608            let event = FlexibleEvent {
609                event_type: "test".to_string(),
610                event_properties,
611            };
612
613            telemetry.report_event(Event::Flexible(event.clone()));
614            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
615            assert!(telemetry.state.lock().flush_events_task.is_some());
616            assert_eq!(
617                telemetry.state.lock().first_event_date_time,
618                Some(first_date_time)
619            );
620
621            clock.advance(Duration::from_millis(100));
622
623            telemetry.report_event(Event::Flexible(event.clone()));
624            assert_eq!(telemetry.state.lock().events_queue.len(), 2);
625            assert!(telemetry.state.lock().flush_events_task.is_some());
626            assert_eq!(
627                telemetry.state.lock().first_event_date_time,
628                Some(first_date_time)
629            );
630
631            clock.advance(Duration::from_millis(100));
632
633            telemetry.report_event(Event::Flexible(event.clone()));
634            assert_eq!(telemetry.state.lock().events_queue.len(), 3);
635            assert!(telemetry.state.lock().flush_events_task.is_some());
636            assert_eq!(
637                telemetry.state.lock().first_event_date_time,
638                Some(first_date_time)
639            );
640
641            clock.advance(Duration::from_millis(100));
642
643            // Adding a 4th event should cause a flush
644            telemetry.report_event(Event::Flexible(event));
645            assert!(is_empty_state(&telemetry));
646        });
647    }
648
649    #[gpui::test]
650    async fn test_telemetry_flush_on_flush_interval(
651        executor: BackgroundExecutor,
652        cx: &mut TestAppContext,
653    ) {
654        init_test(cx);
655        let clock = Arc::new(FakeSystemClock::new());
656        let http = FakeHttpClient::with_200_response();
657        let system_id = Some("system_id".to_string());
658        let installation_id = Some("installation_id".to_string());
659        let session_id = "session_id".to_string();
660
661        cx.update(|cx| {
662            let telemetry = Telemetry::new(clock.clone(), http, cx);
663            telemetry.state.lock().max_queue_size = 4;
664            telemetry.start(system_id, installation_id, session_id, cx);
665
666            assert!(is_empty_state(&telemetry));
667            let first_date_time = clock.utc_now();
668
669            let event_properties = HashMap::from_iter([(
670                "test_key".to_string(),
671                serde_json::Value::String("test_value".to_string()),
672            )]);
673
674            let event = FlexibleEvent {
675                event_type: "test".to_string(),
676                event_properties,
677            };
678
679            telemetry.report_event(Event::Flexible(event));
680            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
681            assert!(telemetry.state.lock().flush_events_task.is_some());
682            assert_eq!(
683                telemetry.state.lock().first_event_date_time,
684                Some(first_date_time)
685            );
686
687            let duration = Duration::from_millis(1);
688
689            // Test 1 millisecond before the flush interval limit is met
690            executor.advance_clock(FLUSH_INTERVAL - duration);
691
692            assert!(!is_empty_state(&telemetry));
693
694            // Test the exact moment the flush interval limit is met
695            executor.advance_clock(duration);
696
697            assert!(is_empty_state(&telemetry));
698        });
699    }
700
701    // TODO:
702    // Test settings
703    // Update FakeHTTPClient to keep track of the number of requests and assert on it
704
705    fn init_test(cx: &mut TestAppContext) {
706        cx.update(|cx| {
707            let settings_store = SettingsStore::test(cx);
708            cx.set_global(settings_store);
709        });
710    }
711
712    fn is_empty_state(telemetry: &Telemetry) -> bool {
713        telemetry.state.lock().events_queue.is_empty()
714            && telemetry.state.lock().flush_events_task.is_none()
715            && telemetry.state.lock().first_event_date_time.is_none()
716    }
717}