telemetry.rs

  1mod event_coalescer;
  2
  3use crate::TelemetrySettings;
  4use anyhow::Result;
  5use clock::SystemClock;
  6use futures::channel::mpsc;
  7use futures::{Future, FutureExt, StreamExt};
  8use gpui::{App, AppContext as _, BackgroundExecutor, Task};
  9use http_client::{self, AsyncBody, HttpClient, HttpClientWithUrl, Method, Request};
 10use parking_lot::Mutex;
 11use release_channel::ReleaseChannel;
 12use settings::{Settings, SettingsStore};
 13use sha2::{Digest, Sha256};
 14use std::collections::{HashMap, HashSet};
 15use std::fs::File;
 16use std::io::Write;
 17use std::sync::LazyLock;
 18use std::time::Instant;
 19use std::{env, mem, path::PathBuf, sync::Arc, time::Duration};
 20use telemetry_events::{AssistantEventData, AssistantPhase, Event, EventRequestBody, EventWrapper};
 21use util::{ResultExt, TryFutureExt};
 22use worktree::{UpdatedEntriesSet, WorktreeId};
 23
 24use self::event_coalescer::EventCoalescer;
 25
 26pub struct Telemetry {
 27    clock: Arc<dyn SystemClock>,
 28    http_client: Arc<HttpClientWithUrl>,
 29    executor: BackgroundExecutor,
 30    state: Arc<Mutex<TelemetryState>>,
 31}
 32
 33struct TelemetryState {
 34    settings: TelemetrySettings,
 35    system_id: Option<Arc<str>>,       // Per system
 36    installation_id: Option<Arc<str>>, // Per app installation (different for dev, nightly, preview, and stable)
 37    session_id: Option<String>,        // Per app launch
 38    metrics_id: Option<Arc<str>>,      // Per logged-in user
 39    release_channel: Option<&'static str>,
 40    architecture: &'static str,
 41    events_queue: Vec<EventWrapper>,
 42    flush_events_task: Option<Task<()>>,
 43    log_file: Option<File>,
 44    is_staff: Option<bool>,
 45    first_event_date_time: Option<Instant>,
 46    event_coalescer: EventCoalescer,
 47    max_queue_size: usize,
 48    worktree_id_map: WorktreeIdMap,
 49
 50    os_name: String,
 51    app_version: String,
 52    os_version: Option<String>,
 53}
 54
 55#[derive(Debug)]
 56struct WorktreeIdMap(HashMap<String, ProjectCache>);
 57
 58#[derive(Debug)]
 59struct ProjectCache {
 60    name: String,
 61    worktree_ids_reported: HashSet<WorktreeId>,
 62}
 63
 64impl ProjectCache {
 65    fn new(name: String) -> Self {
 66        Self {
 67            name,
 68            worktree_ids_reported: HashSet::default(),
 69        }
 70    }
 71}
 72
 73#[cfg(debug_assertions)]
 74const MAX_QUEUE_LEN: usize = 5;
 75
 76#[cfg(not(debug_assertions))]
 77const MAX_QUEUE_LEN: usize = 50;
 78
 79#[cfg(debug_assertions)]
 80const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 81
 82#[cfg(not(debug_assertions))]
 83const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5);
 84static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
 85    option_env!("ZED_CLIENT_CHECKSUM_SEED")
 86        .map(|s| s.as_bytes().into())
 87        .or_else(|| {
 88            env::var("ZED_CLIENT_CHECKSUM_SEED")
 89                .ok()
 90                .map(|s| s.as_bytes().into())
 91        })
 92});
 93
 94pub fn os_name() -> String {
 95    #[cfg(target_os = "macos")]
 96    {
 97        "macOS".to_string()
 98    }
 99    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
100    {
101        format!("Linux {}", gpui::guess_compositor())
102    }
103
104    #[cfg(target_os = "windows")]
105    {
106        "Windows".to_string()
107    }
108}
109
110/// Note: This might do blocking IO! Only call from background threads
111pub fn os_version() -> String {
112    #[cfg(target_os = "macos")]
113    {
114        use cocoa::base::nil;
115        use cocoa::foundation::NSProcessInfo;
116
117        unsafe {
118            let process_info = cocoa::foundation::NSProcessInfo::processInfo(nil);
119            let version = process_info.operatingSystemVersion();
120            gpui::SemanticVersion::new(
121                version.majorVersion as usize,
122                version.minorVersion as usize,
123                version.patchVersion as usize,
124            )
125            .to_string()
126        }
127    }
128    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
129    {
130        use std::path::Path;
131
132        let content = if let Ok(file) = std::fs::read_to_string(&Path::new("/etc/os-release")) {
133            file
134        } else if let Ok(file) = std::fs::read_to_string(&Path::new("/usr/lib/os-release")) {
135            file
136        } else {
137            log::error!("Failed to load /etc/os-release, /usr/lib/os-release");
138            "".to_string()
139        };
140        let mut name = "unknown";
141        let mut version = "unknown";
142
143        for line in content.lines() {
144            match line.split_once('=') {
145                Some(("ID", val)) => name = val.trim_matches('"'),
146                Some(("VERSION_ID", val)) => version = val.trim_matches('"'),
147                _ => {}
148            }
149        }
150
151        format!("{} {}", name, version)
152    }
153
154    #[cfg(target_os = "windows")]
155    {
156        let mut info = unsafe { std::mem::zeroed() };
157        let status = unsafe { windows::Wdk::System::SystemServices::RtlGetVersion(&mut info) };
158        if status.is_ok() {
159            gpui::SemanticVersion::new(
160                info.dwMajorVersion as _,
161                info.dwMinorVersion as _,
162                info.dwBuildNumber as _,
163            )
164            .to_string()
165        } else {
166            "unknown".to_string()
167        }
168    }
169}
170
171impl Telemetry {
172    pub fn new(
173        clock: Arc<dyn SystemClock>,
174        client: Arc<HttpClientWithUrl>,
175        cx: &mut App,
176    ) -> Arc<Self> {
177        let release_channel =
178            ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name());
179
180        TelemetrySettings::register(cx);
181
182        let state = Arc::new(Mutex::new(TelemetryState {
183            settings: *TelemetrySettings::get_global(cx),
184            architecture: env::consts::ARCH,
185            release_channel,
186            system_id: None,
187            installation_id: None,
188            session_id: None,
189            metrics_id: None,
190            events_queue: Vec::new(),
191            flush_events_task: None,
192            log_file: None,
193            is_staff: None,
194            first_event_date_time: None,
195            event_coalescer: EventCoalescer::new(clock.clone()),
196            max_queue_size: MAX_QUEUE_LEN,
197            worktree_id_map: WorktreeIdMap(HashMap::from_iter([
198                (
199                    "pnpm-lock.yaml".to_string(),
200                    ProjectCache::new("pnpm".to_string()),
201                ),
202                (
203                    "yarn.lock".to_string(),
204                    ProjectCache::new("yarn".to_string()),
205                ),
206                (
207                    "package.json".to_string(),
208                    ProjectCache::new("node".to_string()),
209                ),
210            ])),
211
212            os_version: None,
213            os_name: os_name(),
214            app_version: release_channel::AppVersion::global(cx).to_string(),
215        }));
216        Self::log_file_path();
217
218        cx.background_spawn({
219            let state = state.clone();
220            let os_version = os_version();
221            state.lock().os_version = Some(os_version);
222            async move {
223                if let Some(tempfile) = File::create(Self::log_file_path()).log_err() {
224                    state.lock().log_file = Some(tempfile);
225                }
226            }
227        })
228        .detach();
229
230        cx.observe_global::<SettingsStore>({
231            let state = state.clone();
232
233            move |cx| {
234                let mut state = state.lock();
235                state.settings = *TelemetrySettings::get_global(cx);
236            }
237        })
238        .detach();
239
240        let this = Arc::new(Self {
241            clock,
242            http_client: client,
243            executor: cx.background_executor().clone(),
244            state,
245        });
246
247        let (tx, mut rx) = mpsc::unbounded();
248        ::telemetry::init(tx);
249
250        cx.background_spawn({
251            let this = Arc::downgrade(&this);
252            async move {
253                while let Some(event) = rx.next().await {
254                    let Some(state) = this.upgrade() else { break };
255                    state.report_event(Event::Flexible(event))
256                }
257            }
258        })
259        .detach();
260
261        // We should only ever have one instance of Telemetry, leak the subscription to keep it alive
262        // rather than store in TelemetryState, complicating spawn as subscriptions are not Send
263        std::mem::forget(cx.on_app_quit({
264            let this = this.clone();
265            move |_| this.shutdown_telemetry()
266        }));
267
268        this
269    }
270
271    #[cfg(any(test, feature = "test-support"))]
272    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
273        Task::ready(())
274    }
275
276    // Skip calling this function in tests.
277    // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings
278    #[cfg(not(any(test, feature = "test-support")))]
279    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> + use<> {
280        telemetry::event!("App Closed");
281        // TODO: close final edit period and make sure it's sent
282        Task::ready(())
283    }
284
285    pub fn log_file_path() -> PathBuf {
286        paths::logs_dir().join("telemetry.log")
287    }
288
289    pub fn has_checksum_seed(&self) -> bool {
290        ZED_CLIENT_CHECKSUM_SEED.is_some()
291    }
292
293    pub fn start(
294        self: &Arc<Self>,
295        system_id: Option<String>,
296        installation_id: Option<String>,
297        session_id: String,
298        cx: &App,
299    ) {
300        let mut state = self.state.lock();
301        state.system_id = system_id.map(|id| id.into());
302        state.installation_id = installation_id.map(|id| id.into());
303        state.session_id = Some(session_id);
304        state.app_version = release_channel::AppVersion::global(cx).to_string();
305        state.os_name = os_name();
306    }
307
308    pub fn metrics_enabled(self: &Arc<Self>) -> bool {
309        let state = self.state.lock();
310        let enabled = state.settings.metrics;
311        drop(state);
312        enabled
313    }
314
315    pub fn set_authenticated_user_info(
316        self: &Arc<Self>,
317        metrics_id: Option<String>,
318        is_staff: bool,
319    ) {
320        let mut state = self.state.lock();
321
322        if !state.settings.metrics {
323            return;
324        }
325
326        let metrics_id: Option<Arc<str>> = metrics_id.map(|id| id.into());
327        state.metrics_id.clone_from(&metrics_id);
328        state.is_staff = Some(is_staff);
329        drop(state);
330    }
331
332    pub fn report_assistant_event(self: &Arc<Self>, event: AssistantEventData) {
333        let event_type = match event.phase {
334            AssistantPhase::Response => "Assistant Responded",
335            AssistantPhase::Invoked => "Assistant Invoked",
336            AssistantPhase::Accepted => "Assistant Response Accepted",
337            AssistantPhase::Rejected => "Assistant Response Rejected",
338        };
339
340        telemetry::event!(
341            event_type,
342            conversation_id = event.conversation_id,
343            kind = event.kind,
344            phase = event.phase,
345            message_id = event.message_id,
346            model = event.model,
347            model_provider = event.model_provider,
348            response_latency = event.response_latency,
349            error_message = event.error_message,
350            language_name = event.language_name,
351        );
352    }
353
354    pub fn log_edit_event(self: &Arc<Self>, environment: &'static str, is_via_ssh: bool) {
355        let mut state = self.state.lock();
356        let period_data = state.event_coalescer.log_event(environment);
357        drop(state);
358
359        if let Some((start, end, environment)) = period_data {
360            let duration = end
361                .saturating_duration_since(start)
362                .min(Duration::from_secs(60 * 60 * 24))
363                .as_millis() as i64;
364
365            telemetry::event!(
366                "Editor Edited",
367                duration = duration,
368                environment = environment,
369                is_via_ssh = is_via_ssh
370            );
371        }
372    }
373
374    pub fn report_discovered_project_events(
375        self: &Arc<Self>,
376        worktree_id: WorktreeId,
377        updated_entries_set: &UpdatedEntriesSet,
378    ) {
379        let project_type_names: Vec<String> = {
380            let mut state = self.state.lock();
381            state
382                .worktree_id_map
383                .0
384                .iter_mut()
385                .filter_map(|(project_file_name, project_type_telemetry)| {
386                    if project_type_telemetry
387                        .worktree_ids_reported
388                        .contains(&worktree_id)
389                    {
390                        return None;
391                    }
392
393                    let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
394                        path.as_ref()
395                            .file_name()
396                            .and_then(|name| name.to_str())
397                            .map(|name_str| name_str == project_file_name)
398                            .unwrap_or(false)
399                    });
400
401                    if !project_file_found {
402                        return None;
403                    }
404
405                    project_type_telemetry
406                        .worktree_ids_reported
407                        .insert(worktree_id);
408
409                    Some(project_type_telemetry.name.clone())
410                })
411                .collect()
412        };
413
414        for project_type_name in project_type_names {
415            telemetry::event!("Project Opened", project_type = project_type_name);
416        }
417    }
418
419    fn report_event(self: &Arc<Self>, event: Event) {
420        let mut state = self.state.lock();
421        // RUST_LOG=telemetry=trace to debug telemetry events
422        log::trace!(target: "telemetry", "{:?}", event);
423
424        if !state.settings.metrics {
425            return;
426        }
427
428        if state.flush_events_task.is_none() {
429            let this = self.clone();
430            state.flush_events_task = Some(self.executor.spawn(async move {
431                this.executor.timer(FLUSH_INTERVAL).await;
432                this.flush_events().detach();
433            }));
434        }
435
436        let date_time = self.clock.utc_now();
437
438        let milliseconds_since_first_event = match state.first_event_date_time {
439            Some(first_event_date_time) => date_time
440                .saturating_duration_since(first_event_date_time)
441                .min(Duration::from_secs(60 * 60 * 24))
442                .as_millis() as i64,
443            None => {
444                state.first_event_date_time = Some(date_time);
445                0
446            }
447        };
448
449        let signed_in = state.metrics_id.is_some();
450        state.events_queue.push(EventWrapper {
451            signed_in,
452            milliseconds_since_first_event,
453            event,
454        });
455
456        if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size {
457            drop(state);
458            self.flush_events().detach();
459        }
460    }
461
462    pub fn metrics_id(self: &Arc<Self>) -> Option<Arc<str>> {
463        self.state.lock().metrics_id.clone()
464    }
465
466    pub fn system_id(self: &Arc<Self>) -> Option<Arc<str>> {
467        self.state.lock().system_id.clone()
468    }
469
470    pub fn installation_id(self: &Arc<Self>) -> Option<Arc<str>> {
471        self.state.lock().installation_id.clone()
472    }
473
474    pub fn is_staff(self: &Arc<Self>) -> Option<bool> {
475        self.state.lock().is_staff
476    }
477
478    fn build_request(
479        self: &Arc<Self>,
480        // We take in the JSON bytes buffer so we can reuse the existing allocation.
481        mut json_bytes: Vec<u8>,
482        event_request: &EventRequestBody,
483    ) -> Result<Request<AsyncBody>> {
484        json_bytes.clear();
485        serde_json::to_writer(&mut json_bytes, event_request)?;
486
487        let checksum = calculate_json_checksum(&json_bytes).unwrap_or_default();
488
489        Ok(Request::builder()
490            .method(Method::POST)
491            .uri(
492                self.http_client
493                    .build_zed_api_url("/telemetry/events", &[])?
494                    .as_ref(),
495            )
496            .header("Content-Type", "application/json")
497            .header("x-zed-checksum", checksum)
498            .body(json_bytes.into())?)
499    }
500
501    pub fn flush_events(self: &Arc<Self>) -> Task<()> {
502        let mut state = self.state.lock();
503        state.first_event_date_time = None;
504        let events = mem::take(&mut state.events_queue);
505        state.flush_events_task.take();
506        drop(state);
507        if events.is_empty() {
508            return Task::ready(());
509        }
510
511        let this = self.clone();
512        self.executor.spawn(
513            async move {
514                let mut json_bytes = Vec::new();
515
516                if let Some(file) = &mut this.state.lock().log_file {
517                    for event in &events {
518                        json_bytes.clear();
519                        serde_json::to_writer(&mut json_bytes, event)?;
520                        file.write_all(&json_bytes)?;
521                        file.write_all(b"\n")?;
522                    }
523                }
524
525                let request_body = {
526                    let state = this.state.lock();
527
528                    EventRequestBody {
529                        system_id: state.system_id.as_deref().map(Into::into),
530                        installation_id: state.installation_id.as_deref().map(Into::into),
531                        session_id: state.session_id.clone(),
532                        metrics_id: state.metrics_id.as_deref().map(Into::into),
533                        is_staff: state.is_staff,
534                        app_version: state.app_version.clone(),
535                        os_name: state.os_name.clone(),
536                        os_version: state.os_version.clone(),
537                        architecture: state.architecture.to_string(),
538
539                        release_channel: state.release_channel.map(Into::into),
540                        events,
541                    }
542                };
543
544                let request = this.build_request(json_bytes, &request_body)?;
545                let response = this.http_client.send(request).await?;
546                if response.status() != 200 {
547                    log::error!("Failed to send events: HTTP {:?}", response.status());
548                }
549                anyhow::Ok(())
550            }
551            .log_err()
552            .map(|_| ()),
553        )
554    }
555}
556
557pub fn calculate_json_checksum(json: &impl AsRef<[u8]>) -> Option<String> {
558    let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else {
559        return None;
560    };
561
562    let mut summer = Sha256::new();
563    summer.update(checksum_seed);
564    summer.update(json);
565    summer.update(checksum_seed);
566    let mut checksum = String::new();
567    for byte in summer.finalize().as_slice() {
568        use std::fmt::Write;
569        write!(&mut checksum, "{:02x}", byte).unwrap();
570    }
571
572    Some(checksum)
573}
574
575#[cfg(test)]
576mod tests {
577    use super::*;
578    use clock::FakeSystemClock;
579    use gpui::TestAppContext;
580    use http_client::FakeHttpClient;
581    use telemetry_events::FlexibleEvent;
582
583    #[gpui::test]
584    fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) {
585        init_test(cx);
586        let clock = Arc::new(FakeSystemClock::new());
587        let http = FakeHttpClient::with_200_response();
588        let system_id = Some("system_id".to_string());
589        let installation_id = Some("installation_id".to_string());
590        let session_id = "session_id".to_string();
591
592        cx.update(|cx| {
593            let telemetry = Telemetry::new(clock.clone(), http, cx);
594
595            telemetry.state.lock().max_queue_size = 4;
596            telemetry.start(system_id, installation_id, session_id, cx);
597
598            assert!(is_empty_state(&telemetry));
599
600            let first_date_time = clock.utc_now();
601            let event_properties = HashMap::from_iter([(
602                "test_key".to_string(),
603                serde_json::Value::String("test_value".to_string()),
604            )]);
605
606            let event = FlexibleEvent {
607                event_type: "test".to_string(),
608                event_properties,
609            };
610
611            telemetry.report_event(Event::Flexible(event.clone()));
612            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
613            assert!(telemetry.state.lock().flush_events_task.is_some());
614            assert_eq!(
615                telemetry.state.lock().first_event_date_time,
616                Some(first_date_time)
617            );
618
619            clock.advance(Duration::from_millis(100));
620
621            telemetry.report_event(Event::Flexible(event.clone()));
622            assert_eq!(telemetry.state.lock().events_queue.len(), 2);
623            assert!(telemetry.state.lock().flush_events_task.is_some());
624            assert_eq!(
625                telemetry.state.lock().first_event_date_time,
626                Some(first_date_time)
627            );
628
629            clock.advance(Duration::from_millis(100));
630
631            telemetry.report_event(Event::Flexible(event.clone()));
632            assert_eq!(telemetry.state.lock().events_queue.len(), 3);
633            assert!(telemetry.state.lock().flush_events_task.is_some());
634            assert_eq!(
635                telemetry.state.lock().first_event_date_time,
636                Some(first_date_time)
637            );
638
639            clock.advance(Duration::from_millis(100));
640
641            // Adding a 4th event should cause a flush
642            telemetry.report_event(Event::Flexible(event));
643            assert!(is_empty_state(&telemetry));
644        });
645    }
646
647    #[gpui::test]
648    async fn test_telemetry_flush_on_flush_interval(
649        executor: BackgroundExecutor,
650        cx: &mut TestAppContext,
651    ) {
652        init_test(cx);
653        let clock = Arc::new(FakeSystemClock::new());
654        let http = FakeHttpClient::with_200_response();
655        let system_id = Some("system_id".to_string());
656        let installation_id = Some("installation_id".to_string());
657        let session_id = "session_id".to_string();
658
659        cx.update(|cx| {
660            let telemetry = Telemetry::new(clock.clone(), http, cx);
661            telemetry.state.lock().max_queue_size = 4;
662            telemetry.start(system_id, installation_id, session_id, cx);
663
664            assert!(is_empty_state(&telemetry));
665            let first_date_time = clock.utc_now();
666
667            let event_properties = HashMap::from_iter([(
668                "test_key".to_string(),
669                serde_json::Value::String("test_value".to_string()),
670            )]);
671
672            let event = FlexibleEvent {
673                event_type: "test".to_string(),
674                event_properties,
675            };
676
677            telemetry.report_event(Event::Flexible(event));
678            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
679            assert!(telemetry.state.lock().flush_events_task.is_some());
680            assert_eq!(
681                telemetry.state.lock().first_event_date_time,
682                Some(first_date_time)
683            );
684
685            let duration = Duration::from_millis(1);
686
687            // Test 1 millisecond before the flush interval limit is met
688            executor.advance_clock(FLUSH_INTERVAL - duration);
689
690            assert!(!is_empty_state(&telemetry));
691
692            // Test the exact moment the flush interval limit is met
693            executor.advance_clock(duration);
694
695            assert!(is_empty_state(&telemetry));
696        });
697    }
698
699    // TODO:
700    // Test settings
701    // Update FakeHTTPClient to keep track of the number of requests and assert on it
702
703    fn init_test(cx: &mut TestAppContext) {
704        cx.update(|cx| {
705            let settings_store = SettingsStore::test(cx);
706            cx.set_global(settings_store);
707        });
708    }
709
710    fn is_empty_state(telemetry: &Telemetry) -> bool {
711        telemetry.state.lock().events_queue.is_empty()
712            && telemetry.state.lock().flush_events_task.is_none()
713            && telemetry.state.lock().first_event_date_time.is_none()
714    }
715}