telemetry.rs

  1mod event_coalescer;
  2
  3use crate::TelemetrySettings;
  4use anyhow::Result;
  5use clock::SystemClock;
  6use futures::channel::mpsc;
  7use futures::{Future, StreamExt};
  8use gpui::{App, BackgroundExecutor, Task};
  9use http_client::{self, AsyncBody, HttpClient, HttpClientWithUrl, Method, Request};
 10use parking_lot::Mutex;
 11use release_channel::ReleaseChannel;
 12use settings::{Settings, SettingsStore};
 13use sha2::{Digest, Sha256};
 14use std::collections::{HashMap, HashSet};
 15use std::fs::File;
 16use std::io::Write;
 17use std::sync::LazyLock;
 18use std::time::Instant;
 19use std::{env, mem, path::PathBuf, sync::Arc, time::Duration};
 20use telemetry_events::{AssistantEvent, AssistantPhase, Event, EventRequestBody, EventWrapper};
 21use util::{ResultExt, TryFutureExt};
 22use worktree::{UpdatedEntriesSet, WorktreeId};
 23
 24use self::event_coalescer::EventCoalescer;
 25
 26pub struct Telemetry {
 27    clock: Arc<dyn SystemClock>,
 28    http_client: Arc<HttpClientWithUrl>,
 29    executor: BackgroundExecutor,
 30    state: Arc<Mutex<TelemetryState>>,
 31}
 32
 33struct TelemetryState {
 34    settings: TelemetrySettings,
 35    system_id: Option<Arc<str>>,       // Per system
 36    installation_id: Option<Arc<str>>, // Per app installation (different for dev, nightly, preview, and stable)
 37    session_id: Option<String>,        // Per app launch
 38    metrics_id: Option<Arc<str>>,      // Per logged-in user
 39    release_channel: Option<&'static str>,
 40    architecture: &'static str,
 41    events_queue: Vec<EventWrapper>,
 42    flush_events_task: Option<Task<()>>,
 43    log_file: Option<File>,
 44    is_staff: Option<bool>,
 45    first_event_date_time: Option<Instant>,
 46    event_coalescer: EventCoalescer,
 47    max_queue_size: usize,
 48    worktree_id_map: WorktreeIdMap,
 49
 50    os_name: String,
 51    app_version: String,
 52    os_version: Option<String>,
 53}
 54
 55#[derive(Debug)]
 56struct WorktreeIdMap(HashMap<String, ProjectCache>);
 57
 58#[derive(Debug)]
 59struct ProjectCache {
 60    name: String,
 61    worktree_ids_reported: HashSet<WorktreeId>,
 62}
 63
 64impl ProjectCache {
 65    fn new(name: String) -> Self {
 66        Self {
 67            name,
 68            worktree_ids_reported: HashSet::default(),
 69        }
 70    }
 71}
 72
 73#[cfg(debug_assertions)]
 74const MAX_QUEUE_LEN: usize = 5;
 75
 76#[cfg(not(debug_assertions))]
 77const MAX_QUEUE_LEN: usize = 50;
 78
 79#[cfg(debug_assertions)]
 80const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 81
 82#[cfg(not(debug_assertions))]
 83const FLUSH_INTERVAL: Duration = Duration::from_secs(60 * 5);
 84static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
 85    option_env!("ZED_CLIENT_CHECKSUM_SEED")
 86        .map(|s| s.as_bytes().into())
 87        .or_else(|| {
 88            env::var("ZED_CLIENT_CHECKSUM_SEED")
 89                .ok()
 90                .map(|s| s.as_bytes().into())
 91        })
 92});
 93
 94pub fn os_name() -> String {
 95    #[cfg(target_os = "macos")]
 96    {
 97        "macOS".to_string()
 98    }
 99    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
100    {
101        format!("Linux {}", gpui::guess_compositor())
102    }
103
104    #[cfg(target_os = "windows")]
105    {
106        "Windows".to_string()
107    }
108}
109
110/// Note: This might do blocking IO! Only call from background threads
111pub fn os_version() -> String {
112    #[cfg(target_os = "macos")]
113    {
114        use cocoa::base::nil;
115        use cocoa::foundation::NSProcessInfo;
116
117        unsafe {
118            let process_info = cocoa::foundation::NSProcessInfo::processInfo(nil);
119            let version = process_info.operatingSystemVersion();
120            gpui::SemanticVersion::new(
121                version.majorVersion as usize,
122                version.minorVersion as usize,
123                version.patchVersion as usize,
124            )
125            .to_string()
126        }
127    }
128    #[cfg(any(target_os = "linux", target_os = "freebsd"))]
129    {
130        use std::path::Path;
131
132        let content = if let Ok(file) = std::fs::read_to_string(&Path::new("/etc/os-release")) {
133            file
134        } else if let Ok(file) = std::fs::read_to_string(&Path::new("/usr/lib/os-release")) {
135            file
136        } else {
137            log::error!("Failed to load /etc/os-release, /usr/lib/os-release");
138            "".to_string()
139        };
140        let mut name = "unknown".to_string();
141        let mut version = "unknown".to_string();
142
143        for line in content.lines() {
144            if line.starts_with("ID=") {
145                name = line.trim_start_matches("ID=").trim_matches('"').to_string();
146            }
147            if line.starts_with("VERSION_ID=") {
148                version = line
149                    .trim_start_matches("VERSION_ID=")
150                    .trim_matches('"')
151                    .to_string();
152            }
153        }
154
155        format!("{} {}", name, version)
156    }
157
158    #[cfg(target_os = "windows")]
159    {
160        let mut info = unsafe { std::mem::zeroed() };
161        let status = unsafe { windows::Wdk::System::SystemServices::RtlGetVersion(&mut info) };
162        if status.is_ok() {
163            gpui::SemanticVersion::new(
164                info.dwMajorVersion as _,
165                info.dwMinorVersion as _,
166                info.dwBuildNumber as _,
167            )
168            .to_string()
169        } else {
170            "unknown".to_string()
171        }
172    }
173}
174
175impl Telemetry {
176    pub fn new(
177        clock: Arc<dyn SystemClock>,
178        client: Arc<HttpClientWithUrl>,
179        cx: &mut App,
180    ) -> Arc<Self> {
181        let release_channel =
182            ReleaseChannel::try_global(cx).map(|release_channel| release_channel.display_name());
183
184        TelemetrySettings::register(cx);
185
186        let state = Arc::new(Mutex::new(TelemetryState {
187            settings: *TelemetrySettings::get_global(cx),
188            architecture: env::consts::ARCH,
189            release_channel,
190            system_id: None,
191            installation_id: None,
192            session_id: None,
193            metrics_id: None,
194            events_queue: Vec::new(),
195            flush_events_task: None,
196            log_file: None,
197            is_staff: None,
198            first_event_date_time: None,
199            event_coalescer: EventCoalescer::new(clock.clone()),
200            max_queue_size: MAX_QUEUE_LEN,
201            worktree_id_map: WorktreeIdMap(HashMap::from_iter([
202                (
203                    "pnpm-lock.yaml".to_string(),
204                    ProjectCache::new("pnpm".to_string()),
205                ),
206                (
207                    "yarn.lock".to_string(),
208                    ProjectCache::new("yarn".to_string()),
209                ),
210                (
211                    "package.json".to_string(),
212                    ProjectCache::new("node".to_string()),
213                ),
214            ])),
215
216            os_version: None,
217            os_name: os_name(),
218            app_version: release_channel::AppVersion::global(cx).to_string(),
219        }));
220        Self::log_file_path();
221
222        cx.background_executor()
223            .spawn({
224                let state = state.clone();
225                let os_version = os_version();
226                state.lock().os_version = Some(os_version.clone());
227                async move {
228                    if let Some(tempfile) = File::create(Self::log_file_path()).log_err() {
229                        state.lock().log_file = Some(tempfile);
230                    }
231                }
232            })
233            .detach();
234
235        cx.observe_global::<SettingsStore>({
236            let state = state.clone();
237
238            move |cx| {
239                let mut state = state.lock();
240                state.settings = *TelemetrySettings::get_global(cx);
241            }
242        })
243        .detach();
244
245        let this = Arc::new(Self {
246            clock,
247            http_client: client,
248            executor: cx.background_executor().clone(),
249            state,
250        });
251
252        let (tx, mut rx) = mpsc::unbounded();
253        ::telemetry::init(tx);
254
255        cx.background_executor()
256            .spawn({
257                let this = Arc::downgrade(&this);
258                async move {
259                    while let Some(event) = rx.next().await {
260                        let Some(state) = this.upgrade() else { break };
261                        state.report_event(Event::Flexible(event))
262                    }
263                }
264            })
265            .detach();
266
267        // We should only ever have one instance of Telemetry, leak the subscription to keep it alive
268        // rather than store in TelemetryState, complicating spawn as subscriptions are not Send
269        std::mem::forget(cx.on_app_quit({
270            let this = this.clone();
271            move |_| this.shutdown_telemetry()
272        }));
273
274        this
275    }
276
277    #[cfg(any(test, feature = "test-support"))]
278    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> {
279        Task::ready(())
280    }
281
282    // Skip calling this function in tests.
283    // TestAppContext ends up calling this function on shutdown and it panics when trying to find the TelemetrySettings
284    #[cfg(not(any(test, feature = "test-support")))]
285    fn shutdown_telemetry(self: &Arc<Self>) -> impl Future<Output = ()> {
286        telemetry::event!("App Closed");
287        // TODO: close final edit period and make sure it's sent
288        Task::ready(())
289    }
290
291    pub fn log_file_path() -> PathBuf {
292        paths::logs_dir().join("telemetry.log")
293    }
294
295    pub fn start(
296        self: &Arc<Self>,
297        system_id: Option<String>,
298        installation_id: Option<String>,
299        session_id: String,
300        cx: &App,
301    ) {
302        let mut state = self.state.lock();
303        state.system_id = system_id.map(|id| id.into());
304        state.installation_id = installation_id.map(|id| id.into());
305        state.session_id = Some(session_id);
306        state.app_version = release_channel::AppVersion::global(cx).to_string();
307        state.os_name = os_name();
308    }
309
310    pub fn metrics_enabled(self: &Arc<Self>) -> bool {
311        let state = self.state.lock();
312        let enabled = state.settings.metrics;
313        drop(state);
314        enabled
315    }
316
317    pub fn set_authenticated_user_info(
318        self: &Arc<Self>,
319        metrics_id: Option<String>,
320        is_staff: bool,
321    ) {
322        let mut state = self.state.lock();
323
324        if !state.settings.metrics {
325            return;
326        }
327
328        let metrics_id: Option<Arc<str>> = metrics_id.map(|id| id.into());
329        state.metrics_id.clone_from(&metrics_id);
330        state.is_staff = Some(is_staff);
331        drop(state);
332    }
333
334    pub fn report_assistant_event(self: &Arc<Self>, event: AssistantEvent) {
335        let event_type = match event.phase {
336            AssistantPhase::Response => "Assistant Responded",
337            AssistantPhase::Invoked => "Assistant Invoked",
338            AssistantPhase::Accepted => "Assistant Response Accepted",
339            AssistantPhase::Rejected => "Assistant Response Rejected",
340        };
341
342        telemetry::event!(
343            event_type,
344            conversation_id = event.conversation_id,
345            kind = event.kind,
346            phase = event.phase,
347            message_id = event.message_id,
348            model = event.model,
349            model_provider = event.model_provider,
350            response_latency = event.response_latency,
351            error_message = event.error_message,
352            language_name = event.language_name,
353        );
354    }
355
356    pub fn log_edit_event(self: &Arc<Self>, environment: &'static str, is_via_ssh: bool) {
357        let mut state = self.state.lock();
358        let period_data = state.event_coalescer.log_event(environment);
359        drop(state);
360
361        if let Some((start, end, environment)) = period_data {
362            let duration = end
363                .saturating_duration_since(start)
364                .min(Duration::from_secs(60 * 60 * 24))
365                .as_millis() as i64;
366
367            telemetry::event!(
368                "Editor Edited",
369                duration = duration,
370                environment = environment.to_string(),
371                is_via_ssh = is_via_ssh
372            );
373        }
374    }
375
376    pub fn report_discovered_project_events(
377        self: &Arc<Self>,
378        worktree_id: WorktreeId,
379        updated_entries_set: &UpdatedEntriesSet,
380    ) {
381        let project_type_names: Vec<String> = {
382            let mut state = self.state.lock();
383            state
384                .worktree_id_map
385                .0
386                .iter_mut()
387                .filter_map(|(project_file_name, project_type_telemetry)| {
388                    if project_type_telemetry
389                        .worktree_ids_reported
390                        .contains(&worktree_id)
391                    {
392                        return None;
393                    }
394
395                    let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
396                        path.as_ref()
397                            .file_name()
398                            .and_then(|name| name.to_str())
399                            .map(|name_str| name_str == project_file_name)
400                            .unwrap_or(false)
401                    });
402
403                    if !project_file_found {
404                        return None;
405                    }
406
407                    project_type_telemetry
408                        .worktree_ids_reported
409                        .insert(worktree_id);
410
411                    Some(project_type_telemetry.name.clone())
412                })
413                .collect()
414        };
415
416        for project_type_name in project_type_names {
417            telemetry::event!("Project Opened", project_type = project_type_name);
418        }
419    }
420
421    fn report_event(self: &Arc<Self>, event: Event) {
422        let mut state = self.state.lock();
423
424        if !state.settings.metrics {
425            return;
426        }
427
428        if state.flush_events_task.is_none() {
429            let this = self.clone();
430            let executor = self.executor.clone();
431            state.flush_events_task = Some(self.executor.spawn(async move {
432                executor.timer(FLUSH_INTERVAL).await;
433                this.flush_events();
434            }));
435        }
436
437        let date_time = self.clock.utc_now();
438
439        let milliseconds_since_first_event = match state.first_event_date_time {
440            Some(first_event_date_time) => date_time
441                .saturating_duration_since(first_event_date_time)
442                .min(Duration::from_secs(60 * 60 * 24))
443                .as_millis() as i64,
444            None => {
445                state.first_event_date_time = Some(date_time);
446                0
447            }
448        };
449
450        let signed_in = state.metrics_id.is_some();
451        state.events_queue.push(EventWrapper {
452            signed_in,
453            milliseconds_since_first_event,
454            event,
455        });
456
457        if state.installation_id.is_some() && state.events_queue.len() >= state.max_queue_size {
458            drop(state);
459            self.flush_events();
460        }
461    }
462
463    pub fn metrics_id(self: &Arc<Self>) -> Option<Arc<str>> {
464        self.state.lock().metrics_id.clone()
465    }
466
467    pub fn system_id(self: &Arc<Self>) -> Option<Arc<str>> {
468        self.state.lock().system_id.clone()
469    }
470
471    pub fn installation_id(self: &Arc<Self>) -> Option<Arc<str>> {
472        self.state.lock().installation_id.clone()
473    }
474
475    pub fn is_staff(self: &Arc<Self>) -> Option<bool> {
476        self.state.lock().is_staff
477    }
478
479    fn build_request(
480        self: &Arc<Self>,
481        // We take in the JSON bytes buffer so we can reuse the existing allocation.
482        mut json_bytes: Vec<u8>,
483        event_request: EventRequestBody,
484    ) -> Result<Request<AsyncBody>> {
485        json_bytes.clear();
486        serde_json::to_writer(&mut json_bytes, &event_request)?;
487
488        let checksum = calculate_json_checksum(&json_bytes).unwrap_or("".to_string());
489
490        Ok(Request::builder()
491            .method(Method::POST)
492            .uri(
493                self.http_client
494                    .build_zed_api_url("/telemetry/events", &[])?
495                    .as_ref(),
496            )
497            .header("Content-Type", "application/json")
498            .header("x-zed-checksum", checksum)
499            .body(json_bytes.into())?)
500    }
501
502    pub fn flush_events(self: &Arc<Self>) {
503        let mut state = self.state.lock();
504        state.first_event_date_time = None;
505        let mut events = mem::take(&mut state.events_queue);
506        state.flush_events_task.take();
507        drop(state);
508        if events.is_empty() {
509            return;
510        }
511
512        let this = self.clone();
513        self.executor
514            .spawn(
515                async move {
516                    let mut json_bytes = Vec::new();
517
518                    if let Some(file) = &mut this.state.lock().log_file {
519                        for event in &mut events {
520                            json_bytes.clear();
521                            serde_json::to_writer(&mut json_bytes, event)?;
522                            file.write_all(&json_bytes)?;
523                            file.write_all(b"\n")?;
524                        }
525                    }
526
527                    let request_body = {
528                        let state = this.state.lock();
529
530                        EventRequestBody {
531                            system_id: state.system_id.as_deref().map(Into::into),
532                            installation_id: state.installation_id.as_deref().map(Into::into),
533                            session_id: state.session_id.clone(),
534                            metrics_id: state.metrics_id.as_deref().map(Into::into),
535                            is_staff: state.is_staff,
536                            app_version: state.app_version.clone(),
537                            os_name: state.os_name.clone(),
538                            os_version: state.os_version.clone(),
539                            architecture: state.architecture.to_string(),
540
541                            release_channel: state.release_channel.map(Into::into),
542                            events,
543                        }
544                    };
545
546                    let request = this.build_request(json_bytes, request_body)?;
547                    let response = this.http_client.send(request).await?;
548                    if response.status() != 200 {
549                        log::error!("Failed to send events: HTTP {:?}", response.status());
550                    }
551                    anyhow::Ok(())
552                }
553                .log_err(),
554            )
555            .detach();
556    }
557}
558
559pub fn calculate_json_checksum(json: &impl AsRef<[u8]>) -> Option<String> {
560    let Some(checksum_seed) = &*ZED_CLIENT_CHECKSUM_SEED else {
561        return None;
562    };
563
564    let mut summer = Sha256::new();
565    summer.update(checksum_seed);
566    summer.update(json);
567    summer.update(checksum_seed);
568    let mut checksum = String::new();
569    for byte in summer.finalize().as_slice() {
570        use std::fmt::Write;
571        write!(&mut checksum, "{:02x}", byte).unwrap();
572    }
573
574    Some(checksum)
575}
576
577#[cfg(test)]
578mod tests {
579    use super::*;
580    use clock::FakeSystemClock;
581    use gpui::TestAppContext;
582    use http_client::FakeHttpClient;
583    use telemetry_events::FlexibleEvent;
584
585    #[gpui::test]
586    fn test_telemetry_flush_on_max_queue_size(cx: &mut TestAppContext) {
587        init_test(cx);
588        let clock = Arc::new(FakeSystemClock::new());
589        let http = FakeHttpClient::with_200_response();
590        let system_id = Some("system_id".to_string());
591        let installation_id = Some("installation_id".to_string());
592        let session_id = "session_id".to_string();
593
594        cx.update(|cx| {
595            let telemetry = Telemetry::new(clock.clone(), http, cx);
596
597            telemetry.state.lock().max_queue_size = 4;
598            telemetry.start(system_id, installation_id, session_id, cx);
599
600            assert!(is_empty_state(&telemetry));
601
602            let first_date_time = clock.utc_now();
603            let event_properties = HashMap::from_iter([(
604                "test_key".to_string(),
605                serde_json::Value::String("test_value".to_string()),
606            )]);
607
608            let event = FlexibleEvent {
609                event_type: "test".to_string(),
610                event_properties,
611            };
612
613            telemetry.report_event(Event::Flexible(event.clone()));
614            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
615            assert!(telemetry.state.lock().flush_events_task.is_some());
616            assert_eq!(
617                telemetry.state.lock().first_event_date_time,
618                Some(first_date_time)
619            );
620
621            clock.advance(Duration::from_millis(100));
622
623            telemetry.report_event(Event::Flexible(event.clone()));
624            assert_eq!(telemetry.state.lock().events_queue.len(), 2);
625            assert!(telemetry.state.lock().flush_events_task.is_some());
626            assert_eq!(
627                telemetry.state.lock().first_event_date_time,
628                Some(first_date_time)
629            );
630
631            clock.advance(Duration::from_millis(100));
632
633            telemetry.report_event(Event::Flexible(event.clone()));
634            assert_eq!(telemetry.state.lock().events_queue.len(), 3);
635            assert!(telemetry.state.lock().flush_events_task.is_some());
636            assert_eq!(
637                telemetry.state.lock().first_event_date_time,
638                Some(first_date_time)
639            );
640
641            clock.advance(Duration::from_millis(100));
642
643            // Adding a 4th event should cause a flush
644            telemetry.report_event(Event::Flexible(event));
645            assert!(is_empty_state(&telemetry));
646        });
647    }
648
649    #[gpui::test]
650    async fn test_telemetry_flush_on_flush_interval(
651        executor: BackgroundExecutor,
652        cx: &mut TestAppContext,
653    ) {
654        init_test(cx);
655        let clock = Arc::new(FakeSystemClock::new());
656        let http = FakeHttpClient::with_200_response();
657        let system_id = Some("system_id".to_string());
658        let installation_id = Some("installation_id".to_string());
659        let session_id = "session_id".to_string();
660
661        cx.update(|cx| {
662            let telemetry = Telemetry::new(clock.clone(), http, cx);
663            telemetry.state.lock().max_queue_size = 4;
664            telemetry.start(system_id, installation_id, session_id, cx);
665
666            assert!(is_empty_state(&telemetry));
667            let first_date_time = clock.utc_now();
668
669            let event_properties = HashMap::from_iter([(
670                "test_key".to_string(),
671                serde_json::Value::String("test_value".to_string()),
672            )]);
673
674            let event = FlexibleEvent {
675                event_type: "test".to_string(),
676                event_properties,
677            };
678
679            telemetry.report_event(Event::Flexible(event));
680            assert_eq!(telemetry.state.lock().events_queue.len(), 1);
681            assert!(telemetry.state.lock().flush_events_task.is_some());
682            assert_eq!(
683                telemetry.state.lock().first_event_date_time,
684                Some(first_date_time)
685            );
686
687            let duration = Duration::from_millis(1);
688
689            // Test 1 millisecond before the flush interval limit is met
690            executor.advance_clock(FLUSH_INTERVAL - duration);
691
692            assert!(!is_empty_state(&telemetry));
693
694            // Test the exact moment the flush interval limit is met
695            executor.advance_clock(duration);
696
697            assert!(is_empty_state(&telemetry));
698        });
699    }
700
701    // TODO:
702    // Test settings
703    // Update FakeHTTPClient to keep track of the number of requests and assert on it
704
705    fn init_test(cx: &mut TestAppContext) {
706        cx.update(|cx| {
707            let settings_store = SettingsStore::test(cx);
708            cx.set_global(settings_store);
709        });
710    }
711
712    fn is_empty_state(telemetry: &Telemetry) -> bool {
713        telemetry.state.lock().events_queue.is_empty()
714            && telemetry.state.lock().flush_events_task.is_none()
715            && telemetry.state.lock().first_event_date_time.is_none()
716    }
717}