Do not report same project type multiple times for same worktree (#32785)

Joseph T. Lyons created

Follow-up to: https://github.com/zed-industries/zed/pull/32769

Now that the project type identification telemetry can look for multiple
files in order to identify the project type, we need to make sure we
still only send a single event for a given worktree.

Also, simplifies project detection telemetry code

Release Notes:

- N/A

Change summary

crates/client/src/telemetry.rs | 111 +++++++++++++++--------------------
crates/project/src/project.rs  |   2 
2 files changed, 48 insertions(+), 65 deletions(-)

Detailed changes

crates/client/src/telemetry.rs 🔗

@@ -46,31 +46,13 @@ struct TelemetryState {
     first_event_date_time: Option<Instant>,
     event_coalescer: EventCoalescer,
     max_queue_size: usize,
-    project_marker_patterns: ProjectMarkerPatterns,
+    worktrees_with_project_type_events_sent: HashSet<WorktreeId>,
 
     os_name: String,
     app_version: String,
     os_version: Option<String>,
 }
 
-#[derive(Debug)]
-struct ProjectMarkerPatterns(Vec<(Regex, ProjectCache)>);
-
-#[derive(Debug)]
-struct ProjectCache {
-    name: String,
-    worktree_ids_reported: HashSet<WorktreeId>,
-}
-
-impl ProjectCache {
-    fn new(name: String) -> Self {
-        Self {
-            name,
-            worktree_ids_reported: HashSet::default(),
-        }
-    }
-}
-
 #[cfg(debug_assertions)]
 const MAX_QUEUE_LEN: usize = 5;
 
@@ -92,6 +74,10 @@ static ZED_CLIENT_CHECKSUM_SEED: LazyLock<Option<Vec<u8>>> = LazyLock::new(|| {
         })
 });
 
+static DOTNET_PROJECT_FILES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(r"^(global\.json|Directory\.Build\.props|.*\.(csproj|fsproj|vbproj|sln))$").unwrap()
+});
+
 pub fn os_name() -> String {
     #[cfg(target_os = "macos")]
     {
@@ -195,27 +181,7 @@ impl Telemetry {
             first_event_date_time: None,
             event_coalescer: EventCoalescer::new(clock.clone()),
             max_queue_size: MAX_QUEUE_LEN,
-            project_marker_patterns: ProjectMarkerPatterns(vec![
-                (
-                    Regex::new(r"^pnpm-lock\.yaml$").unwrap(),
-                    ProjectCache::new("pnpm".to_string()),
-                ),
-                (
-                    Regex::new(r"^yarn\.lock$").unwrap(),
-                    ProjectCache::new("yarn".to_string()),
-                ),
-                (
-                    Regex::new(r"^package\.json$").unwrap(),
-                    ProjectCache::new("node".to_string()),
-                ),
-                (
-                    Regex::new(
-                        r"^(global\.json|Directory\.Build\.props|.*\.(csproj|fsproj|vbproj|sln))$",
-                    )
-                    .unwrap(),
-                    ProjectCache::new("dotnet".to_string()),
-                ),
-            ]),
+            worktrees_with_project_type_events_sent: HashSet::new(),
 
             os_version: None,
             os_name: os_name(),
@@ -379,7 +345,7 @@ impl Telemetry {
         }
     }
 
-    pub fn report_discovered_project_events(
+    pub fn report_discovered_project_type_events(
         self: &Arc<Self>,
         worktree_id: WorktreeId,
         updated_entries_set: &UpdatedEntriesSet,
@@ -397,32 +363,40 @@ impl Telemetry {
         updated_entries_set: &UpdatedEntriesSet,
     ) -> Vec<String> {
         let mut state = self.state.lock();
-        state
-            .project_marker_patterns
-            .0
-            .iter_mut()
-            .filter_map(|(pattern, project_cache)| {
-                if project_cache.worktree_ids_reported.contains(&worktree_id) {
-                    return None;
-                }
+        let mut project_names: HashSet<String> = HashSet::new();
 
-                let project_file_found = updated_entries_set.iter().any(|(path, _, _)| {
-                    path.as_ref()
-                        .file_name()
-                        .and_then(|name| name.to_str())
-                        .map(|name_str| pattern.is_match(name_str))
-                        .unwrap_or(false)
-                });
+        if state
+            .worktrees_with_project_type_events_sent
+            .contains(&worktree_id)
+        {
+            return project_names.into_iter().collect();
+        }
 
-                if !project_file_found {
-                    return None;
-                }
+        for (path, _, _) in updated_entries_set.iter() {
+            let Some(file_name) = path.file_name().and_then(|f| f.to_str()) else {
+                continue;
+            };
 
-                project_cache.worktree_ids_reported.insert(worktree_id);
+            if file_name == "pnpm-lock.yaml" {
+                project_names.insert("pnpm".to_string());
+            } else if file_name == "yarn.lock" {
+                project_names.insert("yarn".to_string());
+            } else if file_name == "package.json" {
+                project_names.insert("node".to_string());
+            } else if DOTNET_PROJECT_FILES_REGEX.is_match(file_name) {
+                project_names.insert("dotnet".to_string());
+            }
+        }
 
-                Some(project_cache.name.clone())
-            })
-            .collect()
+        if !project_names.is_empty() {
+            state
+                .worktrees_with_project_type_events_sent
+                .insert(worktree_id);
+        }
+
+        let mut project_names_vec: Vec<String> = project_names.into_iter().collect();
+        project_names_vec.sort();
+        project_names_vec
     }
 
     fn report_event(self: &Arc<Self>, event: Event) {
@@ -786,7 +760,16 @@ mod tests {
         test_project_discovery_helper(telemetry.clone(), vec!["file.csproj"], vec!["dotnet"], 3);
         test_project_discovery_helper(telemetry.clone(), vec!["file.fsproj"], vec!["dotnet"], 4);
         test_project_discovery_helper(telemetry.clone(), vec!["file.vbproj"], vec!["dotnet"], 5);
-        test_project_discovery_helper(telemetry, vec!["file.sln"], vec!["dotnet"], 6);
+        test_project_discovery_helper(telemetry.clone(), vec!["file.sln"], vec!["dotnet"], 6);
+
+        // Each worktree should only send a single project type event, even when
+        // encountering multiple files associated with that project type
+        test_project_discovery_helper(
+            telemetry,
+            vec!["global.json", "Directory.Build.props"],
+            vec!["dotnet"],
+            7,
+        );
     }
 
     // TODO:

crates/project/src/project.rs 🔗

@@ -2945,7 +2945,7 @@ impl Project {
             WorktreeStoreEvent::WorktreeUpdatedEntries(worktree_id, changes) => {
                 self.client()
                     .telemetry()
-                    .report_discovered_project_events(*worktree_id, changes);
+                    .report_discovered_project_type_events(*worktree_id, changes);
                 cx.emit(Event::WorktreeUpdatedEntries(*worktree_id, changes.clone()))
             }
             WorktreeStoreEvent::WorktreeDeletedEntry(worktree_id, id) => {