Avoid redundant FS scans when LSPs changed watched files (#2663)

Max Brunsfeld created

Release Notes:

- Fixed a performance problem that could occur when a language server
requested to watch a set of files (preview only).

Change summary

crates/fs/src/fs.rs                  |  11 +
crates/project/src/project_tests.rs  |   9 +
crates/project/src/worktree.rs       | 189 +++++++++++++++--------------
crates/project/src/worktree_tests.rs |  17 ++
4 files changed, 136 insertions(+), 90 deletions(-)

Detailed changes

crates/fs/src/fs.rs 🔗

@@ -388,6 +388,7 @@ struct FakeFsState {
     event_txs: Vec<smol::channel::Sender<Vec<fsevent::Event>>>,
     events_paused: bool,
     buffered_events: Vec<fsevent::Event>,
+    metadata_call_count: usize,
     read_dir_call_count: usize,
 }
 
@@ -538,6 +539,7 @@ impl FakeFs {
                 buffered_events: Vec::new(),
                 events_paused: false,
                 read_dir_call_count: 0,
+                metadata_call_count: 0,
             }),
         })
     }
@@ -774,10 +776,16 @@ impl FakeFs {
         result
     }
 
+    /// How many `read_dir` calls have been issued.
     pub fn read_dir_call_count(&self) -> usize {
         self.state.lock().read_dir_call_count
     }
 
+    /// How many `metadata` calls have been issued.
+    pub fn metadata_call_count(&self) -> usize {
+        self.state.lock().metadata_call_count
+    }
+
     async fn simulate_random_delay(&self) {
         self.executor
             .upgrade()
@@ -1098,7 +1106,8 @@ impl Fs for FakeFs {
     async fn metadata(&self, path: &Path) -> Result<Option<Metadata>> {
         self.simulate_random_delay().await;
         let path = normalize_path(path);
-        let state = self.state.lock();
+        let mut state = self.state.lock();
+        state.metadata_call_count += 1;
         if let Some((mut entry, _)) = state.try_read_path(&path, false) {
             let is_symlink = entry.lock().is_symlink();
             if is_symlink {

crates/project/src/project_tests.rs 🔗

@@ -596,6 +596,8 @@ async fn test_reporting_fs_changes_to_language_servers(cx: &mut gpui::TestAppCon
         );
     });
 
+    let prev_read_dir_count = fs.read_dir_call_count();
+
     // Keep track of the FS events reported to the language server.
     let fake_server = fake_servers.next().await.unwrap();
     let file_changes = Arc::new(Mutex::new(Vec::new()));
@@ -607,6 +609,12 @@ async fn test_reporting_fs_changes_to_language_servers(cx: &mut gpui::TestAppCon
                 register_options: serde_json::to_value(
                     lsp::DidChangeWatchedFilesRegistrationOptions {
                         watchers: vec![
+                            lsp::FileSystemWatcher {
+                                glob_pattern: lsp::GlobPattern::String(
+                                    "/the-root/Cargo.toml".to_string(),
+                                ),
+                                kind: None,
+                            },
                             lsp::FileSystemWatcher {
                                 glob_pattern: lsp::GlobPattern::String(
                                     "/the-root/src/*.{rs,c}".to_string(),
@@ -638,6 +646,7 @@ async fn test_reporting_fs_changes_to_language_servers(cx: &mut gpui::TestAppCon
 
     cx.foreground().run_until_parked();
     assert_eq!(mem::take(&mut *file_changes.lock()), &[]);
+    assert_eq!(fs.read_dir_call_count() - prev_read_dir_count, 4);
 
     // Now the language server has asked us to watch an ignored directory path,
     // so we recursively load it.

crates/project/src/worktree.rs 🔗

@@ -3071,17 +3071,20 @@ impl BackgroundScanner {
 
                 path_prefix = self.path_prefixes_to_scan_rx.recv().fuse() => {
                     let Ok(path_prefix) = path_prefix else { break };
+                    log::trace!("adding path prefix {:?}", path_prefix);
 
-                    self.forcibly_load_paths(&[path_prefix.clone()]).await;
+                    let did_scan = self.forcibly_load_paths(&[path_prefix.clone()]).await;
+                    if did_scan {
+                        let abs_path =
+                        {
+                            let mut state = self.state.lock();
+                            state.path_prefixes_to_scan.insert(path_prefix.clone());
+                            state.snapshot.abs_path.join(&path_prefix)
+                        };
 
-                    let abs_path =
-                    {
-                        let mut state = self.state.lock();
-                        state.path_prefixes_to_scan.insert(path_prefix.clone());
-                        state.snapshot.abs_path.join(path_prefix)
-                    };
-                    if let Some(abs_path) = self.fs.canonicalize(&abs_path).await.log_err() {
-                        self.process_events(vec![abs_path]).await;
+                        if let Some(abs_path) = self.fs.canonicalize(&abs_path).await.log_err() {
+                            self.process_events(vec![abs_path]).await;
+                        }
                     }
                 }
 
@@ -3097,10 +3100,13 @@ impl BackgroundScanner {
         }
     }
 
-    async fn process_scan_request(&self, request: ScanRequest, scanning: bool) -> bool {
+    async fn process_scan_request(&self, mut request: ScanRequest, scanning: bool) -> bool {
         log::debug!("rescanning paths {:?}", request.relative_paths);
 
-        let root_path = self.forcibly_load_paths(&request.relative_paths).await;
+        request.relative_paths.sort_unstable();
+        self.forcibly_load_paths(&request.relative_paths).await;
+
+        let root_path = self.state.lock().snapshot.abs_path.clone();
         let root_canonical_path = match self.fs.canonicalize(&root_path).await {
             Ok(path) => path,
             Err(err) => {
@@ -3108,10 +3114,9 @@ impl BackgroundScanner {
                 return false;
             }
         };
-
         let abs_paths = request
             .relative_paths
-            .into_iter()
+            .iter()
             .map(|path| {
                 if path.file_name().is_some() {
                     root_canonical_path.join(path)
@@ -3120,12 +3125,19 @@ impl BackgroundScanner {
                 }
             })
             .collect::<Vec<_>>();
-        self.reload_entries_for_paths(root_path, root_canonical_path, abs_paths, None)
-            .await;
+
+        self.reload_entries_for_paths(
+            root_path,
+            root_canonical_path,
+            &request.relative_paths,
+            abs_paths,
+            None,
+        )
+        .await;
         self.send_status_update(scanning, Some(request.done))
     }
 
-    async fn process_events(&mut self, abs_paths: Vec<PathBuf>) {
+    async fn process_events(&mut self, mut abs_paths: Vec<PathBuf>) {
         log::debug!("received fs events {:?}", abs_paths);
 
         let root_path = self.state.lock().snapshot.abs_path.clone();
@@ -3137,25 +3149,61 @@ impl BackgroundScanner {
             }
         };
 
-        let (scan_job_tx, scan_job_rx) = channel::unbounded();
-        let paths = self
-            .reload_entries_for_paths(
+        let mut relative_paths = Vec::with_capacity(abs_paths.len());
+        let mut unloaded_relative_paths = Vec::new();
+        abs_paths.sort_unstable();
+        abs_paths.dedup_by(|a, b| a.starts_with(&b));
+        abs_paths.retain(|abs_path| {
+            let snapshot = &self.state.lock().snapshot;
+            {
+                let relative_path: Arc<Path> =
+                    if let Ok(path) = abs_path.strip_prefix(&root_canonical_path) {
+                        path.into()
+                    } else {
+                        log::error!(
+                        "ignoring event {abs_path:?} outside of root path {root_canonical_path:?}",
+                    );
+                        return false;
+                    };
+
+                let parent_dir_is_loaded = relative_path.parent().map_or(true, |parent| {
+                    snapshot
+                        .entry_for_path(parent)
+                        .map_or(false, |entry| entry.kind == EntryKind::Dir)
+                });
+                if !parent_dir_is_loaded {
+                    log::debug!("ignoring event {relative_path:?} within unloaded directory");
+                    unloaded_relative_paths.push(relative_path);
+                    return false;
+                }
+
+                relative_paths.push(relative_path);
+                true
+            }
+        });
+
+        if !relative_paths.is_empty() {
+            let (scan_job_tx, scan_job_rx) = channel::unbounded();
+            self.reload_entries_for_paths(
                 root_path,
                 root_canonical_path,
+                &relative_paths,
                 abs_paths,
                 Some(scan_job_tx.clone()),
             )
             .await;
-        drop(scan_job_tx);
-        self.scan_dirs(false, scan_job_rx).await;
+            drop(scan_job_tx);
+            self.scan_dirs(false, scan_job_rx).await;
 
-        let (scan_job_tx, scan_job_rx) = channel::unbounded();
-        self.update_ignore_statuses(scan_job_tx).await;
-        self.scan_dirs(false, scan_job_rx).await;
+            let (scan_job_tx, scan_job_rx) = channel::unbounded();
+            self.update_ignore_statuses(scan_job_tx).await;
+            self.scan_dirs(false, scan_job_rx).await;
+        }
 
         {
             let mut state = self.state.lock();
-            state.reload_repositories(&paths, self.fs.as_ref());
+            relative_paths.extend(unloaded_relative_paths);
+            state.reload_repositories(&relative_paths, self.fs.as_ref());
             state.snapshot.completed_scan_id = state.snapshot.scan_id;
             for (_, entry_id) in mem::take(&mut state.removed_entry_ids) {
                 state.scanned_dirs.remove(&entry_id);
@@ -3165,12 +3213,11 @@ impl BackgroundScanner {
         self.send_status_update(false, None);
     }
 
-    async fn forcibly_load_paths(&self, paths: &[Arc<Path>]) -> Arc<Path> {
-        let root_path;
+    async fn forcibly_load_paths(&self, paths: &[Arc<Path>]) -> bool {
         let (scan_job_tx, mut scan_job_rx) = channel::unbounded();
         {
             let mut state = self.state.lock();
-            root_path = state.snapshot.abs_path.clone();
+            let root_path = state.snapshot.abs_path.clone();
             for path in paths {
                 for ancestor in path.ancestors() {
                     if let Some(entry) = state.snapshot.entry_for_path(ancestor) {
@@ -3201,8 +3248,8 @@ impl BackgroundScanner {
         while let Some(job) = scan_job_rx.next().await {
             self.scan_dir(&job).await.log_err();
         }
-        self.state.lock().paths_to_scan.clear();
-        root_path
+
+        mem::take(&mut self.state.lock().paths_to_scan).len() > 0
     }
 
     async fn scan_dirs(
@@ -3475,7 +3522,7 @@ impl BackgroundScanner {
                             .expect("channel is unbounded");
                     }
                 } else {
-                    log::debug!("defer scanning directory {:?} {:?}", entry.path, entry.kind);
+                    log::debug!("defer scanning directory {:?}", entry.path);
                     entry.kind = EntryKind::UnloadedDir;
                 }
             }
@@ -3490,26 +3537,10 @@ impl BackgroundScanner {
         &self,
         root_abs_path: Arc<Path>,
         root_canonical_path: PathBuf,
-        mut abs_paths: Vec<PathBuf>,
+        relative_paths: &[Arc<Path>],
+        abs_paths: Vec<PathBuf>,
         scan_queue_tx: Option<Sender<ScanJob>>,
-    ) -> Vec<Arc<Path>> {
-        let mut event_paths = Vec::<Arc<Path>>::with_capacity(abs_paths.len());
-        abs_paths.sort_unstable();
-        abs_paths.dedup_by(|a, b| a.starts_with(&b));
-        abs_paths.retain(|abs_path| {
-            if let Ok(path) = abs_path.strip_prefix(&root_canonical_path) {
-                event_paths.push(path.into());
-                true
-            } else {
-                log::error!(
-                    "unexpected event {:?} for root path {:?}",
-                    abs_path,
-                    root_canonical_path
-                );
-                false
-            }
-        });
-
+    ) {
         let metadata = futures::future::join_all(
             abs_paths
                 .iter()
@@ -3538,30 +3569,15 @@ impl BackgroundScanner {
         // Remove any entries for paths that no longer exist or are being recursively
         // refreshed. Do this before adding any new entries, so that renames can be
         // detected regardless of the order of the paths.
-        for (path, metadata) in event_paths.iter().zip(metadata.iter()) {
+        for (path, metadata) in relative_paths.iter().zip(metadata.iter()) {
             if matches!(metadata, Ok(None)) || doing_recursive_update {
                 log::trace!("remove path {:?}", path);
                 state.remove_path(path);
             }
         }
 
-        for (path, metadata) in event_paths.iter().zip(metadata.iter()) {
-            if let (Some(parent), true) = (path.parent(), doing_recursive_update) {
-                if state
-                    .snapshot
-                    .entry_for_path(parent)
-                    .map_or(true, |entry| entry.kind != EntryKind::Dir)
-                {
-                    log::debug!(
-                        "ignoring event {path:?} within unloaded directory {:?}",
-                        parent
-                    );
-                    continue;
-                }
-            }
-
+        for (path, metadata) in relative_paths.iter().zip(metadata.iter()) {
             let abs_path: Arc<Path> = root_abs_path.join(&path).into();
-
             match metadata {
                 Ok(Some((metadata, canonical_path))) => {
                     let ignore_stack = state
@@ -3624,12 +3640,10 @@ impl BackgroundScanner {
 
         util::extend_sorted(
             &mut state.changed_paths,
-            event_paths.iter().cloned(),
+            relative_paths.iter().cloned(),
             usize::MAX,
             Ord::cmp,
         );
-
-        event_paths
     }
 
     fn remove_repo_path(&self, path: &Path, snapshot: &mut LocalSnapshot) -> Option<()> {
@@ -3760,25 +3774,22 @@ impl BackgroundScanner {
 
                 // Scan any directories that were previously ignored and weren't
                 // previously scanned.
-                if was_ignored
-                    && !entry.is_ignored
-                    && !entry.is_external
-                    && entry.kind == EntryKind::UnloadedDir
-                {
-                    job.scan_queue
-                        .try_send(ScanJob {
-                            abs_path: abs_path.clone(),
-                            path: entry.path.clone(),
-                            ignore_stack: child_ignore_stack.clone(),
-                            scan_queue: job.scan_queue.clone(),
-                            ancestor_inodes: self
-                                .state
-                                .lock()
-                                .snapshot
-                                .ancestor_inodes_for_path(&entry.path),
-                            is_external: false,
-                        })
-                        .unwrap();
+                if was_ignored && !entry.is_ignored && entry.kind.is_unloaded() {
+                    let state = self.state.lock();
+                    if state.should_scan_directory(&entry) {
+                        job.scan_queue
+                            .try_send(ScanJob {
+                                abs_path: abs_path.clone(),
+                                path: entry.path.clone(),
+                                ignore_stack: child_ignore_stack.clone(),
+                                scan_queue: job.scan_queue.clone(),
+                                ancestor_inodes: state
+                                    .snapshot
+                                    .ancestor_inodes_for_path(&entry.path),
+                                is_external: false,
+                            })
+                            .unwrap();
+                    }
                 }
 
                 job.ignore_queue

crates/project/src/worktree_tests.rs 🔗

@@ -454,6 +454,10 @@ async fn test_open_gitignored_files(cx: &mut TestAppContext) {
                         "b1.js": "b1",
                         "b2.js": "b2",
                     },
+                    "c": {
+                        "c1.js": "c1",
+                        "c2.js": "c2",
+                    }
                 },
             },
             "two": {
@@ -521,6 +525,7 @@ async fn test_open_gitignored_files(cx: &mut TestAppContext) {
                 (Path::new("one/node_modules/b"), true),
                 (Path::new("one/node_modules/b/b1.js"), true),
                 (Path::new("one/node_modules/b/b2.js"), true),
+                (Path::new("one/node_modules/c"), true),
                 (Path::new("two"), false),
                 (Path::new("two/x.js"), false),
                 (Path::new("two/y.js"), false),
@@ -564,6 +569,7 @@ async fn test_open_gitignored_files(cx: &mut TestAppContext) {
                 (Path::new("one/node_modules/b"), true),
                 (Path::new("one/node_modules/b/b1.js"), true),
                 (Path::new("one/node_modules/b/b2.js"), true),
+                (Path::new("one/node_modules/c"), true),
                 (Path::new("two"), false),
                 (Path::new("two/x.js"), false),
                 (Path::new("two/y.js"), false),
@@ -578,6 +584,17 @@ async fn test_open_gitignored_files(cx: &mut TestAppContext) {
         // Only the newly-expanded directory is scanned.
         assert_eq!(fs.read_dir_call_count() - prev_read_dir_count, 1);
     });
+
+    // No work happens when files and directories change within an unloaded directory.
+    let prev_fs_call_count = fs.read_dir_call_count() + fs.metadata_call_count();
+    fs.create_dir("/root/one/node_modules/c/lib".as_ref())
+        .await
+        .unwrap();
+    cx.foreground().run_until_parked();
+    assert_eq!(
+        fs.read_dir_call_count() + fs.metadata_call_count() - prev_fs_call_count,
+        0
+    );
 }
 
 #[gpui::test]