Remove expensive-to-clone fields from worktree's LocalSnapshot (#2497)

Max Brunsfeld created

This fixes performance problems that @nathansobo and I have seen in some
cases, when a large number of files changed on disk. A lot of time was
being spent in `worktree::LocalSnapshot::clone`. I think this may have
been because of needing to clone the `removed_entry_ids` map. This
structure is only really used when *mutating* the `LocalSnapshot` in the
background scanner, so I moved it off of the snapshots.

Change summary

crates/project/src/worktree.rs            | 311 +++++++++++++-----------
crates/project_panel/src/project_panel.rs |   2 
2 files changed, 173 insertions(+), 140 deletions(-)

Detailed changes

crates/project/src/worktree.rs 🔗

@@ -120,25 +120,6 @@ pub struct Snapshot {
     completed_scan_id: usize,
 }
 
-impl Snapshot {
-    pub fn repo_for(&self, path: &Path) -> Option<RepositoryEntry> {
-        let mut max_len = 0;
-        let mut current_candidate = None;
-        for (work_directory, repo) in (&self.repository_entries).iter() {
-            if path.starts_with(&work_directory.0) {
-                if work_directory.0.as_os_str().len() >= max_len {
-                    current_candidate = Some(repo);
-                    max_len = work_directory.0.as_os_str().len();
-                } else {
-                    break;
-                }
-            }
-        }
-
-        current_candidate.map(|entry| entry.to_owned())
-    }
-}
-
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct RepositoryEntry {
     pub(crate) work_directory: WorkDirectoryEntry,
@@ -246,7 +227,7 @@ impl RepositoryEntry {
             work_directory_id: self.work_directory_id().to_proto(),
             branch: self.branch.as_ref().map(|str| str.to_string()),
             removed_repo_paths: removed_statuses,
-            updated_statuses: updated_statuses,
+            updated_statuses,
         }
     }
 }
@@ -326,13 +307,22 @@ impl<'a> From<ProjectEntryId> for WorkDirectoryEntry {
 
 #[derive(Debug, Clone)]
 pub struct LocalSnapshot {
-    ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>, // (gitignore, needs_update)
-    // The ProjectEntryId corresponds to the entry for the .git dir
-    // work_directory_id
+    snapshot: Snapshot,
+    /// All of the gitignore files in the worktree, indexed by their relative path.
+    /// The boolean indicates whether the gitignore needs to be updated.
+    ignores_by_parent_abs_path: HashMap<Arc<Path>, (Arc<Gitignore>, bool)>,
+    /// All of the git repositories in the worktree, indexed by the project entry
+    /// id of their parent directory.
     git_repositories: TreeMap<ProjectEntryId, LocalRepositoryEntry>,
+}
+
+pub struct LocalMutableSnapshot {
+    snapshot: LocalSnapshot,
+    /// The ids of all of the entries that were removed from the snapshot
+    /// as part of the current update. These entry ids may be re-used
+    /// if the same inode is discovered at a new path, or if the given
+    /// path is re-created after being deleted.
     removed_entry_ids: HashMap<u64, ProjectEntryId>,
-    next_entry_id: Arc<AtomicUsize>,
-    snapshot: Snapshot,
 }
 
 #[derive(Debug, Clone)]
@@ -366,6 +356,20 @@ impl DerefMut for LocalSnapshot {
     }
 }
 
+impl Deref for LocalMutableSnapshot {
+    type Target = LocalSnapshot;
+
+    fn deref(&self) -> &Self::Target {
+        &self.snapshot
+    }
+}
+
+impl DerefMut for LocalMutableSnapshot {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.snapshot
+    }
+}
+
 enum ScanState {
     Started,
     Updated {
@@ -416,9 +420,7 @@ impl Worktree {
 
             let mut snapshot = LocalSnapshot {
                 ignores_by_parent_abs_path: Default::default(),
-                removed_entry_ids: Default::default(),
                 git_repositories: Default::default(),
-                next_entry_id,
                 snapshot: Snapshot {
                     id: WorktreeId::from_usize(cx.model_id()),
                     abs_path: abs_path.clone(),
@@ -437,7 +439,7 @@ impl Worktree {
                     Entry::new(
                         Arc::from(Path::new("")),
                         &metadata,
-                        &snapshot.next_entry_id,
+                        &next_entry_id,
                         snapshot.root_char_bag,
                     ),
                     fs.as_ref(),
@@ -481,6 +483,7 @@ impl Worktree {
                     let events = fs.watch(&abs_path, Duration::from_millis(100)).await;
                     BackgroundScanner::new(
                         snapshot,
+                        next_entry_id,
                         fs,
                         scan_states_tx,
                         background,
@@ -901,7 +904,7 @@ impl LocalWorktree {
 
         let mut index_task = None;
 
-        if let Some(repo) = snapshot.repo_for(&path) {
+        if let Some(repo) = snapshot.repository_for_path(&path) {
             let repo_path = repo.work_directory.relativize(self, &path).unwrap();
             if let Some(repo) = self.git_repositories.get(&*repo.work_directory) {
                 let repo = repo.repo_ptr.to_owned();
@@ -1228,8 +1231,6 @@ impl LocalWorktree {
                     let mut share_tx = Some(share_tx);
                     let mut prev_snapshot = LocalSnapshot {
                         ignores_by_parent_abs_path: Default::default(),
-                        removed_entry_ids: Default::default(),
-                        next_entry_id: Default::default(),
                         git_repositories: Default::default(),
                         snapshot: Snapshot {
                             id: WorktreeId(worktree_id as usize),
@@ -1637,9 +1638,27 @@ impl Snapshot {
             .map(|(path, entry)| (&path.0, entry))
     }
 
+    /// Get the repository whose work directory contains the given path.
+    pub fn repository_for_path(&self, path: &Path) -> Option<RepositoryEntry> {
+        let mut max_len = 0;
+        let mut current_candidate = None;
+        for (work_directory, repo) in (&self.repository_entries).iter() {
+            if path.starts_with(&work_directory.0) {
+                if work_directory.0.as_os_str().len() >= max_len {
+                    current_candidate = Some(repo);
+                    max_len = work_directory.0.as_os_str().len();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        current_candidate.map(|entry| entry.to_owned())
+    }
+
     /// Given an ordered iterator of entries, returns an iterator of those entries,
     /// along with their containing git repository.
-    pub fn entries_with_repos<'a>(
+    pub fn entries_with_repositories<'a>(
         &'a self,
         entries: impl 'a + Iterator<Item = &'a Entry>,
     ) -> impl 'a + Iterator<Item = (&'a Entry, Option<&'a RepositoryEntry>)> {
@@ -1913,8 +1932,6 @@ impl LocalSnapshot {
             }
         }
 
-        self.reuse_entry_id(&mut entry);
-
         if entry.kind == EntryKind::PendingDir {
             if let Some(existing_entry) =
                 self.entries_by_path.get(&PathKey(entry.path.clone()), &())
@@ -1943,60 +1960,6 @@ impl LocalSnapshot {
         entry
     }
 
-    fn populate_dir(
-        &mut self,
-        parent_path: Arc<Path>,
-        entries: impl IntoIterator<Item = Entry>,
-        ignore: Option<Arc<Gitignore>>,
-        fs: &dyn Fs,
-    ) {
-        let mut parent_entry = if let Some(parent_entry) =
-            self.entries_by_path.get(&PathKey(parent_path.clone()), &())
-        {
-            parent_entry.clone()
-        } else {
-            log::warn!(
-                "populating a directory {:?} that has been removed",
-                parent_path
-            );
-            return;
-        };
-
-        match parent_entry.kind {
-            EntryKind::PendingDir => {
-                parent_entry.kind = EntryKind::Dir;
-            }
-            EntryKind::Dir => {}
-            _ => return,
-        }
-
-        if let Some(ignore) = ignore {
-            self.ignores_by_parent_abs_path
-                .insert(self.abs_path.join(&parent_path).into(), (ignore, false));
-        }
-
-        if parent_path.file_name() == Some(&DOT_GIT) {
-            self.build_repo(parent_path, fs);
-        }
-
-        let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
-        let mut entries_by_id_edits = Vec::new();
-
-        for mut entry in entries {
-            self.reuse_entry_id(&mut entry);
-            entries_by_id_edits.push(Edit::Insert(PathEntry {
-                id: entry.id,
-                path: entry.path.clone(),
-                is_ignored: entry.is_ignored,
-                scan_id: self.scan_id,
-            }));
-            entries_by_path_edits.push(Edit::Insert(entry));
-        }
-
-        self.entries_by_path.edit(entries_by_path_edits, &());
-        self.entries_by_id.edit(entries_by_id_edits, &());
-    }
-
     fn build_repo(&mut self, parent_path: Arc<Path>, fs: &dyn Fs) -> Option<()> {
         let abs_path = self.abs_path.join(&parent_path);
         let work_dir: Arc<Path> = parent_path.parent().unwrap().into();
@@ -2044,6 +2007,46 @@ impl LocalSnapshot {
 
         Some(())
     }
+
+    fn ancestor_inodes_for_path(&self, path: &Path) -> TreeSet<u64> {
+        let mut inodes = TreeSet::default();
+        for ancestor in path.ancestors().skip(1) {
+            if let Some(entry) = self.entry_for_path(ancestor) {
+                inodes.insert(entry.inode);
+            }
+        }
+        inodes
+    }
+
+    fn ignore_stack_for_abs_path(&self, abs_path: &Path, is_dir: bool) -> Arc<IgnoreStack> {
+        let mut new_ignores = Vec::new();
+        for ancestor in abs_path.ancestors().skip(1) {
+            if let Some((ignore, _)) = self.ignores_by_parent_abs_path.get(ancestor) {
+                new_ignores.push((ancestor, Some(ignore.clone())));
+            } else {
+                new_ignores.push((ancestor, None));
+            }
+        }
+
+        let mut ignore_stack = IgnoreStack::none();
+        for (parent_abs_path, ignore) in new_ignores.into_iter().rev() {
+            if ignore_stack.is_abs_path_ignored(parent_abs_path, true) {
+                ignore_stack = IgnoreStack::all();
+                break;
+            } else if let Some(ignore) = ignore {
+                ignore_stack = ignore_stack.append(parent_abs_path.into(), ignore);
+            }
+        }
+
+        if ignore_stack.is_abs_path_ignored(abs_path, is_dir) {
+            ignore_stack = IgnoreStack::all();
+        }
+
+        ignore_stack
+    }
+}
+
+impl LocalMutableSnapshot {
     fn reuse_entry_id(&mut self, entry: &mut Entry) {
         if let Some(removed_entry_id) = self.removed_entry_ids.remove(&entry.inode) {
             entry.id = removed_entry_id;
@@ -2052,6 +2055,66 @@ impl LocalSnapshot {
         }
     }
 
+    fn insert_entry(&mut self, mut entry: Entry, fs: &dyn Fs) -> Entry {
+        self.reuse_entry_id(&mut entry);
+        self.snapshot.insert_entry(entry, fs)
+    }
+
+    fn populate_dir(
+        &mut self,
+        parent_path: Arc<Path>,
+        entries: impl IntoIterator<Item = Entry>,
+        ignore: Option<Arc<Gitignore>>,
+        fs: &dyn Fs,
+    ) {
+        let mut parent_entry = if let Some(parent_entry) =
+            self.entries_by_path.get(&PathKey(parent_path.clone()), &())
+        {
+            parent_entry.clone()
+        } else {
+            log::warn!(
+                "populating a directory {:?} that has been removed",
+                parent_path
+            );
+            return;
+        };
+
+        match parent_entry.kind {
+            EntryKind::PendingDir => {
+                parent_entry.kind = EntryKind::Dir;
+            }
+            EntryKind::Dir => {}
+            _ => return,
+        }
+
+        if let Some(ignore) = ignore {
+            let abs_parent_path = self.abs_path.join(&parent_path).into();
+            self.ignores_by_parent_abs_path
+                .insert(abs_parent_path, (ignore, false));
+        }
+
+        if parent_path.file_name() == Some(&DOT_GIT) {
+            self.build_repo(parent_path, fs);
+        }
+
+        let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
+        let mut entries_by_id_edits = Vec::new();
+
+        for mut entry in entries {
+            self.reuse_entry_id(&mut entry);
+            entries_by_id_edits.push(Edit::Insert(PathEntry {
+                id: entry.id,
+                path: entry.path.clone(),
+                is_ignored: entry.is_ignored,
+                scan_id: self.scan_id,
+            }));
+            entries_by_path_edits.push(Edit::Insert(entry));
+        }
+
+        self.entries_by_path.edit(entries_by_path_edits, &());
+        self.entries_by_id.edit(entries_by_id_edits, &());
+    }
+
     fn remove_path(&mut self, path: &Path) {
         let mut new_entries;
         let removed_entries;
@@ -2084,43 +2147,6 @@ impl LocalSnapshot {
             }
         }
     }
-
-    fn ancestor_inodes_for_path(&self, path: &Path) -> TreeSet<u64> {
-        let mut inodes = TreeSet::default();
-        for ancestor in path.ancestors().skip(1) {
-            if let Some(entry) = self.entry_for_path(ancestor) {
-                inodes.insert(entry.inode);
-            }
-        }
-        inodes
-    }
-
-    fn ignore_stack_for_abs_path(&self, abs_path: &Path, is_dir: bool) -> Arc<IgnoreStack> {
-        let mut new_ignores = Vec::new();
-        for ancestor in abs_path.ancestors().skip(1) {
-            if let Some((ignore, _)) = self.ignores_by_parent_abs_path.get(ancestor) {
-                new_ignores.push((ancestor, Some(ignore.clone())));
-            } else {
-                new_ignores.push((ancestor, None));
-            }
-        }
-
-        let mut ignore_stack = IgnoreStack::none();
-        for (parent_abs_path, ignore) in new_ignores.into_iter().rev() {
-            if ignore_stack.is_abs_path_ignored(parent_abs_path, true) {
-                ignore_stack = IgnoreStack::all();
-                break;
-            } else if let Some(ignore) = ignore {
-                ignore_stack = ignore_stack.append(parent_abs_path.into(), ignore);
-            }
-        }
-
-        if ignore_stack.is_abs_path_ignored(abs_path, is_dir) {
-            ignore_stack = IgnoreStack::all();
-        }
-
-        ignore_stack
-    }
 }
 
 async fn build_gitignore(abs_path: &Path, fs: &dyn Fs) -> Result<Gitignore> {
@@ -2565,12 +2591,13 @@ impl<'a> sum_tree::Dimension<'a, EntrySummary> for PathKey {
 }
 
 struct BackgroundScanner {
-    snapshot: Mutex<LocalSnapshot>,
+    snapshot: Mutex<LocalMutableSnapshot>,
     fs: Arc<dyn Fs>,
     status_updates_tx: UnboundedSender<ScanState>,
     executor: Arc<executor::Background>,
     refresh_requests_rx: channel::Receiver<(Vec<PathBuf>, barrier::Sender)>,
     prev_state: Mutex<BackgroundScannerState>,
+    next_entry_id: Arc<AtomicUsize>,
     finished_initial_scan: bool,
 }
 
@@ -2582,6 +2609,7 @@ struct BackgroundScannerState {
 impl BackgroundScanner {
     fn new(
         snapshot: LocalSnapshot,
+        next_entry_id: Arc<AtomicUsize>,
         fs: Arc<dyn Fs>,
         status_updates_tx: UnboundedSender<ScanState>,
         executor: Arc<executor::Background>,
@@ -2592,11 +2620,15 @@ impl BackgroundScanner {
             status_updates_tx,
             executor,
             refresh_requests_rx,
+            next_entry_id,
             prev_state: Mutex::new(BackgroundScannerState {
                 snapshot: snapshot.snapshot.clone(),
                 event_paths: Default::default(),
             }),
-            snapshot: Mutex::new(snapshot),
+            snapshot: Mutex::new(LocalMutableSnapshot {
+                snapshot,
+                removed_entry_ids: Default::default(),
+            }),
             finished_initial_scan: false,
         }
     }
@@ -2750,10 +2782,7 @@ impl BackgroundScanner {
                 .is_some()
         });
         snapshot.snapshot.repository_entries = git_repository_entries;
-
-        snapshot.removed_entry_ids.clear();
         snapshot.completed_scan_id = snapshot.scan_id;
-
         drop(snapshot);
 
         self.send_status_update(false, None);
@@ -2864,7 +2893,7 @@ impl BackgroundScanner {
             (
                 snapshot.abs_path().clone(),
                 snapshot.root_char_bag,
-                snapshot.next_entry_id.clone(),
+                self.next_entry_id.clone(),
             )
         };
         let mut child_paths = self.fs.read_dir(&job.abs_path).await?;
@@ -3036,7 +3065,7 @@ impl BackgroundScanner {
                     let mut fs_entry = Entry::new(
                         path.clone(),
                         &metadata,
-                        snapshot.next_entry_id.as_ref(),
+                        self.next_entry_id.as_ref(),
                         snapshot.root_char_bag,
                     );
                     fs_entry.is_ignored = ignore_stack.is_all();
@@ -3076,7 +3105,7 @@ impl BackgroundScanner {
             .any(|component| component.as_os_str() == *DOT_GIT)
         {
             let scan_id = snapshot.scan_id;
-            let repo = snapshot.repo_for(&path)?;
+            let repo = snapshot.repository_for_path(&path)?;
 
             let repo_path = repo.work_directory.relativize(&snapshot, &path)?;
 
@@ -3152,7 +3181,7 @@ impl BackgroundScanner {
                 return None;
             }
 
-            let repo = snapshot.repo_for(&path)?;
+            let repo = snapshot.repository_for_path(&path)?;
 
             let work_dir = repo.work_directory(snapshot)?;
             let work_dir_id = repo.work_directory.clone();
@@ -4063,9 +4092,9 @@ mod tests {
         tree.read_with(cx, |tree, _cx| {
             let tree = tree.as_local().unwrap();
 
-            assert!(tree.repo_for("c.txt".as_ref()).is_none());
+            assert!(tree.repository_for_path("c.txt".as_ref()).is_none());
 
-            let entry = tree.repo_for("dir1/src/b.txt".as_ref()).unwrap();
+            let entry = tree.repository_for_path("dir1/src/b.txt".as_ref()).unwrap();
             assert_eq!(
                 entry
                     .work_directory(tree)
@@ -4073,7 +4102,9 @@ mod tests {
                 Some(Path::new("dir1").to_owned())
             );
 
-            let entry = tree.repo_for("dir1/deps/dep1/src/a.txt".as_ref()).unwrap();
+            let entry = tree
+                .repository_for_path("dir1/deps/dep1/src/a.txt".as_ref())
+                .unwrap();
             assert_eq!(
                 entry
                     .work_directory(tree)
@@ -4084,7 +4115,7 @@ mod tests {
             let entries = tree.files(false, 0);
 
             let paths_with_repos = tree
-                .entries_with_repos(entries)
+                .entries_with_repositories(entries)
                 .map(|(entry, repo)| {
                     (
                         entry.path.as_ref(),
@@ -4137,7 +4168,9 @@ mod tests {
         tree.read_with(cx, |tree, _cx| {
             let tree = tree.as_local().unwrap();
 
-            assert!(tree.repo_for("dir1/src/b.txt".as_ref()).is_none());
+            assert!(tree
+                .repository_for_path("dir1/src/b.txt".as_ref())
+                .is_none());
         });
     }
 

crates/project_panel/src/project_panel.rs 🔗

@@ -1011,7 +1011,7 @@ impl ProjectPanel {
 
                 let entry_range = range.start.saturating_sub(ix)..end_ix - ix;
                 for (entry, repo) in
-                    snapshot.entries_with_repos(visible_worktree_entries[entry_range].iter())
+                    snapshot.entries_with_repositories(visible_worktree_entries[entry_range].iter())
                 {
                     let status = (entry.path.parent().is_some() && !entry.is_ignored)
                         .then(|| repo.and_then(|repo| repo.status_for_path(&snapshot, &entry.path)))