Fix git repository state corruption when work dir's metadata is updated (#16926)

Max Brunsfeld created

Fixes https://github.com/zed-industries/zed/issues/13176

Release Notes:

- Fixed an issue where git state would stop updating if the root
directory of a git repository was updated in certain ways

Change summary

crates/fs/src/fs.rs                   |  29 ++++++++
crates/worktree/src/worktree.rs       | 105 ++++++++++++++++++++--------
crates/worktree/src/worktree_tests.rs | 104 ++++++++++++++++++++++++----
3 files changed, 191 insertions(+), 47 deletions(-)

Detailed changes

crates/fs/src/fs.rs 🔗

@@ -846,6 +846,35 @@ impl FakeFs {
         state.next_mtime = next_mtime;
     }
 
+    pub async fn touch_path(&self, path: impl AsRef<Path>) {
+        let mut state = self.state.lock();
+        let path = path.as_ref();
+        let new_mtime = state.next_mtime;
+        let new_inode = state.next_inode;
+        state.next_inode += 1;
+        state.next_mtime += Duration::from_nanos(1);
+        state
+            .write_path(path, move |entry| {
+                match entry {
+                    btree_map::Entry::Vacant(e) => {
+                        e.insert(Arc::new(Mutex::new(FakeFsEntry::File {
+                            inode: new_inode,
+                            mtime: new_mtime,
+                            content: Vec::new(),
+                        })));
+                    }
+                    btree_map::Entry::Occupied(mut e) => match &mut *e.get_mut().lock() {
+                        FakeFsEntry::File { mtime, .. } => *mtime = new_mtime,
+                        FakeFsEntry::Dir { mtime, .. } => *mtime = new_mtime,
+                        FakeFsEntry::Symlink { .. } => {}
+                    },
+                }
+                Ok(())
+            })
+            .unwrap();
+        state.emit_event([path.to_path_buf()]);
+    }
+
     pub async fn insert_file(&self, path: impl AsRef<Path>, content: Vec<u8>) {
         self.write_file_internal(path, content).unwrap()
     }

crates/worktree/src/worktree.rs 🔗

@@ -41,7 +41,8 @@ use settings::{Settings, SettingsLocation, SettingsStore};
 use smol::channel::{self, Sender};
 use std::{
     any::Any,
-    cmp::{self, Ordering},
+    cmp::Ordering,
+    collections::hash_map,
     convert::TryFrom,
     ffi::OsStr,
     fmt,
@@ -299,7 +300,7 @@ struct BackgroundScannerState {
     /// as part of the current update. These entry ids may be re-used
     /// if the same inode is discovered at a new path, or if the given
     /// path is re-created after being deleted.
-    removed_entry_ids: HashMap<(u64, SystemTime), ProjectEntryId>,
+    removed_entries: HashMap<u64, Entry>,
     changed_paths: Vec<Arc<Path>>,
     prev_snapshot: Snapshot,
 }
@@ -1022,7 +1023,7 @@ impl LocalWorktree {
                         scanned_dirs: Default::default(),
                         path_prefixes_to_scan: Default::default(),
                         paths_to_scan: Default::default(),
-                        removed_entry_ids: Default::default(),
+                        removed_entries: Default::default(),
                         changed_paths: Default::default(),
                     }),
                     phase: BackgroundScannerPhase::InitialScan,
@@ -2636,6 +2637,26 @@ impl LocalSnapshot {
         }
     }
 
+    #[cfg(test)]
+    fn check_git_invariants(&self) {
+        let dotgit_paths = self
+            .git_repositories
+            .iter()
+            .map(|repo| repo.1.git_dir_path.clone())
+            .collect::<HashSet<_>>();
+        let work_dir_paths = self
+            .repository_entries
+            .iter()
+            .map(|repo| repo.0.clone().0)
+            .collect::<HashSet<_>>();
+        assert_eq!(dotgit_paths.len(), work_dir_paths.len());
+        assert_eq!(self.repository_entries.iter().count(), work_dir_paths.len());
+        assert_eq!(self.git_repositories.iter().count(), work_dir_paths.len());
+        for (_, entry) in self.repository_entries.iter() {
+            self.git_repositories.get(&entry.work_directory).unwrap();
+        }
+    }
+
     #[cfg(test)]
     pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&Path, u64, bool)> {
         let mut paths = Vec::new();
@@ -2704,8 +2725,17 @@ impl BackgroundScannerState {
 
     fn reuse_entry_id(&mut self, entry: &mut Entry) {
         if let Some(mtime) = entry.mtime {
-            if let Some(removed_entry_id) = self.removed_entry_ids.remove(&(entry.inode, mtime)) {
-                entry.id = removed_entry_id;
+            // If an entry with the same inode was removed from the worktree during this scan,
+            // then it *might* represent the same file or directory. But the OS might also have
+            // re-used the inode for a completely different file or directory.
+            //
+            // Conditionally reuse the old entry's id:
+            // * if the mtime is the same, the file was probably been renamed.
+            // * if the path is the same, the file may just have been updated
+            if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) {
+                if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path {
+                    entry.id = removed_entry.id;
+                }
             } else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) {
                 entry.id = existing_entry.id;
             }
@@ -2797,30 +2827,47 @@ impl BackgroundScannerState {
         }
         self.snapshot.entries_by_path = new_entries;
 
-        let mut entries_by_id_edits = Vec::new();
+        let mut removed_ids = Vec::with_capacity(removed_entries.summary().count);
         for entry in removed_entries.cursor::<()>() {
-            if let Some(mtime) = entry.mtime {
-                let removed_entry_id = self
-                    .removed_entry_ids
-                    .entry((entry.inode, mtime))
-                    .or_insert(entry.id);
-                *removed_entry_id = cmp::max(*removed_entry_id, entry.id);
+            match self.removed_entries.entry(entry.inode) {
+                hash_map::Entry::Occupied(mut e) => {
+                    let prev_removed_entry = e.get_mut();
+                    if entry.id > prev_removed_entry.id {
+                        *prev_removed_entry = entry.clone();
+                    }
+                }
+                hash_map::Entry::Vacant(e) => {
+                    e.insert(entry.clone());
+                }
             }
-            entries_by_id_edits.push(Edit::Remove(entry.id));
-        }
-        self.snapshot.entries_by_id.edit(entries_by_id_edits, &());
 
-        if path.file_name() == Some(&GITIGNORE) {
-            let abs_parent_path = self.snapshot.abs_path.join(path.parent().unwrap());
-            if let Some((_, needs_update)) = self
-                .snapshot
-                .ignores_by_parent_abs_path
-                .get_mut(abs_parent_path.as_path())
-            {
-                *needs_update = true;
+            if entry.path.file_name() == Some(&GITIGNORE) {
+                let abs_parent_path = self.snapshot.abs_path.join(entry.path.parent().unwrap());
+                if let Some((_, needs_update)) = self
+                    .snapshot
+                    .ignores_by_parent_abs_path
+                    .get_mut(abs_parent_path.as_path())
+                {
+                    *needs_update = true;
+                }
+            }
+
+            if let Err(ix) = removed_ids.binary_search(&entry.id) {
+                removed_ids.insert(ix, entry.id);
             }
         }
 
+        self.snapshot.entries_by_id.edit(
+            removed_ids.iter().map(|&id| Edit::Remove(id)).collect(),
+            &(),
+        );
+        self.snapshot
+            .git_repositories
+            .retain(|id, _| removed_ids.binary_search(&id).is_err());
+        self.snapshot
+            .repository_entries
+            .retain(|repo_path, _| !repo_path.0.starts_with(path));
+
         #[cfg(test)]
         self.snapshot.check_invariants(false);
     }
@@ -3722,11 +3769,14 @@ impl BackgroundScanner {
         {
             let mut state = self.state.lock();
             state.snapshot.completed_scan_id = state.snapshot.scan_id;
-            for (_, entry_id) in mem::take(&mut state.removed_entry_ids) {
-                state.scanned_dirs.remove(&entry_id);
+            for (_, entry) in mem::take(&mut state.removed_entries) {
+                state.scanned_dirs.remove(&entry.id);
             }
         }
 
+        #[cfg(test)]
+        self.state.lock().snapshot.check_git_invariants();
+
         self.send_status_update(false, None);
     }
 
@@ -4139,7 +4189,6 @@ impl BackgroundScanner {
 
                     let is_dir = fs_entry.is_dir();
                     fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir);
-
                     fs_entry.is_external = is_external;
                     fs_entry.is_private = self.is_path_private(path);
 
@@ -4168,7 +4217,6 @@ impl BackgroundScanner {
                     self.remove_repo_path(path, &mut state.snapshot);
                 }
                 Err(err) => {
-                    // TODO - create a special 'error' entry in the entries tree to mark this
                     log::error!("error reading file {abs_path:?} on event: {err:#}");
                 }
             }
@@ -4198,9 +4246,6 @@ impl BackgroundScanner {
             }
         }
 
-        // TODO statuses
-        // Track when a .git is removed and iterate over the file system there
-
         Some(())
     }
 

crates/worktree/src/worktree_tests.rs 🔗

@@ -1212,6 +1212,76 @@ async fn test_create_directory_during_initial_scan(cx: &mut TestAppContext) {
     );
 }
 
+#[gpui::test]
+async fn test_bump_mtime_of_git_repo_workdir(cx: &mut TestAppContext) {
+    init_test(cx);
+
+    // Create a worktree with a git directory.
+    let fs = FakeFs::new(cx.background_executor.clone());
+    fs.insert_tree(
+        "/root",
+        json!({
+            ".git": {},
+            "a.txt": "",
+            "b":  {
+                "c.txt": "",
+            },
+        }),
+    )
+    .await;
+
+    let tree = Worktree::local(
+        "/root".as_ref(),
+        true,
+        fs.clone(),
+        Default::default(),
+        &mut cx.to_async(),
+    )
+    .await
+    .unwrap();
+    cx.executor().run_until_parked();
+
+    let (old_entry_ids, old_mtimes) = tree.read_with(cx, |tree, _| {
+        (
+            tree.entries(true, 0).map(|e| e.id).collect::<Vec<_>>(),
+            tree.entries(true, 0).map(|e| e.mtime).collect::<Vec<_>>(),
+        )
+    });
+
+    // Regression test: after the directory is scanned, touch the git repo's
+    // working directory, bumping its mtime. That directory keeps its project
+    // entry id after the directories are re-scanned.
+    fs.touch_path("/root").await;
+    cx.executor().run_until_parked();
+
+    let (new_entry_ids, new_mtimes) = tree.read_with(cx, |tree, _| {
+        (
+            tree.entries(true, 0).map(|e| e.id).collect::<Vec<_>>(),
+            tree.entries(true, 0).map(|e| e.mtime).collect::<Vec<_>>(),
+        )
+    });
+    assert_eq!(new_entry_ids, old_entry_ids);
+    assert_ne!(new_mtimes, old_mtimes);
+
+    // Regression test: changes to the git repository should still be
+    // detected.
+    fs.set_status_for_repo_via_git_operation(
+        &Path::new("/root/.git"),
+        &[(Path::new("b/c.txt"), GitFileStatus::Modified)],
+    );
+    cx.executor().run_until_parked();
+
+    let snapshot = tree.read_with(cx, |tree, _| tree.snapshot());
+    check_propagated_statuses(
+        &snapshot,
+        &[
+            (Path::new(""), Some(GitFileStatus::Modified)),
+            (Path::new("a.txt"), None),
+            (Path::new("b/c.txt"), Some(GitFileStatus::Modified)),
+        ],
+    );
+}
+
 #[gpui::test]
 async fn test_create_dir_all_on_create_entry(cx: &mut TestAppContext) {
     init_test(cx);
@@ -2409,25 +2479,25 @@ async fn test_propagate_git_statuses(cx: &mut TestAppContext) {
             (Path::new("f/no-status.txt"), None),
         ],
     );
+}
 
-    #[track_caller]
-    fn check_propagated_statuses(
-        snapshot: &Snapshot,
-        expected_statuses: &[(&Path, Option<GitFileStatus>)],
-    ) {
-        let mut entries = expected_statuses
+#[track_caller]
+fn check_propagated_statuses(
+    snapshot: &Snapshot,
+    expected_statuses: &[(&Path, Option<GitFileStatus>)],
+) {
+    let mut entries = expected_statuses
+        .iter()
+        .map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone())
+        .collect::<Vec<_>>();
+    snapshot.propagate_git_statuses(&mut entries);
+    assert_eq!(
+        entries
             .iter()
-            .map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone())
-            .collect::<Vec<_>>();
-        snapshot.propagate_git_statuses(&mut entries);
-        assert_eq!(
-            entries
-                .iter()
-                .map(|e| (e.path.as_ref(), e.git_status))
-                .collect::<Vec<_>>(),
-            expected_statuses
-        );
-    }
+            .map(|e| (e.path.as_ref(), e.git_status))
+            .collect::<Vec<_>>(),
+        expected_statuses
+    );
 }
 
 #[track_caller]