path_trie.rs

  1use std::{
  2    collections::{BTreeMap, btree_map::Entry},
  3    ffi::OsStr,
  4    ops::ControlFlow,
  5    path::{Path, PathBuf},
  6    sync::Arc,
  7};
  8
  9/// [RootPathTrie] is a workhorse of [super::ManifestTree]. It is responsible for determining the closest known entry for a given path.
 10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
 11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
 12///
 13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
 14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
 15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
 16pub(super) struct RootPathTrie<Label> {
 17    worktree_relative_path: Arc<Path>,
 18    labels: BTreeMap<Label, LabelPresence>,
 19    children: BTreeMap<Arc<OsStr>, RootPathTrie<Label>>,
 20}
 21
 22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
 23/// - Present; we know there's definitely a project root at this node.
 24/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
 25///   The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
 26///   to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
 27///   from the leaf up to the root of the worktree.
 28///
 29/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
 30/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
 31///
 32/// Storing absent nodes allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
 33/// such scan more than once.
 34#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
 35pub(super) enum LabelPresence {
 36    KnownAbsent,
 37    Present,
 38}
 39
 40impl<Label: Ord + Clone> RootPathTrie<Label> {
 41    pub(super) fn new() -> Self {
 42        Self::new_with_key(Arc::from(Path::new("")))
 43    }
 44    fn new_with_key(worktree_relative_path: Arc<Path>) -> Self {
 45        RootPathTrie {
 46            worktree_relative_path,
 47            labels: Default::default(),
 48            children: Default::default(),
 49        }
 50    }
 51    // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
 52    fn insert_inner(
 53        &mut self,
 54        path: &TriePath,
 55        value: Label,
 56        presence: LabelPresence,
 57    ) -> &mut Self {
 58        let mut current = self;
 59
 60        let mut path_so_far = PathBuf::new();
 61        for key in path.0.iter() {
 62            path_so_far.push(Path::new(key));
 63            current = match current.children.entry(key.clone()) {
 64                Entry::Vacant(vacant_entry) => vacant_entry
 65                    .insert(RootPathTrie::new_with_key(Arc::from(path_so_far.as_path()))),
 66                Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
 67            };
 68        }
 69        let _previous_value = current.labels.insert(value, presence);
 70        debug_assert_eq!(_previous_value, None);
 71        current
 72    }
 73    pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
 74        self.insert_inner(path, value, presence);
 75    }
 76
 77    pub(super) fn walk<'a>(
 78        &'a self,
 79        path: &TriePath,
 80        callback: &mut dyn for<'b> FnMut(
 81            &'b Arc<Path>,
 82            &'a BTreeMap<Label, LabelPresence>,
 83        ) -> ControlFlow<()>,
 84    ) {
 85        let mut current = self;
 86        for key in path.0.iter() {
 87            if !current.labels.is_empty()
 88                && (callback)(&current.worktree_relative_path, &current.labels).is_break()
 89            {
 90                return;
 91            };
 92            current = match current.children.get(key) {
 93                Some(child) => child,
 94                None => return,
 95            };
 96        }
 97        if !current.labels.is_empty() {
 98            let _ = (callback)(&current.worktree_relative_path, &current.labels);
 99        }
100    }
101
102    pub(super) fn remove(&mut self, path: &TriePath) {
103        let mut current = self;
104        for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
105            current = match current.children.get_mut(path) {
106                Some(child) => child,
107                None => return,
108            };
109        }
110        if let Some(final_entry_name) = path.0.last() {
111            current.children.remove(final_entry_name);
112        }
113    }
114}
115
116/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
117#[derive(Clone)]
118pub(super) struct TriePath(Arc<[Arc<OsStr>]>);
119
120impl From<&Path> for TriePath {
121    fn from(value: &Path) -> Self {
122        TriePath(value.components().map(|c| c.as_os_str().into()).collect())
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use std::collections::BTreeSet;
129
130    use super::*;
131
132    #[test]
133    fn test_insert_and_lookup() {
134        let mut trie = RootPathTrie::<()>::new();
135        trie.insert(
136            &TriePath::from(Path::new("a/b/c")),
137            (),
138            LabelPresence::Present,
139        );
140
141        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
142            assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
143            assert_eq!(path.as_ref(), Path::new("a/b/c"));
144            ControlFlow::Continue(())
145        });
146        // Now let's annotate a parent with "Known missing" node.
147        trie.insert(
148            &TriePath::from(Path::new("a")),
149            (),
150            LabelPresence::KnownAbsent,
151        );
152
153        // Ensure that we walk from the root to the leaf.
154        let mut visited_paths = BTreeSet::new();
155        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
156            if path.as_ref() == Path::new("a/b/c") {
157                assert_eq!(
158                    visited_paths,
159                    BTreeSet::from_iter([Arc::from(Path::new("a/"))])
160                );
161                assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
162            } else if path.as_ref() == Path::new("a/") {
163                assert!(visited_paths.is_empty());
164                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
165            } else {
166                panic!("Unknown path");
167            }
168            // Assert that we only ever visit a path once.
169            assert!(visited_paths.insert(path.clone()));
170            ControlFlow::Continue(())
171        });
172
173        // One can also pass a path whose prefix is in the tree, but not that path itself.
174        let mut visited_paths = BTreeSet::new();
175        trie.walk(
176            &TriePath::from(Path::new("a/b/c/d/e/f/g")),
177            &mut |path, nodes| {
178                if path.as_ref() == Path::new("a/b/c") {
179                    assert_eq!(
180                        visited_paths,
181                        BTreeSet::from_iter([Arc::from(Path::new("a/"))])
182                    );
183                    assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
184                } else if path.as_ref() == Path::new("a/") {
185                    assert!(visited_paths.is_empty());
186                    assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
187                } else {
188                    panic!("Unknown path");
189                }
190                // Assert that we only ever visit a path once.
191                assert!(visited_paths.insert(path.clone()));
192                ControlFlow::Continue(())
193            },
194        );
195
196        // Test breaking from the tree-walk.
197        let mut visited_paths = BTreeSet::new();
198        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
199            if path.as_ref() == Path::new("a/") {
200                assert!(visited_paths.is_empty());
201                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
202            } else {
203                panic!("Unknown path");
204            }
205            // Assert that we only ever visit a path once.
206            assert!(visited_paths.insert(path.clone()));
207            ControlFlow::Break(())
208        });
209        assert_eq!(visited_paths.len(), 1);
210
211        // Entry removal.
212        trie.insert(
213            &TriePath::from(Path::new("a/b")),
214            (),
215            LabelPresence::KnownAbsent,
216        );
217        let mut visited_paths = BTreeSet::new();
218        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
219            // Assert that we only ever visit a path once.
220            assert!(visited_paths.insert(path.clone()));
221            ControlFlow::Continue(())
222        });
223        assert_eq!(visited_paths.len(), 3);
224        trie.remove(&TriePath::from(Path::new("a/b/")));
225        let mut visited_paths = BTreeSet::new();
226        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
227            // Assert that we only ever visit a path once.
228            assert!(visited_paths.insert(path.clone()));
229            ControlFlow::Continue(())
230        });
231        assert_eq!(visited_paths.len(), 1);
232        assert_eq!(
233            visited_paths.into_iter().next().unwrap().as_ref(),
234            Path::new("a/")
235        );
236    }
237
238    #[test]
239    fn path_to_a_root_can_contain_multiple_known_nodes() {
240        let mut trie = RootPathTrie::<()>::new();
241        trie.insert(
242            &TriePath::from(Path::new("a/b")),
243            (),
244            LabelPresence::Present,
245        );
246        trie.insert(&TriePath::from(Path::new("a")), (), LabelPresence::Present);
247        let mut visited_paths = BTreeSet::new();
248        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
249            assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
250            if path.as_ref() != Path::new("a") && path.as_ref() != Path::new("a/b") {
251                panic!("Unexpected path: {}", path.as_ref().display());
252            }
253            assert!(visited_paths.insert(path.clone()));
254            ControlFlow::Continue(())
255        });
256        assert_eq!(visited_paths.len(), 2);
257    }
258}