path_trie.rs

  1use std::{
  2    collections::{btree_map::Entry, BTreeMap},
  3    ffi::OsStr,
  4    ops::ControlFlow,
  5    path::{Path, PathBuf},
  6    sync::Arc,
  7};
  8
  9/// [RootPathTrie] is a workhorse of [super::ProjectTree]. It is responsible for determining the closest known project root for a given path.
 10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
 11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
 12///
 13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
 14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
 15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
 16pub(super) struct RootPathTrie<Label> {
 17    worktree_relative_path: Arc<Path>,
 18    labels: BTreeMap<Label, LabelPresence>,
 19    children: BTreeMap<Arc<OsStr>, RootPathTrie<Label>>,
 20}
 21
 22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
 23/// - Present; we know there's definitely a project root at this node and it is the only label of that kind on the path to the root of a worktree
 24/// (none of it's ancestors or descendants can contain the same present label)
 25/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
 26/// - Forbidden - we know there's definitely no project root at this node and none of it's ancestors or descendants can be Present.
 27/// The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
 28/// to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
 29/// from the leaf up to the root of the worktree. When any of the ancestors is forbidden, we don't need to look at the node or its ancestors.
 30/// When there's a present labeled node on the path to the root, we don't need to ask the adapter to run the search at all.
 31///
 32/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
 33/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
 34///
 35/// Storing project absence allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
 36/// such scan more than once.
 37#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
 38pub(super) enum LabelPresence {
 39    KnownAbsent,
 40    Present,
 41}
 42
 43impl<Label: Ord + Clone> RootPathTrie<Label> {
 44    pub(super) fn new() -> Self {
 45        Self::new_with_key(Arc::from(Path::new("")))
 46    }
 47    fn new_with_key(worktree_relative_path: Arc<Path>) -> Self {
 48        RootPathTrie {
 49            worktree_relative_path,
 50            labels: Default::default(),
 51            children: Default::default(),
 52        }
 53    }
 54    // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
 55    fn insert_inner(
 56        &mut self,
 57        path: &TriePath,
 58        value: Label,
 59        presence: LabelPresence,
 60    ) -> &mut Self {
 61        let mut current = self;
 62
 63        let mut path_so_far = PathBuf::new();
 64        for key in path.0.iter() {
 65            path_so_far.push(Path::new(key));
 66            current = match current.children.entry(key.clone()) {
 67                Entry::Vacant(vacant_entry) => vacant_entry
 68                    .insert(RootPathTrie::new_with_key(Arc::from(path_so_far.as_path()))),
 69                Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
 70            };
 71        }
 72        let _previous_value = current.labels.insert(value, presence);
 73        debug_assert_eq!(_previous_value, None);
 74        current
 75    }
 76    pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
 77        self.insert_inner(path, value, presence);
 78    }
 79
 80    pub(super) fn walk<'a>(
 81        &'a self,
 82        path: &TriePath,
 83        callback: &mut dyn for<'b> FnMut(
 84            &'b Arc<Path>,
 85            &'a BTreeMap<Label, LabelPresence>,
 86        ) -> ControlFlow<()>,
 87    ) {
 88        let mut current = self;
 89        for key in path.0.iter() {
 90            if !current.labels.is_empty() {
 91                if (callback)(&current.worktree_relative_path, &current.labels).is_break() {
 92                    return;
 93                };
 94            }
 95            current = match current.children.get(key) {
 96                Some(child) => child,
 97                None => return,
 98            };
 99        }
100        if !current.labels.is_empty() {
101            (callback)(&current.worktree_relative_path, &current.labels);
102        }
103    }
104
105    pub(super) fn remove(&mut self, path: &TriePath) {
106        debug_assert_ne!(path.0.len(), 0);
107        let mut current = self;
108        for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
109            current = match current.children.get_mut(path) {
110                Some(child) => child,
111                None => return,
112            };
113        }
114        if let Some(final_entry_name) = path.0.last() {
115            current.children.remove(final_entry_name);
116        }
117    }
118}
119
120/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
121#[derive(Clone)]
122pub(super) struct TriePath(Arc<[Arc<OsStr>]>);
123
124impl From<&Path> for TriePath {
125    fn from(value: &Path) -> Self {
126        TriePath(value.components().map(|c| c.as_os_str().into()).collect())
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use std::collections::BTreeSet;
133
134    use super::*;
135
136    #[test]
137    fn test_insert_and_lookup() {
138        let mut trie = RootPathTrie::<()>::new();
139        trie.insert(
140            &TriePath::from(Path::new("a/b/c")),
141            (),
142            LabelPresence::Present,
143        );
144
145        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
146            assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
147            assert_eq!(path.as_ref(), Path::new("a/b/c"));
148            ControlFlow::Continue(())
149        });
150        // Now let's annotate a parent with "Known missing" node.
151        trie.insert(
152            &TriePath::from(Path::new("a")),
153            (),
154            LabelPresence::KnownAbsent,
155        );
156
157        // Ensure that we walk from the root to the leaf.
158        let mut visited_paths = BTreeSet::new();
159        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
160            if path.as_ref() == Path::new("a/b/c") {
161                assert_eq!(
162                    visited_paths,
163                    BTreeSet::from_iter([Arc::from(Path::new("a/"))])
164                );
165                assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
166            } else if path.as_ref() == Path::new("a/") {
167                assert!(visited_paths.is_empty());
168                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
169            } else {
170                panic!("Unknown path");
171            }
172            // Assert that we only ever visit a path once.
173            assert!(visited_paths.insert(path.clone()));
174            ControlFlow::Continue(())
175        });
176
177        // One can also pass a path whose prefix is in the tree, but not that path itself.
178        let mut visited_paths = BTreeSet::new();
179        trie.walk(
180            &TriePath::from(Path::new("a/b/c/d/e/f/g")),
181            &mut |path, nodes| {
182                if path.as_ref() == Path::new("a/b/c") {
183                    assert_eq!(
184                        visited_paths,
185                        BTreeSet::from_iter([Arc::from(Path::new("a/"))])
186                    );
187                    assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
188                } else if path.as_ref() == Path::new("a/") {
189                    assert!(visited_paths.is_empty());
190                    assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
191                } else {
192                    panic!("Unknown path");
193                }
194                // Assert that we only ever visit a path once.
195                assert!(visited_paths.insert(path.clone()));
196                ControlFlow::Continue(())
197            },
198        );
199
200        // Test breaking from the tree-walk.
201        let mut visited_paths = BTreeSet::new();
202        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
203            if path.as_ref() == Path::new("a/") {
204                assert!(visited_paths.is_empty());
205                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
206            } else {
207                panic!("Unknown path");
208            }
209            // Assert that we only ever visit a path once.
210            assert!(visited_paths.insert(path.clone()));
211            ControlFlow::Break(())
212        });
213        assert_eq!(visited_paths.len(), 1);
214
215        // Entry removal.
216        trie.insert(
217            &TriePath::from(Path::new("a/b")),
218            (),
219            LabelPresence::KnownAbsent,
220        );
221        let mut visited_paths = BTreeSet::new();
222        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
223            // Assert that we only ever visit a path once.
224            assert!(visited_paths.insert(path.clone()));
225            ControlFlow::Continue(())
226        });
227        assert_eq!(visited_paths.len(), 3);
228        trie.remove(&TriePath::from(Path::new("a/b/")));
229        let mut visited_paths = BTreeSet::new();
230        trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
231            // Assert that we only ever visit a path once.
232            assert!(visited_paths.insert(path.clone()));
233            ControlFlow::Continue(())
234        });
235        assert_eq!(visited_paths.len(), 1);
236        assert_eq!(
237            visited_paths.into_iter().next().unwrap().as_ref(),
238            Path::new("a/")
239        );
240    }
241}