1use std::{
  2    collections::{BTreeMap, btree_map::Entry},
  3    ops::ControlFlow,
  4    sync::Arc,
  5};
  6
  7use util::rel_path::RelPath;
  8
  9/// [RootPathTrie] is a workhorse of [super::ManifestTree]. It is responsible for determining the closest known entry for a given path.
 10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
 11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
 12///
 13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
 14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
 15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
 16pub(super) struct RootPathTrie<Label> {
 17    worktree_relative_path: Arc<RelPath>,
 18    labels: BTreeMap<Label, LabelPresence>,
 19    children: BTreeMap<Arc<str>, RootPathTrie<Label>>,
 20}
 21
 22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
 23/// - Present; we know there's definitely a project root at this node.
 24/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
 25///   The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
 26///   to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
 27///   from the leaf up to the root of the worktree.
 28///
 29/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
 30/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
 31///
 32/// Storing absent nodes allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
 33/// such scan more than once.
 34#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
 35pub(super) enum LabelPresence {
 36    KnownAbsent,
 37    Present,
 38}
 39
 40impl<Label: Ord + Clone> RootPathTrie<Label> {
 41    pub(super) fn new() -> Self {
 42        Self::new_with_key(Arc::from(RelPath::empty()))
 43    }
 44
 45    fn new_with_key(worktree_relative_path: Arc<RelPath>) -> Self {
 46        RootPathTrie {
 47            worktree_relative_path,
 48            labels: Default::default(),
 49            children: Default::default(),
 50        }
 51    }
 52
 53    // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
 54    fn insert_inner(
 55        &mut self,
 56        path: &TriePath,
 57        value: Label,
 58        presence: LabelPresence,
 59    ) -> &mut Self {
 60        let mut current = self;
 61
 62        let mut path_so_far = <Arc<RelPath>>::from(RelPath::empty());
 63        for key in path.0.iter() {
 64            path_so_far = path_so_far.join(RelPath::unix(key.as_ref()).unwrap());
 65            current = match current.children.entry(key.clone()) {
 66                Entry::Vacant(vacant_entry) => {
 67                    vacant_entry.insert(RootPathTrie::new_with_key(path_so_far.clone()))
 68                }
 69                Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
 70            };
 71        }
 72        let _previous_value = current.labels.insert(value, presence);
 73        debug_assert_eq!(_previous_value, None);
 74        current
 75    }
 76
 77    pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
 78        self.insert_inner(path, value, presence);
 79    }
 80
 81    pub(super) fn walk<'a>(
 82        &'a self,
 83        path: &TriePath,
 84        callback: &mut dyn for<'b> FnMut(
 85            &'b Arc<RelPath>,
 86            &'a BTreeMap<Label, LabelPresence>,
 87        ) -> ControlFlow<()>,
 88    ) {
 89        let mut current = self;
 90        for key in path.0.iter() {
 91            if !current.labels.is_empty()
 92                && (callback)(¤t.worktree_relative_path, ¤t.labels).is_break()
 93            {
 94                return;
 95            };
 96            current = match current.children.get(key) {
 97                Some(child) => child,
 98                None => return,
 99            };
100        }
101        if !current.labels.is_empty() {
102            let _ = (callback)(¤t.worktree_relative_path, ¤t.labels);
103        }
104    }
105
106    pub(super) fn remove(&mut self, path: &TriePath) {
107        let mut current = self;
108        for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
109            current = match current.children.get_mut(path) {
110                Some(child) => child,
111                None => return,
112            };
113        }
114        if let Some(final_entry_name) = path.0.last() {
115            current.children.remove(final_entry_name);
116        }
117    }
118}
119
120/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
121#[derive(Clone)]
122pub(super) struct TriePath(Arc<[Arc<str>]>);
123
124impl TriePath {
125    fn new(value: &RelPath) -> Self {
126        TriePath(
127            value
128                .components()
129                .map(|component| component.into())
130                .collect(),
131        )
132    }
133}
134
135impl From<&RelPath> for TriePath {
136    fn from(value: &RelPath) -> Self {
137        Self::new(value)
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use std::collections::BTreeSet;
144
145    use util::rel_path::rel_path;
146
147    use super::*;
148
149    #[test]
150    fn test_insert_and_lookup() {
151        let mut trie = RootPathTrie::<()>::new();
152        trie.insert(
153            &TriePath::new(rel_path("a/b/c")),
154            (),
155            LabelPresence::Present,
156        );
157
158        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
159            assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
160            assert_eq!(path.as_unix_str(), "a/b/c");
161            ControlFlow::Continue(())
162        });
163        // Now let's annotate a parent with "Known missing" node.
164        trie.insert(
165            &TriePath::new(rel_path("a")),
166            (),
167            LabelPresence::KnownAbsent,
168        );
169
170        // Ensure that we walk from the root to the leaf.
171        let mut visited_paths = BTreeSet::new();
172        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
173            if path.as_unix_str() == "a/b/c" {
174                assert_eq!(visited_paths, BTreeSet::from_iter([rel_path("a").into()]));
175                assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
176            } else if path.as_unix_str() == "a" {
177                assert!(visited_paths.is_empty());
178                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
179            } else {
180                panic!("Unknown path");
181            }
182            // Assert that we only ever visit a path once.
183            assert!(visited_paths.insert(path.clone()));
184            ControlFlow::Continue(())
185        });
186
187        // One can also pass a path whose prefix is in the tree, but not that path itself.
188        let mut visited_paths = BTreeSet::new();
189        trie.walk(
190            &TriePath::new(rel_path("a/b/c/d/e/f/g")),
191            &mut |path, nodes| {
192                if path.as_unix_str() == "a/b/c" {
193                    assert_eq!(visited_paths, BTreeSet::from_iter([rel_path("a").into()]));
194                    assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
195                } else if path.as_unix_str() == "a" {
196                    assert!(visited_paths.is_empty());
197                    assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
198                } else {
199                    panic!("Unknown path");
200                }
201                // Assert that we only ever visit a path once.
202                assert!(visited_paths.insert(path.clone()));
203                ControlFlow::Continue(())
204            },
205        );
206
207        // Test breaking from the tree-walk.
208        let mut visited_paths = BTreeSet::new();
209        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
210            if path.as_unix_str() == "a" {
211                assert!(visited_paths.is_empty());
212                assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
213            } else {
214                panic!("Unknown path");
215            }
216            // Assert that we only ever visit a path once.
217            assert!(visited_paths.insert(path.clone()));
218            ControlFlow::Break(())
219        });
220        assert_eq!(visited_paths.len(), 1);
221
222        // Entry removal.
223        trie.insert(
224            &TriePath::new(rel_path("a/b")),
225            (),
226            LabelPresence::KnownAbsent,
227        );
228        let mut visited_paths = BTreeSet::new();
229        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, _nodes| {
230            // Assert that we only ever visit a path once.
231            assert!(visited_paths.insert(path.clone()));
232            ControlFlow::Continue(())
233        });
234        assert_eq!(visited_paths.len(), 3);
235        trie.remove(&TriePath::new(rel_path("a/b")));
236        let mut visited_paths = BTreeSet::new();
237        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, _nodes| {
238            // Assert that we only ever visit a path once.
239            assert!(visited_paths.insert(path.clone()));
240            ControlFlow::Continue(())
241        });
242        assert_eq!(visited_paths.len(), 1);
243        assert_eq!(
244            visited_paths.into_iter().next().unwrap(),
245            rel_path("a").into()
246        );
247    }
248
249    #[test]
250    fn path_to_a_root_can_contain_multiple_known_nodes() {
251        let mut trie = RootPathTrie::<()>::new();
252        trie.insert(&TriePath::new(rel_path("a/b")), (), LabelPresence::Present);
253        trie.insert(&TriePath::new(rel_path("a")), (), LabelPresence::Present);
254        let mut visited_paths = BTreeSet::new();
255        trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
256            assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
257            if path.as_unix_str() != "a" && path.as_unix_str() != "a/b" {
258                panic!("Unexpected path: {}", path.as_unix_str());
259            }
260            assert!(visited_paths.insert(path.clone()));
261            ControlFlow::Continue(())
262        });
263        assert_eq!(visited_paths.len(), 2);
264    }
265}