1use std::{
2 collections::{BTreeMap, btree_map::Entry},
3 ffi::OsStr,
4 ops::ControlFlow,
5 path::{Path, PathBuf},
6 sync::Arc,
7};
8
9/// [RootPathTrie] is a workhorse of [super::ManifestTree]. It is responsible for determining the closest known entry for a given path.
10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
12///
13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
16pub(super) struct RootPathTrie<Label> {
17 worktree_relative_path: Arc<Path>,
18 labels: BTreeMap<Label, LabelPresence>,
19 children: BTreeMap<Arc<OsStr>, RootPathTrie<Label>>,
20}
21
22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
23/// - Present; we know there's definitely a project root at this node.
24/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
25/// The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
26/// to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
27/// from the leaf up to the root of the worktree.
28///
29/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
30/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
31///
32/// Storing absent nodes allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
33/// such scan more than once.
34#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
35pub(super) enum LabelPresence {
36 KnownAbsent,
37 Present,
38}
39
40impl<Label: Ord + Clone> RootPathTrie<Label> {
41 pub(super) fn new() -> Self {
42 Self::new_with_key(Arc::from(Path::new("")))
43 }
44 fn new_with_key(worktree_relative_path: Arc<Path>) -> Self {
45 RootPathTrie {
46 worktree_relative_path,
47 labels: Default::default(),
48 children: Default::default(),
49 }
50 }
51 // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
52 fn insert_inner(
53 &mut self,
54 path: &TriePath,
55 value: Label,
56 presence: LabelPresence,
57 ) -> &mut Self {
58 let mut current = self;
59
60 let mut path_so_far = PathBuf::new();
61 for key in path.0.iter() {
62 path_so_far.push(Path::new(key));
63 current = match current.children.entry(key.clone()) {
64 Entry::Vacant(vacant_entry) => vacant_entry
65 .insert(RootPathTrie::new_with_key(Arc::from(path_so_far.as_path()))),
66 Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
67 };
68 }
69 let _previous_value = current.labels.insert(value, presence);
70 debug_assert_eq!(_previous_value, None);
71 current
72 }
73 pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
74 self.insert_inner(path, value, presence);
75 }
76
77 pub(super) fn walk<'a>(
78 &'a self,
79 path: &TriePath,
80 callback: &mut dyn for<'b> FnMut(
81 &'b Arc<Path>,
82 &'a BTreeMap<Label, LabelPresence>,
83 ) -> ControlFlow<()>,
84 ) {
85 let mut current = self;
86 for key in path.0.iter() {
87 if !current.labels.is_empty()
88 && (callback)(¤t.worktree_relative_path, ¤t.labels).is_break()
89 {
90 return;
91 };
92 current = match current.children.get(key) {
93 Some(child) => child,
94 None => return,
95 };
96 }
97 if !current.labels.is_empty() {
98 let _ = (callback)(¤t.worktree_relative_path, ¤t.labels);
99 }
100 }
101
102 pub(super) fn remove(&mut self, path: &TriePath) {
103 let mut current = self;
104 for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
105 current = match current.children.get_mut(path) {
106 Some(child) => child,
107 None => return,
108 };
109 }
110 if let Some(final_entry_name) = path.0.last() {
111 current.children.remove(final_entry_name);
112 }
113 }
114}
115
116/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
117#[derive(Clone)]
118pub(super) struct TriePath(Arc<[Arc<OsStr>]>);
119
120impl From<&Path> for TriePath {
121 fn from(value: &Path) -> Self {
122 TriePath(value.components().map(|c| c.as_os_str().into()).collect())
123 }
124}
125
126#[cfg(test)]
127mod tests {
128 use std::collections::BTreeSet;
129
130 use super::*;
131
132 #[test]
133 fn test_insert_and_lookup() {
134 let mut trie = RootPathTrie::<()>::new();
135 trie.insert(
136 &TriePath::from(Path::new("a/b/c")),
137 (),
138 LabelPresence::Present,
139 );
140
141 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
142 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
143 assert_eq!(path.as_ref(), Path::new("a/b/c"));
144 ControlFlow::Continue(())
145 });
146 // Now let's annotate a parent with "Known missing" node.
147 trie.insert(
148 &TriePath::from(Path::new("a")),
149 (),
150 LabelPresence::KnownAbsent,
151 );
152
153 // Ensure that we walk from the root to the leaf.
154 let mut visited_paths = BTreeSet::new();
155 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
156 if path.as_ref() == Path::new("a/b/c") {
157 assert_eq!(
158 visited_paths,
159 BTreeSet::from_iter([Arc::from(Path::new("a/"))])
160 );
161 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
162 } else if path.as_ref() == Path::new("a/") {
163 assert!(visited_paths.is_empty());
164 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
165 } else {
166 panic!("Unknown path");
167 }
168 // Assert that we only ever visit a path once.
169 assert!(visited_paths.insert(path.clone()));
170 ControlFlow::Continue(())
171 });
172
173 // One can also pass a path whose prefix is in the tree, but not that path itself.
174 let mut visited_paths = BTreeSet::new();
175 trie.walk(
176 &TriePath::from(Path::new("a/b/c/d/e/f/g")),
177 &mut |path, nodes| {
178 if path.as_ref() == Path::new("a/b/c") {
179 assert_eq!(
180 visited_paths,
181 BTreeSet::from_iter([Arc::from(Path::new("a/"))])
182 );
183 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
184 } else if path.as_ref() == Path::new("a/") {
185 assert!(visited_paths.is_empty());
186 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
187 } else {
188 panic!("Unknown path");
189 }
190 // Assert that we only ever visit a path once.
191 assert!(visited_paths.insert(path.clone()));
192 ControlFlow::Continue(())
193 },
194 );
195
196 // Test breaking from the tree-walk.
197 let mut visited_paths = BTreeSet::new();
198 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
199 if path.as_ref() == Path::new("a/") {
200 assert!(visited_paths.is_empty());
201 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
202 } else {
203 panic!("Unknown path");
204 }
205 // Assert that we only ever visit a path once.
206 assert!(visited_paths.insert(path.clone()));
207 ControlFlow::Break(())
208 });
209 assert_eq!(visited_paths.len(), 1);
210
211 // Entry removal.
212 trie.insert(
213 &TriePath::from(Path::new("a/b")),
214 (),
215 LabelPresence::KnownAbsent,
216 );
217 let mut visited_paths = BTreeSet::new();
218 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
219 // Assert that we only ever visit a path once.
220 assert!(visited_paths.insert(path.clone()));
221 ControlFlow::Continue(())
222 });
223 assert_eq!(visited_paths.len(), 3);
224 trie.remove(&TriePath::from(Path::new("a/b/")));
225 let mut visited_paths = BTreeSet::new();
226 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
227 // Assert that we only ever visit a path once.
228 assert!(visited_paths.insert(path.clone()));
229 ControlFlow::Continue(())
230 });
231 assert_eq!(visited_paths.len(), 1);
232 assert_eq!(
233 visited_paths.into_iter().next().unwrap().as_ref(),
234 Path::new("a/")
235 );
236 }
237
238 #[test]
239 fn path_to_a_root_can_contain_multiple_known_nodes() {
240 let mut trie = RootPathTrie::<()>::new();
241 trie.insert(
242 &TriePath::from(Path::new("a/b")),
243 (),
244 LabelPresence::Present,
245 );
246 trie.insert(&TriePath::from(Path::new("a")), (), LabelPresence::Present);
247 let mut visited_paths = BTreeSet::new();
248 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
249 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
250 if path.as_ref() != Path::new("a") && path.as_ref() != Path::new("a/b") {
251 panic!("Unexpected path: {}", path.as_ref().display());
252 }
253 assert!(visited_paths.insert(path.clone()));
254 ControlFlow::Continue(())
255 });
256 assert_eq!(visited_paths.len(), 2);
257 }
258}