1use std::{
2 collections::{BTreeMap, btree_map::Entry},
3 ops::ControlFlow,
4 sync::Arc,
5};
6
7use util::rel_path::RelPath;
8
9/// [RootPathTrie] is a workhorse of [super::ManifestTree]. It is responsible for determining the closest known entry for a given path.
10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
12///
13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
16pub(super) struct RootPathTrie<Label> {
17 worktree_relative_path: Arc<RelPath>,
18 labels: BTreeMap<Label, LabelPresence>,
19 children: BTreeMap<Arc<str>, RootPathTrie<Label>>,
20}
21
22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
23/// - Present; we know there's definitely a project root at this node.
24/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
25/// The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
26/// to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
27/// from the leaf up to the root of the worktree.
28///
29/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
30/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
31///
32/// Storing absent nodes allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
33/// such scan more than once.
34#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
35pub(super) enum LabelPresence {
36 KnownAbsent,
37 Present,
38}
39
40impl<Label: Ord + Clone> RootPathTrie<Label> {
41 pub(super) fn new() -> Self {
42 Self::new_with_key(Arc::from(RelPath::empty()))
43 }
44
45 fn new_with_key(worktree_relative_path: Arc<RelPath>) -> Self {
46 RootPathTrie {
47 worktree_relative_path,
48 labels: Default::default(),
49 children: Default::default(),
50 }
51 }
52
53 // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
54 fn insert_inner(
55 &mut self,
56 path: &TriePath,
57 value: Label,
58 presence: LabelPresence,
59 ) -> &mut Self {
60 let mut current = self;
61
62 let mut path_so_far = <Arc<RelPath>>::from(RelPath::empty());
63 for key in path.0.iter() {
64 path_so_far = path_so_far.join(RelPath::unix(key.as_ref()).unwrap());
65 current = match current.children.entry(key.clone()) {
66 Entry::Vacant(vacant_entry) => {
67 vacant_entry.insert(RootPathTrie::new_with_key(path_so_far.clone()))
68 }
69 Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
70 };
71 }
72 let _previous_value = current.labels.insert(value, presence);
73 debug_assert_eq!(_previous_value, None);
74 current
75 }
76
77 pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
78 self.insert_inner(path, value, presence);
79 }
80
81 pub(super) fn walk<'a>(
82 &'a self,
83 path: &TriePath,
84 callback: &mut dyn for<'b> FnMut(
85 &'b Arc<RelPath>,
86 &'a BTreeMap<Label, LabelPresence>,
87 ) -> ControlFlow<()>,
88 ) {
89 let mut current = self;
90 for key in path.0.iter() {
91 if !current.labels.is_empty()
92 && (callback)(¤t.worktree_relative_path, ¤t.labels).is_break()
93 {
94 return;
95 };
96 current = match current.children.get(key) {
97 Some(child) => child,
98 None => return,
99 };
100 }
101 if !current.labels.is_empty() {
102 let _ = (callback)(¤t.worktree_relative_path, ¤t.labels);
103 }
104 }
105
106 pub(super) fn remove(&mut self, path: &TriePath) {
107 let mut current = self;
108 for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
109 current = match current.children.get_mut(path) {
110 Some(child) => child,
111 None => return,
112 };
113 }
114 if let Some(final_entry_name) = path.0.last() {
115 current.children.remove(final_entry_name);
116 }
117 }
118}
119
120/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
121#[derive(Clone)]
122pub(super) struct TriePath(Arc<[Arc<str>]>);
123
124impl TriePath {
125 fn new(value: &RelPath) -> Self {
126 TriePath(
127 value
128 .components()
129 .map(|component| component.into())
130 .collect(),
131 )
132 }
133}
134
135impl From<&RelPath> for TriePath {
136 fn from(value: &RelPath) -> Self {
137 Self::new(value)
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use std::collections::BTreeSet;
144
145 use util::rel_path::rel_path;
146
147 use super::*;
148
149 #[test]
150 fn test_insert_and_lookup() {
151 let mut trie = RootPathTrie::<()>::new();
152 trie.insert(
153 &TriePath::new(rel_path("a/b/c")),
154 (),
155 LabelPresence::Present,
156 );
157
158 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
159 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
160 assert_eq!(path.as_unix_str(), "a/b/c");
161 ControlFlow::Continue(())
162 });
163 // Now let's annotate a parent with "Known missing" node.
164 trie.insert(
165 &TriePath::new(rel_path("a")),
166 (),
167 LabelPresence::KnownAbsent,
168 );
169
170 // Ensure that we walk from the root to the leaf.
171 let mut visited_paths = BTreeSet::new();
172 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
173 if path.as_unix_str() == "a/b/c" {
174 assert_eq!(visited_paths, BTreeSet::from_iter([rel_path("a").into()]));
175 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
176 } else if path.as_unix_str() == "a" {
177 assert!(visited_paths.is_empty());
178 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
179 } else {
180 panic!("Unknown path");
181 }
182 // Assert that we only ever visit a path once.
183 assert!(visited_paths.insert(path.clone()));
184 ControlFlow::Continue(())
185 });
186
187 // One can also pass a path whose prefix is in the tree, but not that path itself.
188 let mut visited_paths = BTreeSet::new();
189 trie.walk(
190 &TriePath::new(rel_path("a/b/c/d/e/f/g")),
191 &mut |path, nodes| {
192 if path.as_unix_str() == "a/b/c" {
193 assert_eq!(visited_paths, BTreeSet::from_iter([rel_path("a").into()]));
194 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
195 } else if path.as_unix_str() == "a" {
196 assert!(visited_paths.is_empty());
197 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
198 } else {
199 panic!("Unknown path");
200 }
201 // Assert that we only ever visit a path once.
202 assert!(visited_paths.insert(path.clone()));
203 ControlFlow::Continue(())
204 },
205 );
206
207 // Test breaking from the tree-walk.
208 let mut visited_paths = BTreeSet::new();
209 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
210 if path.as_unix_str() == "a" {
211 assert!(visited_paths.is_empty());
212 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
213 } else {
214 panic!("Unknown path");
215 }
216 // Assert that we only ever visit a path once.
217 assert!(visited_paths.insert(path.clone()));
218 ControlFlow::Break(())
219 });
220 assert_eq!(visited_paths.len(), 1);
221
222 // Entry removal.
223 trie.insert(
224 &TriePath::new(rel_path("a/b")),
225 (),
226 LabelPresence::KnownAbsent,
227 );
228 let mut visited_paths = BTreeSet::new();
229 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, _nodes| {
230 // Assert that we only ever visit a path once.
231 assert!(visited_paths.insert(path.clone()));
232 ControlFlow::Continue(())
233 });
234 assert_eq!(visited_paths.len(), 3);
235 trie.remove(&TriePath::new(rel_path("a/b")));
236 let mut visited_paths = BTreeSet::new();
237 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, _nodes| {
238 // Assert that we only ever visit a path once.
239 assert!(visited_paths.insert(path.clone()));
240 ControlFlow::Continue(())
241 });
242 assert_eq!(visited_paths.len(), 1);
243 assert_eq!(
244 visited_paths.into_iter().next().unwrap(),
245 rel_path("a").into()
246 );
247 }
248
249 #[test]
250 fn path_to_a_root_can_contain_multiple_known_nodes() {
251 let mut trie = RootPathTrie::<()>::new();
252 trie.insert(&TriePath::new(rel_path("a/b")), (), LabelPresence::Present);
253 trie.insert(&TriePath::new(rel_path("a")), (), LabelPresence::Present);
254 let mut visited_paths = BTreeSet::new();
255 trie.walk(&TriePath::new(rel_path("a/b/c")), &mut |path, nodes| {
256 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
257 if path.as_unix_str() != "a" && path.as_unix_str() != "a/b" {
258 panic!("Unexpected path: {}", path.as_unix_str());
259 }
260 assert!(visited_paths.insert(path.clone()));
261 ControlFlow::Continue(())
262 });
263 assert_eq!(visited_paths.len(), 2);
264 }
265}