1use std::{
2 collections::{btree_map::Entry, BTreeMap},
3 ffi::OsStr,
4 ops::ControlFlow,
5 path::{Path, PathBuf},
6 sync::Arc,
7};
8
9/// [RootPathTrie] is a workhorse of [super::ProjectTree]. It is responsible for determining the closest known project root for a given path.
10/// It also determines how much of a given path is unexplored, thus letting callers fill in that gap if needed.
11/// Conceptually, it allows one to annotate Worktree entries with arbitrary extra metadata and run closest-ancestor searches.
12///
13/// A path is unexplored when the closest ancestor of a path is not the path itself; that means that we have not yet ran the scan on that path.
14/// For example, if there's a project root at path `python/project` and we query for a path `python/project/subdir/another_subdir/file.py`, there is
15/// a known root at `python/project` and the unexplored part is `subdir/another_subdir` - we need to run a scan on these 2 directories.
16pub(super) struct RootPathTrie<Label> {
17 worktree_relative_path: Arc<Path>,
18 labels: BTreeMap<Label, LabelPresence>,
19 children: BTreeMap<Arc<OsStr>, RootPathTrie<Label>>,
20}
21
22/// Label presence is a marker that allows to optimize searches within [RootPathTrie]; node label can be:
23/// - Present; we know there's definitely a project root at this node and it is the only label of that kind on the path to the root of a worktree
24/// (none of it's ancestors or descendants can contain the same present label)
25/// - Known Absent - we know there's definitely no project root at this node and none of it's ancestors are Present (descendants can be present though!).
26/// - Forbidden - we know there's definitely no project root at this node and none of it's ancestors or descendants can be Present.
27/// The distinction is there to optimize searching; when we encounter a node with unknown status, we don't need to look at it's full path
28/// to the root of the worktree; it's sufficient to explore only the path between last node with a KnownAbsent state and the directory of a path, since we run searches
29/// from the leaf up to the root of the worktree. When any of the ancestors is forbidden, we don't need to look at the node or its ancestors.
30/// When there's a present labeled node on the path to the root, we don't need to ask the adapter to run the search at all.
31///
32/// In practical terms, it means that by storing label presence we don't need to do a project discovery on a given folder more than once
33/// (unless the node is invalidated, which can happen when FS entries are renamed/removed).
34///
35/// Storing project absence allows us to recognize which paths have already been scanned for a project root unsuccessfully. This way we don't need to run
36/// such scan more than once.
37#[derive(Clone, Copy, Debug, PartialOrd, PartialEq, Ord, Eq)]
38pub(super) enum LabelPresence {
39 KnownAbsent,
40 Present,
41}
42
43impl<Label: Ord + Clone> RootPathTrie<Label> {
44 pub(super) fn new() -> Self {
45 Self::new_with_key(Arc::from(Path::new("")))
46 }
47 fn new_with_key(worktree_relative_path: Arc<Path>) -> Self {
48 RootPathTrie {
49 worktree_relative_path,
50 labels: Default::default(),
51 children: Default::default(),
52 }
53 }
54 // Internal implementation of inner that allows one to visit descendants of insertion point for a node.
55 fn insert_inner(
56 &mut self,
57 path: &TriePath,
58 value: Label,
59 presence: LabelPresence,
60 ) -> &mut Self {
61 let mut current = self;
62
63 let mut path_so_far = PathBuf::new();
64 for key in path.0.iter() {
65 path_so_far.push(Path::new(key));
66 current = match current.children.entry(key.clone()) {
67 Entry::Vacant(vacant_entry) => vacant_entry
68 .insert(RootPathTrie::new_with_key(Arc::from(path_so_far.as_path()))),
69 Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
70 };
71 }
72 let _previous_value = current.labels.insert(value, presence);
73 debug_assert_eq!(_previous_value, None);
74 current
75 }
76 pub(super) fn insert(&mut self, path: &TriePath, value: Label, presence: LabelPresence) {
77 self.insert_inner(path, value, presence);
78 }
79
80 pub(super) fn walk<'a>(
81 &'a self,
82 path: &TriePath,
83 callback: &mut dyn for<'b> FnMut(
84 &'b Arc<Path>,
85 &'a BTreeMap<Label, LabelPresence>,
86 ) -> ControlFlow<()>,
87 ) {
88 let mut current = self;
89 for key in path.0.iter() {
90 if !current.labels.is_empty() {
91 if (callback)(¤t.worktree_relative_path, ¤t.labels).is_break() {
92 return;
93 };
94 }
95 current = match current.children.get(key) {
96 Some(child) => child,
97 None => return,
98 };
99 }
100 if !current.labels.is_empty() {
101 (callback)(¤t.worktree_relative_path, ¤t.labels);
102 }
103 }
104
105 pub(super) fn remove(&mut self, path: &TriePath) {
106 debug_assert_ne!(path.0.len(), 0);
107 let mut current = self;
108 for path in path.0.iter().take(path.0.len().saturating_sub(1)) {
109 current = match current.children.get_mut(path) {
110 Some(child) => child,
111 None => return,
112 };
113 }
114 if let Some(final_entry_name) = path.0.last() {
115 current.children.remove(final_entry_name);
116 }
117 }
118}
119
120/// [TriePath] is a [Path] preprocessed for amortizing the cost of doing multiple lookups in distinct [RootPathTrie]s.
121#[derive(Clone)]
122pub(super) struct TriePath(Arc<[Arc<OsStr>]>);
123
124impl From<&Path> for TriePath {
125 fn from(value: &Path) -> Self {
126 TriePath(value.components().map(|c| c.as_os_str().into()).collect())
127 }
128}
129
130#[cfg(test)]
131mod tests {
132 use std::collections::BTreeSet;
133
134 use super::*;
135
136 #[test]
137 fn test_insert_and_lookup() {
138 let mut trie = RootPathTrie::<()>::new();
139 trie.insert(
140 &TriePath::from(Path::new("a/b/c")),
141 (),
142 LabelPresence::Present,
143 );
144
145 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
146 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
147 assert_eq!(path.as_ref(), Path::new("a/b/c"));
148 ControlFlow::Continue(())
149 });
150 // Now let's annotate a parent with "Known missing" node.
151 trie.insert(
152 &TriePath::from(Path::new("a")),
153 (),
154 LabelPresence::KnownAbsent,
155 );
156
157 // Ensure that we walk from the root to the leaf.
158 let mut visited_paths = BTreeSet::new();
159 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
160 if path.as_ref() == Path::new("a/b/c") {
161 assert_eq!(
162 visited_paths,
163 BTreeSet::from_iter([Arc::from(Path::new("a/"))])
164 );
165 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
166 } else if path.as_ref() == Path::new("a/") {
167 assert!(visited_paths.is_empty());
168 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
169 } else {
170 panic!("Unknown path");
171 }
172 // Assert that we only ever visit a path once.
173 assert!(visited_paths.insert(path.clone()));
174 ControlFlow::Continue(())
175 });
176
177 // One can also pass a path whose prefix is in the tree, but not that path itself.
178 let mut visited_paths = BTreeSet::new();
179 trie.walk(
180 &TriePath::from(Path::new("a/b/c/d/e/f/g")),
181 &mut |path, nodes| {
182 if path.as_ref() == Path::new("a/b/c") {
183 assert_eq!(
184 visited_paths,
185 BTreeSet::from_iter([Arc::from(Path::new("a/"))])
186 );
187 assert_eq!(nodes.get(&()), Some(&LabelPresence::Present));
188 } else if path.as_ref() == Path::new("a/") {
189 assert!(visited_paths.is_empty());
190 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
191 } else {
192 panic!("Unknown path");
193 }
194 // Assert that we only ever visit a path once.
195 assert!(visited_paths.insert(path.clone()));
196 ControlFlow::Continue(())
197 },
198 );
199
200 // Test breaking from the tree-walk.
201 let mut visited_paths = BTreeSet::new();
202 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, nodes| {
203 if path.as_ref() == Path::new("a/") {
204 assert!(visited_paths.is_empty());
205 assert_eq!(nodes.get(&()), Some(&LabelPresence::KnownAbsent));
206 } else {
207 panic!("Unknown path");
208 }
209 // Assert that we only ever visit a path once.
210 assert!(visited_paths.insert(path.clone()));
211 ControlFlow::Break(())
212 });
213 assert_eq!(visited_paths.len(), 1);
214
215 // Entry removal.
216 trie.insert(
217 &TriePath::from(Path::new("a/b")),
218 (),
219 LabelPresence::KnownAbsent,
220 );
221 let mut visited_paths = BTreeSet::new();
222 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
223 // Assert that we only ever visit a path once.
224 assert!(visited_paths.insert(path.clone()));
225 ControlFlow::Continue(())
226 });
227 assert_eq!(visited_paths.len(), 3);
228 trie.remove(&TriePath::from(Path::new("a/b/")));
229 let mut visited_paths = BTreeSet::new();
230 trie.walk(&TriePath::from(Path::new("a/b/c")), &mut |path, _nodes| {
231 // Assert that we only ever visit a path once.
232 assert!(visited_paths.insert(path.clone()));
233 ControlFlow::Continue(())
234 });
235 assert_eq!(visited_paths.len(), 1);
236 assert_eq!(
237 visited_paths.into_iter().next().unwrap().as_ref(),
238 Path::new("a/")
239 );
240 }
241}