1use gpui::BackgroundExecutor;
2use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
3use std::{
4 cmp::{self, Ordering},
5 sync::{
6 Arc,
7 atomic::{self, AtomicBool},
8 },
9};
10use util::{paths::PathStyle, rel_path::RelPath};
11
12use crate::{CharBag, matcher};
13
14#[derive(Clone, Debug)]
15pub struct PathMatchCandidate<'a> {
16 pub is_dir: bool,
17 pub path: &'a RelPath,
18 pub char_bag: CharBag,
19}
20
21#[derive(Clone, Debug)]
22pub struct PathMatch {
23 pub score: f64,
24 /// Guarenteed to be sorted, chars from the start of the string
25 pub positions: Vec<usize>,
26 pub worktree_id: usize,
27 pub path: Arc<RelPath>,
28 pub path_prefix: Arc<RelPath>,
29 pub is_dir: bool,
30 /// Number of steps removed from a shared parent with the relative path
31 /// Used to order closer paths first in the search list
32 pub distance_to_relative_ancestor: usize,
33}
34
35// This has only one implementation. It's here to invert dependencies so fuzzy
36// does not need to depend on project. Though we also use it to make testing easier.
37pub trait PathMatchCandidateSet<'a>: Send + Sync {
38 type Candidates: Iterator<Item = PathMatchCandidate<'a>>;
39 fn id(&self) -> usize;
40 fn len(&self) -> usize;
41 fn is_empty(&self) -> bool {
42 self.len() == 0
43 }
44 fn root_is_file(&self) -> bool;
45 fn prefix(&self) -> Arc<RelPath>;
46 fn candidates(&'a self, start: usize) -> Self::Candidates;
47 fn path_style(&self) -> PathStyle;
48}
49
50impl PartialEq for PathMatch {
51 fn eq(&self, other: &Self) -> bool {
52 self.cmp(other).is_eq()
53 }
54}
55
56impl Eq for PathMatch {}
57
58impl PartialOrd for PathMatch {
59 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
60 Some(self.cmp(other))
61 }
62}
63
64impl Ord for PathMatch {
65 fn cmp(&self, other: &Self) -> Ordering {
66 println!(
67 "{:?}: {}, {:?} {}",
68 self.path, self.score, other.path, other.score
69 );
70 self.score
71 .total_cmp(&other.score)
72 .reverse()
73 .then_with(|| self.worktree_id.cmp(&other.worktree_id))
74 .then_with(|| {
75 other
76 .distance_to_relative_ancestor
77 .cmp(&self.distance_to_relative_ancestor)
78 })
79 .then_with(|| {
80 self.distance_from_end()
81 .total_cmp(&other.distance_from_end())
82 })
83 // see shorter_over_lexicographical test for an example of why we want this
84 .then_with(|| {
85 self.path
86 .as_unix_str()
87 .chars()
88 .count()
89 .cmp(&other.path.as_unix_str().chars().count())
90 })
91 .then_with(|| self.path.cmp(&other.path))
92 }
93}
94
95impl PathMatch {
96 fn distance_from_end(&self) -> f32 {
97 let len = self.path_prefix.as_unix_str().chars().count()
98 + 1
99 + self.path.as_unix_str().chars().count(); // add one for path separator
100 dbg!(&self.path, &self.path_prefix);
101 self.positions
102 .iter()
103 .map(|p| (dbg!(len) - dbg!(p)) as f32 / 1000.0)
104 .sum()
105 }
106}
107
108pub fn match_fixed_path_set(
109 candidates: Vec<PathMatchCandidate>,
110 worktree_id: usize,
111 worktree_root_name: Option<Arc<RelPath>>,
112 query: &str,
113 smart_case: bool,
114 max_results: usize,
115 path_style: PathStyle,
116) -> Vec<PathMatch> {
117 let mut config = nucleo::Config::DEFAULT;
118 config.set_match_paths();
119 let mut matcher = matcher::get_matcher(config);
120 let pattern = Pattern::new(
121 query,
122 if smart_case {
123 CaseMatching::Smart
124 } else {
125 CaseMatching::Ignore
126 },
127 Normalization::Smart,
128 AtomKind::Fuzzy,
129 );
130
131 let mut results = Vec::with_capacity(candidates.len());
132 path_match_helper(
133 &mut matcher,
134 &pattern,
135 candidates.into_iter(),
136 worktree_id,
137 &worktree_root_name
138 .clone()
139 .unwrap_or(RelPath::empty().into()),
140 &None,
141 path_style,
142 &AtomicBool::new(false),
143 &mut results,
144 )
145 .ok();
146 matcher::return_matcher(matcher);
147 util::truncate_to_bottom_n_sorted(&mut results, max_results);
148 for r in &mut results {
149 r.positions.sort();
150 }
151 results
152}
153
154struct Cancelled;
155
156fn path_match_helper<'a>(
157 matcher: &mut nucleo::Matcher,
158 pattern: &Pattern,
159 candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
160 worktree_id: usize,
161 path_prefix: &Arc<RelPath>,
162 relative_to: &Option<Arc<RelPath>>,
163 path_style: PathStyle,
164 cancel_flag: &AtomicBool,
165 results: &mut Vec<PathMatch>,
166) -> std::result::Result<(), Cancelled> {
167 let mut candidate_buf = path_prefix.display(path_style).to_string();
168 if !path_prefix.is_empty() {
169 candidate_buf.push_str(path_style.separator());
170 }
171 let path_prefix_len = candidate_buf.len();
172 for c in candidates {
173 if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
174 return Err(Cancelled);
175 }
176 let mut indices = Vec::new();
177 let mut buf = Vec::new();
178 candidate_buf.truncate(path_prefix_len);
179 candidate_buf.push_str(c.path.as_unix_str());
180 // TODO: need to convert indices/positions from char offsets to byte offsets.
181 if let Some(score) = pattern.indices(
182 nucleo::Utf32Str::new(dbg!(&candidate_buf), &mut buf),
183 matcher,
184 &mut indices,
185 ) {
186 // TODO: walk both in order for better perf
187 let positions: Vec<_> = candidate_buf
188 .char_indices()
189 .enumerate()
190 .filter_map(|(char_offset, (byte_offset, _))| {
191 indices
192 .contains(&(char_offset as u32))
193 .then_some(byte_offset)
194 })
195 .collect();
196
197 results.push(PathMatch {
198 score: score as f64,
199 worktree_id,
200 positions,
201 is_dir: c.is_dir,
202 path: c.path.into(),
203 path_prefix: Arc::clone(&path_prefix),
204 distance_to_relative_ancestor: relative_to
205 .as_ref()
206 .map_or(usize::MAX, |relative_to| {
207 distance_between_paths(c.path, relative_to.as_ref())
208 }),
209 })
210 };
211 }
212 Ok(())
213}
214
215/// Query should contain spaces if you want it to be matched out of order
216/// for example: 'audio Cargo' matching 'audio/Cargo.toml'
217pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
218 candidate_sets: &'a [Set],
219 query: &str,
220 relative_to: &Option<Arc<RelPath>>,
221 smart_case: bool,
222 max_results: usize,
223 cancel_flag: &AtomicBool,
224 executor: BackgroundExecutor,
225) -> Vec<PathMatch> {
226 let path_count: usize = candidate_sets.iter().map(|s| s.len()).sum();
227 if path_count == 0 {
228 return Vec::new();
229 }
230 dbg!(relative_to);
231
232 let path_style = candidate_sets[0].path_style();
233
234 let query = if path_style.is_windows() {
235 query.replace('\\', "/")
236 } else {
237 query.to_owned()
238 };
239
240 let pattern = Pattern::new(
241 &query,
242 if smart_case {
243 CaseMatching::Smart
244 } else {
245 CaseMatching::Ignore
246 },
247 Normalization::Smart,
248 AtomKind::Fuzzy,
249 );
250
251 let num_cpus = executor.num_cpus().min(path_count);
252 let segment_size = path_count.div_ceil(num_cpus);
253 let mut segment_results = (0..num_cpus)
254 .map(|_| Vec::with_capacity(max_results))
255 .collect::<Vec<_>>();
256
257 let mut config = nucleo::Config::DEFAULT;
258 config.set_match_paths();
259 let mut matchers = matcher::get_matchers(num_cpus, config);
260
261 // This runs num_cpu parallel searches. Each search is going through all candidate sets
262 // Each parallel search goes through one segment of the every candidate set. The segments are
263 // not overlapping.
264 executor
265 .scoped(|scope| {
266 for (segment_idx, (results, matcher)) in segment_results
267 .iter_mut()
268 .zip(matchers.iter_mut())
269 .enumerate()
270 {
271 let relative_to = relative_to.clone();
272 let pattern = pattern.clone();
273 scope.spawn(async move {
274 let segment_start = segment_idx * segment_size;
275 let segment_end = segment_start + segment_size;
276
277 let mut tree_start = 0;
278 for candidate_set in candidate_sets {
279 let tree_end = tree_start + candidate_set.len();
280
281 if tree_start < segment_end && segment_start < tree_end {
282 let start = cmp::max(tree_start, segment_start) - tree_start;
283 let end = cmp::min(tree_end, segment_end) - tree_start;
284 let candidates = candidate_set.candidates(start).take(end - start);
285
286 let worktree_id = candidate_set.id();
287 let mut prefix = candidate_set
288 .prefix()
289 .as_unix_str()
290 .chars()
291 .collect::<Vec<_>>();
292 if !candidate_set.root_is_file() && !prefix.is_empty() {
293 prefix.push('/');
294 }
295
296 if path_match_helper(
297 matcher,
298 &pattern,
299 candidates,
300 worktree_id,
301 &candidate_set.prefix(),
302 &relative_to,
303 path_style,
304 cancel_flag,
305 results,
306 )
307 .is_err()
308 {
309 break;
310 }
311 }
312 if tree_end >= segment_end {
313 break;
314 }
315 tree_start = tree_end;
316 }
317 })
318 }
319 })
320 .await;
321
322 if cancel_flag.load(atomic::Ordering::Acquire) {
323 return Vec::new();
324 }
325
326 matcher::return_matchers(matchers);
327
328 let mut results = segment_results.concat();
329 util::truncate_to_bottom_n_sorted(&mut results, max_results);
330 for r in &mut results {
331 r.positions.sort();
332 }
333
334 results
335}
336
337/// Compute the distance from a given path to some other path
338/// If there is no shared path, returns usize::MAX
339fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
340 let mut path_components = path.components();
341 let mut relative_components = relative_to.components();
342
343 while path_components
344 .next()
345 .zip(relative_components.next())
346 .map(|(path_component, relative_component)| path_component == relative_component)
347 .unwrap_or_default()
348 {}
349 path_components.count() + relative_components.count() + 1
350}
351
352#[cfg(test)]
353mod tests {
354 use std::sync::{Arc, atomic::AtomicBool};
355
356 use gpui::TestAppContext;
357 use util::{paths::PathStyle, rel_path::RelPath};
358
359 use crate::{CharBag, PathMatchCandidate, PathMatchCandidateSet};
360
361 use super::distance_between_paths;
362
363 #[test]
364 fn test_distance_between_paths_empty() {
365 distance_between_paths(RelPath::empty(), RelPath::empty());
366 }
367
368 struct TestCandidateSet<'a> {
369 prefix: Arc<RelPath>,
370 candidates: Vec<PathMatchCandidate<'a>>,
371 path_style: PathStyle,
372 }
373
374 impl<'a> PathMatchCandidateSet<'a> for TestCandidateSet<'a> {
375 type Candidates = std::vec::IntoIter<PathMatchCandidate<'a>>;
376
377 fn id(&self) -> usize {
378 0
379 }
380 fn len(&self) -> usize {
381 self.candidates.len()
382 }
383 fn is_empty(&self) -> bool {
384 self.candidates.is_empty()
385 }
386 fn root_is_file(&self) -> bool {
387 true // TODO: swap this
388 }
389 fn prefix(&self) -> Arc<RelPath> {
390 self.prefix.clone()
391 }
392 fn candidates(&self, start: usize) -> Self::Candidates {
393 self.candidates[start..]
394 .iter()
395 .cloned()
396 .collect::<Vec<_>>()
397 .into_iter()
398 }
399 fn path_style(&self) -> PathStyle {
400 self.path_style
401 }
402 }
403
404 async fn path_matches(
405 cx: &mut TestAppContext,
406 candidates: &'static [&'static str],
407 query: &'static str,
408 prefix: Option<&str>,
409 ) -> Vec<String> {
410 let set = TestCandidateSet {
411 prefix: RelPath::unix(prefix.unwrap_or_default()).unwrap().into(),
412 candidates: candidates
413 .into_iter()
414 .map(|s| PathMatchCandidate {
415 is_dir: false,
416 path: RelPath::unix(s).unwrap().into(),
417 char_bag: CharBag::from_iter(s.to_lowercase().chars()),
418 })
419 .collect(),
420 path_style: PathStyle::Windows,
421 };
422 let candidate_sets = vec![set];
423
424 let cancellation_flag = AtomicBool::new(false);
425 let executor = cx.background_executor.clone();
426 let matches = cx
427 .foreground_executor
428 .spawn(async move {
429 super::match_path_sets(
430 candidate_sets.as_slice(),
431 query,
432 &None,
433 false,
434 100,
435 &cancellation_flag,
436 executor,
437 )
438 .await
439 })
440 .await;
441
442 matches
443 .iter()
444 .map(|s| s.path.as_unix_str().to_string())
445 .collect::<Vec<_>>()
446 }
447
448 #[gpui::test]
449 async fn test_dir_paths(cx: &mut TestAppContext) {
450 const CANDIDATES: &'static [&'static str] = &[
451 "gpui_even_more/Cargo.toml",
452 "gpui_more/Cargo.toml",
453 "gpui/Cargo.toml",
454 ];
455
456 assert_eq!(
457 path_matches(cx, CANDIDATES, "toml gpui", None).await,
458 [
459 "gpui/Cargo.toml",
460 "gpui_more/Cargo.toml",
461 "gpui_even_more/Cargo.toml",
462 ]
463 );
464
465 assert_eq!(
466 path_matches(cx, CANDIDATES, "gpui more", None).await,
467 ["gpui_more/Cargo.toml", "gpui_even_more/Cargo.toml",]
468 );
469 }
470 #[gpui::test]
471 async fn test_more_dir_paths(cx: &mut TestAppContext) {
472 const CANDIDATES: &'static [&'static str] = &[
473 "crates/gpui_macros/Cargo.toml",
474 "crates/gpui_tokio/Cargo.toml",
475 "crates/gpui/Cargo.toml",
476 ];
477
478 assert_eq!(
479 path_matches(cx, CANDIDATES, "toml gpui", None).await,
480 [
481 "crates/gpui/Cargo.toml",
482 "crates/gpui_tokio/Cargo.toml",
483 "crates/gpui_macros/Cargo.toml"
484 ]
485 );
486 }
487
488 #[gpui::test]
489 async fn denoise(cx: &mut TestAppContext) {
490 const CANDIDATES: &'static [&'static str] = &[
491 "crates/debug_adapter_extension/Cargo.toml",
492 "crates/debugger_tools/Cargo.toml",
493 "crates/debugger_ui/Cargo.toml",
494 "crates/deepseek/Cargo.toml",
495 "crates/denoise/Cargo.toml",
496 ];
497
498 assert_eq!(
499 path_matches(cx, CANDIDATES, "toml de", None).await,
500 [
501 "crates/denoise/Cargo.toml",
502 "crates/deepseek/Cargo.toml",
503 "crates/debugger_ui/Cargo.toml",
504 "crates/debugger_tools/Cargo.toml",
505 "crates/debug_adapter_extension/Cargo.toml",
506 ]
507 );
508 }
509
510 #[gpui::test]
511 async fn test_path_matcher(cx: &mut TestAppContext) {
512 const CANDIDATES: &'static [&'static str] = &[
513 "blue", "red", "purple", "pink", "green", "yellow", "magenta", "orange", "ocean",
514 "navy", "brown",
515 ];
516 assert_eq!(path_matches(cx, CANDIDATES, "bl", None).await, ["blue"]);
517 }
518
519 #[gpui::test]
520 async fn shorter_over_lexicographical(cx: &mut TestAppContext) {
521 const CANDIDATES: &'static [&'static str] = &["qr", "qqqqqqqqqqqq"];
522 assert_eq!(
523 path_matches(cx, CANDIDATES, "q", None).await,
524 ["qr", "qqqqqqqqqqqq"]
525 );
526 }
527 // TODO: add perf test on zed repo
528
529 #[gpui::test]
530 async fn prefer_single_word_match_to_multiple_fragments(cx: &mut TestAppContext) {
531 const CANDIDATES: &'static [&'static str] = &[
532 "crates/theme_importer/README.md",
533 "extensions/test-extension/README.md",
534 "extensions/slash-commands-example/README.md",
535 "crates/livekit_api/vendored/protocol/README.md",
536 "crates/assistant_tools/src/read_file_tool/description.md",
537 ];
538 assert_eq!(path_matches(cx, CANDIDATES, "read", None).await, CANDIDATES);
539 }
540
541 #[gpui::test]
542 async fn paprika(cx: &mut TestAppContext) {
543 const CANDIDATES: &'static [&'static str] = &["bar/neat.txt", "foo/bar.txt"];
544
545 assert_eq!(
546 path_matches(cx, CANDIDATES, "bar", None).await,
547 ["foo/bar.txt", "bar/neat.txt"]
548 );
549 }
550
551 #[gpui::test]
552 async fn aubergine(cx: &mut TestAppContext) {
553 const CANDIDATES: &'static [&'static str] =
554 &["vim_mode_setting/Cargo.toml", "vim/Cargo.toml"];
555 assert_eq!(
556 path_matches(cx, CANDIDATES, "Cargo.to vim", Some("crates")).await,
557 [CANDIDATES[1], CANDIDATES[0]]
558 );
559 }
560}