paths.rs

  1use gpui::BackgroundExecutor;
  2use nucleo::pattern::{AtomKind, CaseMatching, Normalization, Pattern};
  3use std::{
  4    cmp::{self, Ordering},
  5    sync::{
  6        Arc,
  7        atomic::{self, AtomicBool},
  8    },
  9};
 10use util::{paths::PathStyle, rel_path::RelPath};
 11
 12use crate::{CharBag, matcher};
 13
 14#[derive(Clone, Debug)]
 15pub struct PathMatchCandidate<'a> {
 16    pub is_dir: bool,
 17    pub path: &'a RelPath,
 18    pub char_bag: CharBag,
 19}
 20
 21#[derive(Clone, Debug)]
 22pub struct PathMatch {
 23    pub score: f64,
 24    /// Guarenteed to be sorted, chars from the start of the string
 25    pub positions: Vec<usize>,
 26    pub worktree_id: usize,
 27    pub path: Arc<RelPath>,
 28    pub path_prefix: Arc<RelPath>,
 29    pub is_dir: bool,
 30    /// Number of steps removed from a shared parent with the relative path
 31    /// Used to order closer paths first in the search list
 32    pub distance_to_relative_ancestor: usize,
 33}
 34
 35// This has only one implementation. It's here to invert dependencies so fuzzy
 36// does not need to depend on project. Though we also use it to make testing easier.
 37pub trait PathMatchCandidateSet<'a>: Send + Sync {
 38    type Candidates: Iterator<Item = PathMatchCandidate<'a>>;
 39    fn id(&self) -> usize;
 40    fn len(&self) -> usize;
 41    fn is_empty(&self) -> bool {
 42        self.len() == 0
 43    }
 44    fn root_is_file(&self) -> bool;
 45    fn prefix(&self) -> Arc<RelPath>;
 46    fn candidates(&'a self, start: usize) -> Self::Candidates;
 47    fn path_style(&self) -> PathStyle;
 48}
 49
 50impl PartialEq for PathMatch {
 51    fn eq(&self, other: &Self) -> bool {
 52        self.cmp(other).is_eq()
 53    }
 54}
 55
 56impl Eq for PathMatch {}
 57
 58impl PartialOrd for PathMatch {
 59    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
 60        Some(self.cmp(other))
 61    }
 62}
 63
 64impl Ord for PathMatch {
 65    fn cmp(&self, other: &Self) -> Ordering {
 66        println!(
 67            "{:?}: {}, {:?} {}",
 68            self.path, self.score, other.path, other.score
 69        );
 70        self.score
 71            .total_cmp(&other.score)
 72            .reverse()
 73            .then_with(|| self.worktree_id.cmp(&other.worktree_id))
 74            .then_with(|| {
 75                other
 76                    .distance_to_relative_ancestor
 77                    .cmp(&self.distance_to_relative_ancestor)
 78            })
 79            .then_with(|| {
 80                self.distance_from_end()
 81                    .total_cmp(&other.distance_from_end())
 82            })
 83            // see shorter_over_lexicographical test for an example of why we want this
 84            .then_with(|| {
 85                self.path
 86                    .as_unix_str()
 87                    .chars()
 88                    .count()
 89                    .cmp(&other.path.as_unix_str().chars().count())
 90            })
 91            .then_with(|| self.path.cmp(&other.path))
 92    }
 93}
 94
 95impl PathMatch {
 96    fn distance_from_end(&self) -> f32 {
 97        let len = self.path_prefix.as_unix_str().chars().count()
 98            + 1
 99            + self.path.as_unix_str().chars().count(); // add one for path separator
100        dbg!(&self.path, &self.path_prefix);
101        self.positions
102            .iter()
103            .map(|p| (dbg!(len) - dbg!(p)) as f32 / 1000.0)
104            .sum()
105    }
106}
107
108pub fn match_fixed_path_set(
109    candidates: Vec<PathMatchCandidate>,
110    worktree_id: usize,
111    worktree_root_name: Option<Arc<RelPath>>,
112    query: &str,
113    smart_case: bool,
114    max_results: usize,
115    path_style: PathStyle,
116) -> Vec<PathMatch> {
117    let mut config = nucleo::Config::DEFAULT;
118    config.set_match_paths();
119    let mut matcher = matcher::get_matcher(config);
120    let pattern = Pattern::new(
121        query,
122        if smart_case {
123            CaseMatching::Smart
124        } else {
125            CaseMatching::Ignore
126        },
127        Normalization::Smart,
128        AtomKind::Fuzzy,
129    );
130
131    let mut results = Vec::with_capacity(candidates.len());
132    path_match_helper(
133        &mut matcher,
134        &pattern,
135        candidates.into_iter(),
136        worktree_id,
137        &worktree_root_name
138            .clone()
139            .unwrap_or(RelPath::empty().into()),
140        &None,
141        path_style,
142        &AtomicBool::new(false),
143        &mut results,
144    )
145    .ok();
146    matcher::return_matcher(matcher);
147    util::truncate_to_bottom_n_sorted(&mut results, max_results);
148    for r in &mut results {
149        r.positions.sort();
150    }
151    results
152}
153
154struct Cancelled;
155
156fn path_match_helper<'a>(
157    matcher: &mut nucleo::Matcher,
158    pattern: &Pattern,
159    candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
160    worktree_id: usize,
161    path_prefix: &Arc<RelPath>,
162    relative_to: &Option<Arc<RelPath>>,
163    path_style: PathStyle,
164    cancel_flag: &AtomicBool,
165    results: &mut Vec<PathMatch>,
166) -> std::result::Result<(), Cancelled> {
167    let mut candidate_buf = path_prefix.display(path_style).to_string();
168    if !path_prefix.is_empty() {
169        candidate_buf.push_str(path_style.separator());
170    }
171    let path_prefix_len = candidate_buf.len();
172    for c in candidates {
173        if cancel_flag.load(std::sync::atomic::Ordering::Relaxed) {
174            return Err(Cancelled);
175        }
176        let mut indices = Vec::new();
177        let mut buf = Vec::new();
178        candidate_buf.truncate(path_prefix_len);
179        candidate_buf.push_str(c.path.as_unix_str());
180        // TODO: need to convert indices/positions from char offsets to byte offsets.
181        if let Some(score) = pattern.indices(
182            nucleo::Utf32Str::new(dbg!(&candidate_buf), &mut buf),
183            matcher,
184            &mut indices,
185        ) {
186            // TODO: walk both in order for better perf
187            let positions: Vec<_> = candidate_buf
188                .char_indices()
189                .enumerate()
190                .filter_map(|(char_offset, (byte_offset, _))| {
191                    indices
192                        .contains(&(char_offset as u32))
193                        .then_some(byte_offset)
194                })
195                .collect();
196
197            results.push(PathMatch {
198                score: score as f64,
199                worktree_id,
200                positions,
201                is_dir: c.is_dir,
202                path: c.path.into(),
203                path_prefix: Arc::clone(&path_prefix),
204                distance_to_relative_ancestor: relative_to
205                    .as_ref()
206                    .map_or(usize::MAX, |relative_to| {
207                        distance_between_paths(c.path, relative_to.as_ref())
208                    }),
209            })
210        };
211    }
212    Ok(())
213}
214
215/// Query should contain spaces if you want it to be matched out of order
216/// for example: 'audio Cargo' matching 'audio/Cargo.toml'
217pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
218    candidate_sets: &'a [Set],
219    query: &str,
220    relative_to: &Option<Arc<RelPath>>,
221    smart_case: bool,
222    max_results: usize,
223    cancel_flag: &AtomicBool,
224    executor: BackgroundExecutor,
225) -> Vec<PathMatch> {
226    let path_count: usize = candidate_sets.iter().map(|s| s.len()).sum();
227    if path_count == 0 {
228        return Vec::new();
229    }
230    dbg!(relative_to);
231
232    let path_style = candidate_sets[0].path_style();
233
234    let query = if path_style.is_windows() {
235        query.replace('\\', "/")
236    } else {
237        query.to_owned()
238    };
239
240    let pattern = Pattern::new(
241        &query,
242        if smart_case {
243            CaseMatching::Smart
244        } else {
245            CaseMatching::Ignore
246        },
247        Normalization::Smart,
248        AtomKind::Fuzzy,
249    );
250
251    let num_cpus = executor.num_cpus().min(path_count);
252    let segment_size = path_count.div_ceil(num_cpus);
253    let mut segment_results = (0..num_cpus)
254        .map(|_| Vec::with_capacity(max_results))
255        .collect::<Vec<_>>();
256
257    let mut config = nucleo::Config::DEFAULT;
258    config.set_match_paths();
259    let mut matchers = matcher::get_matchers(num_cpus, config);
260
261    // This runs num_cpu parallel searches. Each search is going through all candidate sets
262    // Each parallel search goes through one segment of the every candidate set. The segments are
263    // not overlapping.
264    executor
265        .scoped(|scope| {
266            for (segment_idx, (results, matcher)) in segment_results
267                .iter_mut()
268                .zip(matchers.iter_mut())
269                .enumerate()
270            {
271                let relative_to = relative_to.clone();
272                let pattern = pattern.clone();
273                scope.spawn(async move {
274                    let segment_start = segment_idx * segment_size;
275                    let segment_end = segment_start + segment_size;
276
277                    let mut tree_start = 0;
278                    for candidate_set in candidate_sets {
279                        let tree_end = tree_start + candidate_set.len();
280
281                        if tree_start < segment_end && segment_start < tree_end {
282                            let start = cmp::max(tree_start, segment_start) - tree_start;
283                            let end = cmp::min(tree_end, segment_end) - tree_start;
284                            let candidates = candidate_set.candidates(start).take(end - start);
285
286                            let worktree_id = candidate_set.id();
287                            let mut prefix = candidate_set
288                                .prefix()
289                                .as_unix_str()
290                                .chars()
291                                .collect::<Vec<_>>();
292                            if !candidate_set.root_is_file() && !prefix.is_empty() {
293                                prefix.push('/');
294                            }
295
296                            if path_match_helper(
297                                matcher,
298                                &pattern,
299                                candidates,
300                                worktree_id,
301                                &candidate_set.prefix(),
302                                &relative_to,
303                                path_style,
304                                cancel_flag,
305                                results,
306                            )
307                            .is_err()
308                            {
309                                break;
310                            }
311                        }
312                        if tree_end >= segment_end {
313                            break;
314                        }
315                        tree_start = tree_end;
316                    }
317                })
318            }
319        })
320        .await;
321
322    if cancel_flag.load(atomic::Ordering::Acquire) {
323        return Vec::new();
324    }
325
326    matcher::return_matchers(matchers);
327
328    let mut results = segment_results.concat();
329    util::truncate_to_bottom_n_sorted(&mut results, max_results);
330    for r in &mut results {
331        r.positions.sort();
332    }
333
334    results
335}
336
337/// Compute the distance from a given path to some other path
338/// If there is no shared path, returns usize::MAX
339fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
340    let mut path_components = path.components();
341    let mut relative_components = relative_to.components();
342
343    while path_components
344        .next()
345        .zip(relative_components.next())
346        .map(|(path_component, relative_component)| path_component == relative_component)
347        .unwrap_or_default()
348    {}
349    path_components.count() + relative_components.count() + 1
350}
351
352#[cfg(test)]
353mod tests {
354    use std::sync::{Arc, atomic::AtomicBool};
355
356    use gpui::TestAppContext;
357    use util::{paths::PathStyle, rel_path::RelPath};
358
359    use crate::{CharBag, PathMatchCandidate, PathMatchCandidateSet};
360
361    use super::distance_between_paths;
362
363    #[test]
364    fn test_distance_between_paths_empty() {
365        distance_between_paths(RelPath::empty(), RelPath::empty());
366    }
367
368    struct TestCandidateSet<'a> {
369        prefix: Arc<RelPath>,
370        candidates: Vec<PathMatchCandidate<'a>>,
371        path_style: PathStyle,
372    }
373
374    impl<'a> PathMatchCandidateSet<'a> for TestCandidateSet<'a> {
375        type Candidates = std::vec::IntoIter<PathMatchCandidate<'a>>;
376
377        fn id(&self) -> usize {
378            0
379        }
380        fn len(&self) -> usize {
381            self.candidates.len()
382        }
383        fn is_empty(&self) -> bool {
384            self.candidates.is_empty()
385        }
386        fn root_is_file(&self) -> bool {
387            true // TODO: swap this
388        }
389        fn prefix(&self) -> Arc<RelPath> {
390            self.prefix.clone()
391        }
392        fn candidates(&self, start: usize) -> Self::Candidates {
393            self.candidates[start..]
394                .iter()
395                .cloned()
396                .collect::<Vec<_>>()
397                .into_iter()
398        }
399        fn path_style(&self) -> PathStyle {
400            self.path_style
401        }
402    }
403
404    async fn path_matches(
405        cx: &mut TestAppContext,
406        candidates: &'static [&'static str],
407        query: &'static str,
408        prefix: Option<&str>,
409    ) -> Vec<String> {
410        let set = TestCandidateSet {
411            prefix: RelPath::unix(prefix.unwrap_or_default()).unwrap().into(),
412            candidates: candidates
413                .into_iter()
414                .map(|s| PathMatchCandidate {
415                    is_dir: false,
416                    path: RelPath::unix(s).unwrap().into(),
417                    char_bag: CharBag::from_iter(s.to_lowercase().chars()),
418                })
419                .collect(),
420            path_style: PathStyle::Windows,
421        };
422        let candidate_sets = vec![set];
423
424        let cancellation_flag = AtomicBool::new(false);
425        let executor = cx.background_executor.clone();
426        let matches = cx
427            .foreground_executor
428            .spawn(async move {
429                super::match_path_sets(
430                    candidate_sets.as_slice(),
431                    query,
432                    &None,
433                    false,
434                    100,
435                    &cancellation_flag,
436                    executor,
437                )
438                .await
439            })
440            .await;
441
442        matches
443            .iter()
444            .map(|s| s.path.as_unix_str().to_string())
445            .collect::<Vec<_>>()
446    }
447
448    #[gpui::test]
449    async fn test_dir_paths(cx: &mut TestAppContext) {
450        const CANDIDATES: &'static [&'static str] = &[
451            "gpui_even_more/Cargo.toml",
452            "gpui_more/Cargo.toml",
453            "gpui/Cargo.toml",
454        ];
455
456        assert_eq!(
457            path_matches(cx, CANDIDATES, "toml gpui", None).await,
458            [
459                "gpui/Cargo.toml",
460                "gpui_more/Cargo.toml",
461                "gpui_even_more/Cargo.toml",
462            ]
463        );
464
465        assert_eq!(
466            path_matches(cx, CANDIDATES, "gpui more", None).await,
467            ["gpui_more/Cargo.toml", "gpui_even_more/Cargo.toml",]
468        );
469    }
470    #[gpui::test]
471    async fn test_more_dir_paths(cx: &mut TestAppContext) {
472        const CANDIDATES: &'static [&'static str] = &[
473            "crates/gpui_macros/Cargo.toml",
474            "crates/gpui_tokio/Cargo.toml",
475            "crates/gpui/Cargo.toml",
476        ];
477
478        assert_eq!(
479            path_matches(cx, CANDIDATES, "toml gpui", None).await,
480            [
481                "crates/gpui/Cargo.toml",
482                "crates/gpui_tokio/Cargo.toml",
483                "crates/gpui_macros/Cargo.toml"
484            ]
485        );
486    }
487
488    #[gpui::test]
489    async fn denoise(cx: &mut TestAppContext) {
490        const CANDIDATES: &'static [&'static str] = &[
491            "crates/debug_adapter_extension/Cargo.toml",
492            "crates/debugger_tools/Cargo.toml",
493            "crates/debugger_ui/Cargo.toml",
494            "crates/deepseek/Cargo.toml",
495            "crates/denoise/Cargo.toml",
496        ];
497
498        assert_eq!(
499            path_matches(cx, CANDIDATES, "toml de", None).await,
500            [
501                "crates/denoise/Cargo.toml",
502                "crates/deepseek/Cargo.toml",
503                "crates/debugger_ui/Cargo.toml",
504                "crates/debugger_tools/Cargo.toml",
505                "crates/debug_adapter_extension/Cargo.toml",
506            ]
507        );
508    }
509
510    #[gpui::test]
511    async fn test_path_matcher(cx: &mut TestAppContext) {
512        const CANDIDATES: &'static [&'static str] = &[
513            "blue", "red", "purple", "pink", "green", "yellow", "magenta", "orange", "ocean",
514            "navy", "brown",
515        ];
516        assert_eq!(path_matches(cx, CANDIDATES, "bl", None).await, ["blue"]);
517    }
518
519    #[gpui::test]
520    async fn shorter_over_lexicographical(cx: &mut TestAppContext) {
521        const CANDIDATES: &'static [&'static str] = &["qr", "qqqqqqqqqqqq"];
522        assert_eq!(
523            path_matches(cx, CANDIDATES, "q", None).await,
524            ["qr", "qqqqqqqqqqqq"]
525        );
526    }
527    // TODO: add perf test on zed repo
528
529    #[gpui::test]
530    async fn prefer_single_word_match_to_multiple_fragments(cx: &mut TestAppContext) {
531        const CANDIDATES: &'static [&'static str] = &[
532            "crates/theme_importer/README.md",
533            "extensions/test-extension/README.md",
534            "extensions/slash-commands-example/README.md",
535            "crates/livekit_api/vendored/protocol/README.md",
536            "crates/assistant_tools/src/read_file_tool/description.md",
537        ];
538        assert_eq!(path_matches(cx, CANDIDATES, "read", None).await, CANDIDATES);
539    }
540
541    #[gpui::test]
542    async fn paprika(cx: &mut TestAppContext) {
543        const CANDIDATES: &'static [&'static str] = &["bar/neat.txt", "foo/bar.txt"];
544
545        assert_eq!(
546            path_matches(cx, CANDIDATES, "bar", None).await,
547            ["foo/bar.txt", "bar/neat.txt"]
548        );
549    }
550
551    #[gpui::test]
552    async fn aubergine(cx: &mut TestAppContext) {
553        const CANDIDATES: &'static [&'static str] =
554            &["vim_mode_setting/Cargo.toml", "vim/Cargo.toml"];
555        assert_eq!(
556            path_matches(cx, CANDIDATES, "Cargo.to vim", Some("crates")).await,
557            [CANDIDATES[1], CANDIDATES[0]]
558        );
559    }
560}