fuzzy_nucleo: Optimize path matching with CharBag prefilter and add benchmarks (#54112)

Finn Eitreim created 4 days ago

This PR was originally a part of
https://github.com/zed-industries/zed/pull/53551 so theres more info
about its motivation there.

- Add a CharBag prefilter on path candidates to skip irrelevant entries
before invoking nucleo's matcher.
- Use binary_search on sorted matched char indices when reconstructing
byte positions (perf improvement).
- Add a criterion benchmark comparing `fuzzy_nucleo` path matching
against the existing fuzzy crate.

Performance Chart:

| Benchmark | Size | Nucleo (before) | Nucleo (after) | Fuzzy |
Before/Fuzzy | After/Fuzzy |

|-----------|-----:|----------------:|---------------:|------:|-------------:|------------:|
  | 1-word | 100 | 14.14 µs | 9.12 µs | 9.06 µs | 1.56x | 1.01x |
  | 1-word | 1,000 | 164.37 µs | 114.11 µs | 110.43 µs | 1.49x | 1.03x |
  | 1-word | 10,000 | 1.83 ms | 1.39 ms | 1.41 ms | 1.30x | 0.99x |
  | 2-word | 100 | 12.83 µs | 3.51 µs | 979 ns | 13.10x | 3.59x |
  | 2-word | 1,000 | 131.65 µs | 33.46 µs | 6.37 µs | 20.67x | 5.25x |
  | 2-word | 10,000 | 1.24 ms | 338.84 µs | 52.46 µs | 23.64x | 6.46x |o

Exact Current State:
| query | size | nucleo | fuzzy | nucleo/fuzzy |
  |---|---:|---:|---:|---:|
  | 1-word | 100 | 8.62 µs | 9.22 µs | 0.93× |
  | 1-word | 1000 | 102 µs | 111 µs | 0.92× |
  | 1-word | 10000 | 1.13 ms | 1.28 ms | 0.88× |
  | 2-word | 100 | 3.48 µs | 0.98 µs | 3.55× |
  | 2-word | 1000 | 29.9 µs | 6.39 µs | 4.68× |
  | 2-word | 10000 | 271 µs | 53.4 µs | 5.08× |
  | 4-word | 100 | 0.85 µs | 0.53 µs | 1.60× |
  | 4-word | 1000 | 2.99 µs | 1.66 µs | 1.80× |
  | 4-word | 10000 | 20.1 µs | 9.14 µs | 2.20× |

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- fuzzy_nucleo: improved the performance of path matching

Change summary

Cargo.lock                                     |   2 
crates/file_finder/src/file_finder.rs          |  23 +
crates/fuzzy_nucleo/Cargo.toml                 |   8 
crates/fuzzy_nucleo/benches/match_benchmark.rs | 253 ++++++++++++++++++++
crates/fuzzy_nucleo/src/fuzzy_nucleo.rs        |  50 +++
crates/fuzzy_nucleo/src/matcher.rs             |  23 +
crates/fuzzy_nucleo/src/paths.rs               | 136 ++++++++--
crates/project/src/project.rs                  |   1 
8 files changed, 451 insertions(+), 45 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -6772,6 +6772,8 @@ dependencies = [
 name = "fuzzy_nucleo"
 version = "0.1.0"
 dependencies = [
+ "criterion",
+ "fuzzy",
  "gpui",
  "nucleo",
  "util",

crates/file_finder/src/file_finder.rs 🔗

@@ -698,13 +698,18 @@ fn matching_history_items<'a>(
         .into_iter()
         .chain(currently_opened)
         .map(|found_path| {
-            let candidate = PathMatchCandidate {
-                is_dir: false, // You can't open directories as project items
-                path: &found_path.project.path,
-                // Only match history items names, otherwise their paths may match too many queries, producing false positives.
-                // E.g. `foo` would match both `something/foo/bar.rs` and `something/foo/foo.rs` and if the former is a history item,
-                // it would be shown first always, despite the latter being a better match.
-            };
+            // Only match history items names, otherwise their paths may match too many queries,
+            // producing false positives. E.g. `foo` would match both `something/foo/bar.rs` and
+            // `something/foo/foo.rs` and if the former is a history item, it would be shown first
+            // always, despite the latter being a better match.
+            let candidate = PathMatchCandidate::new(
+                &found_path.project.path,
+                false,
+                worktree_name_by_id
+                    .as_ref()
+                    .and_then(|m| m.get(&found_path.project.worktree_id))
+                    .map(|prefix| prefix.as_ref()),
+            );
             candidates_paths.insert(&found_path.project, found_path);
             (found_path.project.worktree_id, candidate)
         })
@@ -731,7 +736,7 @@ fn matching_history_items<'a>(
                 worktree.to_usize(),
                 worktree_root_name,
                 query.path_query(),
-                false,
+                fuzzy_nucleo::Case::Ignore,
                 max_results,
                 path_style,
             )
@@ -914,7 +919,7 @@ impl FileFinderDelegate {
                 candidate_sets.as_slice(),
                 query.path_query(),
                 &relative_to,
-                false,
+                fuzzy_nucleo::Case::Ignore,
                 100,
                 &cancel_flag,
                 cx.background_executor().clone(),

crates/fuzzy_nucleo/Cargo.toml 🔗

@@ -13,9 +13,15 @@ path = "src/fuzzy_nucleo.rs"
 doctest = false
 
 [dependencies]
+fuzzy.workspace = true
 nucleo.workspace = true
 gpui.workspace = true
 util.workspace = true
 
 [dev-dependencies]
-util = {workspace = true, features = ["test-support"]}
+criterion.workspace = true
+util = { workspace = true, features = ["test-support"] }
+
+[[bench]]
+name = "match_benchmark"
+harness = false

crates/fuzzy_nucleo/benches/match_benchmark.rs 🔗

@@ -0,0 +1,253 @@
+use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
+use fuzzy::CharBag;
+use util::{paths::PathStyle, rel_path::RelPath};
+
+const DIRS: &[&str] = &[
+    "src",
+    "crates/gpui/src",
+    "crates/editor/src",
+    "crates/fuzzy_nucleo/src",
+    "crates/workspace/src",
+    "crates/project/src",
+    "crates/language/src",
+    "crates/terminal/src",
+    "crates/assistant/src",
+    "crates/theme/src",
+    "tests/integration",
+    "tests/unit",
+    "docs/architecture",
+    "scripts",
+    "assets/icons",
+    "assets/fonts",
+    "crates/git/src",
+    "crates/rpc/src",
+    "crates/settings/src",
+    "crates/diagnostics/src",
+    "crates/search/src",
+    "crates/collab/src",
+    "crates/db/src",
+    "crates/lsp/src",
+];
+
+const FILENAMES: &[&str] = &[
+    "parser.rs",
+    "main.rs",
+    "executor.rs",
+    "editor.rs",
+    "strings.rs",
+    "workspace.rs",
+    "project.rs",
+    "buffer.rs",
+    "colors.rs",
+    "panel.rs",
+    "renderer.rs",
+    "dispatcher.rs",
+    "matcher.rs",
+    "paths.rs",
+    "context.rs",
+    "toolbar.rs",
+    "statusbar.rs",
+    "keymap.rs",
+    "config.rs",
+    "settings.rs",
+    "diagnostics.rs",
+    "completion.rs",
+    "hover.rs",
+    "references.rs",
+    "inlay_hints.rs",
+    "git_blame.rs",
+    "terminal.rs",
+    "search.rs",
+    "replace.rs",
+    "outline.rs",
+    "breadcrumbs.rs",
+    "tab_bar.rs",
+    "Cargo.toml",
+    "README.md",
+    "build.sh",
+    "LICENSE",
+    "overview.md",
+    "string_helpers.rs",
+    "test_helpers.rs",
+    "fixtures.json",
+    "schema.sql",
+];
+
+const QUERY_WORDS: &[&str] = &[
+    "par",
+    "edi",
+    "buf",
+    "set",
+    "mat",
+    "con",
+    "ren",
+    "dis",
+    "sea",
+    "ter",
+    "col",
+    "hov",
+    "out",
+    "rep",
+    "key",
+    "too",
+    "pan",
+    "str",
+    "dia",
+    "com",
+    "executor",
+    "workspace",
+    "settings",
+    "terminal",
+    "breadcrumbs",
+    "git_blame",
+    "fixtures",
+    "schema",
+    "config",
+    "toolbar",
+];
+
+/// Deterministic query generation from QUERY_WORDS using a simple LCG.
+/// Returns `count` queries of each arity: 1, 2, and 4 space-separated words.
+fn generate_queries(count: usize) -> (Vec<String>, Vec<String>, Vec<String>) {
+    let mut state: u64 = 0xDEAD_BEEF;
+    let mut next = || -> usize {
+        // LCG: simple, fast, deterministic
+        state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
+        (state >> 33) as usize
+    };
+    let mut n_word = |n: usize| -> Vec<String> {
+        (0..count)
+            .map(|_| {
+                (0..n)
+                    .map(|_| QUERY_WORDS[next() % QUERY_WORDS.len()])
+                    .collect::<Vec<_>>()
+                    .join(" ")
+            })
+            .collect()
+    };
+
+    (n_word(1), n_word(2), n_word(4))
+}
+
+fn generate_path_strings(count: usize) -> &'static [String] {
+    let paths: Box<[String]> = (0..count)
+        .map(|id| {
+            let dir = DIRS[id % DIRS.len()];
+            let file = FILENAMES[id / DIRS.len() % FILENAMES.len()];
+            format!("{dir}/{file}")
+        })
+        .collect();
+    Box::leak(paths)
+}
+
+fn generate_nucleo_path_candidates(
+    paths: &'static [String],
+) -> Vec<fuzzy_nucleo::PathMatchCandidate<'static>> {
+    paths
+        .iter()
+        .map(|path| {
+            fuzzy_nucleo::PathMatchCandidate::new(RelPath::unix(path).unwrap(), false, None)
+        })
+        .collect()
+}
+
+fn generate_fuzzy_path_candidates(
+    paths: &'static [String],
+) -> Vec<fuzzy::PathMatchCandidate<'static>> {
+    paths
+        .iter()
+        .map(|path| fuzzy::PathMatchCandidate {
+            is_dir: false,
+            path: RelPath::unix(path).unwrap(),
+            char_bag: CharBag::from(path.as_str()),
+        })
+        .collect()
+}
+
+fn capitalize_each_word(query: &str) -> String {
+    query
+        .split_whitespace()
+        .map(|w| {
+            let mut chars = w.chars();
+            match chars.next() {
+                Some(c) => c.to_ascii_uppercase().to_string() + chars.as_str(),
+                None => String::new(),
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(" ")
+}
+
+fn bench_path_matching(criterion: &mut Criterion) {
+    let sizes = [100, 1000, 10_000];
+    let all_path_strings = sizes.map(generate_path_strings);
+    let query_count = 200;
+    let (q1, q2, q4) = generate_queries(query_count);
+    let q1_upper: Vec<String> = q1.iter().map(|q| capitalize_each_word(q)).collect();
+    let q2_upper: Vec<String> = q2.iter().map(|q| capitalize_each_word(q)).collect();
+    let q4_upper: Vec<String> = q4.iter().map(|q| capitalize_each_word(q)).collect();
+
+    for (label, queries, case) in [
+        ("path/1-word", &q1, fuzzy_nucleo::Case::Ignore),
+        ("path/2-word", &q2, fuzzy_nucleo::Case::Ignore),
+        ("path/4-word", &q4, fuzzy_nucleo::Case::Ignore),
+        ("path_smart/1-word", &q1_upper, fuzzy_nucleo::Case::Smart),
+        ("path_smart/2-word", &q2_upper, fuzzy_nucleo::Case::Smart),
+        ("path_smart/4-word", &q4_upper, fuzzy_nucleo::Case::Smart),
+    ] {
+        let mut group = criterion.benchmark_group(label);
+        for (size_index, &size) in sizes.iter().enumerate() {
+            let path_strings = all_path_strings[size_index];
+
+            let mut query_idx = 0usize;
+            group.bench_function(BenchmarkId::new("nucleo", size), |b| {
+                b.iter_batched(
+                    || {
+                        let query = queries[query_idx % queries.len()].as_str();
+                        query_idx += 1;
+                        (generate_nucleo_path_candidates(path_strings), query)
+                    },
+                    |(candidates, query)| {
+                        fuzzy_nucleo::match_fixed_path_set(
+                            candidates,
+                            0,
+                            None,
+                            query,
+                            case,
+                            size,
+                            PathStyle::Posix,
+                        )
+                    },
+                    BatchSize::SmallInput,
+                )
+            });
+
+            let mut query_idx = 0usize;
+            group.bench_function(BenchmarkId::new("fuzzy", size), |b| {
+                b.iter_batched(
+                    || {
+                        let query = queries[query_idx % queries.len()].as_str();
+                        query_idx += 1;
+                        (generate_fuzzy_path_candidates(path_strings), query)
+                    },
+                    |(candidates, query)| {
+                        fuzzy::match_fixed_path_set(
+                            candidates,
+                            0,
+                            None,
+                            query,
+                            false,
+                            size,
+                            PathStyle::Posix,
+                        )
+                    },
+                    BatchSize::SmallInput,
+                )
+            });
+        }
+        group.finish();
+    }
+}
+
+criterion_group!(benches, bench_path_matching);
+criterion_main!(benches);

crates/fuzzy_nucleo/src/fuzzy_nucleo.rs 🔗

@@ -3,3 +3,53 @@ mod paths;
 pub use paths::{
     PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets,
 };
+
+pub(crate) struct Cancelled;
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Case {
+    Smart,
+    Ignore,
+}
+
+impl Case {
+    pub fn from_smart(smart: bool) -> Self {
+        if smart { Self::Smart } else { Self::Ignore }
+    }
+
+    pub fn is_smart(self) -> bool {
+        matches!(self, Self::Smart)
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum LengthPenalty {
+    On,
+    Off,
+}
+
+impl LengthPenalty {
+    pub fn from_bool(on: bool) -> Self {
+        if on { Self::On } else { Self::Off }
+    }
+
+    pub fn is_on(self) -> bool {
+        matches!(self, Self::On)
+    }
+}
+
+/// Reconstruct byte-offset match positions from a list of matched char offsets
+/// that is already sorted ascending and deduplicated.
+pub(crate) fn positions_from_sorted(s: &str, sorted_char_indices: &[u32]) -> Vec<usize> {
+    let mut iter = sorted_char_indices.iter().copied().peekable();
+    let mut out = Vec::with_capacity(sorted_char_indices.len());
+    for (char_offset, (byte_offset, _)) in s.char_indices().enumerate() {
+        if iter.peek().is_none() {
+            break;
+        }
+        if iter.next_if(|&m| m == char_offset as u32).is_some() {
+            out.push(byte_offset);
+        }
+    }
+    out
+}

crates/fuzzy_nucleo/src/matcher.rs 🔗

@@ -4,8 +4,15 @@ static MATCHERS: Mutex<Vec<nucleo::Matcher>> = Mutex::new(Vec::new());
 
 pub const LENGTH_PENALTY: f64 = 0.01;
 
+fn pool_cap() -> usize {
+    std::thread::available_parallelism()
+        .map(|n| n.get())
+        .unwrap_or(8)
+        .max(1)
+}
+
 pub fn get_matcher(config: nucleo::Config) -> nucleo::Matcher {
-    let mut matchers = MATCHERS.lock().unwrap();
+    let mut matchers = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
     match matchers.pop() {
         Some(mut matcher) => {
             matcher.config = config;
@@ -16,12 +23,15 @@ pub fn get_matcher(config: nucleo::Config) -> nucleo::Matcher {
 }
 
 pub fn return_matcher(matcher: nucleo::Matcher) {
-    MATCHERS.lock().unwrap().push(matcher);
+    let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
+    if pool.len() < pool_cap() {
+        pool.push(matcher);
+    }
 }
 
 pub fn get_matchers(n: usize, config: nucleo::Config) -> Vec<nucleo::Matcher> {
     let mut matchers: Vec<_> = {
-        let mut pool = MATCHERS.lock().unwrap();
+        let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
         let available = pool.len().min(n);
         pool.drain(..available)
             .map(|mut matcher| {
@@ -34,6 +44,9 @@ pub fn get_matchers(n: usize, config: nucleo::Config) -> Vec<nucleo::Matcher> {
     matchers
 }
 
-pub fn return_matchers(mut matchers: Vec<nucleo::Matcher>) {
-    MATCHERS.lock().unwrap().append(&mut matchers);
+pub fn return_matchers(matchers: Vec<nucleo::Matcher>) {
+    let cap = pool_cap();
+    let mut pool = MATCHERS.lock().unwrap_or_else(|e| e.into_inner());
+    let space = cap.saturating_sub(pool.len());
+    pool.extend(matchers.into_iter().take(space));
 }

crates/fuzzy_nucleo/src/paths.rs 🔗

@@ -11,12 +11,35 @@ use util::{paths::PathStyle, rel_path::RelPath};
 use nucleo::Utf32Str;
 use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
 
+use fuzzy::CharBag;
+
 use crate::matcher::{self, LENGTH_PENALTY};
+use crate::{Cancelled, Case, positions_from_sorted};
 
 #[derive(Clone, Debug)]
 pub struct PathMatchCandidate<'a> {
     pub is_dir: bool,
     pub path: &'a RelPath,
+    pub char_bag: CharBag,
+}
+
+impl<'a> PathMatchCandidate<'a> {
+    /// Build a candidate whose prefilter bag covers both the worktree prefix and the path.
+    /// Pass `None` when matching against paths that have no worktree prefix.
+    pub fn new(path: &'a RelPath, is_dir: bool, path_prefix: Option<&RelPath>) -> Self {
+        let mut char_bag = CharBag::default();
+        if let Some(prefix) = path_prefix
+            && !prefix.is_empty()
+        {
+            char_bag.extend(prefix.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
+        }
+        char_bag.extend(path.as_unix_str().chars().map(|c| c.to_ascii_lowercase()));
+        Self {
+            is_dir,
+            path,
+            char_bag,
+        }
+    }
 }
 
 #[derive(Clone, Debug)]
@@ -62,8 +85,7 @@ impl PartialOrd for PathMatch {
 impl Ord for PathMatch {
     fn cmp(&self, other: &Self) -> Ordering {
         self.score
-            .partial_cmp(&other.score)
-            .unwrap_or(Ordering::Equal)
+            .total_cmp(&other.score)
             .then_with(|| self.worktree_id.cmp(&other.worktree_id))
             .then_with(|| {
                 other
@@ -74,18 +96,47 @@ impl Ord for PathMatch {
     }
 }
 
-fn make_atoms(query: &str, smart_case: bool) -> Vec<Atom> {
-    let case = if smart_case {
-        CaseMatching::Smart
-    } else {
-        CaseMatching::Ignore
-    };
+// Path matching is always case-insensitive at the nucleo level. `Case::Smart`
+// is honored as a *scoring hint*: when the query contains uppercase, candidates
+// whose matched characters disagree in case are downranked by a factor per
+// mismatch rather than dropped. This keeps `"Editor: Backspace"` matching
+// `"editor: backspace"` while still preferring exact-case hits.
+const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
+
+pub(crate) fn make_atoms(query: &str) -> Vec<Atom> {
     query
         .split_whitespace()
-        .map(|word| Atom::new(word, case, Normalization::Smart, AtomKind::Fuzzy, false))
+        .map(|word| {
+            Atom::new(
+                word,
+                CaseMatching::Ignore,
+                Normalization::Smart,
+                AtomKind::Fuzzy,
+                false,
+            )
+        })
         .collect()
 }
 
+// Only populated when we will actually charge a smart-case penalty, so the hot
+// path can iterate a plain `&[Atom]` and ignore this slice entirely.
+fn make_source_words(query: &str, case: Case) -> Option<Vec<Vec<char>>> {
+    (case.is_smart() && query.chars().any(|c| c.is_uppercase())).then(|| {
+        query
+            .split_whitespace()
+            .map(|word| word.chars().collect())
+            .collect()
+    })
+}
+
+fn case_penalty(mismatches: u32) -> f64 {
+    if mismatches == 0 {
+        1.0
+    } else {
+        SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
+    }
+}
+
 pub(crate) fn distance_between_paths(path: &RelPath, relative_to: &RelPath) -> usize {
     let mut path_components = path.components();
     let mut relative_components = relative_to.components();
@@ -121,11 +172,12 @@ fn get_filename_match_bonus(
     }
     total_score as f64 / filename.len().max(1) as f64
 }
-struct Cancelled;
 
 fn path_match_helper<'a>(
     matcher: &mut nucleo::Matcher,
     atoms: &[Atom],
+    source_words: Option<&[Vec<char>]>,
+    query_bag: CharBag,
     candidates: impl Iterator<Item = PathMatchCandidate<'a>>,
     results: &mut Vec<PathMatch>,
     worktree_id: usize,
@@ -146,6 +198,7 @@ fn path_match_helper<'a>(
     let mut buf = Vec::new();
     let mut matched_chars: Vec<u32> = Vec::new();
     let mut atom_matched_chars = Vec::new();
+    let mut candidate_chars: Vec<char> = Vec::new();
     for candidate in candidates {
         buf.clear();
         matched_chars.clear();
@@ -153,6 +206,10 @@ fn path_match_helper<'a>(
             return Err(Cancelled);
         }
 
+        if !candidate.char_bag.is_superset(query_bag) {
+            continue;
+        }
+
         candidate_buf.truncate(path_prefix_len);
         if root_is_file {
             candidate_buf.push_str(path_prefix.as_unix_str());
@@ -162,18 +219,36 @@ fn path_match_helper<'a>(
 
         let haystack = Utf32Str::new(&candidate_buf, &mut buf);
 
+        if source_words.is_some() {
+            candidate_chars.clear();
+            candidate_chars.extend(candidate_buf.chars());
+        }
+
         let mut total_score: u32 = 0;
+        let mut case_mismatches: u32 = 0;
         let mut all_matched = true;
 
-        for atom in atoms {
+        for (atom_idx, atom) in atoms.iter().enumerate() {
             atom_matched_chars.clear();
-            if let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) {
-                total_score = total_score.saturating_add(score as u32);
-                matched_chars.extend_from_slice(&atom_matched_chars);
-            } else {
+            let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
                 all_matched = false;
                 break;
+            };
+            total_score = total_score.saturating_add(score as u32);
+            if let Some(source_words) = source_words {
+                let query_chars = &source_words[atom_idx];
+                if query_chars.len() == atom_matched_chars.len() {
+                    for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
+                        if let Some(&candidate_char) = candidate_chars.get(pos as usize)
+                            && candidate_char != query_char
+                            && candidate_char.eq_ignore_ascii_case(&query_char)
+                        {
+                            case_mismatches += 1;
+                        }
+                    }
+                }
             }
+            matched_chars.extend_from_slice(&atom_matched_chars);
         }
 
         if all_matched && !atoms.is_empty() {
@@ -182,17 +257,9 @@ fn path_match_helper<'a>(
 
             let length_penalty = candidate_buf.len() as f64 * LENGTH_PENALTY;
             let filename_bonus = get_filename_match_bonus(&candidate_buf, atoms, matcher);
-            let adjusted_score = total_score as f64 + filename_bonus - length_penalty;
-            let mut positions: Vec<usize> = candidate_buf
-                .char_indices()
-                .enumerate()
-                .filter_map(|(char_offset, (byte_offset, _))| {
-                    matched_chars
-                        .contains(&(char_offset as u32))
-                        .then_some(byte_offset)
-                })
-                .collect();
-            positions.sort_unstable();
+            let positive = (total_score as f64 + filename_bonus) * case_penalty(case_mismatches);
+            let adjusted_score = positive - length_penalty;
+            let positions = positions_from_sorted(&candidate_buf, &matched_chars);
 
             results.push(PathMatch {
                 score: adjusted_score,
@@ -225,7 +292,7 @@ pub fn match_fixed_path_set(
     worktree_id: usize,
     worktree_root_name: Option<Arc<RelPath>>,
     query: &str,
-    smart_case: bool,
+    case: Case,
     max_results: usize,
     path_style: PathStyle,
 ) -> Vec<PathMatch> {
@@ -233,7 +300,9 @@ pub fn match_fixed_path_set(
     config.set_match_paths();
     let mut matcher = matcher::get_matcher(config);
 
-    let atoms = make_atoms(query, smart_case);
+    let atoms = make_atoms(query);
+    let source_words = make_source_words(query, case);
+    let query_bag = CharBag::from(query);
 
     let root_is_file = worktree_root_name.is_some() && candidates.iter().all(|c| c.path.is_empty());
 
@@ -244,6 +313,8 @@ pub fn match_fixed_path_set(
     path_match_helper(
         &mut matcher,
         &atoms,
+        source_words.as_deref(),
+        query_bag,
         candidates.into_iter(),
         &mut results,
         worktree_id,
@@ -263,7 +334,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
     candidate_sets: &'a [Set],
     query: &str,
     relative_to: &Option<Arc<RelPath>>,
-    smart_case: bool,
+    case: Case,
     max_results: usize,
     cancel_flag: &AtomicBool,
     executor: BackgroundExecutor,
@@ -281,7 +352,9 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
         query.to_owned()
     };
 
-    let atoms = make_atoms(&query, smart_case);
+    let atoms = make_atoms(&query);
+    let source_words = make_source_words(&query, case);
+    let query_bag = CharBag::from(query.as_str());
 
     let num_cpus = executor.num_cpus().min(path_count);
     let segment_size = path_count.div_ceil(num_cpus);
@@ -299,6 +372,7 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
                 .enumerate()
             {
                 let atoms = atoms.clone();
+                let source_words = source_words.clone();
                 let relative_to = relative_to.clone();
                 scope.spawn(async move {
                     let segment_start = segment_idx * segment_size;
@@ -316,6 +390,8 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>(
                             if path_match_helper(
                                 matcher,
                                 &atoms,
+                                source_words.as_deref(),
+                                query_bag,
                                 candidates,
                                 results,
                                 candidate_set.id(),

crates/project/src/project.rs 🔗

@@ -6439,6 +6439,7 @@ impl<'a> Iterator for PathMatchCandidateSetNucleoIter<'a> {
             .map(|entry| fuzzy_nucleo::PathMatchCandidate {
                 is_dir: entry.kind.is_dir(),
                 path: &entry.path,
+                char_bag: entry.char_bag,
             })
     }
 }