fuzzy_nucleo: Add strings module and route several pickers through it (#54123)

Finn Eitreim and Yara created

Stacked on top of #54112
This is part 2 of 3 towards #51197
More details from the original PR #53551

This PR includes the changes from #54112 , im not sure how to avoid
that, my understanding is that after that one is merged, this PR can be
rebased onto main and everything will be correct. You can also view the
version of this that does reflect the changes more directly here:
https://github.com/feitreim/zed/pull/1

## Changes

In this PR I added a more general string matching functionality to
`fuzzy_nucleo`, in order to have proper testing for this, I also changed
the command palette, tab switching picker, branch picker, and recent
projects picker to use this new implementation. I think the command
palette change in particular is awesome, just super nice to vaguely
gesture at the command i want and have it pop right up.

The main change here and departure from
https://github.com/zed-industries/zed/pull/37123 is realizing that the
primary reason for the regressions is actually how nucleo handles smart
case, the old `fuzzy` crate only uses the smart case argument to score
things differently, while nucleo actually filters on the case, eg. with
smart case query "Apple" wouldnt match "apple". To get around this we
always pass `CaseMatching::Ignore` to nucleo and implement the same
score modifications from fuzzy in our code.

There is a performance cost to that, of course, but from my testing it
is fairly static, not growing as the size increases, so maybe a query
takes 35 µs instead of 25 µs, but a query that takes 800 µs will only
take 820 µs.

Benchmark:
| kind | query | size | nucleo | fuzzy | nucleo/fuzzy |
  |---|---|---:|---:|---:|---:|
  | string | 1-word | 100 | 9.15 µs | 24.6 µs | 0.37× |
  | string | 1-word | 1000 | 150.2 µs | 207.2 µs | 0.72× |
  | string | 1-word | 10000 | 1.34 ms | 2.07 ms | 0.65× |
  | string | 2-word | 100 | 5.16 µs | 2.94 µs | 1.75× |
  | string | 2-word | 1000 | 29.0 µs | 11.0 µs | 2.63× |
  | string | 2-word | 10000 | 210.6 µs | 55.5 µs | 3.79× |
  | string | 4-word | 100 | 2.57 µs | 2.33 µs | 1.10× |
  | string | 4-word | 1000 | 6.98 µs | 5.85 µs | 1.19× |
  | string | 4-word | 10000 | 20.0 µs | 12.0 µs | 1.66× |

When I added the 4-word queries to the benchmarks I was actually really
concerned that the performance would be awful, making it unsuitable for
the command palette especially. However, I think due to the CharBag
pre-filtering when the query is longer, the performance is actually way
better than the 2 word case.

Video:


https://github.com/user-attachments/assets/3cd7221b-424f-4fd3-8df1-5543dcc340a3

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- Improved fuzzy matching in the command palette, branch picker, tab
switcher, and recent projects picker to support multi-word queries.

---------

Co-authored-by: Yara <git@yara.blue>

Change summary

Cargo.lock                                            |   8 
crates/command_palette/Cargo.toml                     |   2 
crates/command_palette/src/command_palette.rs         |  10 
crates/fuzzy_nucleo/Cargo.toml                        |   1 
crates/fuzzy_nucleo/benches/match_benchmark.rs        |  89 +
crates/fuzzy_nucleo/src/fuzzy_nucleo.rs               |  11 
crates/fuzzy_nucleo/src/strings.rs                    | 741 +++++++++++++
crates/git_ui/Cargo.toml                              |   1 
crates/git_ui/src/branch_picker.rs                    |   8 
crates/recent_projects/Cargo.toml                     |   2 
crates/recent_projects/src/recent_projects.rs         |  50 
crates/recent_projects/src/sidebar_recent_projects.rs |  21 
crates/recent_projects/src/wsl_picker.rs              |  24 
crates/tab_switcher/Cargo.toml                        |   3 
crates/tab_switcher/src/tab_switcher.rs               |  12 
typos.toml                                            |   3 
16 files changed, 896 insertions(+), 90 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -3340,7 +3340,7 @@ dependencies = [
  "command_palette_hooks",
  "db",
  "editor",
- "fuzzy",
+ "fuzzy_nucleo",
  "go_to_line",
  "gpui",
  "language",
@@ -7323,6 +7323,7 @@ dependencies = [
  "fs",
  "futures 0.3.32",
  "fuzzy",
+ "fuzzy_nucleo",
  "git",
  "gpui",
  "indoc",
@@ -14293,7 +14294,7 @@ dependencies = [
  "extension_host",
  "fs",
  "futures 0.3.32",
- "fuzzy",
+ "fuzzy_nucleo",
  "gpui",
  "http_client",
  "indoc",
@@ -17429,7 +17430,7 @@ dependencies = [
  "collections",
  "ctor",
  "editor",
- "fuzzy",
+ "fuzzy_nucleo",
  "gpui",
  "menu",
  "picker",
@@ -17438,7 +17439,6 @@ dependencies = [
  "serde",
  "serde_json",
  "settings",
- "smol",
  "theme",
  "theme_settings",
  "ui",

crates/command_palette/Cargo.toml 🔗

@@ -21,7 +21,7 @@ client.workspace = true
 collections.workspace = true
 command_palette_hooks.workspace = true
 db.workspace = true
-fuzzy.workspace = true
+fuzzy_nucleo.workspace = true
 gpui.workspace = true
 menu.workspace = true
 log.workspace = true

crates/command_palette/src/command_palette.rs 🔗

@@ -13,7 +13,7 @@ use command_palette_hooks::{
     GlobalCommandPaletteInterceptor,
 };
 
-use fuzzy::{StringMatch, StringMatchCandidate};
+use fuzzy_nucleo::{StringMatch, StringMatchCandidate};
 use gpui::{
     Action, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable,
     ParentElement, Render, Styled, Task, WeakEntity, Window,
@@ -326,7 +326,7 @@ impl CommandPaletteDelegate {
             });
             new_matches.push(StringMatch {
                 candidate_id: commands.len() - 1,
-                string,
+                string: string.into(),
                 positions,
                 score: 0.0,
             })
@@ -474,11 +474,11 @@ impl PickerDelegate for CommandPaletteDelegate {
                     .map(|(ix, command)| StringMatchCandidate::new(ix, &command.name))
                     .collect::<Vec<_>>();
 
-                let matches = fuzzy::match_strings(
+                let matches = fuzzy_nucleo::match_strings_async(
                     &candidates,
                     &query,
-                    true,
-                    true,
+                    fuzzy_nucleo::Case::Smart,
+                    fuzzy_nucleo::LengthPenalty::On,
                     10000,
                     &Default::default(),
                     executor,

crates/fuzzy_nucleo/Cargo.toml 🔗

@@ -20,6 +20,7 @@ util.workspace = true
 
 [dev-dependencies]
 criterion.workspace = true
+gpui = { workspace = true, features = ["test-support"] }
 util = { workspace = true, features = ["test-support"] }
 
 [[bench]]

crates/fuzzy_nucleo/benches/match_benchmark.rs 🔗

@@ -1,5 +1,6 @@
 use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
 use fuzzy::CharBag;
+use std::sync::atomic::AtomicBool;
 use util::{paths::PathStyle, rel_path::RelPath};
 
 const DIRS: &[&str] = &[
@@ -129,6 +130,92 @@ fn generate_queries(count: usize) -> (Vec<String>, Vec<String>, Vec<String>) {
     (n_word(1), n_word(2), n_word(4))
 }
 
+fn generate_candidates(count: usize) -> Vec<fuzzy_nucleo::StringMatchCandidate> {
+    (0..count)
+        .map(|id| {
+            let dir = DIRS[id % DIRS.len()];
+            let file = FILENAMES[id / DIRS.len() % FILENAMES.len()];
+            fuzzy_nucleo::StringMatchCandidate::new(id, &format!("{dir}/{file}"))
+        })
+        .collect()
+}
+
+fn to_fuzzy_candidates(
+    candidates: &[fuzzy_nucleo::StringMatchCandidate],
+) -> Vec<fuzzy::StringMatchCandidate> {
+    candidates
+        .iter()
+        .map(|c| fuzzy::StringMatchCandidate::new(c.id, c.string.as_ref()))
+        .collect()
+}
+
+fn bench_string_matching(criterion: &mut Criterion) {
+    let cancel = AtomicBool::new(false);
+
+    let dispatcher = std::sync::Arc::new(gpui::TestDispatcher::new(0));
+    let background_executor = gpui::BackgroundExecutor::new(dispatcher.clone());
+    let foreground_executor = gpui::ForegroundExecutor::new(dispatcher);
+
+    let sizes = [100, 1000, 10_000];
+    let query_count = 200;
+    let (q1, q2, q4) = generate_queries(query_count);
+
+    for (label, queries) in [("1-word", &q1), ("2-word", &q2), ("4-word", &q4)] {
+        let mut group = criterion.benchmark_group(label);
+        for size in sizes {
+            let candidates = generate_candidates(size);
+            let fuzzy_candidates = to_fuzzy_candidates(&candidates);
+
+            let mut query_idx = 0usize;
+            group.bench_function(BenchmarkId::new("nucleo", size), |b| {
+                b.iter_batched(
+                    || {
+                        let query = queries[query_idx % queries.len()].as_str();
+                        query_idx += 1;
+                        query
+                    },
+                    |query| {
+                        foreground_executor.block_on(fuzzy_nucleo::match_strings_async(
+                            &candidates,
+                            query,
+                            fuzzy_nucleo::Case::Ignore,
+                            fuzzy_nucleo::LengthPenalty::On,
+                            size,
+                            &cancel,
+                            background_executor.clone(),
+                        ))
+                    },
+                    BatchSize::SmallInput,
+                )
+            });
+
+            let mut query_idx = 0usize;
+            group.bench_function(BenchmarkId::new("fuzzy", size), |b| {
+                b.iter_batched(
+                    || {
+                        let query = queries[query_idx % queries.len()].as_str();
+                        query_idx += 1;
+                        query
+                    },
+                    |query| {
+                        foreground_executor.block_on(fuzzy::match_strings(
+                            &fuzzy_candidates,
+                            query,
+                            false,
+                            true,
+                            size,
+                            &cancel,
+                            background_executor.clone(),
+                        ))
+                    },
+                    BatchSize::SmallInput,
+                )
+            });
+        }
+        group.finish();
+    }
+}
+
 fn generate_path_strings(count: usize) -> &'static [String] {
     let paths: Box<[String]> = (0..count)
         .map(|id| {
@@ -249,5 +336,5 @@ fn bench_path_matching(criterion: &mut Criterion) {
     }
 }
 
-criterion_group!(benches, bench_path_matching);
+criterion_group!(benches, bench_string_matching, bench_path_matching);
 criterion_main!(benches);

crates/fuzzy_nucleo/src/fuzzy_nucleo.rs 🔗

@@ -1,8 +1,11 @@
 mod matcher;
 mod paths;
+mod strings;
+
 pub use paths::{
     PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets,
 };
+pub use strings::{StringMatch, StringMatchCandidate, match_strings, match_strings_async};
 
 pub(crate) struct Cancelled;
 
@@ -13,8 +16,12 @@ pub enum Case {
 }
 
 impl Case {
-    pub fn from_smart(smart: bool) -> Self {
-        if smart { Self::Smart } else { Self::Ignore }
+    pub fn smart_if_uppercase_in(query: &str) -> Self {
+        if query.chars().any(|c| c.is_uppercase()) {
+            Self::Smart
+        } else {
+            Self::Ignore
+        }
     }
 
     pub fn is_smart(self) -> bool {

crates/fuzzy_nucleo/src/strings.rs 🔗

@@ -0,0 +1,741 @@
+use std::{
+    borrow::Borrow,
+    cmp::Ordering,
+    iter,
+    ops::Range,
+    sync::atomic::{self, AtomicBool},
+};
+
+use gpui::{BackgroundExecutor, SharedString};
+use nucleo::Utf32Str;
+use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization};
+
+use crate::{
+    Cancelled, Case, LengthPenalty,
+    matcher::{self, LENGTH_PENALTY},
+    positions_from_sorted,
+};
+use fuzzy::CharBag;
+
+// String matching is always case-insensitive at the nucleo level — using
+// `CaseMatching::Smart` there would reject queries whose capitalization
+// doesn't match the candidate, breaking pickers like the command palette
+// (`"Editor: Backspace"` against the action named `"editor: backspace"`).
+// `Case::Smart` is still honored as a *scoring hint*: when the query
+// contains uppercase, candidates whose matched characters disagree in case
+// are downranked rather than dropped.
+const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9;
+
+struct Query {
+    atoms: Vec<Atom>,
+    source_words: Option<Vec<Vec<char>>>,
+    char_bag: CharBag,
+}
+
+impl Query {
+    fn build(query: &str, case: Case) -> Option<Self> {
+        let mut atoms = Vec::new();
+        let mut source_words = Vec::new();
+        let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase());
+
+        for word in query.split_whitespace() {
+            atoms.push(Atom::new(
+                word,
+                CaseMatching::Ignore,
+                Normalization::Smart,
+                AtomKind::Fuzzy,
+                false,
+            ));
+            if wants_case_penalty {
+                source_words.push(word.chars().collect());
+            }
+        }
+
+        if atoms.is_empty() {
+            return None;
+        }
+
+        Some(Query {
+            atoms,
+            source_words: wants_case_penalty.then_some(source_words),
+            char_bag: CharBag::from(query),
+        })
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct StringMatchCandidate {
+    pub id: usize,
+    pub string: SharedString,
+    char_bag: CharBag,
+}
+
+impl StringMatchCandidate {
+    pub fn new(id: usize, string: impl ToString) -> Self {
+        Self::from_shared(id, SharedString::new(string.to_string()))
+    }
+
+    pub fn from_shared(id: usize, string: SharedString) -> Self {
+        let char_bag = CharBag::from(string.as_ref());
+        Self {
+            id,
+            string,
+            char_bag,
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct StringMatch {
+    pub candidate_id: usize,
+    pub score: f64,
+    pub positions: Vec<usize>,
+    pub string: SharedString,
+}
+
+impl StringMatch {
+    pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
+        let mut positions = self.positions.iter().peekable();
+        iter::from_fn(move || {
+            let start = *positions.next()?;
+            let char_len = self.char_len_at_index(start)?;
+            let mut end = start + char_len;
+            while let Some(next_start) = positions.peek() {
+                if end == **next_start {
+                    let Some(char_len) = self.char_len_at_index(end) else {
+                        break;
+                    };
+                    end += char_len;
+                    positions.next();
+                } else {
+                    break;
+                }
+            }
+            Some(start..end)
+        })
+    }
+
+    fn char_len_at_index(&self, ix: usize) -> Option<usize> {
+        self.string
+            .get(ix..)
+            .and_then(|slice| slice.chars().next().map(|c| c.len_utf8()))
+    }
+}
+
+impl PartialEq for StringMatch {
+    fn eq(&self, other: &Self) -> bool {
+        self.cmp(other).is_eq()
+    }
+}
+
+impl Eq for StringMatch {}
+
+impl PartialOrd for StringMatch {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for StringMatch {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.score
+            .total_cmp(&other.score)
+            .then_with(|| self.candidate_id.cmp(&other.candidate_id))
+    }
+}
+
+pub async fn match_strings_async<T>(
+    candidates: &[T],
+    query: &str,
+    case: Case,
+    length_penalty: LengthPenalty,
+    max_results: usize,
+    cancel_flag: &AtomicBool,
+    executor: BackgroundExecutor,
+) -> Vec<StringMatch>
+where
+    T: Borrow<StringMatchCandidate> + Sync,
+{
+    if candidates.is_empty() || max_results == 0 {
+        return Vec::new();
+    }
+
+    let Some(query) = Query::build(query, case) else {
+        return empty_query_results(candidates, max_results);
+    };
+
+    let num_cpus = executor.num_cpus().min(candidates.len());
+    let segment_size = candidates.len().div_ceil(num_cpus);
+    let mut segment_results = (0..num_cpus)
+        .map(|_| Vec::with_capacity(max_results.min(candidates.len())))
+        .collect::<Vec<_>>();
+
+    let config = nucleo::Config::DEFAULT;
+    let mut matchers = matcher::get_matchers(num_cpus, config);
+
+    executor
+        .scoped(|scope| {
+            for (segment_idx, (results, matcher)) in segment_results
+                .iter_mut()
+                .zip(matchers.iter_mut())
+                .enumerate()
+            {
+                let query = &query;
+                scope.spawn(async move {
+                    let segment_start = segment_idx * segment_size;
+                    let segment_end = (segment_start + segment_size).min(candidates.len());
+
+                    match_string_helper(
+                        &candidates[segment_start..segment_end],
+                        query,
+                        matcher,
+                        length_penalty,
+                        results,
+                        cancel_flag,
+                    )
+                    .ok();
+                });
+            }
+        })
+        .await;
+
+    matcher::return_matchers(matchers);
+
+    if cancel_flag.load(atomic::Ordering::Acquire) {
+        return Vec::new();
+    }
+
+    let mut results = segment_results.concat();
+    util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
+    results
+}
+
+pub fn match_strings<T>(
+    candidates: &[T],
+    query: &str,
+    case: Case,
+    length_penalty: LengthPenalty,
+    max_results: usize,
+) -> Vec<StringMatch>
+where
+    T: Borrow<StringMatchCandidate>,
+{
+    if candidates.is_empty() || max_results == 0 {
+        return Vec::new();
+    }
+
+    let Some(query) = Query::build(query, case) else {
+        return empty_query_results(candidates, max_results);
+    };
+
+    let config = nucleo::Config::DEFAULT;
+    let mut matcher = matcher::get_matcher(config);
+    let mut results = Vec::with_capacity(max_results.min(candidates.len()));
+
+    match_string_helper(
+        candidates,
+        &query,
+        &mut matcher,
+        length_penalty,
+        &mut results,
+        &AtomicBool::new(false),
+    )
+    .ok();
+
+    matcher::return_matcher(matcher);
+    util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
+    results
+}
+
+fn empty_query_results<T: Borrow<StringMatchCandidate>>(
+    candidates: &[T],
+    max_results: usize,
+) -> Vec<StringMatch> {
+    candidates
+        .iter()
+        .take(max_results)
+        .map(|candidate| {
+            let borrowed = candidate.borrow();
+            StringMatch {
+                candidate_id: borrowed.id,
+                score: 0.,
+                positions: Vec::new(),
+                string: borrowed.string.clone(),
+            }
+        })
+        .collect()
+}
+
+fn match_string_helper<T>(
+    candidates: &[T],
+    query: &Query,
+    matcher: &mut nucleo::Matcher,
+    length_penalty: LengthPenalty,
+    results: &mut Vec<StringMatch>,
+    cancel_flag: &AtomicBool,
+) -> Result<(), Cancelled>
+where
+    T: Borrow<StringMatchCandidate>,
+{
+    let mut buf = Vec::new();
+    let mut matched_chars: Vec<u32> = Vec::new();
+    let mut atom_matched_chars = Vec::new();
+    let mut candidate_chars: Vec<char> = Vec::new();
+
+    for candidate in candidates {
+        buf.clear();
+        matched_chars.clear();
+        if cancel_flag.load(atomic::Ordering::Relaxed) {
+            return Err(Cancelled);
+        }
+
+        let borrowed = candidate.borrow();
+
+        if !borrowed.char_bag.is_superset(query.char_bag) {
+            continue;
+        }
+
+        let haystack: Utf32Str = Utf32Str::new(&borrowed.string, &mut buf);
+
+        if query.source_words.is_some() {
+            candidate_chars.clear();
+            candidate_chars.extend(borrowed.string.chars());
+        }
+
+        let mut total_score: u32 = 0;
+        let mut case_mismatches: u32 = 0;
+        let mut all_matched = true;
+
+        for (atom_idx, atom) in query.atoms.iter().enumerate() {
+            atom_matched_chars.clear();
+            let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else {
+                all_matched = false;
+                break;
+            };
+            total_score = total_score.saturating_add(score as u32);
+            if let Some(source_words) = query.source_words.as_deref() {
+                let query_chars = &source_words[atom_idx];
+                if query_chars.len() == atom_matched_chars.len() {
+                    for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) {
+                        if let Some(&candidate_char) = candidate_chars.get(pos as usize)
+                            && candidate_char != query_char
+                            && candidate_char.eq_ignore_ascii_case(&query_char)
+                        {
+                            case_mismatches += 1;
+                        }
+                    }
+                }
+            }
+            matched_chars.extend_from_slice(&atom_matched_chars);
+        }
+
+        if all_matched {
+            matched_chars.sort_unstable();
+            matched_chars.dedup();
+
+            let positive = total_score as f64 * case_penalty(case_mismatches);
+            let adjusted_score =
+                positive - length_penalty_for(borrowed.string.as_ref(), length_penalty);
+            let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars);
+
+            results.push(StringMatch {
+                candidate_id: borrowed.id,
+                score: adjusted_score,
+                positions,
+                string: borrowed.string.clone(),
+            });
+        }
+    }
+    Ok(())
+}
+
+#[inline]
+fn case_penalty(mismatches: u32) -> f64 {
+    if mismatches == 0 {
+        1.0
+    } else {
+        SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32)
+    }
+}
+
+#[inline]
+fn length_penalty_for(s: &str, length_penalty: LengthPenalty) -> f64 {
+    if length_penalty.is_on() {
+        s.len() as f64 * LENGTH_PENALTY
+    } else {
+        0.0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use gpui::BackgroundExecutor;
+
+    fn candidates(strings: &[&str]) -> Vec<StringMatchCandidate> {
+        strings
+            .iter()
+            .enumerate()
+            .map(|(id, s)| StringMatchCandidate::new(id, s))
+            .collect()
+    }
+
+    #[gpui::test]
+    async fn test_basic_match(executor: BackgroundExecutor) {
+        let cs = candidates(&["hello", "world", "help"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "hel",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect();
+        assert!(matched.contains(&"hello"));
+        assert!(matched.contains(&"help"));
+        assert!(!matched.contains(&"world"));
+    }
+
+    #[gpui::test]
+    async fn test_multi_word_query(executor: BackgroundExecutor) {
+        let cs = candidates(&[
+            "src/lib/parser.rs",
+            "src/bin/main.rs",
+            "tests/parser_test.rs",
+        ]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "src parser",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].string, "src/lib/parser.rs");
+    }
+
+    #[gpui::test]
+    async fn test_empty_query_returns_all(executor: BackgroundExecutor) {
+        let cs = candidates(&["alpha", "beta", "gamma"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 3);
+        assert!(results.iter().all(|m| m.score == 0.0));
+    }
+
+    #[gpui::test]
+    async fn test_whitespace_only_query_returns_all(executor: BackgroundExecutor) {
+        let cs = candidates(&["alpha", "beta", "gamma"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "   \t\n",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 3);
+    }
+
+    #[gpui::test]
+    async fn test_empty_candidates(executor: BackgroundExecutor) {
+        let cs: Vec<StringMatchCandidate> = vec![];
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "query",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert!(results.is_empty());
+    }
+
+    #[gpui::test]
+    async fn test_cancellation(executor: BackgroundExecutor) {
+        let cs = candidates(&["hello", "world"]);
+        let cancel = AtomicBool::new(true);
+        let results = match_strings_async(
+            &cs,
+            "hel",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert!(results.is_empty());
+    }
+
+    #[gpui::test]
+    async fn test_max_results_limit(executor: BackgroundExecutor) {
+        let cs = candidates(&["ab", "abc", "abcd", "abcde"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "ab",
+            Case::Ignore,
+            LengthPenalty::Off,
+            2,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 2);
+    }
+
+    #[gpui::test]
+    async fn test_scoring_order(executor: BackgroundExecutor) {
+        let cs = candidates(&[
+            "some_very_long_variable_name_fuzzy",
+            "fuzzy",
+            "a_fuzzy_thing",
+        ]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "fuzzy",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor.clone(),
+        )
+        .await;
+
+        let ordered = matches!(
+            (
+                results[0].string.as_ref(),
+                results[1].string.as_ref(),
+                results[2].string.as_ref()
+            ),
+            (
+                "fuzzy",
+                "a_fuzzy_thing",
+                "some_very_long_variable_name_fuzzy"
+            )
+        );
+        assert!(ordered, "matches are not in the proper order.");
+
+        let results_penalty = match_strings_async(
+            &cs,
+            "fuzzy",
+            Case::Ignore,
+            LengthPenalty::On,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        let greater = results[2].score > results_penalty[2].score;
+        assert!(greater, "penalize length not affecting long candidates");
+    }
+
+    #[gpui::test]
+    async fn test_utf8_positions(executor: BackgroundExecutor) {
+        let cs = candidates(&["café"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "caf",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 1);
+        let m = &results[0];
+        assert_eq!(m.positions, vec![0, 1, 2]);
+        for &pos in &m.positions {
+            assert!(m.string.is_char_boundary(pos));
+        }
+    }
+
+    #[gpui::test]
+    async fn test_smart_case(executor: BackgroundExecutor) {
+        let cs = candidates(&["FooBar", "foobar", "FOOBAR"]);
+        let cancel = AtomicBool::new(false);
+
+        let case_insensitive = match_strings_async(
+            &cs,
+            "foobar",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor.clone(),
+        )
+        .await;
+        assert_eq!(case_insensitive.len(), 3);
+
+        let smart = match_strings_async(
+            &cs,
+            "FooBar",
+            Case::Smart,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert!(smart.iter().any(|m| m.string == "FooBar"));
+        let foobar_score = smart.iter().find(|m| m.string == "FooBar").map(|m| m.score);
+        let lower_score = smart.iter().find(|m| m.string == "foobar").map(|m| m.score);
+        if let (Some(exact), Some(lower)) = (foobar_score, lower_score) {
+            assert!(exact >= lower);
+        }
+    }
+
+    #[gpui::test]
+    async fn test_smart_case_does_not_flip_order_when_length_penalty_on(
+        executor: BackgroundExecutor,
+    ) {
+        // Regression for the sign bug: with a length penalty large enough to push
+        // `total_score - length_penalty` negative, case mismatches used to make
+        // scores *better* (less negative). Exact-case match must still rank first.
+        let cs = candidates(&[
+            "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar",
+            "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar",
+        ]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "FooBar",
+            Case::Smart,
+            LengthPenalty::On,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        let exact = results
+            .iter()
+            .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar")
+            .map(|m| m.score)
+            .expect("exact-case candidate should match");
+        let mismatch = results
+            .iter()
+            .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar")
+            .map(|m| m.score)
+            .expect("mismatch-case candidate should match");
+        assert!(
+            exact >= mismatch,
+            "exact-case score ({exact}) should be >= mismatch-case score ({mismatch})"
+        );
+    }
+
+    #[gpui::test]
+    async fn test_char_bag_prefilter(executor: BackgroundExecutor) {
+        let cs = candidates(&["abcdef", "abc", "def", "aabbcc"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "abc",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect();
+        assert!(matched.contains(&"abcdef"));
+        assert!(matched.contains(&"abc"));
+        assert!(matched.contains(&"aabbcc"));
+        assert!(!matched.contains(&"def"));
+    }
+
+    #[test]
+    fn test_sync_basic_match() {
+        let cs = candidates(&["hello", "world", "help"]);
+        let results = match_strings(&cs, "hel", Case::Ignore, LengthPenalty::Off, 10);
+        let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect();
+        assert!(matched.contains(&"hello"));
+        assert!(matched.contains(&"help"));
+        assert!(!matched.contains(&"world"));
+    }
+
+    #[test]
+    fn test_sync_empty_query_returns_all() {
+        let cs = candidates(&["alpha", "beta", "gamma"]);
+        let results = match_strings(&cs, "", Case::Ignore, LengthPenalty::Off, 10);
+        assert_eq!(results.len(), 3);
+    }
+
+    #[test]
+    fn test_sync_whitespace_only_query_returns_all() {
+        let cs = candidates(&["alpha", "beta", "gamma"]);
+        let results = match_strings(&cs, "  ", Case::Ignore, LengthPenalty::Off, 10);
+        assert_eq!(results.len(), 3);
+    }
+
+    #[test]
+    fn test_sync_max_results() {
+        let cs = candidates(&["ab", "abc", "abcd", "abcde"]);
+        let results = match_strings(&cs, "ab", Case::Ignore, LengthPenalty::Off, 2);
+        assert_eq!(results.len(), 2);
+    }
+
+    #[gpui::test]
+    async fn test_empty_query_respects_max_results(executor: BackgroundExecutor) {
+        let cs = candidates(&["alpha", "beta", "gamma", "delta"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "",
+            Case::Ignore,
+            LengthPenalty::Off,
+            2,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert_eq!(results.len(), 2);
+    }
+
+    #[gpui::test]
+    async fn test_multi_word_with_nonmatching_word(executor: BackgroundExecutor) {
+        let cs = candidates(&["src/parser.rs", "src/main.rs"]);
+        let cancel = AtomicBool::new(false);
+        let results = match_strings_async(
+            &cs,
+            "src xyzzy",
+            Case::Ignore,
+            LengthPenalty::Off,
+            10,
+            &cancel,
+            executor,
+        )
+        .await;
+        assert!(
+            results.is_empty(),
+            "no candidate contains 'xyzzy', so nothing should match"
+        );
+    }
+}

crates/git_ui/Cargo.toml 🔗

@@ -30,6 +30,7 @@ file_icons.workspace = true
 fs.workspace = true
 futures.workspace = true
 fuzzy.workspace = true
+fuzzy_nucleo.workspace = true
 git.workspace = true
 gpui.workspace = true
 itertools.workspace = true

crates/git_ui/src/branch_picker.rs 🔗

@@ -1,6 +1,6 @@
 use anyhow::Context as _;
 use editor::Editor;
-use fuzzy::StringMatchCandidate;
+use fuzzy_nucleo::StringMatchCandidate;
 
 use collections::HashSet;
 use git::repository::Branch;
@@ -737,11 +737,11 @@ impl PickerDelegate for BranchListDelegate {
                     .enumerate()
                     .map(|(ix, branch)| StringMatchCandidate::new(ix, branch.name()))
                     .collect::<Vec<StringMatchCandidate>>();
-                let mut matches: Vec<Entry> = fuzzy::match_strings(
+                let mut matches: Vec<Entry> = fuzzy_nucleo::match_strings_async(
                     &candidates,
                     &query,
-                    true,
-                    true,
+                    fuzzy_nucleo::Case::Smart,
+                    fuzzy_nucleo::LengthPenalty::On,
                     10000,
                     &Default::default(),
                     cx.background_executor().clone(),

crates/recent_projects/Cargo.toml 🔗

@@ -26,7 +26,7 @@ editor.workspace = true
 extension_host.workspace = true
 fs.workspace = true
 futures.workspace = true
-fuzzy.workspace = true
+fuzzy_nucleo.workspace = true
 gpui.workspace = true
 language.workspace = true
 log.workspace = true

crates/recent_projects/src/recent_projects.rs 🔗

@@ -22,7 +22,7 @@ pub use remote_connection::{RemoteConnectionModal, connect};
 pub use remote_connections::{navigate_to_positions, open_remote_project};
 
 use disconnected_overlay::DisconnectedOverlay;
-use fuzzy::{StringMatch, StringMatchCandidate};
+use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings};
 use gpui::{
     Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable,
     Subscription, Task, WeakEntity, Window, actions, px,
@@ -937,7 +937,7 @@ impl PickerDelegate for RecentProjectsDelegate {
         cx: &mut Context<Picker<Self>>,
     ) -> gpui::Task<()> {
         let query = query.trim_start();
-        let smart_case = query.chars().any(|c| c.is_uppercase());
+        let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query);
         let is_empty_query = query.is_empty();
 
         let folder_matches = if self.open_folders.is_empty() {
@@ -950,15 +950,13 @@ impl PickerDelegate for RecentProjectsDelegate {
                 .map(|(id, folder)| StringMatchCandidate::new(id, folder.name.as_ref()))
                 .collect();
 
-            smol::block_on(fuzzy::match_strings(
+            match_strings(
                 &candidates,
                 query,
-                smart_case,
-                true,
+                case,
+                fuzzy_nucleo::LengthPenalty::On,
                 100,
-                &Default::default(),
-                cx.background_executor().clone(),
-            ))
+            )
         };
 
         let project_group_candidates: Vec<_> = self
@@ -976,21 +974,13 @@ impl PickerDelegate for RecentProjectsDelegate {
             })
             .collect();
 
-        let mut project_group_matches = smol::block_on(fuzzy::match_strings(
+        let project_group_matches = match_strings(
             &project_group_candidates,
             query,
-            smart_case,
-            true,
+            case,
+            fuzzy_nucleo::LengthPenalty::On,
             100,
-            &Default::default(),
-            cx.background_executor().clone(),
-        ));
-        project_group_matches.sort_unstable_by(|a, b| {
-            b.score
-                .partial_cmp(&a.score)
-                .unwrap_or(std::cmp::Ordering::Equal)
-                .then_with(|| a.candidate_id.cmp(&b.candidate_id))
-        });
+        );
 
         // Build candidates for recent projects (not current, not sibling, not open folder)
         let recent_candidates: Vec<_> = self
@@ -1008,21 +998,13 @@ impl PickerDelegate for RecentProjectsDelegate {
             })
             .collect();
 
-        let mut recent_matches = smol::block_on(fuzzy::match_strings(
+        let recent_matches = match_strings(
             &recent_candidates,
             query,
-            smart_case,
-            true,
+            case,
+            fuzzy_nucleo::LengthPenalty::On,
             100,
-            &Default::default(),
-            cx.background_executor().clone(),
-        ));
-        recent_matches.sort_unstable_by(|a, b| {
-            b.score
-                .partial_cmp(&a.score)
-                .unwrap_or(std::cmp::Ordering::Equal)
-                .then_with(|| a.candidate_id.cmp(&b.candidate_id))
-        });
+        );
 
         let mut entries = Vec::new();
 
@@ -1058,7 +1040,7 @@ impl PickerDelegate for RecentProjectsDelegate {
                         candidate_id: id,
                         score: 0.0,
                         positions: Vec::new(),
-                        string: String::new(),
+                        string: Default::default(),
                     }));
                 }
             } else {
@@ -1084,7 +1066,7 @@ impl PickerDelegate for RecentProjectsDelegate {
                             candidate_id: id,
                             score: 0.0,
                             positions: Vec::new(),
-                            string: String::new(),
+                            string: Default::default(),
                         }));
                     }
                 }

crates/recent_projects/src/sidebar_recent_projects.rs 🔗

@@ -1,7 +1,7 @@
 use std::sync::Arc;
 
 use chrono::{DateTime, Utc};
-use fuzzy::{StringMatch, StringMatchCandidate};
+use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings};
 use gpui::{
     Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable,
     Subscription, Task, WeakEntity, Window,
@@ -194,7 +194,7 @@ impl PickerDelegate for SidebarRecentProjectsDelegate {
         cx: &mut Context<Picker<Self>>,
     ) -> Task<()> {
         let query = query.trim_start();
-        let smart_case = query.chars().any(|c| c.is_uppercase());
+        let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query);
         let is_empty_query = query.is_empty();
 
         let current_workspace_id = self
@@ -234,22 +234,13 @@ impl PickerDelegate for SidebarRecentProjectsDelegate {
                 })
                 .collect();
         } else {
-            let mut matches = smol::block_on(fuzzy::match_strings(
+            self.filtered_workspaces = match_strings(
                 &candidates,
                 query,
-                smart_case,
-                true,
+                case,
+                fuzzy_nucleo::LengthPenalty::On,
                 100,
-                &Default::default(),
-                cx.background_executor().clone(),
-            ));
-            matches.sort_unstable_by(|a, b| {
-                b.score
-                    .partial_cmp(&a.score)
-                    .unwrap_or(std::cmp::Ordering::Equal)
-                    .then_with(|| a.candidate_id.cmp(&b.candidate_id))
-            });
-            self.filtered_workspaces = matches;
+            );
         }
 
         self.selected_index = 0;

crates/recent_projects/src/wsl_picker.rs 🔗

@@ -24,7 +24,7 @@ pub struct WslPickerDismissed;
 pub(crate) struct WslPickerDelegate {
     selected_index: usize,
     distro_list: Option<Vec<String>>,
-    matches: Vec<fuzzy::StringMatch>,
+    matches: Vec<fuzzy_nucleo::StringMatch>,
 }
 
 impl WslPickerDelegate {
@@ -39,7 +39,7 @@ impl WslPickerDelegate {
     pub fn selected_distro(&self) -> Option<String> {
         self.matches
             .get(self.selected_index)
-            .map(|m| m.string.clone())
+            .map(|m| m.string.to_string())
     }
 }
 
@@ -101,9 +101,9 @@ impl picker::PickerDelegate for WslPickerDelegate {
         &mut self,
         query: String,
         _window: &mut Window,
-        cx: &mut Context<Picker<Self>>,
+        _cx: &mut Context<Picker<Self>>,
     ) -> Task<()> {
-        use fuzzy::StringMatchCandidate;
+        use fuzzy_nucleo::StringMatchCandidate;
 
         let needs_fetch = self.distro_list.is_none();
         if needs_fetch {
@@ -121,16 +121,14 @@ impl picker::PickerDelegate for WslPickerDelegate {
                 .collect::<Vec<_>>();
 
             let query = query.trim_start();
-            let smart_case = query.chars().any(|c| c.is_uppercase());
-            self.matches = smol::block_on(fuzzy::match_strings(
-                candidates.as_slice(),
+            let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query);
+            self.matches = fuzzy_nucleo::match_strings(
+                &candidates,
                 query,
-                smart_case,
-                true,
+                case,
+                fuzzy_nucleo::LengthPenalty::On,
                 100,
-                &Default::default(),
-                cx.background_executor().clone(),
-            ));
+            );
             self.matches.sort_unstable_by_key(|m| m.candidate_id);
 
             self.selected_index = self
@@ -150,7 +148,7 @@ impl picker::PickerDelegate for WslPickerDelegate {
         if let Some(distro) = self.matches.get(self.selected_index) {
             cx.emit(WslDistroSelected {
                 secondary,
-                distro: distro.string.clone(),
+                distro: distro.string.to_string(),
             });
         }
     }

crates/tab_switcher/Cargo.toml 🔗

@@ -15,7 +15,7 @@ doctest = false
 [dependencies]
 collections.workspace = true
 editor.workspace = true
-fuzzy.workspace = true
+fuzzy_nucleo.workspace = true
 gpui.workspace = true
 menu.workspace = true
 picker.workspace = true
@@ -23,7 +23,6 @@ project.workspace = true
 schemars.workspace = true
 serde.workspace = true
 settings.workspace = true
-smol.workspace = true
 ui.workspace = true
 util.workspace = true
 workspace.workspace = true

crates/tab_switcher/src/tab_switcher.rs 🔗

@@ -5,7 +5,7 @@ use collections::{HashMap, HashSet};
 use editor::items::{
     entry_diagnostic_aware_icon_decoration_and_color, entry_git_aware_label_color,
 };
-use fuzzy::StringMatchCandidate;
+use fuzzy_nucleo::StringMatchCandidate;
 use gpui::{
     Action, AnyElement, App, Context, DismissEvent, Entity, EntityId, EventEmitter, FocusHandle,
     Focusable, Modifiers, ModifiersChangedEvent, MouseButton, MouseUpEvent, ParentElement, Point,
@@ -441,15 +441,13 @@ impl TabSwitcherDelegate {
                     ))
                 })
                 .collect::<Vec<_>>();
-            smol::block_on(fuzzy::match_strings(
+            fuzzy_nucleo::match_strings(
                 &candidates,
                 &query,
-                true,
-                true,
+                fuzzy_nucleo::Case::Smart,
+                fuzzy_nucleo::LengthPenalty::On,
                 10000,
-                &Default::default(),
-                cx.background_executor().clone(),
-            ))
+            )
             .into_iter()
             .map(|m| all_items[m.candidate_id].clone())
             .collect()

typos.toml 🔗

@@ -24,8 +24,9 @@ extend-exclude = [
     "crates/livekit_api/",
     # Vim makes heavy use of partial typing tables.
     "crates/vim/",
-    # Editor and file finder rely on partial typing and custom in-string syntax.
+    # Editor, file finder, and fuzzy matching rely on partial typing and custom in-string syntax.
     "crates/file_finder/src/file_finder_tests.rs",
+    "crates/fuzzy_nucleo/src/strings.rs",
     "crates/editor/src/editor_tests.rs",
     "crates/editor/src/edit_prediction_tests.rs",
     # There are some names in the test data that are incorrectly flagged as typos.