From 68541960a7c41548060fcfcb1c644c56a1b31166 Mon Sep 17 00:00:00 2001 From: Finn Eitreim <48069764+feitreim@users.noreply.github.com> Date: Mon, 20 Apr 2026 08:41:29 -0400 Subject: [PATCH] fuzzy_nucleo: Add strings module and route several pickers through it (#54123) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacked on top of #54112 This is part 2 of 3 towards #51197 More details from the original PR #53551 This PR includes the changes from #54112 , im not sure how to avoid that, my understanding is that after that one is merged, this PR can be rebased onto main and everything will be correct. You can also view the version of this that does reflect the changes more directly here: https://github.com/feitreim/zed/pull/1 ## Changes In this PR I added a more general string matching functionality to `fuzzy_nucleo`, in order to have proper testing for this, I also changed the command palette, tab switching picker, branch picker, and recent projects picker to use this new implementation. I think the command palette change in particular is awesome, just super nice to vaguely gesture at the command i want and have it pop right up. The main change here and departure from https://github.com/zed-industries/zed/pull/37123 is realizing that the primary reason for the regressions is actually how nucleo handles smart case, the old `fuzzy` crate only uses the smart case argument to score things differently, while nucleo actually filters on the case, eg. with smart case query "Apple" wouldnt match "apple". To get around this we always pass `CaseMatching::Ignore` to nucleo and implement the same score modifications from fuzzy in our code. There is a performance cost to that, of course, but from my testing it is fairly static, not growing as the size increases, so maybe a query takes 35 µs instead of 25 µs, but a query that takes 800 µs will only take 820 µs. Benchmark: | kind | query | size | nucleo | fuzzy | nucleo/fuzzy | |---|---|---:|---:|---:|---:| | string | 1-word | 100 | 9.15 µs | 24.6 µs | 0.37× | | string | 1-word | 1000 | 150.2 µs | 207.2 µs | 0.72× | | string | 1-word | 10000 | 1.34 ms | 2.07 ms | 0.65× | | string | 2-word | 100 | 5.16 µs | 2.94 µs | 1.75× | | string | 2-word | 1000 | 29.0 µs | 11.0 µs | 2.63× | | string | 2-word | 10000 | 210.6 µs | 55.5 µs | 3.79× | | string | 4-word | 100 | 2.57 µs | 2.33 µs | 1.10× | | string | 4-word | 1000 | 6.98 µs | 5.85 µs | 1.19× | | string | 4-word | 10000 | 20.0 µs | 12.0 µs | 1.66× | When I added the 4-word queries to the benchmarks I was actually really concerned that the performance would be awful, making it unsuitable for the command palette especially. However, I think due to the CharBag pre-filtering when the query is longer, the performance is actually way better than the 2 word case. Video: https://github.com/user-attachments/assets/3cd7221b-424f-4fd3-8df1-5543dcc340a3 Self-Review Checklist: - [x] I've reviewed my own diff for quality, security, and reliability - [x] Unsafe blocks (if any) have justifying comments - [x] The content is consistent with the [UI/UX checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) - [x] Tests cover the new/changed behavior - [x] Performance impact has been considered and is acceptable Release Notes: - Improved fuzzy matching in the command palette, branch picker, tab switcher, and recent projects picker to support multi-word queries. --------- Co-authored-by: Yara --- Cargo.lock | 8 +- crates/command_palette/Cargo.toml | 2 +- crates/command_palette/src/command_palette.rs | 10 +- crates/fuzzy_nucleo/Cargo.toml | 1 + .../fuzzy_nucleo/benches/match_benchmark.rs | 89 ++- crates/fuzzy_nucleo/src/fuzzy_nucleo.rs | 11 +- crates/fuzzy_nucleo/src/strings.rs | 741 ++++++++++++++++++ crates/git_ui/Cargo.toml | 1 + crates/git_ui/src/branch_picker.rs | 8 +- crates/recent_projects/Cargo.toml | 2 +- crates/recent_projects/src/recent_projects.rs | 50 +- .../src/sidebar_recent_projects.rs | 21 +- crates/recent_projects/src/wsl_picker.rs | 24 +- crates/tab_switcher/Cargo.toml | 3 +- crates/tab_switcher/src/tab_switcher.rs | 12 +- typos.toml | 3 +- 16 files changed, 896 insertions(+), 90 deletions(-) create mode 100644 crates/fuzzy_nucleo/src/strings.rs diff --git a/Cargo.lock b/Cargo.lock index b16363d091696d26016338cd62bbcb5ec8f5a447..78166ddc04990b2998e2a64cf831f415efd7ed61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3340,7 +3340,7 @@ dependencies = [ "command_palette_hooks", "db", "editor", - "fuzzy", + "fuzzy_nucleo", "go_to_line", "gpui", "language", @@ -7323,6 +7323,7 @@ dependencies = [ "fs", "futures 0.3.32", "fuzzy", + "fuzzy_nucleo", "git", "gpui", "indoc", @@ -14293,7 +14294,7 @@ dependencies = [ "extension_host", "fs", "futures 0.3.32", - "fuzzy", + "fuzzy_nucleo", "gpui", "http_client", "indoc", @@ -17429,7 +17430,7 @@ dependencies = [ "collections", "ctor", "editor", - "fuzzy", + "fuzzy_nucleo", "gpui", "menu", "picker", @@ -17438,7 +17439,6 @@ dependencies = [ "serde", "serde_json", "settings", - "smol", "theme", "theme_settings", "ui", diff --git a/crates/command_palette/Cargo.toml b/crates/command_palette/Cargo.toml index df9da6f67e5c2c2e7d91b2ece0245c352e4190b7..1b2af52662cf98bcaf6976f2ae1d4192f1d95c00 100644 --- a/crates/command_palette/Cargo.toml +++ b/crates/command_palette/Cargo.toml @@ -21,7 +21,7 @@ client.workspace = true collections.workspace = true command_palette_hooks.workspace = true db.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true menu.workspace = true log.workspace = true diff --git a/crates/command_palette/src/command_palette.rs b/crates/command_palette/src/command_palette.rs index 4a80740c3765f25ee878a60fa061c17e3a795b5f..5756dcd888a6f05deae5aa1c74366a929c758401 100644 --- a/crates/command_palette/src/command_palette.rs +++ b/crates/command_palette/src/command_palette.rs @@ -13,7 +13,7 @@ use command_palette_hooks::{ GlobalCommandPaletteInterceptor, }; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate}; use gpui::{ Action, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, ParentElement, Render, Styled, Task, WeakEntity, Window, @@ -326,7 +326,7 @@ impl CommandPaletteDelegate { }); new_matches.push(StringMatch { candidate_id: commands.len() - 1, - string, + string: string.into(), positions, score: 0.0, }) @@ -474,11 +474,11 @@ impl PickerDelegate for CommandPaletteDelegate { .map(|(ix, command)| StringMatchCandidate::new(ix, &command.name)) .collect::>(); - let matches = fuzzy::match_strings( + let matches = fuzzy_nucleo::match_strings_async( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, &Default::default(), executor, diff --git a/crates/fuzzy_nucleo/Cargo.toml b/crates/fuzzy_nucleo/Cargo.toml index b2152035ff317aeee5a675e07db1b923213db2f5..2f9a1b9ec39beed607ccbc2779a968ead2a33b44 100644 --- a/crates/fuzzy_nucleo/Cargo.toml +++ b/crates/fuzzy_nucleo/Cargo.toml @@ -20,6 +20,7 @@ util.workspace = true [dev-dependencies] criterion.workspace = true +gpui = { workspace = true, features = ["test-support"] } util = { workspace = true, features = ["test-support"] } [[bench]] diff --git a/crates/fuzzy_nucleo/benches/match_benchmark.rs b/crates/fuzzy_nucleo/benches/match_benchmark.rs index 3aab6e756fcb944e04e218bc286fe59cc70496a9..8f6eedce491613d332f6b2a5fce928e0c8acaad9 100644 --- a/crates/fuzzy_nucleo/benches/match_benchmark.rs +++ b/crates/fuzzy_nucleo/benches/match_benchmark.rs @@ -1,5 +1,6 @@ use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use fuzzy::CharBag; +use std::sync::atomic::AtomicBool; use util::{paths::PathStyle, rel_path::RelPath}; const DIRS: &[&str] = &[ @@ -129,6 +130,92 @@ fn generate_queries(count: usize) -> (Vec, Vec, Vec) { (n_word(1), n_word(2), n_word(4)) } +fn generate_candidates(count: usize) -> Vec { + (0..count) + .map(|id| { + let dir = DIRS[id % DIRS.len()]; + let file = FILENAMES[id / DIRS.len() % FILENAMES.len()]; + fuzzy_nucleo::StringMatchCandidate::new(id, &format!("{dir}/{file}")) + }) + .collect() +} + +fn to_fuzzy_candidates( + candidates: &[fuzzy_nucleo::StringMatchCandidate], +) -> Vec { + candidates + .iter() + .map(|c| fuzzy::StringMatchCandidate::new(c.id, c.string.as_ref())) + .collect() +} + +fn bench_string_matching(criterion: &mut Criterion) { + let cancel = AtomicBool::new(false); + + let dispatcher = std::sync::Arc::new(gpui::TestDispatcher::new(0)); + let background_executor = gpui::BackgroundExecutor::new(dispatcher.clone()); + let foreground_executor = gpui::ForegroundExecutor::new(dispatcher); + + let sizes = [100, 1000, 10_000]; + let query_count = 200; + let (q1, q2, q4) = generate_queries(query_count); + + for (label, queries) in [("1-word", &q1), ("2-word", &q2), ("4-word", &q4)] { + let mut group = criterion.benchmark_group(label); + for size in sizes { + let candidates = generate_candidates(size); + let fuzzy_candidates = to_fuzzy_candidates(&candidates); + + let mut query_idx = 0usize; + group.bench_function(BenchmarkId::new("nucleo", size), |b| { + b.iter_batched( + || { + let query = queries[query_idx % queries.len()].as_str(); + query_idx += 1; + query + }, + |query| { + foreground_executor.block_on(fuzzy_nucleo::match_strings_async( + &candidates, + query, + fuzzy_nucleo::Case::Ignore, + fuzzy_nucleo::LengthPenalty::On, + size, + &cancel, + background_executor.clone(), + )) + }, + BatchSize::SmallInput, + ) + }); + + let mut query_idx = 0usize; + group.bench_function(BenchmarkId::new("fuzzy", size), |b| { + b.iter_batched( + || { + let query = queries[query_idx % queries.len()].as_str(); + query_idx += 1; + query + }, + |query| { + foreground_executor.block_on(fuzzy::match_strings( + &fuzzy_candidates, + query, + false, + true, + size, + &cancel, + background_executor.clone(), + )) + }, + BatchSize::SmallInput, + ) + }); + } + group.finish(); + } +} + fn generate_path_strings(count: usize) -> &'static [String] { let paths: Box<[String]> = (0..count) .map(|id| { @@ -249,5 +336,5 @@ fn bench_path_matching(criterion: &mut Criterion) { } } -criterion_group!(benches, bench_path_matching); +criterion_group!(benches, bench_string_matching, bench_path_matching); criterion_main!(benches); diff --git a/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs b/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs index dcc9edf37d4bf3575dd95cb78a57aa7eb14e0ede..a6b32f6e1cc1b9deb0b28348f5e59a2ce8d55667 100644 --- a/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs +++ b/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs @@ -1,8 +1,11 @@ mod matcher; mod paths; +mod strings; + pub use paths::{ PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets, }; +pub use strings::{StringMatch, StringMatchCandidate, match_strings, match_strings_async}; pub(crate) struct Cancelled; @@ -13,8 +16,12 @@ pub enum Case { } impl Case { - pub fn from_smart(smart: bool) -> Self { - if smart { Self::Smart } else { Self::Ignore } + pub fn smart_if_uppercase_in(query: &str) -> Self { + if query.chars().any(|c| c.is_uppercase()) { + Self::Smart + } else { + Self::Ignore + } } pub fn is_smart(self) -> bool { diff --git a/crates/fuzzy_nucleo/src/strings.rs b/crates/fuzzy_nucleo/src/strings.rs new file mode 100644 index 0000000000000000000000000000000000000000..8596f513b53d25b3a23a03683849f4c279b14946 --- /dev/null +++ b/crates/fuzzy_nucleo/src/strings.rs @@ -0,0 +1,741 @@ +use std::{ + borrow::Borrow, + cmp::Ordering, + iter, + ops::Range, + sync::atomic::{self, AtomicBool}, +}; + +use gpui::{BackgroundExecutor, SharedString}; +use nucleo::Utf32Str; +use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization}; + +use crate::{ + Cancelled, Case, LengthPenalty, + matcher::{self, LENGTH_PENALTY}, + positions_from_sorted, +}; +use fuzzy::CharBag; + +// String matching is always case-insensitive at the nucleo level — using +// `CaseMatching::Smart` there would reject queries whose capitalization +// doesn't match the candidate, breaking pickers like the command palette +// (`"Editor: Backspace"` against the action named `"editor: backspace"`). +// `Case::Smart` is still honored as a *scoring hint*: when the query +// contains uppercase, candidates whose matched characters disagree in case +// are downranked rather than dropped. +const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9; + +struct Query { + atoms: Vec, + source_words: Option>>, + char_bag: CharBag, +} + +impl Query { + fn build(query: &str, case: Case) -> Option { + let mut atoms = Vec::new(); + let mut source_words = Vec::new(); + let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase()); + + for word in query.split_whitespace() { + atoms.push(Atom::new( + word, + CaseMatching::Ignore, + Normalization::Smart, + AtomKind::Fuzzy, + false, + )); + if wants_case_penalty { + source_words.push(word.chars().collect()); + } + } + + if atoms.is_empty() { + return None; + } + + Some(Query { + atoms, + source_words: wants_case_penalty.then_some(source_words), + char_bag: CharBag::from(query), + }) + } +} + +#[derive(Clone, Debug)] +pub struct StringMatchCandidate { + pub id: usize, + pub string: SharedString, + char_bag: CharBag, +} + +impl StringMatchCandidate { + pub fn new(id: usize, string: impl ToString) -> Self { + Self::from_shared(id, SharedString::new(string.to_string())) + } + + pub fn from_shared(id: usize, string: SharedString) -> Self { + let char_bag = CharBag::from(string.as_ref()); + Self { + id, + string, + char_bag, + } + } +} + +#[derive(Clone, Debug)] +pub struct StringMatch { + pub candidate_id: usize, + pub score: f64, + pub positions: Vec, + pub string: SharedString, +} + +impl StringMatch { + pub fn ranges(&self) -> impl '_ + Iterator> { + let mut positions = self.positions.iter().peekable(); + iter::from_fn(move || { + let start = *positions.next()?; + let char_len = self.char_len_at_index(start)?; + let mut end = start + char_len; + while let Some(next_start) = positions.peek() { + if end == **next_start { + let Some(char_len) = self.char_len_at_index(end) else { + break; + }; + end += char_len; + positions.next(); + } else { + break; + } + } + Some(start..end) + }) + } + + fn char_len_at_index(&self, ix: usize) -> Option { + self.string + .get(ix..) + .and_then(|slice| slice.chars().next().map(|c| c.len_utf8())) + } +} + +impl PartialEq for StringMatch { + fn eq(&self, other: &Self) -> bool { + self.cmp(other).is_eq() + } +} + +impl Eq for StringMatch {} + +impl PartialOrd for StringMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for StringMatch { + fn cmp(&self, other: &Self) -> Ordering { + self.score + .total_cmp(&other.score) + .then_with(|| self.candidate_id.cmp(&other.candidate_id)) + } +} + +pub async fn match_strings_async( + candidates: &[T], + query: &str, + case: Case, + length_penalty: LengthPenalty, + max_results: usize, + cancel_flag: &AtomicBool, + executor: BackgroundExecutor, +) -> Vec +where + T: Borrow + Sync, +{ + if candidates.is_empty() || max_results == 0 { + return Vec::new(); + } + + let Some(query) = Query::build(query, case) else { + return empty_query_results(candidates, max_results); + }; + + let num_cpus = executor.num_cpus().min(candidates.len()); + let segment_size = candidates.len().div_ceil(num_cpus); + let mut segment_results = (0..num_cpus) + .map(|_| Vec::with_capacity(max_results.min(candidates.len()))) + .collect::>(); + + let config = nucleo::Config::DEFAULT; + let mut matchers = matcher::get_matchers(num_cpus, config); + + executor + .scoped(|scope| { + for (segment_idx, (results, matcher)) in segment_results + .iter_mut() + .zip(matchers.iter_mut()) + .enumerate() + { + let query = &query; + scope.spawn(async move { + let segment_start = segment_idx * segment_size; + let segment_end = (segment_start + segment_size).min(candidates.len()); + + match_string_helper( + &candidates[segment_start..segment_end], + query, + matcher, + length_penalty, + results, + cancel_flag, + ) + .ok(); + }); + } + }) + .await; + + matcher::return_matchers(matchers); + + if cancel_flag.load(atomic::Ordering::Acquire) { + return Vec::new(); + } + + let mut results = segment_results.concat(); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); + results +} + +pub fn match_strings( + candidates: &[T], + query: &str, + case: Case, + length_penalty: LengthPenalty, + max_results: usize, +) -> Vec +where + T: Borrow, +{ + if candidates.is_empty() || max_results == 0 { + return Vec::new(); + } + + let Some(query) = Query::build(query, case) else { + return empty_query_results(candidates, max_results); + }; + + let config = nucleo::Config::DEFAULT; + let mut matcher = matcher::get_matcher(config); + let mut results = Vec::with_capacity(max_results.min(candidates.len())); + + match_string_helper( + candidates, + &query, + &mut matcher, + length_penalty, + &mut results, + &AtomicBool::new(false), + ) + .ok(); + + matcher::return_matcher(matcher); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); + results +} + +fn empty_query_results>( + candidates: &[T], + max_results: usize, +) -> Vec { + candidates + .iter() + .take(max_results) + .map(|candidate| { + let borrowed = candidate.borrow(); + StringMatch { + candidate_id: borrowed.id, + score: 0., + positions: Vec::new(), + string: borrowed.string.clone(), + } + }) + .collect() +} + +fn match_string_helper( + candidates: &[T], + query: &Query, + matcher: &mut nucleo::Matcher, + length_penalty: LengthPenalty, + results: &mut Vec, + cancel_flag: &AtomicBool, +) -> Result<(), Cancelled> +where + T: Borrow, +{ + let mut buf = Vec::new(); + let mut matched_chars: Vec = Vec::new(); + let mut atom_matched_chars = Vec::new(); + let mut candidate_chars: Vec = Vec::new(); + + for candidate in candidates { + buf.clear(); + matched_chars.clear(); + if cancel_flag.load(atomic::Ordering::Relaxed) { + return Err(Cancelled); + } + + let borrowed = candidate.borrow(); + + if !borrowed.char_bag.is_superset(query.char_bag) { + continue; + } + + let haystack: Utf32Str = Utf32Str::new(&borrowed.string, &mut buf); + + if query.source_words.is_some() { + candidate_chars.clear(); + candidate_chars.extend(borrowed.string.chars()); + } + + let mut total_score: u32 = 0; + let mut case_mismatches: u32 = 0; + let mut all_matched = true; + + for (atom_idx, atom) in query.atoms.iter().enumerate() { + atom_matched_chars.clear(); + let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else { + all_matched = false; + break; + }; + total_score = total_score.saturating_add(score as u32); + if let Some(source_words) = query.source_words.as_deref() { + let query_chars = &source_words[atom_idx]; + if query_chars.len() == atom_matched_chars.len() { + for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) { + if let Some(&candidate_char) = candidate_chars.get(pos as usize) + && candidate_char != query_char + && candidate_char.eq_ignore_ascii_case(&query_char) + { + case_mismatches += 1; + } + } + } + } + matched_chars.extend_from_slice(&atom_matched_chars); + } + + if all_matched { + matched_chars.sort_unstable(); + matched_chars.dedup(); + + let positive = total_score as f64 * case_penalty(case_mismatches); + let adjusted_score = + positive - length_penalty_for(borrowed.string.as_ref(), length_penalty); + let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars); + + results.push(StringMatch { + candidate_id: borrowed.id, + score: adjusted_score, + positions, + string: borrowed.string.clone(), + }); + } + } + Ok(()) +} + +#[inline] +fn case_penalty(mismatches: u32) -> f64 { + if mismatches == 0 { + 1.0 + } else { + SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32) + } +} + +#[inline] +fn length_penalty_for(s: &str, length_penalty: LengthPenalty) -> f64 { + if length_penalty.is_on() { + s.len() as f64 * LENGTH_PENALTY + } else { + 0.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use gpui::BackgroundExecutor; + + fn candidates(strings: &[&str]) -> Vec { + strings + .iter() + .enumerate() + .map(|(id, s)| StringMatchCandidate::new(id, s)) + .collect() + } + + #[gpui::test] + async fn test_basic_match(executor: BackgroundExecutor) { + let cs = candidates(&["hello", "world", "help"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "hel", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"hello")); + assert!(matched.contains(&"help")); + assert!(!matched.contains(&"world")); + } + + #[gpui::test] + async fn test_multi_word_query(executor: BackgroundExecutor) { + let cs = candidates(&[ + "src/lib/parser.rs", + "src/bin/main.rs", + "tests/parser_test.rs", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "src parser", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 1); + assert_eq!(results[0].string, "src/lib/parser.rs"); + } + + #[gpui::test] + async fn test_empty_query_returns_all(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 3); + assert!(results.iter().all(|m| m.score == 0.0)); + } + + #[gpui::test] + async fn test_whitespace_only_query_returns_all(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + " \t\n", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 3); + } + + #[gpui::test] + async fn test_empty_candidates(executor: BackgroundExecutor) { + let cs: Vec = vec![]; + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "query", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(results.is_empty()); + } + + #[gpui::test] + async fn test_cancellation(executor: BackgroundExecutor) { + let cs = candidates(&["hello", "world"]); + let cancel = AtomicBool::new(true); + let results = match_strings_async( + &cs, + "hel", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(results.is_empty()); + } + + #[gpui::test] + async fn test_max_results_limit(executor: BackgroundExecutor) { + let cs = candidates(&["ab", "abc", "abcd", "abcde"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "ab", + Case::Ignore, + LengthPenalty::Off, + 2, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_scoring_order(executor: BackgroundExecutor) { + let cs = candidates(&[ + "some_very_long_variable_name_fuzzy", + "fuzzy", + "a_fuzzy_thing", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "fuzzy", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor.clone(), + ) + .await; + + let ordered = matches!( + ( + results[0].string.as_ref(), + results[1].string.as_ref(), + results[2].string.as_ref() + ), + ( + "fuzzy", + "a_fuzzy_thing", + "some_very_long_variable_name_fuzzy" + ) + ); + assert!(ordered, "matches are not in the proper order."); + + let results_penalty = match_strings_async( + &cs, + "fuzzy", + Case::Ignore, + LengthPenalty::On, + 10, + &cancel, + executor, + ) + .await; + let greater = results[2].score > results_penalty[2].score; + assert!(greater, "penalize length not affecting long candidates"); + } + + #[gpui::test] + async fn test_utf8_positions(executor: BackgroundExecutor) { + let cs = candidates(&["café"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "caf", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 1); + let m = &results[0]; + assert_eq!(m.positions, vec![0, 1, 2]); + for &pos in &m.positions { + assert!(m.string.is_char_boundary(pos)); + } + } + + #[gpui::test] + async fn test_smart_case(executor: BackgroundExecutor) { + let cs = candidates(&["FooBar", "foobar", "FOOBAR"]); + let cancel = AtomicBool::new(false); + + let case_insensitive = match_strings_async( + &cs, + "foobar", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor.clone(), + ) + .await; + assert_eq!(case_insensitive.len(), 3); + + let smart = match_strings_async( + &cs, + "FooBar", + Case::Smart, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(smart.iter().any(|m| m.string == "FooBar")); + let foobar_score = smart.iter().find(|m| m.string == "FooBar").map(|m| m.score); + let lower_score = smart.iter().find(|m| m.string == "foobar").map(|m| m.score); + if let (Some(exact), Some(lower)) = (foobar_score, lower_score) { + assert!(exact >= lower); + } + } + + #[gpui::test] + async fn test_smart_case_does_not_flip_order_when_length_penalty_on( + executor: BackgroundExecutor, + ) { + // Regression for the sign bug: with a length penalty large enough to push + // `total_score - length_penalty` negative, case mismatches used to make + // scores *better* (less negative). Exact-case match must still rank first. + let cs = candidates(&[ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "FooBar", + Case::Smart, + LengthPenalty::On, + 10, + &cancel, + executor, + ) + .await; + let exact = results + .iter() + .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar") + .map(|m| m.score) + .expect("exact-case candidate should match"); + let mismatch = results + .iter() + .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar") + .map(|m| m.score) + .expect("mismatch-case candidate should match"); + assert!( + exact >= mismatch, + "exact-case score ({exact}) should be >= mismatch-case score ({mismatch})" + ); + } + + #[gpui::test] + async fn test_char_bag_prefilter(executor: BackgroundExecutor) { + let cs = candidates(&["abcdef", "abc", "def", "aabbcc"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "abc", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"abcdef")); + assert!(matched.contains(&"abc")); + assert!(matched.contains(&"aabbcc")); + assert!(!matched.contains(&"def")); + } + + #[test] + fn test_sync_basic_match() { + let cs = candidates(&["hello", "world", "help"]); + let results = match_strings(&cs, "hel", Case::Ignore, LengthPenalty::Off, 10); + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"hello")); + assert!(matched.contains(&"help")); + assert!(!matched.contains(&"world")); + } + + #[test] + fn test_sync_empty_query_returns_all() { + let cs = candidates(&["alpha", "beta", "gamma"]); + let results = match_strings(&cs, "", Case::Ignore, LengthPenalty::Off, 10); + assert_eq!(results.len(), 3); + } + + #[test] + fn test_sync_whitespace_only_query_returns_all() { + let cs = candidates(&["alpha", "beta", "gamma"]); + let results = match_strings(&cs, " ", Case::Ignore, LengthPenalty::Off, 10); + assert_eq!(results.len(), 3); + } + + #[test] + fn test_sync_max_results() { + let cs = candidates(&["ab", "abc", "abcd", "abcde"]); + let results = match_strings(&cs, "ab", Case::Ignore, LengthPenalty::Off, 2); + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_empty_query_respects_max_results(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma", "delta"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "", + Case::Ignore, + LengthPenalty::Off, + 2, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_multi_word_with_nonmatching_word(executor: BackgroundExecutor) { + let cs = candidates(&["src/parser.rs", "src/main.rs"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "src xyzzy", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!( + results.is_empty(), + "no candidate contains 'xyzzy', so nothing should match" + ); + } +} diff --git a/crates/git_ui/Cargo.toml b/crates/git_ui/Cargo.toml index 3aef05d909b095f99a0e6db245f83d737a5c106e..5a9350f8aec7ae4239628fc26f54168f74f286f8 100644 --- a/crates/git_ui/Cargo.toml +++ b/crates/git_ui/Cargo.toml @@ -30,6 +30,7 @@ file_icons.workspace = true fs.workspace = true futures.workspace = true fuzzy.workspace = true +fuzzy_nucleo.workspace = true git.workspace = true gpui.workspace = true itertools.workspace = true diff --git a/crates/git_ui/src/branch_picker.rs b/crates/git_ui/src/branch_picker.rs index bf1780539bad9e22cdeb6cc54ab2bdb1874f10de..a2cf0cb65865b07268cf79234b3892ae261b990b 100644 --- a/crates/git_ui/src/branch_picker.rs +++ b/crates/git_ui/src/branch_picker.rs @@ -1,6 +1,6 @@ use anyhow::Context as _; use editor::Editor; -use fuzzy::StringMatchCandidate; +use fuzzy_nucleo::StringMatchCandidate; use collections::HashSet; use git::repository::Branch; @@ -737,11 +737,11 @@ impl PickerDelegate for BranchListDelegate { .enumerate() .map(|(ix, branch)| StringMatchCandidate::new(ix, branch.name())) .collect::>(); - let mut matches: Vec = fuzzy::match_strings( + let mut matches: Vec = fuzzy_nucleo::match_strings_async( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, &Default::default(), cx.background_executor().clone(), diff --git a/crates/recent_projects/Cargo.toml b/crates/recent_projects/Cargo.toml index a2aa9f78a2a5edaf13a4f23f52f3695de636850f..fbb7bb31a939c26dc581dc2e415c946cafadeef1 100644 --- a/crates/recent_projects/Cargo.toml +++ b/crates/recent_projects/Cargo.toml @@ -26,7 +26,7 @@ editor.workspace = true extension_host.workspace = true fs.workspace = true futures.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true language.workspace = true log.workspace = true diff --git a/crates/recent_projects/src/recent_projects.rs b/crates/recent_projects/src/recent_projects.rs index 045815800286e5d3787241939d57159aabcc5b77..21aa2d6e0b9387c654bdd2424eb3b81aab2ecfec 100644 --- a/crates/recent_projects/src/recent_projects.rs +++ b/crates/recent_projects/src/recent_projects.rs @@ -22,7 +22,7 @@ pub use remote_connection::{RemoteConnectionModal, connect}; pub use remote_connections::{navigate_to_positions, open_remote_project}; use disconnected_overlay::DisconnectedOverlay; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings}; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, Subscription, Task, WeakEntity, Window, actions, px, @@ -937,7 +937,7 @@ impl PickerDelegate for RecentProjectsDelegate { cx: &mut Context>, ) -> gpui::Task<()> { let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); let is_empty_query = query.is_empty(); let folder_matches = if self.open_folders.is_empty() { @@ -950,15 +950,13 @@ impl PickerDelegate for RecentProjectsDelegate { .map(|(id, folder)| StringMatchCandidate::new(id, folder.name.as_ref())) .collect(); - smol::block_on(fuzzy::match_strings( + match_strings( &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )) + ) }; let project_group_candidates: Vec<_> = self @@ -976,21 +974,13 @@ impl PickerDelegate for RecentProjectsDelegate { }) .collect(); - let mut project_group_matches = smol::block_on(fuzzy::match_strings( + let project_group_matches = match_strings( &project_group_candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - project_group_matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); + ); // Build candidates for recent projects (not current, not sibling, not open folder) let recent_candidates: Vec<_> = self @@ -1008,21 +998,13 @@ impl PickerDelegate for RecentProjectsDelegate { }) .collect(); - let mut recent_matches = smol::block_on(fuzzy::match_strings( + let recent_matches = match_strings( &recent_candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - recent_matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); + ); let mut entries = Vec::new(); @@ -1058,7 +1040,7 @@ impl PickerDelegate for RecentProjectsDelegate { candidate_id: id, score: 0.0, positions: Vec::new(), - string: String::new(), + string: Default::default(), })); } } else { @@ -1084,7 +1066,7 @@ impl PickerDelegate for RecentProjectsDelegate { candidate_id: id, score: 0.0, positions: Vec::new(), - string: String::new(), + string: Default::default(), })); } } diff --git a/crates/recent_projects/src/sidebar_recent_projects.rs b/crates/recent_projects/src/sidebar_recent_projects.rs index f197ed3cead41e1fb3786e5b5a99727b5ebcf6b9..65ed83831a8bd208cf5e38ba27e98bbec28100b4 100644 --- a/crates/recent_projects/src/sidebar_recent_projects.rs +++ b/crates/recent_projects/src/sidebar_recent_projects.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings}; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, Subscription, Task, WeakEntity, Window, @@ -194,7 +194,7 @@ impl PickerDelegate for SidebarRecentProjectsDelegate { cx: &mut Context>, ) -> Task<()> { let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); let is_empty_query = query.is_empty(); let current_workspace_id = self @@ -234,22 +234,13 @@ impl PickerDelegate for SidebarRecentProjectsDelegate { }) .collect(); } else { - let mut matches = smol::block_on(fuzzy::match_strings( + self.filtered_workspaces = match_strings( &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); - self.filtered_workspaces = matches; + ); } self.selected_index = 0; diff --git a/crates/recent_projects/src/wsl_picker.rs b/crates/recent_projects/src/wsl_picker.rs index c53dd7c3fb68bc087216764536506f85117ffb36..e0930fde365c11ecfb5e7e9e05fdf3682351acca 100644 --- a/crates/recent_projects/src/wsl_picker.rs +++ b/crates/recent_projects/src/wsl_picker.rs @@ -24,7 +24,7 @@ pub struct WslPickerDismissed; pub(crate) struct WslPickerDelegate { selected_index: usize, distro_list: Option>, - matches: Vec, + matches: Vec, } impl WslPickerDelegate { @@ -39,7 +39,7 @@ impl WslPickerDelegate { pub fn selected_distro(&self) -> Option { self.matches .get(self.selected_index) - .map(|m| m.string.clone()) + .map(|m| m.string.to_string()) } } @@ -101,9 +101,9 @@ impl picker::PickerDelegate for WslPickerDelegate { &mut self, query: String, _window: &mut Window, - cx: &mut Context>, + _cx: &mut Context>, ) -> Task<()> { - use fuzzy::StringMatchCandidate; + use fuzzy_nucleo::StringMatchCandidate; let needs_fetch = self.distro_list.is_none(); if needs_fetch { @@ -121,16 +121,14 @@ impl picker::PickerDelegate for WslPickerDelegate { .collect::>(); let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); - self.matches = smol::block_on(fuzzy::match_strings( - candidates.as_slice(), + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); + self.matches = fuzzy_nucleo::match_strings( + &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); + ); self.matches.sort_unstable_by_key(|m| m.candidate_id); self.selected_index = self @@ -150,7 +148,7 @@ impl picker::PickerDelegate for WslPickerDelegate { if let Some(distro) = self.matches.get(self.selected_index) { cx.emit(WslDistroSelected { secondary, - distro: distro.string.clone(), + distro: distro.string.to_string(), }); } } diff --git a/crates/tab_switcher/Cargo.toml b/crates/tab_switcher/Cargo.toml index 8855c8869ab52260be668c45c20e5af7a869433f..5f0238ebc239c592068a4ee940caf621d388d3c3 100644 --- a/crates/tab_switcher/Cargo.toml +++ b/crates/tab_switcher/Cargo.toml @@ -15,7 +15,7 @@ doctest = false [dependencies] collections.workspace = true editor.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true menu.workspace = true picker.workspace = true @@ -23,7 +23,6 @@ project.workspace = true schemars.workspace = true serde.workspace = true settings.workspace = true -smol.workspace = true ui.workspace = true util.workspace = true workspace.workspace = true diff --git a/crates/tab_switcher/src/tab_switcher.rs b/crates/tab_switcher/src/tab_switcher.rs index d1e19ea4faee8d8259d06e2c24875faac7a0117c..ac4087bb96b2ff36794b47f71f486fb7a5ee64ea 100644 --- a/crates/tab_switcher/src/tab_switcher.rs +++ b/crates/tab_switcher/src/tab_switcher.rs @@ -5,7 +5,7 @@ use collections::{HashMap, HashSet}; use editor::items::{ entry_diagnostic_aware_icon_decoration_and_color, entry_git_aware_label_color, }; -use fuzzy::StringMatchCandidate; +use fuzzy_nucleo::StringMatchCandidate; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EntityId, EventEmitter, FocusHandle, Focusable, Modifiers, ModifiersChangedEvent, MouseButton, MouseUpEvent, ParentElement, Point, @@ -441,15 +441,13 @@ impl TabSwitcherDelegate { )) }) .collect::>(); - smol::block_on(fuzzy::match_strings( + fuzzy_nucleo::match_strings( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, - &Default::default(), - cx.background_executor().clone(), - )) + ) .into_iter() .map(|m| all_items[m.candidate_id].clone()) .collect() diff --git a/typos.toml b/typos.toml index f2cd6d18be3f0c134d339c8533254e2619309408..22823e6b2d90d95cab03444c192a591228632980 100644 --- a/typos.toml +++ b/typos.toml @@ -24,8 +24,9 @@ extend-exclude = [ "crates/livekit_api/", # Vim makes heavy use of partial typing tables. "crates/vim/", - # Editor and file finder rely on partial typing and custom in-string syntax. + # Editor, file finder, and fuzzy matching rely on partial typing and custom in-string syntax. "crates/file_finder/src/file_finder_tests.rs", + "crates/fuzzy_nucleo/src/strings.rs", "crates/editor/src/editor_tests.rs", "crates/editor/src/edit_prediction_tests.rs", # There are some names in the test data that are incorrectly flagged as typos.