diff --git a/Cargo.lock b/Cargo.lock index b16363d091696d26016338cd62bbcb5ec8f5a447..78166ddc04990b2998e2a64cf831f415efd7ed61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3340,7 +3340,7 @@ dependencies = [ "command_palette_hooks", "db", "editor", - "fuzzy", + "fuzzy_nucleo", "go_to_line", "gpui", "language", @@ -7323,6 +7323,7 @@ dependencies = [ "fs", "futures 0.3.32", "fuzzy", + "fuzzy_nucleo", "git", "gpui", "indoc", @@ -14293,7 +14294,7 @@ dependencies = [ "extension_host", "fs", "futures 0.3.32", - "fuzzy", + "fuzzy_nucleo", "gpui", "http_client", "indoc", @@ -17429,7 +17430,7 @@ dependencies = [ "collections", "ctor", "editor", - "fuzzy", + "fuzzy_nucleo", "gpui", "menu", "picker", @@ -17438,7 +17439,6 @@ dependencies = [ "serde", "serde_json", "settings", - "smol", "theme", "theme_settings", "ui", diff --git a/crates/command_palette/Cargo.toml b/crates/command_palette/Cargo.toml index df9da6f67e5c2c2e7d91b2ece0245c352e4190b7..1b2af52662cf98bcaf6976f2ae1d4192f1d95c00 100644 --- a/crates/command_palette/Cargo.toml +++ b/crates/command_palette/Cargo.toml @@ -21,7 +21,7 @@ client.workspace = true collections.workspace = true command_palette_hooks.workspace = true db.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true menu.workspace = true log.workspace = true diff --git a/crates/command_palette/src/command_palette.rs b/crates/command_palette/src/command_palette.rs index 4a80740c3765f25ee878a60fa061c17e3a795b5f..5756dcd888a6f05deae5aa1c74366a929c758401 100644 --- a/crates/command_palette/src/command_palette.rs +++ b/crates/command_palette/src/command_palette.rs @@ -13,7 +13,7 @@ use command_palette_hooks::{ GlobalCommandPaletteInterceptor, }; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate}; use gpui::{ Action, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, ParentElement, Render, Styled, Task, WeakEntity, Window, @@ -326,7 +326,7 @@ impl CommandPaletteDelegate { }); new_matches.push(StringMatch { candidate_id: commands.len() - 1, - string, + string: string.into(), positions, score: 0.0, }) @@ -474,11 +474,11 @@ impl PickerDelegate for CommandPaletteDelegate { .map(|(ix, command)| StringMatchCandidate::new(ix, &command.name)) .collect::>(); - let matches = fuzzy::match_strings( + let matches = fuzzy_nucleo::match_strings_async( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, &Default::default(), executor, diff --git a/crates/fuzzy_nucleo/Cargo.toml b/crates/fuzzy_nucleo/Cargo.toml index b2152035ff317aeee5a675e07db1b923213db2f5..2f9a1b9ec39beed607ccbc2779a968ead2a33b44 100644 --- a/crates/fuzzy_nucleo/Cargo.toml +++ b/crates/fuzzy_nucleo/Cargo.toml @@ -20,6 +20,7 @@ util.workspace = true [dev-dependencies] criterion.workspace = true +gpui = { workspace = true, features = ["test-support"] } util = { workspace = true, features = ["test-support"] } [[bench]] diff --git a/crates/fuzzy_nucleo/benches/match_benchmark.rs b/crates/fuzzy_nucleo/benches/match_benchmark.rs index 3aab6e756fcb944e04e218bc286fe59cc70496a9..8f6eedce491613d332f6b2a5fce928e0c8acaad9 100644 --- a/crates/fuzzy_nucleo/benches/match_benchmark.rs +++ b/crates/fuzzy_nucleo/benches/match_benchmark.rs @@ -1,5 +1,6 @@ use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main}; use fuzzy::CharBag; +use std::sync::atomic::AtomicBool; use util::{paths::PathStyle, rel_path::RelPath}; const DIRS: &[&str] = &[ @@ -129,6 +130,92 @@ fn generate_queries(count: usize) -> (Vec, Vec, Vec) { (n_word(1), n_word(2), n_word(4)) } +fn generate_candidates(count: usize) -> Vec { + (0..count) + .map(|id| { + let dir = DIRS[id % DIRS.len()]; + let file = FILENAMES[id / DIRS.len() % FILENAMES.len()]; + fuzzy_nucleo::StringMatchCandidate::new(id, &format!("{dir}/{file}")) + }) + .collect() +} + +fn to_fuzzy_candidates( + candidates: &[fuzzy_nucleo::StringMatchCandidate], +) -> Vec { + candidates + .iter() + .map(|c| fuzzy::StringMatchCandidate::new(c.id, c.string.as_ref())) + .collect() +} + +fn bench_string_matching(criterion: &mut Criterion) { + let cancel = AtomicBool::new(false); + + let dispatcher = std::sync::Arc::new(gpui::TestDispatcher::new(0)); + let background_executor = gpui::BackgroundExecutor::new(dispatcher.clone()); + let foreground_executor = gpui::ForegroundExecutor::new(dispatcher); + + let sizes = [100, 1000, 10_000]; + let query_count = 200; + let (q1, q2, q4) = generate_queries(query_count); + + for (label, queries) in [("1-word", &q1), ("2-word", &q2), ("4-word", &q4)] { + let mut group = criterion.benchmark_group(label); + for size in sizes { + let candidates = generate_candidates(size); + let fuzzy_candidates = to_fuzzy_candidates(&candidates); + + let mut query_idx = 0usize; + group.bench_function(BenchmarkId::new("nucleo", size), |b| { + b.iter_batched( + || { + let query = queries[query_idx % queries.len()].as_str(); + query_idx += 1; + query + }, + |query| { + foreground_executor.block_on(fuzzy_nucleo::match_strings_async( + &candidates, + query, + fuzzy_nucleo::Case::Ignore, + fuzzy_nucleo::LengthPenalty::On, + size, + &cancel, + background_executor.clone(), + )) + }, + BatchSize::SmallInput, + ) + }); + + let mut query_idx = 0usize; + group.bench_function(BenchmarkId::new("fuzzy", size), |b| { + b.iter_batched( + || { + let query = queries[query_idx % queries.len()].as_str(); + query_idx += 1; + query + }, + |query| { + foreground_executor.block_on(fuzzy::match_strings( + &fuzzy_candidates, + query, + false, + true, + size, + &cancel, + background_executor.clone(), + )) + }, + BatchSize::SmallInput, + ) + }); + } + group.finish(); + } +} + fn generate_path_strings(count: usize) -> &'static [String] { let paths: Box<[String]> = (0..count) .map(|id| { @@ -249,5 +336,5 @@ fn bench_path_matching(criterion: &mut Criterion) { } } -criterion_group!(benches, bench_path_matching); +criterion_group!(benches, bench_string_matching, bench_path_matching); criterion_main!(benches); diff --git a/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs b/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs index dcc9edf37d4bf3575dd95cb78a57aa7eb14e0ede..a6b32f6e1cc1b9deb0b28348f5e59a2ce8d55667 100644 --- a/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs +++ b/crates/fuzzy_nucleo/src/fuzzy_nucleo.rs @@ -1,8 +1,11 @@ mod matcher; mod paths; +mod strings; + pub use paths::{ PathMatch, PathMatchCandidate, PathMatchCandidateSet, match_fixed_path_set, match_path_sets, }; +pub use strings::{StringMatch, StringMatchCandidate, match_strings, match_strings_async}; pub(crate) struct Cancelled; @@ -13,8 +16,12 @@ pub enum Case { } impl Case { - pub fn from_smart(smart: bool) -> Self { - if smart { Self::Smart } else { Self::Ignore } + pub fn smart_if_uppercase_in(query: &str) -> Self { + if query.chars().any(|c| c.is_uppercase()) { + Self::Smart + } else { + Self::Ignore + } } pub fn is_smart(self) -> bool { diff --git a/crates/fuzzy_nucleo/src/strings.rs b/crates/fuzzy_nucleo/src/strings.rs new file mode 100644 index 0000000000000000000000000000000000000000..8596f513b53d25b3a23a03683849f4c279b14946 --- /dev/null +++ b/crates/fuzzy_nucleo/src/strings.rs @@ -0,0 +1,741 @@ +use std::{ + borrow::Borrow, + cmp::Ordering, + iter, + ops::Range, + sync::atomic::{self, AtomicBool}, +}; + +use gpui::{BackgroundExecutor, SharedString}; +use nucleo::Utf32Str; +use nucleo::pattern::{Atom, AtomKind, CaseMatching, Normalization}; + +use crate::{ + Cancelled, Case, LengthPenalty, + matcher::{self, LENGTH_PENALTY}, + positions_from_sorted, +}; +use fuzzy::CharBag; + +// String matching is always case-insensitive at the nucleo level — using +// `CaseMatching::Smart` there would reject queries whose capitalization +// doesn't match the candidate, breaking pickers like the command palette +// (`"Editor: Backspace"` against the action named `"editor: backspace"`). +// `Case::Smart` is still honored as a *scoring hint*: when the query +// contains uppercase, candidates whose matched characters disagree in case +// are downranked rather than dropped. +const SMART_CASE_PENALTY_PER_MISMATCH: f64 = 0.9; + +struct Query { + atoms: Vec, + source_words: Option>>, + char_bag: CharBag, +} + +impl Query { + fn build(query: &str, case: Case) -> Option { + let mut atoms = Vec::new(); + let mut source_words = Vec::new(); + let wants_case_penalty = case.is_smart() && query.chars().any(|c| c.is_uppercase()); + + for word in query.split_whitespace() { + atoms.push(Atom::new( + word, + CaseMatching::Ignore, + Normalization::Smart, + AtomKind::Fuzzy, + false, + )); + if wants_case_penalty { + source_words.push(word.chars().collect()); + } + } + + if atoms.is_empty() { + return None; + } + + Some(Query { + atoms, + source_words: wants_case_penalty.then_some(source_words), + char_bag: CharBag::from(query), + }) + } +} + +#[derive(Clone, Debug)] +pub struct StringMatchCandidate { + pub id: usize, + pub string: SharedString, + char_bag: CharBag, +} + +impl StringMatchCandidate { + pub fn new(id: usize, string: impl ToString) -> Self { + Self::from_shared(id, SharedString::new(string.to_string())) + } + + pub fn from_shared(id: usize, string: SharedString) -> Self { + let char_bag = CharBag::from(string.as_ref()); + Self { + id, + string, + char_bag, + } + } +} + +#[derive(Clone, Debug)] +pub struct StringMatch { + pub candidate_id: usize, + pub score: f64, + pub positions: Vec, + pub string: SharedString, +} + +impl StringMatch { + pub fn ranges(&self) -> impl '_ + Iterator> { + let mut positions = self.positions.iter().peekable(); + iter::from_fn(move || { + let start = *positions.next()?; + let char_len = self.char_len_at_index(start)?; + let mut end = start + char_len; + while let Some(next_start) = positions.peek() { + if end == **next_start { + let Some(char_len) = self.char_len_at_index(end) else { + break; + }; + end += char_len; + positions.next(); + } else { + break; + } + } + Some(start..end) + }) + } + + fn char_len_at_index(&self, ix: usize) -> Option { + self.string + .get(ix..) + .and_then(|slice| slice.chars().next().map(|c| c.len_utf8())) + } +} + +impl PartialEq for StringMatch { + fn eq(&self, other: &Self) -> bool { + self.cmp(other).is_eq() + } +} + +impl Eq for StringMatch {} + +impl PartialOrd for StringMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for StringMatch { + fn cmp(&self, other: &Self) -> Ordering { + self.score + .total_cmp(&other.score) + .then_with(|| self.candidate_id.cmp(&other.candidate_id)) + } +} + +pub async fn match_strings_async( + candidates: &[T], + query: &str, + case: Case, + length_penalty: LengthPenalty, + max_results: usize, + cancel_flag: &AtomicBool, + executor: BackgroundExecutor, +) -> Vec +where + T: Borrow + Sync, +{ + if candidates.is_empty() || max_results == 0 { + return Vec::new(); + } + + let Some(query) = Query::build(query, case) else { + return empty_query_results(candidates, max_results); + }; + + let num_cpus = executor.num_cpus().min(candidates.len()); + let segment_size = candidates.len().div_ceil(num_cpus); + let mut segment_results = (0..num_cpus) + .map(|_| Vec::with_capacity(max_results.min(candidates.len()))) + .collect::>(); + + let config = nucleo::Config::DEFAULT; + let mut matchers = matcher::get_matchers(num_cpus, config); + + executor + .scoped(|scope| { + for (segment_idx, (results, matcher)) in segment_results + .iter_mut() + .zip(matchers.iter_mut()) + .enumerate() + { + let query = &query; + scope.spawn(async move { + let segment_start = segment_idx * segment_size; + let segment_end = (segment_start + segment_size).min(candidates.len()); + + match_string_helper( + &candidates[segment_start..segment_end], + query, + matcher, + length_penalty, + results, + cancel_flag, + ) + .ok(); + }); + } + }) + .await; + + matcher::return_matchers(matchers); + + if cancel_flag.load(atomic::Ordering::Acquire) { + return Vec::new(); + } + + let mut results = segment_results.concat(); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); + results +} + +pub fn match_strings( + candidates: &[T], + query: &str, + case: Case, + length_penalty: LengthPenalty, + max_results: usize, +) -> Vec +where + T: Borrow, +{ + if candidates.is_empty() || max_results == 0 { + return Vec::new(); + } + + let Some(query) = Query::build(query, case) else { + return empty_query_results(candidates, max_results); + }; + + let config = nucleo::Config::DEFAULT; + let mut matcher = matcher::get_matcher(config); + let mut results = Vec::with_capacity(max_results.min(candidates.len())); + + match_string_helper( + candidates, + &query, + &mut matcher, + length_penalty, + &mut results, + &AtomicBool::new(false), + ) + .ok(); + + matcher::return_matcher(matcher); + util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); + results +} + +fn empty_query_results>( + candidates: &[T], + max_results: usize, +) -> Vec { + candidates + .iter() + .take(max_results) + .map(|candidate| { + let borrowed = candidate.borrow(); + StringMatch { + candidate_id: borrowed.id, + score: 0., + positions: Vec::new(), + string: borrowed.string.clone(), + } + }) + .collect() +} + +fn match_string_helper( + candidates: &[T], + query: &Query, + matcher: &mut nucleo::Matcher, + length_penalty: LengthPenalty, + results: &mut Vec, + cancel_flag: &AtomicBool, +) -> Result<(), Cancelled> +where + T: Borrow, +{ + let mut buf = Vec::new(); + let mut matched_chars: Vec = Vec::new(); + let mut atom_matched_chars = Vec::new(); + let mut candidate_chars: Vec = Vec::new(); + + for candidate in candidates { + buf.clear(); + matched_chars.clear(); + if cancel_flag.load(atomic::Ordering::Relaxed) { + return Err(Cancelled); + } + + let borrowed = candidate.borrow(); + + if !borrowed.char_bag.is_superset(query.char_bag) { + continue; + } + + let haystack: Utf32Str = Utf32Str::new(&borrowed.string, &mut buf); + + if query.source_words.is_some() { + candidate_chars.clear(); + candidate_chars.extend(borrowed.string.chars()); + } + + let mut total_score: u32 = 0; + let mut case_mismatches: u32 = 0; + let mut all_matched = true; + + for (atom_idx, atom) in query.atoms.iter().enumerate() { + atom_matched_chars.clear(); + let Some(score) = atom.indices(haystack, matcher, &mut atom_matched_chars) else { + all_matched = false; + break; + }; + total_score = total_score.saturating_add(score as u32); + if let Some(source_words) = query.source_words.as_deref() { + let query_chars = &source_words[atom_idx]; + if query_chars.len() == atom_matched_chars.len() { + for (&query_char, &pos) in query_chars.iter().zip(&atom_matched_chars) { + if let Some(&candidate_char) = candidate_chars.get(pos as usize) + && candidate_char != query_char + && candidate_char.eq_ignore_ascii_case(&query_char) + { + case_mismatches += 1; + } + } + } + } + matched_chars.extend_from_slice(&atom_matched_chars); + } + + if all_matched { + matched_chars.sort_unstable(); + matched_chars.dedup(); + + let positive = total_score as f64 * case_penalty(case_mismatches); + let adjusted_score = + positive - length_penalty_for(borrowed.string.as_ref(), length_penalty); + let positions = positions_from_sorted(borrowed.string.as_ref(), &matched_chars); + + results.push(StringMatch { + candidate_id: borrowed.id, + score: adjusted_score, + positions, + string: borrowed.string.clone(), + }); + } + } + Ok(()) +} + +#[inline] +fn case_penalty(mismatches: u32) -> f64 { + if mismatches == 0 { + 1.0 + } else { + SMART_CASE_PENALTY_PER_MISMATCH.powi(mismatches as i32) + } +} + +#[inline] +fn length_penalty_for(s: &str, length_penalty: LengthPenalty) -> f64 { + if length_penalty.is_on() { + s.len() as f64 * LENGTH_PENALTY + } else { + 0.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use gpui::BackgroundExecutor; + + fn candidates(strings: &[&str]) -> Vec { + strings + .iter() + .enumerate() + .map(|(id, s)| StringMatchCandidate::new(id, s)) + .collect() + } + + #[gpui::test] + async fn test_basic_match(executor: BackgroundExecutor) { + let cs = candidates(&["hello", "world", "help"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "hel", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"hello")); + assert!(matched.contains(&"help")); + assert!(!matched.contains(&"world")); + } + + #[gpui::test] + async fn test_multi_word_query(executor: BackgroundExecutor) { + let cs = candidates(&[ + "src/lib/parser.rs", + "src/bin/main.rs", + "tests/parser_test.rs", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "src parser", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 1); + assert_eq!(results[0].string, "src/lib/parser.rs"); + } + + #[gpui::test] + async fn test_empty_query_returns_all(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 3); + assert!(results.iter().all(|m| m.score == 0.0)); + } + + #[gpui::test] + async fn test_whitespace_only_query_returns_all(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + " \t\n", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 3); + } + + #[gpui::test] + async fn test_empty_candidates(executor: BackgroundExecutor) { + let cs: Vec = vec![]; + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "query", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(results.is_empty()); + } + + #[gpui::test] + async fn test_cancellation(executor: BackgroundExecutor) { + let cs = candidates(&["hello", "world"]); + let cancel = AtomicBool::new(true); + let results = match_strings_async( + &cs, + "hel", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(results.is_empty()); + } + + #[gpui::test] + async fn test_max_results_limit(executor: BackgroundExecutor) { + let cs = candidates(&["ab", "abc", "abcd", "abcde"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "ab", + Case::Ignore, + LengthPenalty::Off, + 2, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_scoring_order(executor: BackgroundExecutor) { + let cs = candidates(&[ + "some_very_long_variable_name_fuzzy", + "fuzzy", + "a_fuzzy_thing", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "fuzzy", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor.clone(), + ) + .await; + + let ordered = matches!( + ( + results[0].string.as_ref(), + results[1].string.as_ref(), + results[2].string.as_ref() + ), + ( + "fuzzy", + "a_fuzzy_thing", + "some_very_long_variable_name_fuzzy" + ) + ); + assert!(ordered, "matches are not in the proper order."); + + let results_penalty = match_strings_async( + &cs, + "fuzzy", + Case::Ignore, + LengthPenalty::On, + 10, + &cancel, + executor, + ) + .await; + let greater = results[2].score > results_penalty[2].score; + assert!(greater, "penalize length not affecting long candidates"); + } + + #[gpui::test] + async fn test_utf8_positions(executor: BackgroundExecutor) { + let cs = candidates(&["café"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "caf", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 1); + let m = &results[0]; + assert_eq!(m.positions, vec![0, 1, 2]); + for &pos in &m.positions { + assert!(m.string.is_char_boundary(pos)); + } + } + + #[gpui::test] + async fn test_smart_case(executor: BackgroundExecutor) { + let cs = candidates(&["FooBar", "foobar", "FOOBAR"]); + let cancel = AtomicBool::new(false); + + let case_insensitive = match_strings_async( + &cs, + "foobar", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor.clone(), + ) + .await; + assert_eq!(case_insensitive.len(), 3); + + let smart = match_strings_async( + &cs, + "FooBar", + Case::Smart, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!(smart.iter().any(|m| m.string == "FooBar")); + let foobar_score = smart.iter().find(|m| m.string == "FooBar").map(|m| m.score); + let lower_score = smart.iter().find(|m| m.string == "foobar").map(|m| m.score); + if let (Some(exact), Some(lower)) = (foobar_score, lower_score) { + assert!(exact >= lower); + } + } + + #[gpui::test] + async fn test_smart_case_does_not_flip_order_when_length_penalty_on( + executor: BackgroundExecutor, + ) { + // Regression for the sign bug: with a length penalty large enough to push + // `total_score - length_penalty` negative, case mismatches used to make + // scores *better* (less negative). Exact-case match must still rank first. + let cs = candidates(&[ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar", + ]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "FooBar", + Case::Smart, + LengthPenalty::On, + 10, + &cancel, + executor, + ) + .await; + let exact = results + .iter() + .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_FooBar") + .map(|m| m.score) + .expect("exact-case candidate should match"); + let mismatch = results + .iter() + .find(|m| m.string.as_ref() == "aaaaaaaaaaaaaaaaaaaaaaaaaaaa_foobar") + .map(|m| m.score) + .expect("mismatch-case candidate should match"); + assert!( + exact >= mismatch, + "exact-case score ({exact}) should be >= mismatch-case score ({mismatch})" + ); + } + + #[gpui::test] + async fn test_char_bag_prefilter(executor: BackgroundExecutor) { + let cs = candidates(&["abcdef", "abc", "def", "aabbcc"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "abc", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"abcdef")); + assert!(matched.contains(&"abc")); + assert!(matched.contains(&"aabbcc")); + assert!(!matched.contains(&"def")); + } + + #[test] + fn test_sync_basic_match() { + let cs = candidates(&["hello", "world", "help"]); + let results = match_strings(&cs, "hel", Case::Ignore, LengthPenalty::Off, 10); + let matched: Vec<&str> = results.iter().map(|m| m.string.as_ref()).collect(); + assert!(matched.contains(&"hello")); + assert!(matched.contains(&"help")); + assert!(!matched.contains(&"world")); + } + + #[test] + fn test_sync_empty_query_returns_all() { + let cs = candidates(&["alpha", "beta", "gamma"]); + let results = match_strings(&cs, "", Case::Ignore, LengthPenalty::Off, 10); + assert_eq!(results.len(), 3); + } + + #[test] + fn test_sync_whitespace_only_query_returns_all() { + let cs = candidates(&["alpha", "beta", "gamma"]); + let results = match_strings(&cs, " ", Case::Ignore, LengthPenalty::Off, 10); + assert_eq!(results.len(), 3); + } + + #[test] + fn test_sync_max_results() { + let cs = candidates(&["ab", "abc", "abcd", "abcde"]); + let results = match_strings(&cs, "ab", Case::Ignore, LengthPenalty::Off, 2); + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_empty_query_respects_max_results(executor: BackgroundExecutor) { + let cs = candidates(&["alpha", "beta", "gamma", "delta"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "", + Case::Ignore, + LengthPenalty::Off, + 2, + &cancel, + executor, + ) + .await; + assert_eq!(results.len(), 2); + } + + #[gpui::test] + async fn test_multi_word_with_nonmatching_word(executor: BackgroundExecutor) { + let cs = candidates(&["src/parser.rs", "src/main.rs"]); + let cancel = AtomicBool::new(false); + let results = match_strings_async( + &cs, + "src xyzzy", + Case::Ignore, + LengthPenalty::Off, + 10, + &cancel, + executor, + ) + .await; + assert!( + results.is_empty(), + "no candidate contains 'xyzzy', so nothing should match" + ); + } +} diff --git a/crates/git_ui/Cargo.toml b/crates/git_ui/Cargo.toml index 3aef05d909b095f99a0e6db245f83d737a5c106e..5a9350f8aec7ae4239628fc26f54168f74f286f8 100644 --- a/crates/git_ui/Cargo.toml +++ b/crates/git_ui/Cargo.toml @@ -30,6 +30,7 @@ file_icons.workspace = true fs.workspace = true futures.workspace = true fuzzy.workspace = true +fuzzy_nucleo.workspace = true git.workspace = true gpui.workspace = true itertools.workspace = true diff --git a/crates/git_ui/src/branch_picker.rs b/crates/git_ui/src/branch_picker.rs index bf1780539bad9e22cdeb6cc54ab2bdb1874f10de..a2cf0cb65865b07268cf79234b3892ae261b990b 100644 --- a/crates/git_ui/src/branch_picker.rs +++ b/crates/git_ui/src/branch_picker.rs @@ -1,6 +1,6 @@ use anyhow::Context as _; use editor::Editor; -use fuzzy::StringMatchCandidate; +use fuzzy_nucleo::StringMatchCandidate; use collections::HashSet; use git::repository::Branch; @@ -737,11 +737,11 @@ impl PickerDelegate for BranchListDelegate { .enumerate() .map(|(ix, branch)| StringMatchCandidate::new(ix, branch.name())) .collect::>(); - let mut matches: Vec = fuzzy::match_strings( + let mut matches: Vec = fuzzy_nucleo::match_strings_async( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, &Default::default(), cx.background_executor().clone(), diff --git a/crates/recent_projects/Cargo.toml b/crates/recent_projects/Cargo.toml index a2aa9f78a2a5edaf13a4f23f52f3695de636850f..fbb7bb31a939c26dc581dc2e415c946cafadeef1 100644 --- a/crates/recent_projects/Cargo.toml +++ b/crates/recent_projects/Cargo.toml @@ -26,7 +26,7 @@ editor.workspace = true extension_host.workspace = true fs.workspace = true futures.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true language.workspace = true log.workspace = true diff --git a/crates/recent_projects/src/recent_projects.rs b/crates/recent_projects/src/recent_projects.rs index 045815800286e5d3787241939d57159aabcc5b77..21aa2d6e0b9387c654bdd2424eb3b81aab2ecfec 100644 --- a/crates/recent_projects/src/recent_projects.rs +++ b/crates/recent_projects/src/recent_projects.rs @@ -22,7 +22,7 @@ pub use remote_connection::{RemoteConnectionModal, connect}; pub use remote_connections::{navigate_to_positions, open_remote_project}; use disconnected_overlay::DisconnectedOverlay; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings}; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, Subscription, Task, WeakEntity, Window, actions, px, @@ -937,7 +937,7 @@ impl PickerDelegate for RecentProjectsDelegate { cx: &mut Context>, ) -> gpui::Task<()> { let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); let is_empty_query = query.is_empty(); let folder_matches = if self.open_folders.is_empty() { @@ -950,15 +950,13 @@ impl PickerDelegate for RecentProjectsDelegate { .map(|(id, folder)| StringMatchCandidate::new(id, folder.name.as_ref())) .collect(); - smol::block_on(fuzzy::match_strings( + match_strings( &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )) + ) }; let project_group_candidates: Vec<_> = self @@ -976,21 +974,13 @@ impl PickerDelegate for RecentProjectsDelegate { }) .collect(); - let mut project_group_matches = smol::block_on(fuzzy::match_strings( + let project_group_matches = match_strings( &project_group_candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - project_group_matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); + ); // Build candidates for recent projects (not current, not sibling, not open folder) let recent_candidates: Vec<_> = self @@ -1008,21 +998,13 @@ impl PickerDelegate for RecentProjectsDelegate { }) .collect(); - let mut recent_matches = smol::block_on(fuzzy::match_strings( + let recent_matches = match_strings( &recent_candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - recent_matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); + ); let mut entries = Vec::new(); @@ -1058,7 +1040,7 @@ impl PickerDelegate for RecentProjectsDelegate { candidate_id: id, score: 0.0, positions: Vec::new(), - string: String::new(), + string: Default::default(), })); } } else { @@ -1084,7 +1066,7 @@ impl PickerDelegate for RecentProjectsDelegate { candidate_id: id, score: 0.0, positions: Vec::new(), - string: String::new(), + string: Default::default(), })); } } diff --git a/crates/recent_projects/src/sidebar_recent_projects.rs b/crates/recent_projects/src/sidebar_recent_projects.rs index f197ed3cead41e1fb3786e5b5a99727b5ebcf6b9..65ed83831a8bd208cf5e38ba27e98bbec28100b4 100644 --- a/crates/recent_projects/src/sidebar_recent_projects.rs +++ b/crates/recent_projects/src/sidebar_recent_projects.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; -use fuzzy::{StringMatch, StringMatchCandidate}; +use fuzzy_nucleo::{StringMatch, StringMatchCandidate, match_strings}; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EventEmitter, FocusHandle, Focusable, Subscription, Task, WeakEntity, Window, @@ -194,7 +194,7 @@ impl PickerDelegate for SidebarRecentProjectsDelegate { cx: &mut Context>, ) -> Task<()> { let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); let is_empty_query = query.is_empty(); let current_workspace_id = self @@ -234,22 +234,13 @@ impl PickerDelegate for SidebarRecentProjectsDelegate { }) .collect(); } else { - let mut matches = smol::block_on(fuzzy::match_strings( + self.filtered_workspaces = match_strings( &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); - matches.sort_unstable_by(|a, b| { - b.score - .partial_cmp(&a.score) - .unwrap_or(std::cmp::Ordering::Equal) - .then_with(|| a.candidate_id.cmp(&b.candidate_id)) - }); - self.filtered_workspaces = matches; + ); } self.selected_index = 0; diff --git a/crates/recent_projects/src/wsl_picker.rs b/crates/recent_projects/src/wsl_picker.rs index c53dd7c3fb68bc087216764536506f85117ffb36..e0930fde365c11ecfb5e7e9e05fdf3682351acca 100644 --- a/crates/recent_projects/src/wsl_picker.rs +++ b/crates/recent_projects/src/wsl_picker.rs @@ -24,7 +24,7 @@ pub struct WslPickerDismissed; pub(crate) struct WslPickerDelegate { selected_index: usize, distro_list: Option>, - matches: Vec, + matches: Vec, } impl WslPickerDelegate { @@ -39,7 +39,7 @@ impl WslPickerDelegate { pub fn selected_distro(&self) -> Option { self.matches .get(self.selected_index) - .map(|m| m.string.clone()) + .map(|m| m.string.to_string()) } } @@ -101,9 +101,9 @@ impl picker::PickerDelegate for WslPickerDelegate { &mut self, query: String, _window: &mut Window, - cx: &mut Context>, + _cx: &mut Context>, ) -> Task<()> { - use fuzzy::StringMatchCandidate; + use fuzzy_nucleo::StringMatchCandidate; let needs_fetch = self.distro_list.is_none(); if needs_fetch { @@ -121,16 +121,14 @@ impl picker::PickerDelegate for WslPickerDelegate { .collect::>(); let query = query.trim_start(); - let smart_case = query.chars().any(|c| c.is_uppercase()); - self.matches = smol::block_on(fuzzy::match_strings( - candidates.as_slice(), + let case = fuzzy_nucleo::Case::smart_if_uppercase_in(query); + self.matches = fuzzy_nucleo::match_strings( + &candidates, query, - smart_case, - true, + case, + fuzzy_nucleo::LengthPenalty::On, 100, - &Default::default(), - cx.background_executor().clone(), - )); + ); self.matches.sort_unstable_by_key(|m| m.candidate_id); self.selected_index = self @@ -150,7 +148,7 @@ impl picker::PickerDelegate for WslPickerDelegate { if let Some(distro) = self.matches.get(self.selected_index) { cx.emit(WslDistroSelected { secondary, - distro: distro.string.clone(), + distro: distro.string.to_string(), }); } } diff --git a/crates/tab_switcher/Cargo.toml b/crates/tab_switcher/Cargo.toml index 8855c8869ab52260be668c45c20e5af7a869433f..5f0238ebc239c592068a4ee940caf621d388d3c3 100644 --- a/crates/tab_switcher/Cargo.toml +++ b/crates/tab_switcher/Cargo.toml @@ -15,7 +15,7 @@ doctest = false [dependencies] collections.workspace = true editor.workspace = true -fuzzy.workspace = true +fuzzy_nucleo.workspace = true gpui.workspace = true menu.workspace = true picker.workspace = true @@ -23,7 +23,6 @@ project.workspace = true schemars.workspace = true serde.workspace = true settings.workspace = true -smol.workspace = true ui.workspace = true util.workspace = true workspace.workspace = true diff --git a/crates/tab_switcher/src/tab_switcher.rs b/crates/tab_switcher/src/tab_switcher.rs index d1e19ea4faee8d8259d06e2c24875faac7a0117c..ac4087bb96b2ff36794b47f71f486fb7a5ee64ea 100644 --- a/crates/tab_switcher/src/tab_switcher.rs +++ b/crates/tab_switcher/src/tab_switcher.rs @@ -5,7 +5,7 @@ use collections::{HashMap, HashSet}; use editor::items::{ entry_diagnostic_aware_icon_decoration_and_color, entry_git_aware_label_color, }; -use fuzzy::StringMatchCandidate; +use fuzzy_nucleo::StringMatchCandidate; use gpui::{ Action, AnyElement, App, Context, DismissEvent, Entity, EntityId, EventEmitter, FocusHandle, Focusable, Modifiers, ModifiersChangedEvent, MouseButton, MouseUpEvent, ParentElement, Point, @@ -441,15 +441,13 @@ impl TabSwitcherDelegate { )) }) .collect::>(); - smol::block_on(fuzzy::match_strings( + fuzzy_nucleo::match_strings( &candidates, &query, - true, - true, + fuzzy_nucleo::Case::Smart, + fuzzy_nucleo::LengthPenalty::On, 10000, - &Default::default(), - cx.background_executor().clone(), - )) + ) .into_iter() .map(|m| all_items[m.candidate_id].clone()) .collect() diff --git a/typos.toml b/typos.toml index f2cd6d18be3f0c134d339c8533254e2619309408..22823e6b2d90d95cab03444c192a591228632980 100644 --- a/typos.toml +++ b/typos.toml @@ -24,8 +24,9 @@ extend-exclude = [ "crates/livekit_api/", # Vim makes heavy use of partial typing tables. "crates/vim/", - # Editor and file finder rely on partial typing and custom in-string syntax. + # Editor, file finder, and fuzzy matching rely on partial typing and custom in-string syntax. "crates/file_finder/src/file_finder_tests.rs", + "crates/fuzzy_nucleo/src/strings.rs", "crates/editor/src/editor_tests.rs", "crates/editor/src/edit_prediction_tests.rs", # There are some names in the test data that are incorrectly flagged as typos.