From 642dab82e5209dd3e9421b0dc2c1b47fe481df9e Mon Sep 17 00:00:00 2001 From: Peter Tripp Date: Wed, 1 Jan 2025 17:04:37 +0000 Subject: [PATCH] Fix for extension search crash (revert #22524; revert #22525) (#22543) Revert "Improve fuzzy match performance and fix corner case that omits results (#22524)" This reverts commit 6ef5d8f74809938f968d0ab86c7c12163c5e0940. Revert "Check cancel in multithreaded fuzzy matching (#22525)" This reverts commit 51ac2d366789fea42e9d1ee9294fe6065e128fe8. Fuzzy matching implemented in: - https://github.com/zed-industries/zed/pull/22524 - https://github.com/zed-industries/zed/pull/22525 Caused a panic in the extension store search: - Closes: https://github.com/zed-industries/zed/issues/22541 cc: @mgsloan Release Notes: - N/A --- crates/fuzzy/src/matcher.rs | 41 +++++++++++++++++++------- crates/fuzzy/src/paths.rs | 59 +++++++++++++++++++++++-------------- crates/fuzzy/src/strings.rs | 39 +++++++++++++++++------- crates/util/src/util.rs | 22 +------------- 4 files changed, 96 insertions(+), 65 deletions(-) diff --git a/crates/fuzzy/src/matcher.rs b/crates/fuzzy/src/matcher.rs index 1b039c16f507bab1a8865ad7e917c2e7d70b0b0b..ae56b84f1ed692f3d9fe9193169b3ab950f5f070 100644 --- a/crates/fuzzy/src/matcher.rs +++ b/crates/fuzzy/src/matcher.rs @@ -14,6 +14,7 @@ pub struct Matcher<'a> { lowercase_query: &'a [char], query_char_bag: CharBag, smart_case: bool, + max_results: usize, min_score: f64, match_positions: Vec, last_positions: Vec, @@ -21,6 +22,11 @@ pub struct Matcher<'a> { best_position_matrix: Vec, } +pub trait Match: Ord { + fn score(&self) -> f64; + fn set_positions(&mut self, positions: Vec); +} + pub trait MatchCandidate { fn has_chars(&self, bag: CharBag) -> bool; fn to_string(&self) -> Cow<'_, str>; @@ -32,6 +38,7 @@ impl<'a> Matcher<'a> { lowercase_query: &'a [char], query_char_bag: CharBag, smart_case: bool, + max_results: usize, ) -> Self { Self { query, @@ -43,11 +50,10 @@ impl<'a> Matcher<'a> { score_matrix: Vec::new(), best_position_matrix: Vec::new(), smart_case, + max_results, } } - /// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as - /// the input candidates. pub fn match_candidates( &mut self, prefix: &[char], @@ -57,7 +63,8 @@ impl<'a> Matcher<'a> { cancel_flag: &AtomicBool, build_match: F, ) where - F: Fn(&C, f64, &Vec) -> R, + R: Match, + F: Fn(&C, f64) -> R, { let mut candidate_chars = Vec::new(); let mut lowercase_candidate_chars = Vec::new(); @@ -96,7 +103,20 @@ impl<'a> Matcher<'a> { ); if score > 0.0 { - results.push(build_match(&candidate, score, &self.match_positions)); + let mut mat = build_match(&candidate, score); + if let Err(i) = results.binary_search_by(|m| mat.cmp(m)) { + if results.len() < self.max_results { + mat.set_positions(self.match_positions.clone()); + results.insert(i, mat); + } else if i < results.len() { + results.pop(); + mat.set_positions(self.match_positions.clone()); + results.insert(i, mat); + } + if results.len() == self.max_results { + self.min_score = results.last().unwrap().score(); + } + } } } } @@ -305,18 +325,18 @@ mod tests { #[test] fn test_get_last_positions() { let mut query: &[char] = &['d', 'c']; - let mut matcher = Matcher::new(query, query, query.into(), false); + let mut matcher = Matcher::new(query, query, query.into(), false, 10); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); assert!(!result); query = &['c', 'd']; - let mut matcher = Matcher::new(query, query, query.into(), false); + let mut matcher = Matcher::new(query, query, query.into(), false, 10); let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); assert!(result); assert_eq!(matcher.last_positions, vec![2, 4]); query = &['z', '/', 'z', 'f']; - let mut matcher = Matcher::new(query, query, query.into(), false); + let mut matcher = Matcher::new(query, query, query.into(), false, 10); let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']); assert!(result); assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]); @@ -431,7 +451,7 @@ mod tests { }); } - let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case); + let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100); let cancel_flag = AtomicBool::new(false); let mut results = Vec::new(); @@ -442,17 +462,16 @@ mod tests { path_entries.into_iter(), &mut results, &cancel_flag, - |candidate, score, positions| PathMatch { + |candidate, score| PathMatch { score, worktree_id: 0, - positions: positions.clone(), + positions: Vec::new(), path: Arc::from(candidate.path), path_prefix: "".into(), distance_to_relative_ancestor: usize::MAX, is_dir: false, }, ); - results.sort_by(|a, b| b.cmp(a)); results .into_iter() diff --git a/crates/fuzzy/src/paths.rs b/crates/fuzzy/src/paths.rs index bc3e399dc233cf9481339c753926b229f514c3db..2b4eec98ef08152b2b7b7dd906b018a390d7bcec 100644 --- a/crates/fuzzy/src/paths.rs +++ b/crates/fuzzy/src/paths.rs @@ -3,14 +3,11 @@ use std::{ borrow::Cow, cmp::{self, Ordering}, path::Path, - sync::{ - atomic::{self, AtomicBool}, - Arc, - }, + sync::{atomic::AtomicBool, Arc}, }; use crate::{ - matcher::{MatchCandidate, Matcher}, + matcher::{Match, MatchCandidate, Matcher}, CharBag, }; @@ -45,6 +42,16 @@ pub trait PathMatchCandidateSet<'a>: Send + Sync { fn candidates(&'a self, start: usize) -> Self::Candidates; } +impl Match for PathMatch { + fn score(&self) -> f64 { + self.score + } + + fn set_positions(&mut self, positions: Vec) { + self.positions = positions; + } +} + impl<'a> MatchCandidate for PathMatchCandidate<'a> { fn has_chars(&self, bag: CharBag) -> bool { self.char_bag.is_superset(bag) @@ -95,7 +102,13 @@ pub fn match_fixed_path_set( let query = query.chars().collect::>(); let query_char_bag = CharBag::from(&lowercase_query[..]); - let mut matcher = Matcher::new(&query, &lowercase_query, query_char_bag, smart_case); + let mut matcher = Matcher::new( + &query, + &lowercase_query, + query_char_bag, + smart_case, + max_results, + ); let mut results = Vec::new(); matcher.match_candidates( @@ -104,17 +117,16 @@ pub fn match_fixed_path_set( candidates.into_iter(), &mut results, &AtomicBool::new(false), - |candidate, score, positions| PathMatch { + |candidate, score| PathMatch { score, worktree_id, - positions: positions.clone(), + positions: Vec::new(), is_dir: candidate.is_dir, path: Arc::from(candidate.path), path_prefix: Arc::default(), distance_to_relative_ancestor: usize::MAX, }, ); - util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } @@ -152,15 +164,16 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( scope.spawn(async move { let segment_start = segment_idx * segment_size; let segment_end = segment_start + segment_size; - let mut matcher = - Matcher::new(query, lowercase_query, query_char_bag, smart_case); + let mut matcher = Matcher::new( + query, + lowercase_query, + query_char_bag, + smart_case, + max_results, + ); let mut tree_start = 0; for candidate_set in candidate_sets { - if cancel_flag.load(atomic::Ordering::Relaxed) { - break; - } - let tree_end = tree_start + candidate_set.len(); if tree_start < segment_end && segment_start < tree_end { @@ -180,10 +193,10 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( candidates, results, cancel_flag, - |candidate, score, positions| PathMatch { + |candidate, score| PathMatch { score, worktree_id, - positions: positions.clone(), + positions: Vec::new(), path: Arc::from(candidate.path), is_dir: candidate.is_dir, path_prefix: candidate_set.prefix(), @@ -209,12 +222,14 @@ pub async fn match_path_sets<'a, Set: PathMatchCandidateSet<'a>>( }) .await; - if cancel_flag.load(atomic::Ordering::Relaxed) { - return Vec::new(); + let mut results = Vec::new(); + for segment_result in segment_results { + if results.is_empty() { + results = segment_result; + } else { + util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a)); + } } - - let mut results = segment_results.concat(); - util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } diff --git a/crates/fuzzy/src/strings.rs b/crates/fuzzy/src/strings.rs index 458278739ab2a8868794f1a640cac12ac908b546..e5fa6d7e6154f17700c84633301e548517553136 100644 --- a/crates/fuzzy/src/strings.rs +++ b/crates/fuzzy/src/strings.rs @@ -1,5 +1,5 @@ use crate::{ - matcher::{MatchCandidate, Matcher}, + matcher::{Match, MatchCandidate, Matcher}, CharBag, }; use gpui::BackgroundExecutor; @@ -8,7 +8,7 @@ use std::{ cmp::{self, Ordering}, iter, ops::Range, - sync::atomic::{self, AtomicBool}, + sync::atomic::AtomicBool, }; #[derive(Clone, Debug)] @@ -46,6 +46,16 @@ pub struct StringMatch { pub string: String, } +impl Match for StringMatch { + fn score(&self) -> f64 { + self.score + } + + fn set_positions(&mut self, positions: Vec) { + self.positions = positions; + } +} + impl StringMatch { pub fn ranges(&self) -> impl '_ + Iterator> { let mut positions = self.positions.iter().peekable(); @@ -157,8 +167,13 @@ pub async fn match_strings( scope.spawn(async move { let segment_start = cmp::min(segment_idx * segment_size, candidates.len()); let segment_end = cmp::min(segment_start + segment_size, candidates.len()); - let mut matcher = - Matcher::new(query, lowercase_query, query_char_bag, smart_case); + let mut matcher = Matcher::new( + query, + lowercase_query, + query_char_bag, + smart_case, + max_results, + ); matcher.match_candidates( &[], @@ -166,10 +181,10 @@ pub async fn match_strings( candidates[segment_start..segment_end].iter(), results, cancel_flag, - |candidate, score, positions| StringMatch { + |candidate, score| StringMatch { candidate_id: candidate.id, score, - positions: positions.clone(), + positions: Vec::new(), string: candidate.string.to_string(), }, ); @@ -178,11 +193,13 @@ pub async fn match_strings( }) .await; - if cancel_flag.load(atomic::Ordering::Relaxed) { - return Vec::new(); + let mut results = Vec::new(); + for segment_result in segment_results { + if results.is_empty() { + results = segment_result; + } else { + util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(a)); + } } - - let mut results = segment_results.concat(); - util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a)); results } diff --git a/crates/util/src/util.rs b/crates/util/src/util.rs index 7df46eda27f69e3056ed91d95a864266befa3a7a..a7619586941175d64054a67ac2721927c87e75e3 100644 --- a/crates/util/src/util.rs +++ b/crates/util/src/util.rs @@ -8,6 +8,7 @@ pub mod test; use anyhow::{anyhow, Context as _, Result}; use futures::Future; + use itertools::Either; use regex::Regex; use std::sync::{LazyLock, OnceLock}; @@ -110,27 +111,6 @@ where } } -pub fn truncate_to_bottom_n_sorted_by(items: &mut Vec, limit: usize, compare: &F) -where - F: Fn(&T, &T) -> Ordering, -{ - if limit == 0 { - items.truncate(0); - } - if items.len() < limit { - return; - } - // When limit is near to items.len() it may be more efficient to sort the whole list and - // truncate, rather than always doing selection first as is done below. It's hard to analyze - // where the threshold for this should be since the quickselect style algorithm used by - // `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted - - // the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary - // unsorted input. - items.select_nth_unstable_by(limit, compare); - items.truncate(limit); - items.sort_by(compare); -} - #[cfg(unix)] pub fn load_shell_from_passwd() -> Result<()> { let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {