Detailed changes
@@ -2040,6 +2040,10 @@ dependencies = [
"slab",
]
+[[package]]
+name = "fuzzy"
+version = "0.1.0"
+
[[package]]
name = "generator"
version = "0.6.23"
@@ -5912,6 +5916,7 @@ dependencies = [
"env_logger",
"fsevent",
"futures",
+ "fuzzy",
"gpui",
"http-auth-basic",
"ignore",
@@ -1,5 +1,5 @@
[workspace]
-members = ["fsevent", "gpui", "gpui_macros", "server", "sum_tree", "zed", "zrpc"]
+members = ["fsevent", "fuzzy", "gpui", "gpui_macros", "server", "sum_tree", "zed", "zrpc"]
default-members = ["zed"]
[patch.crates-io]
@@ -0,0 +1,6 @@
+[package]
+name = "fuzzy"
+version = "0.1.0"
+edition = "2018"
+
+[dependencies]
@@ -0,0 +1,614 @@
+mod char_bag;
+
+use std::{
+ borrow::Cow,
+ cmp::Ordering,
+ path::Path,
+ sync::atomic::{self, AtomicBool},
+ sync::Arc,
+};
+
+pub use char_bag::CharBag;
+
+const BASE_DISTANCE_PENALTY: f64 = 0.6;
+const ADDITIONAL_DISTANCE_PENALTY: f64 = 0.05;
+const MIN_DISTANCE_PENALTY: f64 = 0.2;
+
+pub struct Matcher<'a> {
+ query: &'a [char],
+ lowercase_query: &'a [char],
+ query_char_bag: CharBag,
+ smart_case: bool,
+ max_results: usize,
+ min_score: f64,
+ match_positions: Vec<usize>,
+ last_positions: Vec<usize>,
+ score_matrix: Vec<Option<f64>>,
+ best_position_matrix: Vec<usize>,
+}
+
+trait Match: Ord {
+ fn score(&self) -> f64;
+ fn set_positions(&mut self, positions: Vec<usize>);
+}
+
+trait MatchCandidate {
+ fn has_chars(&self, bag: CharBag) -> bool;
+ fn to_string<'a>(&'a self) -> Cow<'a, str>;
+}
+
+#[derive(Clone, Debug)]
+pub struct PathMatchCandidate<'a> {
+ pub path: &'a Arc<Path>,
+ pub char_bag: CharBag,
+}
+
+#[derive(Clone, Debug)]
+pub struct PathMatch {
+ pub score: f64,
+ pub positions: Vec<usize>,
+ pub tree_id: usize,
+ pub path: Arc<Path>,
+ pub path_prefix: Arc<str>,
+}
+
+#[derive(Clone, Debug)]
+pub struct StringMatchCandidate {
+ pub string: String,
+ pub char_bag: CharBag,
+}
+
+impl Match for PathMatch {
+ fn score(&self) -> f64 {
+ self.score
+ }
+
+ fn set_positions(&mut self, positions: Vec<usize>) {
+ self.positions = positions;
+ }
+}
+
+impl Match for StringMatch {
+ fn score(&self) -> f64 {
+ self.score
+ }
+
+ fn set_positions(&mut self, positions: Vec<usize>) {
+ self.positions = positions;
+ }
+}
+
+impl<'a> MatchCandidate for PathMatchCandidate<'a> {
+ fn has_chars(&self, bag: CharBag) -> bool {
+ self.char_bag.is_superset(bag)
+ }
+
+ fn to_string(&self) -> Cow<'a, str> {
+ self.path.to_string_lossy()
+ }
+}
+
+impl<'a> MatchCandidate for &'a StringMatchCandidate {
+ fn has_chars(&self, bag: CharBag) -> bool {
+ self.char_bag.is_superset(bag)
+ }
+
+ fn to_string(&self) -> Cow<'a, str> {
+ self.string.as_str().into()
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct StringMatch {
+ pub score: f64,
+ pub positions: Vec<usize>,
+ pub string: String,
+}
+
+impl PartialEq for StringMatch {
+ fn eq(&self, other: &Self) -> bool {
+ self.score.eq(&other.score)
+ }
+}
+
+impl Eq for StringMatch {}
+
+impl PartialOrd for StringMatch {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for StringMatch {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.score
+ .partial_cmp(&other.score)
+ .unwrap_or(Ordering::Equal)
+ .then_with(|| self.string.cmp(&other.string))
+ }
+}
+
+impl PartialEq for PathMatch {
+ fn eq(&self, other: &Self) -> bool {
+ self.score.eq(&other.score)
+ }
+}
+
+impl Eq for PathMatch {}
+
+impl PartialOrd for PathMatch {
+ fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for PathMatch {
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.score
+ .partial_cmp(&other.score)
+ .unwrap_or(Ordering::Equal)
+ .then_with(|| self.tree_id.cmp(&other.tree_id))
+ .then_with(|| Arc::as_ptr(&self.path).cmp(&Arc::as_ptr(&other.path)))
+ }
+}
+
+impl<'a> Matcher<'a> {
+ pub fn new(
+ query: &'a [char],
+ lowercase_query: &'a [char],
+ query_char_bag: CharBag,
+ smart_case: bool,
+ max_results: usize,
+ ) -> Self {
+ Self {
+ query,
+ lowercase_query,
+ query_char_bag,
+ min_score: 0.0,
+ last_positions: vec![0; query.len()],
+ match_positions: vec![0; query.len()],
+ score_matrix: Vec::new(),
+ best_position_matrix: Vec::new(),
+ smart_case,
+ max_results,
+ }
+ }
+
+ pub fn match_strings(
+ &mut self,
+ candidates: &[StringMatchCandidate],
+ results: &mut Vec<StringMatch>,
+ cancel_flag: &AtomicBool,
+ ) {
+ self.match_internal(
+ &[],
+ &[],
+ candidates.iter(),
+ results,
+ cancel_flag,
+ |candidate, score| StringMatch {
+ score,
+ positions: Vec::new(),
+ string: candidate.string.to_string(),
+ },
+ )
+ }
+
+ pub fn match_paths(
+ &mut self,
+ tree_id: usize,
+ path_prefix: Arc<str>,
+ path_entries: impl Iterator<Item = PathMatchCandidate<'a>>,
+ results: &mut Vec<PathMatch>,
+ cancel_flag: &AtomicBool,
+ ) {
+ let prefix = path_prefix.chars().collect::<Vec<_>>();
+ let lowercase_prefix = prefix
+ .iter()
+ .map(|c| c.to_ascii_lowercase())
+ .collect::<Vec<_>>();
+ self.match_internal(
+ &prefix,
+ &lowercase_prefix,
+ path_entries,
+ results,
+ cancel_flag,
+ |candidate, score| PathMatch {
+ score,
+ tree_id,
+ positions: Vec::new(),
+ path: candidate.path.clone(),
+ path_prefix: path_prefix.clone(),
+ },
+ )
+ }
+
+ fn match_internal<C: MatchCandidate, R, F>(
+ &mut self,
+ prefix: &[char],
+ lowercase_prefix: &[char],
+ candidates: impl Iterator<Item = C>,
+ results: &mut Vec<R>,
+ cancel_flag: &AtomicBool,
+ build_match: F,
+ ) where
+ R: Match,
+ F: Fn(&C, f64) -> R,
+ {
+ let mut candidate_chars = Vec::new();
+ let mut lowercase_candidate_chars = Vec::new();
+
+ for candidate in candidates {
+ if !candidate.has_chars(self.query_char_bag) {
+ continue;
+ }
+
+ if cancel_flag.load(atomic::Ordering::Relaxed) {
+ break;
+ }
+
+ candidate_chars.clear();
+ lowercase_candidate_chars.clear();
+ for c in candidate.to_string().chars() {
+ candidate_chars.push(c);
+ lowercase_candidate_chars.push(c.to_ascii_lowercase());
+ }
+
+ if !self.find_last_positions(&lowercase_prefix, &lowercase_candidate_chars) {
+ continue;
+ }
+
+ let matrix_len = self.query.len() * (prefix.len() + candidate_chars.len());
+ self.score_matrix.clear();
+ self.score_matrix.resize(matrix_len, None);
+ self.best_position_matrix.clear();
+ self.best_position_matrix.resize(matrix_len, 0);
+
+ let score = self.score_match(
+ &candidate_chars,
+ &lowercase_candidate_chars,
+ &prefix,
+ &lowercase_prefix,
+ );
+
+ if score > 0.0 {
+ let mut mat = build_match(&candidate, score);
+ if let Err(i) = results.binary_search_by(|m| mat.cmp(&m)) {
+ if results.len() < self.max_results {
+ mat.set_positions(self.match_positions.clone());
+ results.insert(i, mat);
+ } else if i < results.len() {
+ results.pop();
+ mat.set_positions(self.match_positions.clone());
+ results.insert(i, mat);
+ }
+ if results.len() == self.max_results {
+ self.min_score = results.last().unwrap().score();
+ }
+ }
+ }
+ }
+ }
+
+ fn find_last_positions(&mut self, prefix: &[char], path: &[char]) -> bool {
+ let mut path = path.iter();
+ let mut prefix_iter = prefix.iter();
+ for (i, char) in self.query.iter().enumerate().rev() {
+ if let Some(j) = path.rposition(|c| c == char) {
+ self.last_positions[i] = j + prefix.len();
+ } else if let Some(j) = prefix_iter.rposition(|c| c == char) {
+ self.last_positions[i] = j;
+ } else {
+ return false;
+ }
+ }
+ true
+ }
+
+ fn score_match(
+ &mut self,
+ path: &[char],
+ path_cased: &[char],
+ prefix: &[char],
+ lowercase_prefix: &[char],
+ ) -> f64 {
+ let score = self.recursive_score_match(
+ path,
+ path_cased,
+ prefix,
+ lowercase_prefix,
+ 0,
+ 0,
+ self.query.len() as f64,
+ ) * self.query.len() as f64;
+
+ if score <= 0.0 {
+ return 0.0;
+ }
+
+ let path_len = prefix.len() + path.len();
+ let mut cur_start = 0;
+ let mut byte_ix = 0;
+ let mut char_ix = 0;
+ for i in 0..self.query.len() {
+ let match_char_ix = self.best_position_matrix[i * path_len + cur_start];
+ while char_ix < match_char_ix {
+ let ch = prefix
+ .get(char_ix)
+ .or_else(|| path.get(char_ix - prefix.len()))
+ .unwrap();
+ byte_ix += ch.len_utf8();
+ char_ix += 1;
+ }
+ cur_start = match_char_ix + 1;
+ self.match_positions[i] = byte_ix;
+ }
+
+ score
+ }
+
+ fn recursive_score_match(
+ &mut self,
+ path: &[char],
+ path_cased: &[char],
+ prefix: &[char],
+ lowercase_prefix: &[char],
+ query_idx: usize,
+ path_idx: usize,
+ cur_score: f64,
+ ) -> f64 {
+ if query_idx == self.query.len() {
+ return 1.0;
+ }
+
+ let path_len = prefix.len() + path.len();
+
+ if let Some(memoized) = self.score_matrix[query_idx * path_len + path_idx] {
+ return memoized;
+ }
+
+ let mut score = 0.0;
+ let mut best_position = 0;
+
+ let query_char = self.lowercase_query[query_idx];
+ let limit = self.last_positions[query_idx];
+
+ let mut last_slash = 0;
+ for j in path_idx..=limit {
+ let path_char = if j < prefix.len() {
+ lowercase_prefix[j]
+ } else {
+ path_cased[j - prefix.len()]
+ };
+ let is_path_sep = path_char == '/' || path_char == '\\';
+
+ if query_idx == 0 && is_path_sep {
+ last_slash = j;
+ }
+
+ if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') {
+ let curr = if j < prefix.len() {
+ prefix[j]
+ } else {
+ path[j - prefix.len()]
+ };
+
+ let mut char_score = 1.0;
+ if j > path_idx {
+ let last = if j - 1 < prefix.len() {
+ prefix[j - 1]
+ } else {
+ path[j - 1 - prefix.len()]
+ };
+
+ if last == '/' {
+ char_score = 0.9;
+ } else if last == '-' || last == '_' || last == ' ' || last.is_numeric() {
+ char_score = 0.8;
+ } else if last.is_lowercase() && curr.is_uppercase() {
+ char_score = 0.8;
+ } else if last == '.' {
+ char_score = 0.7;
+ } else if query_idx == 0 {
+ char_score = BASE_DISTANCE_PENALTY;
+ } else {
+ char_score = MIN_DISTANCE_PENALTY.max(
+ BASE_DISTANCE_PENALTY
+ - (j - path_idx - 1) as f64 * ADDITIONAL_DISTANCE_PENALTY,
+ );
+ }
+ }
+
+ // Apply a severe penalty if the case doesn't match.
+ // This will make the exact matches have higher score than the case-insensitive and the
+ // path insensitive matches.
+ if (self.smart_case || curr == '/') && self.query[query_idx] != curr {
+ char_score *= 0.001;
+ }
+
+ let mut multiplier = char_score;
+
+ // Scale the score based on how deep within the path we found the match.
+ if query_idx == 0 {
+ multiplier /= ((prefix.len() + path.len()) - last_slash) as f64;
+ }
+
+ let mut next_score = 1.0;
+ if self.min_score > 0.0 {
+ next_score = cur_score * multiplier;
+ // Scores only decrease. If we can't pass the previous best, bail
+ if next_score < self.min_score {
+ // Ensure that score is non-zero so we use it in the memo table.
+ if score == 0.0 {
+ score = 1e-18;
+ }
+ continue;
+ }
+ }
+
+ let new_score = self.recursive_score_match(
+ path,
+ path_cased,
+ prefix,
+ lowercase_prefix,
+ query_idx + 1,
+ j + 1,
+ next_score,
+ ) * multiplier;
+
+ if new_score > score {
+ score = new_score;
+ best_position = j;
+ // Optimization: can't score better than 1.
+ if new_score == 1.0 {
+ break;
+ }
+ }
+ }
+ }
+
+ if best_position != 0 {
+ self.best_position_matrix[query_idx * path_len + path_idx] = best_position;
+ }
+
+ self.score_matrix[query_idx * path_len + path_idx] = Some(score);
+ score
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use std::path::PathBuf;
+
+ #[test]
+ fn test_get_last_positions() {
+ let mut query: &[char] = &['d', 'c'];
+ let mut matcher = Matcher::new(query, query, query.into(), false, 10);
+ let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
+ assert_eq!(result, false);
+
+ query = &['c', 'd'];
+ let mut matcher = Matcher::new(query, query, query.into(), false, 10);
+ let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
+ assert_eq!(result, true);
+ assert_eq!(matcher.last_positions, vec![2, 4]);
+
+ query = &['z', '/', 'z', 'f'];
+ let mut matcher = Matcher::new(query, query, query.into(), false, 10);
+ let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
+ assert_eq!(result, true);
+ assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
+ }
+
+ #[test]
+ fn test_match_path_entries() {
+ let paths = vec![
+ "",
+ "a",
+ "ab",
+ "abC",
+ "abcd",
+ "alphabravocharlie",
+ "AlphaBravoCharlie",
+ "thisisatestdir",
+ "/////ThisIsATestDir",
+ "/this/is/a/test/dir",
+ "/test/tiatd",
+ ];
+
+ assert_eq!(
+ match_query("abc", false, &paths),
+ vec![
+ ("abC", vec![0, 1, 2]),
+ ("abcd", vec![0, 1, 2]),
+ ("AlphaBravoCharlie", vec![0, 5, 10]),
+ ("alphabravocharlie", vec![4, 5, 10]),
+ ]
+ );
+ assert_eq!(
+ match_query("t/i/a/t/d", false, &paths),
+ vec![("/this/is/a/test/dir", vec![1, 5, 6, 8, 9, 10, 11, 15, 16]),]
+ );
+
+ assert_eq!(
+ match_query("tiatd", false, &paths),
+ vec![
+ ("/test/tiatd", vec![6, 7, 8, 9, 10]),
+ ("/this/is/a/test/dir", vec![1, 6, 9, 11, 16]),
+ ("/////ThisIsATestDir", vec![5, 9, 11, 12, 16]),
+ ("thisisatestdir", vec![0, 2, 6, 7, 11]),
+ ]
+ );
+ }
+
+ #[test]
+ fn test_match_multibyte_path_entries() {
+ let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"];
+ assert_eq!("1️⃣".len(), 7);
+ assert_eq!(
+ match_query("bcd", false, &paths),
+ vec![
+ ("αβγδ/bcde", vec![9, 10, 11]),
+ ("aαbβ/cγdδ", vec![3, 7, 10]),
+ ]
+ );
+ assert_eq!(
+ match_query("cde", false, &paths),
+ vec![
+ ("αβγδ/bcde", vec![10, 11, 12]),
+ ("c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", vec![0, 23, 46]),
+ ]
+ );
+ }
+
+ fn match_query<'a>(
+ query: &str,
+ smart_case: bool,
+ paths: &Vec<&'a str>,
+ ) -> Vec<(&'a str, Vec<usize>)> {
+ let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
+ let query = query.chars().collect::<Vec<_>>();
+ let query_chars = CharBag::from(&lowercase_query[..]);
+
+ let path_arcs = paths
+ .iter()
+ .map(|path| Arc::from(PathBuf::from(path)))
+ .collect::<Vec<_>>();
+ let mut path_entries = Vec::new();
+ for (i, path) in paths.iter().enumerate() {
+ let lowercase_path = path.to_lowercase().chars().collect::<Vec<_>>();
+ let char_bag = CharBag::from(lowercase_path.as_slice());
+ path_entries.push(PathMatchCandidate {
+ char_bag,
+ path: path_arcs.get(i).unwrap(),
+ });
+ }
+
+ let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100);
+
+ let cancel_flag = AtomicBool::new(false);
+ let mut results = Vec::new();
+ matcher.match_paths(
+ 0,
+ "".into(),
+ path_entries.into_iter(),
+ &mut results,
+ &cancel_flag,
+ );
+
+ results
+ .into_iter()
+ .map(|result| {
+ (
+ paths
+ .iter()
+ .copied()
+ .find(|p| result.path.as_ref() == Path::new(p))
+ .unwrap(),
+ result.positions,
+ )
+ })
+ .collect()
+ }
+}
@@ -3,8 +3,6 @@ name = "sum_tree"
version = "0.1.0"
edition = "2018"
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
[dependencies]
arrayvec = "0.7.1"
@@ -28,6 +28,7 @@ dirs = "3.0"
easy-parallel = "3.1.0"
fsevent = { path = "../fsevent" }
futures = "0.3"
+fuzzy = { path = "../fuzzy" }
gpui = { path = "../gpui" }
http-auth-basic = "0.1.3"
ignore = "0.4"
@@ -1,10 +1,10 @@
use crate::{
editor::{self, Editor},
+ fuzzy::PathMatch,
project::Project,
settings::Settings,
util,
workspace::Workspace,
- worktree::PathMatch,
};
use gpui::{
action,
@@ -1,158 +1,14 @@
-mod char_bag;
-
-use crate::util;
+use crate::{
+ util,
+ worktree::{EntryKind, Snapshot},
+};
use gpui::executor;
use std::{
- borrow::Cow,
- cmp::Ordering,
- path::Path,
- sync::atomic::{self, AtomicBool},
- sync::Arc,
+ cmp,
+ sync::{atomic::AtomicBool, Arc},
};
-pub use char_bag::CharBag;
-
-const BASE_DISTANCE_PENALTY: f64 = 0.6;
-const ADDITIONAL_DISTANCE_PENALTY: f64 = 0.05;
-const MIN_DISTANCE_PENALTY: f64 = 0.2;
-
-pub struct Matcher<'a> {
- query: &'a [char],
- lowercase_query: &'a [char],
- query_char_bag: CharBag,
- smart_case: bool,
- max_results: usize,
- min_score: f64,
- match_positions: Vec<usize>,
- last_positions: Vec<usize>,
- score_matrix: Vec<Option<f64>>,
- best_position_matrix: Vec<usize>,
-}
-
-trait Match: Ord {
- fn score(&self) -> f64;
- fn set_positions(&mut self, positions: Vec<usize>);
-}
-
-trait MatchCandidate {
- fn has_chars(&self, bag: CharBag) -> bool;
- fn to_string<'a>(&'a self) -> Cow<'a, str>;
-}
-
-#[derive(Clone, Debug)]
-pub struct PathMatchCandidate<'a> {
- pub path: &'a Arc<Path>,
- pub char_bag: CharBag,
-}
-
-#[derive(Clone, Debug)]
-pub struct PathMatch {
- pub score: f64,
- pub positions: Vec<usize>,
- pub tree_id: usize,
- pub path: Arc<Path>,
- pub path_prefix: Arc<str>,
-}
-
-#[derive(Clone, Debug)]
-pub struct StringMatchCandidate {
- pub string: String,
- pub char_bag: CharBag,
-}
-
-impl Match for PathMatch {
- fn score(&self) -> f64 {
- self.score
- }
-
- fn set_positions(&mut self, positions: Vec<usize>) {
- self.positions = positions;
- }
-}
-
-impl Match for StringMatch {
- fn score(&self) -> f64 {
- self.score
- }
-
- fn set_positions(&mut self, positions: Vec<usize>) {
- self.positions = positions;
- }
-}
-
-impl<'a> MatchCandidate for PathMatchCandidate<'a> {
- fn has_chars(&self, bag: CharBag) -> bool {
- self.char_bag.is_superset(bag)
- }
-
- fn to_string(&self) -> Cow<'a, str> {
- self.path.to_string_lossy()
- }
-}
-
-impl<'a> MatchCandidate for &'a StringMatchCandidate {
- fn has_chars(&self, bag: CharBag) -> bool {
- self.char_bag.is_superset(bag)
- }
-
- fn to_string(&self) -> Cow<'a, str> {
- self.string.as_str().into()
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct StringMatch {
- pub score: f64,
- pub positions: Vec<usize>,
- pub string: String,
-}
-
-impl PartialEq for StringMatch {
- fn eq(&self, other: &Self) -> bool {
- self.score.eq(&other.score)
- }
-}
-
-impl Eq for StringMatch {}
-
-impl PartialOrd for StringMatch {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl Ord for StringMatch {
- fn cmp(&self, other: &Self) -> Ordering {
- self.score
- .partial_cmp(&other.score)
- .unwrap_or(Ordering::Equal)
- .then_with(|| self.string.cmp(&other.string))
- }
-}
-
-impl PartialEq for PathMatch {
- fn eq(&self, other: &Self) -> bool {
- self.score.eq(&other.score)
- }
-}
-
-impl Eq for PathMatch {}
-
-impl PartialOrd for PathMatch {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl Ord for PathMatch {
- fn cmp(&self, other: &Self) -> Ordering {
- self.score
- .partial_cmp(&other.score)
- .unwrap_or(Ordering::Equal)
- .then_with(|| self.tree_id.cmp(&other.tree_id))
- .then_with(|| Arc::as_ptr(&self.path).cmp(&Arc::as_ptr(&other.path)))
- }
-}
+pub use fuzzy::*;
pub async fn match_strings(
candidates: &[StringMatchCandidate],
@@ -210,463 +66,110 @@ pub async fn match_strings(
results
}
-impl<'a> Matcher<'a> {
- pub fn new(
- query: &'a [char],
- lowercase_query: &'a [char],
- query_char_bag: CharBag,
- smart_case: bool,
- max_results: usize,
- ) -> Self {
- Self {
- query,
- lowercase_query,
- query_char_bag,
- min_score: 0.0,
- last_positions: vec![0; query.len()],
- match_positions: vec![0; query.len()],
- score_matrix: Vec::new(),
- best_position_matrix: Vec::new(),
- smart_case,
- max_results,
- }
- }
-
- pub fn match_strings(
- &mut self,
- candidates: &[StringMatchCandidate],
- results: &mut Vec<StringMatch>,
- cancel_flag: &AtomicBool,
- ) {
- self.match_internal(
- &[],
- &[],
- candidates.iter(),
- results,
- cancel_flag,
- |candidate, score| StringMatch {
- score,
- positions: Vec::new(),
- string: candidate.string.to_string(),
- },
- )
- }
-
- pub fn match_paths(
- &mut self,
- tree_id: usize,
- path_prefix: Arc<str>,
- path_entries: impl Iterator<Item = PathMatchCandidate<'a>>,
- results: &mut Vec<PathMatch>,
- cancel_flag: &AtomicBool,
- ) {
- let prefix = path_prefix.chars().collect::<Vec<_>>();
- let lowercase_prefix = prefix
- .iter()
- .map(|c| c.to_ascii_lowercase())
- .collect::<Vec<_>>();
- self.match_internal(
- &prefix,
- &lowercase_prefix,
- path_entries,
- results,
- cancel_flag,
- |candidate, score| PathMatch {
- score,
- tree_id,
- positions: Vec::new(),
- path: candidate.path.clone(),
- path_prefix: path_prefix.clone(),
- },
- )
- }
-
- fn match_internal<C: MatchCandidate, R, F>(
- &mut self,
- prefix: &[char],
- lowercase_prefix: &[char],
- candidates: impl Iterator<Item = C>,
- results: &mut Vec<R>,
- cancel_flag: &AtomicBool,
- build_match: F,
- ) where
- R: Match,
- F: Fn(&C, f64) -> R,
- {
- let mut candidate_chars = Vec::new();
- let mut lowercase_candidate_chars = Vec::new();
-
- for candidate in candidates {
- if !candidate.has_chars(self.query_char_bag) {
- continue;
- }
-
- if cancel_flag.load(atomic::Ordering::Relaxed) {
- break;
- }
-
- candidate_chars.clear();
- lowercase_candidate_chars.clear();
- for c in candidate.to_string().chars() {
- candidate_chars.push(c);
- lowercase_candidate_chars.push(c.to_ascii_lowercase());
- }
-
- if !self.find_last_positions(&lowercase_prefix, &lowercase_candidate_chars) {
- continue;
- }
-
- let matrix_len = self.query.len() * (prefix.len() + candidate_chars.len());
- self.score_matrix.clear();
- self.score_matrix.resize(matrix_len, None);
- self.best_position_matrix.clear();
- self.best_position_matrix.resize(matrix_len, 0);
-
- let score = self.score_match(
- &candidate_chars,
- &lowercase_candidate_chars,
- &prefix,
- &lowercase_prefix,
- );
-
- if score > 0.0 {
- let mut mat = build_match(&candidate, score);
- if let Err(i) = results.binary_search_by(|m| mat.cmp(&m)) {
- if results.len() < self.max_results {
- mat.set_positions(self.match_positions.clone());
- results.insert(i, mat);
- } else if i < results.len() {
- results.pop();
- mat.set_positions(self.match_positions.clone());
- results.insert(i, mat);
- }
- if results.len() == self.max_results {
- self.min_score = results.last().unwrap().score();
- }
- }
- }
- }
- }
-
- fn find_last_positions(&mut self, prefix: &[char], path: &[char]) -> bool {
- let mut path = path.iter();
- let mut prefix_iter = prefix.iter();
- for (i, char) in self.query.iter().enumerate().rev() {
- if let Some(j) = path.rposition(|c| c == char) {
- self.last_positions[i] = j + prefix.len();
- } else if let Some(j) = prefix_iter.rposition(|c| c == char) {
- self.last_positions[i] = j;
- } else {
- return false;
- }
- }
- true
- }
-
- fn score_match(
- &mut self,
- path: &[char],
- path_cased: &[char],
- prefix: &[char],
- lowercase_prefix: &[char],
- ) -> f64 {
- let score = self.recursive_score_match(
- path,
- path_cased,
- prefix,
- lowercase_prefix,
- 0,
- 0,
- self.query.len() as f64,
- ) * self.query.len() as f64;
-
- if score <= 0.0 {
- return 0.0;
- }
-
- let path_len = prefix.len() + path.len();
- let mut cur_start = 0;
- let mut byte_ix = 0;
- let mut char_ix = 0;
- for i in 0..self.query.len() {
- let match_char_ix = self.best_position_matrix[i * path_len + cur_start];
- while char_ix < match_char_ix {
- let ch = prefix
- .get(char_ix)
- .or_else(|| path.get(char_ix - prefix.len()))
- .unwrap();
- byte_ix += ch.len_utf8();
- char_ix += 1;
- }
- cur_start = match_char_ix + 1;
- self.match_positions[i] = byte_ix;
- }
-
- score
+pub async fn match_paths(
+ snapshots: &[Snapshot],
+ query: &str,
+ include_ignored: bool,
+ smart_case: bool,
+ max_results: usize,
+ cancel_flag: &AtomicBool,
+ background: Arc<executor::Background>,
+) -> Vec<PathMatch> {
+ let path_count: usize = if include_ignored {
+ snapshots.iter().map(Snapshot::file_count).sum()
+ } else {
+ snapshots.iter().map(Snapshot::visible_file_count).sum()
+ };
+ if path_count == 0 {
+ return Vec::new();
}
- fn recursive_score_match(
- &mut self,
- path: &[char],
- path_cased: &[char],
- prefix: &[char],
- lowercase_prefix: &[char],
- query_idx: usize,
- path_idx: usize,
- cur_score: f64,
- ) -> f64 {
- if query_idx == self.query.len() {
- return 1.0;
- }
-
- let path_len = prefix.len() + path.len();
-
- if let Some(memoized) = self.score_matrix[query_idx * path_len + path_idx] {
- return memoized;
- }
-
- let mut score = 0.0;
- let mut best_position = 0;
-
- let query_char = self.lowercase_query[query_idx];
- let limit = self.last_positions[query_idx];
-
- let mut last_slash = 0;
- for j in path_idx..=limit {
- let path_char = if j < prefix.len() {
- lowercase_prefix[j]
- } else {
- path_cased[j - prefix.len()]
- };
- let is_path_sep = path_char == '/' || path_char == '\\';
-
- if query_idx == 0 && is_path_sep {
- last_slash = j;
- }
-
- if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') {
- let curr = if j < prefix.len() {
- prefix[j]
- } else {
- path[j - prefix.len()]
- };
-
- let mut char_score = 1.0;
- if j > path_idx {
- let last = if j - 1 < prefix.len() {
- prefix[j - 1]
- } else {
- path[j - 1 - prefix.len()]
- };
-
- if last == '/' {
- char_score = 0.9;
- } else if last == '-' || last == '_' || last == ' ' || last.is_numeric() {
- char_score = 0.8;
- } else if last.is_lowercase() && curr.is_uppercase() {
- char_score = 0.8;
- } else if last == '.' {
- char_score = 0.7;
- } else if query_idx == 0 {
- char_score = BASE_DISTANCE_PENALTY;
- } else {
- char_score = MIN_DISTANCE_PENALTY.max(
- BASE_DISTANCE_PENALTY
- - (j - path_idx - 1) as f64 * ADDITIONAL_DISTANCE_PENALTY,
- );
- }
- }
+ let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
+ let query = query.chars().collect::<Vec<_>>();
- // Apply a severe penalty if the case doesn't match.
- // This will make the exact matches have higher score than the case-insensitive and the
- // path insensitive matches.
- if (self.smart_case || curr == '/') && self.query[query_idx] != curr {
- char_score *= 0.001;
- }
+ let lowercase_query = &lowercase_query;
+ let query = &query;
+ let query_char_bag = CharBag::from(&lowercase_query[..]);
- let mut multiplier = char_score;
+ let num_cpus = background.num_cpus().min(path_count);
+ let segment_size = (path_count + num_cpus - 1) / num_cpus;
+ let mut segment_results = (0..num_cpus)
+ .map(|_| Vec::with_capacity(max_results))
+ .collect::<Vec<_>>();
- // Scale the score based on how deep within the path we found the match.
- if query_idx == 0 {
- multiplier /= ((prefix.len() + path.len()) - last_slash) as f64;
- }
+ background
+ .scoped(|scope| {
+ for (segment_idx, results) in segment_results.iter_mut().enumerate() {
+ scope.spawn(async move {
+ let segment_start = segment_idx * segment_size;
+ let segment_end = segment_start + segment_size;
+ let mut matcher = Matcher::new(
+ query,
+ lowercase_query,
+ query_char_bag,
+ smart_case,
+ max_results,
+ );
- let mut next_score = 1.0;
- if self.min_score > 0.0 {
- next_score = cur_score * multiplier;
- // Scores only decrease. If we can't pass the previous best, bail
- if next_score < self.min_score {
- // Ensure that score is non-zero so we use it in the memo table.
- if score == 0.0 {
- score = 1e-18;
+ let mut tree_start = 0;
+ for snapshot in snapshots {
+ let tree_end = if include_ignored {
+ tree_start + snapshot.file_count()
+ } else {
+ tree_start + snapshot.visible_file_count()
+ };
+
+ if tree_start < segment_end && segment_start < tree_end {
+ let path_prefix: Arc<str> =
+ if snapshot.root_entry().map_or(false, |e| e.is_file()) {
+ snapshot.root_name().into()
+ } else if snapshots.len() > 1 {
+ format!("{}/", snapshot.root_name()).into()
+ } else {
+ "".into()
+ };
+
+ let start = cmp::max(tree_start, segment_start) - tree_start;
+ let end = cmp::min(tree_end, segment_end) - tree_start;
+ let paths = snapshot
+ .files(include_ignored, start)
+ .take(end - start)
+ .map(|entry| {
+ if let EntryKind::File(char_bag) = entry.kind {
+ PathMatchCandidate {
+ path: &entry.path,
+ char_bag,
+ }
+ } else {
+ unreachable!()
+ }
+ });
+
+ matcher.match_paths(
+ snapshot.id(),
+ path_prefix,
+ paths,
+ results,
+ &cancel_flag,
+ );
}
- continue;
- }
- }
-
- let new_score = self.recursive_score_match(
- path,
- path_cased,
- prefix,
- lowercase_prefix,
- query_idx + 1,
- j + 1,
- next_score,
- ) * multiplier;
-
- if new_score > score {
- score = new_score;
- best_position = j;
- // Optimization: can't score better than 1.
- if new_score == 1.0 {
- break;
+ if tree_end >= segment_end {
+ break;
+ }
+ tree_start = tree_end;
}
- }
+ })
}
- }
-
- if best_position != 0 {
- self.best_position_matrix[query_idx * path_len + path_idx] = best_position;
- }
-
- self.score_matrix[query_idx * path_len + path_idx] = Some(score);
- score
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use std::path::PathBuf;
-
- #[test]
- fn test_get_last_positions() {
- let mut query: &[char] = &['d', 'c'];
- let mut matcher = Matcher::new(query, query, query.into(), false, 10);
- let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
- assert_eq!(result, false);
-
- query = &['c', 'd'];
- let mut matcher = Matcher::new(query, query, query.into(), false, 10);
- let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']);
- assert_eq!(result, true);
- assert_eq!(matcher.last_positions, vec![2, 4]);
-
- query = &['z', '/', 'z', 'f'];
- let mut matcher = Matcher::new(query, query, query.into(), false, 10);
- let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']);
- assert_eq!(result, true);
- assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]);
- }
-
- #[test]
- fn test_match_path_entries() {
- let paths = vec![
- "",
- "a",
- "ab",
- "abC",
- "abcd",
- "alphabravocharlie",
- "AlphaBravoCharlie",
- "thisisatestdir",
- "/////ThisIsATestDir",
- "/this/is/a/test/dir",
- "/test/tiatd",
- ];
-
- assert_eq!(
- match_query("abc", false, &paths),
- vec![
- ("abC", vec![0, 1, 2]),
- ("abcd", vec![0, 1, 2]),
- ("AlphaBravoCharlie", vec![0, 5, 10]),
- ("alphabravocharlie", vec![4, 5, 10]),
- ]
- );
- assert_eq!(
- match_query("t/i/a/t/d", false, &paths),
- vec![("/this/is/a/test/dir", vec![1, 5, 6, 8, 9, 10, 11, 15, 16]),]
- );
-
- assert_eq!(
- match_query("tiatd", false, &paths),
- vec![
- ("/test/tiatd", vec![6, 7, 8, 9, 10]),
- ("/this/is/a/test/dir", vec![1, 6, 9, 11, 16]),
- ("/////ThisIsATestDir", vec![5, 9, 11, 12, 16]),
- ("thisisatestdir", vec![0, 2, 6, 7, 11]),
- ]
- );
- }
-
- #[test]
- fn test_match_multibyte_path_entries() {
- let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"];
- assert_eq!("1️⃣".len(), 7);
- assert_eq!(
- match_query("bcd", false, &paths),
- vec![
- ("αβγδ/bcde", vec![9, 10, 11]),
- ("aαbβ/cγdδ", vec![3, 7, 10]),
- ]
- );
- assert_eq!(
- match_query("cde", false, &paths),
- vec![
- ("αβγδ/bcde", vec![10, 11, 12]),
- ("c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", vec![0, 23, 46]),
- ]
- );
- }
-
- fn match_query<'a>(
- query: &str,
- smart_case: bool,
- paths: &Vec<&'a str>,
- ) -> Vec<(&'a str, Vec<usize>)> {
- let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
- let query = query.chars().collect::<Vec<_>>();
- let query_chars = CharBag::from(&lowercase_query[..]);
+ })
+ .await;
- let path_arcs = paths
- .iter()
- .map(|path| Arc::from(PathBuf::from(path)))
- .collect::<Vec<_>>();
- let mut path_entries = Vec::new();
- for (i, path) in paths.iter().enumerate() {
- let lowercase_path = path.to_lowercase().chars().collect::<Vec<_>>();
- let char_bag = CharBag::from(lowercase_path.as_slice());
- path_entries.push(PathMatchCandidate {
- char_bag,
- path: path_arcs.get(i).unwrap(),
- });
+ let mut results = Vec::new();
+ for segment_result in segment_results {
+ if results.is_empty() {
+ results = segment_result;
+ } else {
+ util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(&a));
}
-
- let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100);
-
- let cancel_flag = AtomicBool::new(false);
- let mut results = Vec::new();
- matcher.match_paths(
- 0,
- "".into(),
- path_entries.into_iter(),
- &mut results,
- &cancel_flag,
- );
-
- results
- .into_iter()
- .map(|result| {
- (
- paths
- .iter()
- .copied()
- .find(|p| result.path.as_ref() == Path::new(p))
- .unwrap(),
- result.positions,
- )
- })
- .collect()
}
+ results
}
@@ -1,17 +1,16 @@
use crate::{
fs::Fs,
- fuzzy::{CharBag, Matcher, PathMatchCandidate},
+ fuzzy::{self, PathMatch},
language::LanguageRegistry,
rpc::Client,
- util::{self, TryFutureExt as _},
- worktree::{self, EntryKind, PathMatch, Snapshot, Worktree},
+ util::TryFutureExt as _,
+ worktree::{self, Worktree},
AppState,
};
use anyhow::Result;
use futures::Future;
use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task};
use std::{
- cmp,
path::Path,
sync::{atomic::AtomicBool, Arc},
};
@@ -191,7 +190,7 @@ impl Project {
pub fn match_paths<'a>(
&self,
- query: &str,
+ query: &'a str,
include_ignored: bool,
smart_case: bool,
max_results: usize,
@@ -203,111 +202,19 @@ impl Project {
.iter()
.map(|worktree| worktree.read(cx).snapshot())
.collect::<Vec<_>>();
-
- let path_count: usize = if include_ignored {
- snapshots.iter().map(Snapshot::file_count).sum()
- } else {
- snapshots.iter().map(Snapshot::visible_file_count).sum()
- };
-
- let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
- let query = query.chars().collect::<Vec<_>>();
- let query_char_bag = CharBag::from(&lowercase_query[..]);
-
let background = cx.background().clone();
async move {
- if path_count == 0 {
- return Vec::new();
- }
-
- let num_cpus = background.num_cpus().min(path_count);
- let segment_size = (path_count + num_cpus - 1) / num_cpus;
- let mut segment_results = (0..num_cpus)
- .map(|_| Vec::with_capacity(max_results))
- .collect::<Vec<_>>();
-
- let lowercase_query = &lowercase_query;
- let query = &query;
- let snapshots = snapshots.as_slice();
-
- background
- .scoped(|scope| {
- for (segment_idx, results) in segment_results.iter_mut().enumerate() {
- scope.spawn(async move {
- let segment_start = segment_idx * segment_size;
- let segment_end = segment_start + segment_size;
- let mut matcher = Matcher::new(
- query,
- lowercase_query,
- query_char_bag,
- smart_case,
- max_results,
- );
-
- let mut tree_start = 0;
- for snapshot in snapshots {
- let tree_end = if include_ignored {
- tree_start + snapshot.file_count()
- } else {
- tree_start + snapshot.visible_file_count()
- };
-
- if tree_start < segment_end && segment_start < tree_end {
- let path_prefix: Arc<str> =
- if snapshot.root_entry().map_or(false, |e| e.is_file()) {
- snapshot.root_name().into()
- } else if snapshots.len() > 1 {
- format!("{}/", snapshot.root_name()).into()
- } else {
- "".into()
- };
-
- let start = cmp::max(tree_start, segment_start) - tree_start;
- let end = cmp::min(tree_end, segment_end) - tree_start;
- let paths = snapshot
- .files(include_ignored, start)
- .take(end - start)
- .map(|entry| {
- if let EntryKind::File(char_bag) = entry.kind {
- PathMatchCandidate {
- path: &entry.path,
- char_bag,
- }
- } else {
- unreachable!()
- }
- });
-
- matcher.match_paths(
- snapshot.id(),
- path_prefix,
- paths,
- results,
- &cancel_flag,
- );
- }
- if tree_end >= segment_end {
- break;
- }
- tree_start = tree_end;
- }
- })
- }
- })
- .await;
-
- let mut results = Vec::new();
- for segment_result in segment_results {
- if results.is_empty() {
- results = segment_result;
- } else {
- util::extend_sorted(&mut results, segment_result, max_results, |a, b| {
- b.cmp(&a)
- });
- }
- }
- results
+ fuzzy::match_paths(
+ snapshots.as_slice(),
+ query,
+ include_ignored,
+ smart_case,
+ max_results,
+ cancel_flag,
+ background,
+ )
+ .await
}
}
}
@@ -4,7 +4,6 @@ use self::ignore::IgnoreStack;
use crate::{
editor::{self, Buffer, History, Operation, Rope},
fs::{self, Fs},
- fuzzy,
fuzzy::CharBag,
language::{Language, LanguageRegistry},
rpc::{self, proto, Status},
@@ -14,7 +13,6 @@ use crate::{
use ::ignore::gitignore::{Gitignore, GitignoreBuilder};
use anyhow::{anyhow, Result};
use futures::{Stream, StreamExt};
-pub use fuzzy::PathMatch;
use gpui::{
executor, AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, MutableAppContext,
Task, UpgradeModelHandle, WeakModelHandle,