strings.rs

  1use crate::{
  2    matcher::{MatchCandidate, Matcher},
  3    CharBag,
  4};
  5use gpui::BackgroundExecutor;
  6use std::{
  7    borrow::Cow,
  8    cmp::{self, Ordering},
  9    iter,
 10    ops::Range,
 11    sync::atomic::{self, AtomicBool},
 12};
 13
 14#[derive(Clone, Debug)]
 15pub struct StringMatchCandidate {
 16    pub id: usize,
 17    pub string: String,
 18    pub char_bag: CharBag,
 19}
 20
 21impl StringMatchCandidate {
 22    pub fn new(id: usize, string: &str) -> Self {
 23        Self {
 24            id,
 25            string: string.into(),
 26            char_bag: string.into(),
 27        }
 28    }
 29}
 30
 31impl<'a> MatchCandidate for &'a StringMatchCandidate {
 32    fn has_chars(&self, bag: CharBag) -> bool {
 33        self.char_bag.is_superset(bag)
 34    }
 35
 36    fn to_string(&self) -> Cow<'a, str> {
 37        self.string.as_str().into()
 38    }
 39}
 40
 41#[derive(Clone, Debug)]
 42pub struct StringMatch {
 43    pub candidate_id: usize,
 44    pub score: f64,
 45    pub positions: Vec<usize>,
 46    pub string: String,
 47}
 48
 49impl StringMatch {
 50    pub fn ranges(&self) -> impl '_ + Iterator<Item = Range<usize>> {
 51        let mut positions = self.positions.iter().peekable();
 52        iter::from_fn(move || {
 53            if let Some(start) = positions.next().copied() {
 54                let Some(char_len) = self.char_len_at_index(start) else {
 55                    log::error!(
 56                        "Invariant violation: Index {start} out of range or not on a utf-8 boundary in string {:?}",
 57                        self.string
 58                    );
 59                    return None;
 60                };
 61                let mut end = start + char_len;
 62                while let Some(next_start) = positions.peek() {
 63                    if end == **next_start {
 64                        let Some(char_len) = self.char_len_at_index(end) else {
 65                            log::error!(
 66                                "Invariant violation: Index {end} out of range or not on a utf-8 boundary in string {:?}",
 67                                self.string
 68                            );
 69                            return None;
 70                        };
 71                        end += char_len;
 72                        positions.next();
 73                    } else {
 74                        break;
 75                    }
 76                }
 77
 78                return Some(start..end);
 79            }
 80            None
 81        })
 82    }
 83
 84    /// Gets the byte length of the utf-8 character at a byte offset. If the index is out of range
 85    /// or not on a utf-8 boundary then None is returned.
 86    fn char_len_at_index(&self, ix: usize) -> Option<usize> {
 87        self.string
 88            .get(ix..)
 89            .and_then(|slice| slice.chars().next().map(|char| char.len_utf8()))
 90    }
 91}
 92
 93impl PartialEq for StringMatch {
 94    fn eq(&self, other: &Self) -> bool {
 95        self.cmp(other).is_eq()
 96    }
 97}
 98
 99impl Eq for StringMatch {}
100
101impl PartialOrd for StringMatch {
102    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
103        Some(self.cmp(other))
104    }
105}
106
107impl Ord for StringMatch {
108    fn cmp(&self, other: &Self) -> Ordering {
109        self.score
110            .partial_cmp(&other.score)
111            .unwrap_or(Ordering::Equal)
112            .then_with(|| self.candidate_id.cmp(&other.candidate_id))
113    }
114}
115
116pub async fn match_strings(
117    candidates: &[StringMatchCandidate],
118    query: &str,
119    smart_case: bool,
120    max_results: usize,
121    cancel_flag: &AtomicBool,
122    executor: BackgroundExecutor,
123) -> Vec<StringMatch> {
124    if candidates.is_empty() || max_results == 0 {
125        return Default::default();
126    }
127
128    if query.is_empty() {
129        return candidates
130            .iter()
131            .map(|candidate| StringMatch {
132                candidate_id: candidate.id,
133                score: 0.,
134                positions: Default::default(),
135                string: candidate.string.clone(),
136            })
137            .collect();
138    }
139
140    let lowercase_query = query.to_lowercase().chars().collect::<Vec<_>>();
141    let query = query.chars().collect::<Vec<_>>();
142
143    let lowercase_query = &lowercase_query;
144    let query = &query;
145    let query_char_bag = CharBag::from(&lowercase_query[..]);
146
147    let num_cpus = executor.num_cpus().min(candidates.len());
148    let segment_size = candidates.len().div_ceil(num_cpus);
149    let mut segment_results = (0..num_cpus)
150        .map(|_| Vec::with_capacity(max_results.min(candidates.len())))
151        .collect::<Vec<_>>();
152
153    executor
154        .scoped(|scope| {
155            for (segment_idx, results) in segment_results.iter_mut().enumerate() {
156                let cancel_flag = &cancel_flag;
157                scope.spawn(async move {
158                    let segment_start = cmp::min(segment_idx * segment_size, candidates.len());
159                    let segment_end = cmp::min(segment_start + segment_size, candidates.len());
160                    let mut matcher =
161                        Matcher::new(query, lowercase_query, query_char_bag, smart_case);
162
163                    matcher.match_candidates(
164                        &[],
165                        &[],
166                        candidates[segment_start..segment_end].iter(),
167                        results,
168                        cancel_flag,
169                        |candidate, score, positions| StringMatch {
170                            candidate_id: candidate.id,
171                            score,
172                            positions: positions.clone(),
173                            string: candidate.string.to_string(),
174                        },
175                    );
176                });
177            }
178        })
179        .await;
180
181    if cancel_flag.load(atomic::Ordering::Relaxed) {
182        return Vec::new();
183    }
184
185    let mut results = segment_results.concat();
186    util::truncate_to_bottom_n_sorted_by(&mut results, max_results, &|a, b| b.cmp(a));
187    results
188}