search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    buffers: Option<Vec<Entity<Buffer>>>,
 40}
 41
 42impl SearchInputs {
 43    pub fn as_str(&self) -> &str {
 44        self.query.as_ref()
 45    }
 46    pub fn files_to_include(&self) -> &PathMatcher {
 47        &self.files_to_include
 48    }
 49    pub fn files_to_exclude(&self) -> &PathMatcher {
 50        &self.files_to_exclude
 51    }
 52    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 53        &self.buffers
 54    }
 55}
 56#[derive(Clone, Debug)]
 57pub enum SearchQuery {
 58    Text {
 59        search: AhoCorasick,
 60        replacement: Option<String>,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        include_ignored: bool,
 64        inner: SearchInputs,
 65    },
 66
 67    Regex {
 68        regex: Regex,
 69        replacement: Option<String>,
 70        multiline: bool,
 71        whole_word: bool,
 72        case_sensitive: bool,
 73        include_ignored: bool,
 74        one_match_per_line: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 80    RegexBuilder::new(r"\B")
 81        .build()
 82        .expect("Failed to create WORD_MATCH_TEST")
 83});
 84
 85impl SearchQuery {
 86    pub fn text(
 87        query: impl ToString,
 88        whole_word: bool,
 89        case_sensitive: bool,
 90        include_ignored: bool,
 91        files_to_include: PathMatcher,
 92        files_to_exclude: PathMatcher,
 93        buffers: Option<Vec<Entity<Buffer>>>,
 94    ) -> Result<Self> {
 95        let query = query.to_string();
 96        if !case_sensitive && !query.is_ascii() {
 97            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
 98            // Fallback to regex search as recommended by
 99            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
100            return Self::regex(
101                regex::escape(&query),
102                whole_word,
103                case_sensitive,
104                include_ignored,
105                false,
106                files_to_include,
107                files_to_exclude,
108                buffers,
109            );
110        }
111        let search = AhoCorasickBuilder::new()
112            .ascii_case_insensitive(!case_sensitive)
113            .build([&query])?;
114        let inner = SearchInputs {
115            query: query.into(),
116            files_to_exclude,
117            files_to_include,
118            buffers,
119        };
120        Ok(Self::Text {
121            search,
122            replacement: None,
123            whole_word,
124            case_sensitive,
125            include_ignored,
126            inner,
127        })
128    }
129
130    pub fn regex(
131        query: impl ToString,
132        whole_word: bool,
133        case_sensitive: bool,
134        include_ignored: bool,
135        one_match_per_line: bool,
136        files_to_include: PathMatcher,
137        files_to_exclude: PathMatcher,
138        buffers: Option<Vec<Entity<Buffer>>>,
139    ) -> Result<Self> {
140        let mut query = query.to_string();
141        let initial_query = Arc::from(query.as_str());
142        if whole_word {
143            let mut word_query = String::new();
144            if let Some(first) = query.get(0..1) {
145                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
146                    word_query.push_str("\\b");
147                }
148            }
149            word_query.push_str(&query);
150            if let Some(last) = query.get(query.len() - 1..) {
151                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
152                    word_query.push_str("\\b");
153                }
154            }
155            query = word_query
156        }
157
158        let multiline = query.contains('\n') || query.contains("\\n");
159        let regex = RegexBuilder::new(&query)
160            .case_insensitive(!case_sensitive)
161            .build()?;
162        let inner = SearchInputs {
163            query: initial_query,
164            files_to_exclude,
165            files_to_include,
166            buffers,
167        };
168        Ok(Self::Regex {
169            regex,
170            replacement: None,
171            multiline,
172            whole_word,
173            case_sensitive,
174            include_ignored,
175            inner,
176            one_match_per_line,
177        })
178    }
179
180    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
181        if message.regex {
182            Self::regex(
183                message.query,
184                message.whole_word,
185                message.case_sensitive,
186                message.include_ignored,
187                false,
188                deserialize_path_matches(&message.files_to_include)?,
189                deserialize_path_matches(&message.files_to_exclude)?,
190                None, // search opened only don't need search remote
191            )
192        } else {
193            Self::text(
194                message.query,
195                message.whole_word,
196                message.case_sensitive,
197                message.include_ignored,
198                deserialize_path_matches(&message.files_to_include)?,
199                deserialize_path_matches(&message.files_to_exclude)?,
200                None, // search opened only don't need search remote
201            )
202        }
203    }
204
205    pub fn with_replacement(mut self, new_replacement: String) -> Self {
206        match self {
207            Self::Text {
208                ref mut replacement,
209                ..
210            }
211            | Self::Regex {
212                ref mut replacement,
213                ..
214            } => {
215                *replacement = Some(new_replacement);
216                self
217            }
218        }
219    }
220
221    pub fn to_proto(&self) -> proto::SearchQuery {
222        proto::SearchQuery {
223            query: self.as_str().to_string(),
224            regex: self.is_regex(),
225            whole_word: self.whole_word(),
226            case_sensitive: self.case_sensitive(),
227            include_ignored: self.include_ignored(),
228            files_to_include: self.files_to_include().sources().join(","),
229            files_to_exclude: self.files_to_exclude().sources().join(","),
230        }
231    }
232
233    pub(crate) fn detect(
234        &self,
235        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
236    ) -> Result<bool> {
237        if self.as_str().is_empty() {
238            return Ok(false);
239        }
240
241        match self {
242            Self::Text { search, .. } => {
243                let mat = search.stream_find_iter(reader).next();
244                match mat {
245                    Some(Ok(_)) => Ok(true),
246                    Some(Err(err)) => Err(err.into()),
247                    None => Ok(false),
248                }
249            }
250            Self::Regex {
251                regex, multiline, ..
252            } => {
253                if *multiline {
254                    let mut text = String::new();
255                    if let Err(err) = reader.read_to_string(&mut text) {
256                        Err(err.into())
257                    } else {
258                        Ok(regex.find(&text)?.is_some())
259                    }
260                } else {
261                    for line in reader.lines() {
262                        let line = line?;
263                        if regex.find(&line)?.is_some() {
264                            return Ok(true);
265                        }
266                    }
267                    Ok(false)
268                }
269            }
270        }
271    }
272    /// Returns the replacement text for this `SearchQuery`.
273    pub fn replacement(&self) -> Option<&str> {
274        match self {
275            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
276                replacement.as_deref()
277            }
278        }
279    }
280    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
281    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
282        match self {
283            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
284            SearchQuery::Regex {
285                regex, replacement, ..
286            } => {
287                if let Some(replacement) = replacement {
288                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
289                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
290                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
291                        replacement,
292                        |c: &Captures| match c.get(0).unwrap().as_str() {
293                            r"\\" => "\\",
294                            r"\n" => "\n",
295                            r"\t" => "\t",
296                            x => unreachable!("Unexpected escape sequence: {}", x),
297                        },
298                    );
299                    Some(regex.replace(text, replacement))
300                } else {
301                    None
302                }
303            }
304        }
305    }
306
307    pub async fn search(
308        &self,
309        buffer: &BufferSnapshot,
310        subrange: Option<Range<usize>>,
311    ) -> Vec<Range<usize>> {
312        const YIELD_INTERVAL: usize = 20000;
313
314        if self.as_str().is_empty() {
315            return Default::default();
316        }
317
318        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
319        let rope = if let Some(range) = subrange {
320            buffer.as_rope().slice(range)
321        } else {
322            buffer.as_rope().clone()
323        };
324
325        let mut matches = Vec::new();
326        match self {
327            Self::Text {
328                search, whole_word, ..
329            } => {
330                for (ix, mat) in search
331                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
332                    .enumerate()
333                {
334                    if (ix + 1) % YIELD_INTERVAL == 0 {
335                        yield_now().await;
336                    }
337
338                    let mat = mat.unwrap();
339                    if *whole_word {
340                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
341
342                        let prev_kind = rope
343                            .reversed_chars_at(mat.start())
344                            .next()
345                            .map(|c| classifier.kind(c));
346                        let start_kind =
347                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
348                        let end_kind =
349                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
350                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
351                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
352                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
353                        {
354                            continue;
355                        }
356                    }
357                    matches.push(mat.start()..mat.end())
358                }
359            }
360
361            Self::Regex {
362                regex, multiline, ..
363            } => {
364                if *multiline {
365                    let text = rope.to_string();
366                    for (ix, mat) in regex.find_iter(&text).enumerate() {
367                        if (ix + 1) % YIELD_INTERVAL == 0 {
368                            yield_now().await;
369                        }
370
371                        if let Ok(mat) = mat {
372                            matches.push(mat.start()..mat.end());
373                        }
374                    }
375                } else {
376                    let mut line = String::new();
377                    let mut line_offset = 0;
378                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
379                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
380                            yield_now().await;
381                        }
382
383                        for (newline_ix, text) in chunk.split('\n').enumerate() {
384                            if newline_ix > 0 {
385                                for mat in regex.find_iter(&line).flatten() {
386                                    let start = line_offset + mat.start();
387                                    let end = line_offset + mat.end();
388                                    matches.push(start..end);
389                                }
390
391                                line_offset += line.len() + 1;
392                                line.clear();
393                            }
394                            line.push_str(text);
395                        }
396                    }
397                }
398            }
399        }
400
401        matches
402    }
403
404    pub fn is_empty(&self) -> bool {
405        self.as_str().is_empty()
406    }
407
408    pub fn as_str(&self) -> &str {
409        self.as_inner().as_str()
410    }
411
412    pub fn whole_word(&self) -> bool {
413        match self {
414            Self::Text { whole_word, .. } => *whole_word,
415            Self::Regex { whole_word, .. } => *whole_word,
416        }
417    }
418
419    pub fn case_sensitive(&self) -> bool {
420        match self {
421            Self::Text { case_sensitive, .. } => *case_sensitive,
422            Self::Regex { case_sensitive, .. } => *case_sensitive,
423        }
424    }
425
426    pub fn include_ignored(&self) -> bool {
427        match self {
428            Self::Text {
429                include_ignored, ..
430            } => *include_ignored,
431            Self::Regex {
432                include_ignored, ..
433            } => *include_ignored,
434        }
435    }
436
437    pub fn is_regex(&self) -> bool {
438        matches!(self, Self::Regex { .. })
439    }
440
441    pub fn files_to_include(&self) -> &PathMatcher {
442        self.as_inner().files_to_include()
443    }
444
445    pub fn files_to_exclude(&self) -> &PathMatcher {
446        self.as_inner().files_to_exclude()
447    }
448
449    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
450        self.as_inner().buffers.as_ref()
451    }
452
453    pub fn is_opened_only(&self) -> bool {
454        self.as_inner().buffers.is_some()
455    }
456
457    pub fn filters_path(&self) -> bool {
458        !(self.files_to_exclude().sources().is_empty()
459            && self.files_to_include().sources().is_empty())
460    }
461
462    pub fn file_matches(&self, file_path: &Path) -> bool {
463        let mut path = file_path.to_path_buf();
464        loop {
465            if self.files_to_exclude().is_match(&path) {
466                return false;
467            } else if self.files_to_include().sources().is_empty()
468                || self.files_to_include().is_match(&path)
469            {
470                return true;
471            } else if !path.pop() {
472                return false;
473            }
474        }
475    }
476    pub fn as_inner(&self) -> &SearchInputs {
477        match self {
478            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
479        }
480    }
481
482    /// Whether this search should replace only one match per line, instead of
483    /// all matches.
484    /// Returns `None` for text searches, as only regex searches support this
485    /// option.
486    pub fn one_match_per_line(&self) -> Option<bool> {
487        match self {
488            Self::Regex {
489                one_match_per_line, ..
490            } => Some(*one_match_per_line),
491            Self::Text { .. } => None,
492        }
493    }
494}
495
496pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
497    let globs = glob_set
498        .split(',')
499        .map(str::trim)
500        .filter(|&glob_str| (!glob_str.is_empty()))
501        .map(|glob_str| glob_str.to_owned())
502        .collect::<Vec<_>>();
503    Ok(PathMatcher::new(&globs)?)
504}
505
506#[cfg(test)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn path_matcher_creation_for_valid_paths() {
512        for valid_path in [
513            "file",
514            "Cargo.toml",
515            ".DS_Store",
516            "~/dir/another_dir/",
517            "./dir/file",
518            "dir/[a-z].txt",
519            "../dir/filé",
520        ] {
521            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
522                panic!("Valid path {valid_path} should be accepted, but got: {e}")
523            });
524            assert!(
525                path_matcher.is_match(valid_path),
526                "Path matcher for valid path {valid_path} should match itself"
527            )
528        }
529    }
530
531    #[test]
532    fn path_matcher_creation_for_globs() {
533        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
534            match PathMatcher::new(&[invalid_glob.to_owned()]) {
535                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
536                Err(_expected) => {}
537            }
538        }
539
540        for valid_glob in [
541            "dir/?ile",
542            "dir/*.txt",
543            "dir/**/file",
544            "dir/[a-z].txt",
545            "{dir,file}",
546        ] {
547            match PathMatcher::new(&[valid_glob.to_owned()]) {
548                Ok(_expected) => {}
549                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
550            }
551        }
552    }
553}