search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    match_full_paths: bool,
 40    buffers: Option<Vec<Entity<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: AhoCorasick,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        one_match_per_line: bool,
 76        inner: SearchInputs,
 77    },
 78}
 79
 80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 81    RegexBuilder::new(r"\B")
 82        .build()
 83        .expect("Failed to create WORD_MATCH_TEST")
 84});
 85
 86impl SearchQuery {
 87    /// Create a text query
 88    ///
 89    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 90    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 91    pub fn text(
 92        query: impl ToString,
 93        whole_word: bool,
 94        case_sensitive: bool,
 95        include_ignored: bool,
 96        files_to_include: PathMatcher,
 97        files_to_exclude: PathMatcher,
 98        match_full_paths: bool,
 99        buffers: Option<Vec<Entity<Buffer>>>,
100    ) -> Result<Self> {
101        let query = query.to_string();
102        if !case_sensitive && !query.is_ascii() {
103            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104            // Fallback to regex search as recommended by
105            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106            return Self::regex(
107                regex::escape(&query),
108                whole_word,
109                case_sensitive,
110                include_ignored,
111                false,
112                files_to_include,
113                files_to_exclude,
114                false,
115                buffers,
116            );
117        }
118        let search = AhoCorasickBuilder::new()
119            .ascii_case_insensitive(!case_sensitive)
120            .build([&query])?;
121        let inner = SearchInputs {
122            query: query.into(),
123            files_to_exclude,
124            files_to_include,
125            match_full_paths,
126            buffers,
127        };
128        Ok(Self::Text {
129            search,
130            replacement: None,
131            whole_word,
132            case_sensitive,
133            include_ignored,
134            inner,
135        })
136    }
137
138    /// Create a regex query
139    ///
140    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142    /// with their respective project root).
143    pub fn regex(
144        query: impl ToString,
145        whole_word: bool,
146        mut case_sensitive: bool,
147        include_ignored: bool,
148        one_match_per_line: bool,
149        files_to_include: PathMatcher,
150        files_to_exclude: PathMatcher,
151        match_full_paths: bool,
152        buffers: Option<Vec<Entity<Buffer>>>,
153    ) -> Result<Self> {
154        let mut query = query.to_string();
155        let initial_query = Arc::from(query.as_str());
156
157        if let Some((case_sensitive_from_pattern, new_query)) =
158            Self::case_sensitive_from_pattern(&query)
159        {
160            case_sensitive = case_sensitive_from_pattern;
161            query = new_query
162        }
163
164        if whole_word {
165            let mut word_query = String::new();
166            if let Some(first) = query.get(0..1)
167                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
168            {
169                word_query.push_str("\\b");
170            }
171            word_query.push_str(&query);
172            if let Some(last) = query.get(query.len() - 1..)
173                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
174            {
175                word_query.push_str("\\b");
176            }
177            query = word_query
178        }
179
180        let multiline = query.contains('\n') || query.contains("\\n");
181        let regex = RegexBuilder::new(&query)
182            .case_insensitive(!case_sensitive)
183            .build()?;
184        let inner = SearchInputs {
185            query: initial_query,
186            files_to_exclude,
187            files_to_include,
188            match_full_paths,
189            buffers,
190        };
191        Ok(Self::Regex {
192            regex,
193            replacement: None,
194            multiline,
195            whole_word,
196            case_sensitive,
197            include_ignored,
198            inner,
199            one_match_per_line,
200        })
201    }
202
203    /// Extracts case sensitivity settings from pattern items in the provided
204    /// query and returns the same query, with the pattern items removed.
205    ///
206    /// The following pattern modifiers are supported:
207    ///
208    /// - `\c` (case_sensitive: false)
209    /// - `\C` (case_sensitive: true)
210    ///
211    /// If no pattern item were found, `None` will be returned.
212    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
213        if !(query.contains("\\c") || query.contains("\\C")) {
214            return None;
215        }
216
217        let mut was_escaped = false;
218        let mut new_query = String::new();
219        let mut is_case_sensitive = None;
220
221        for c in query.chars() {
222            if was_escaped {
223                if c == 'c' {
224                    is_case_sensitive = Some(false);
225                } else if c == 'C' {
226                    is_case_sensitive = Some(true);
227                } else {
228                    new_query.push('\\');
229                    new_query.push(c);
230                }
231                was_escaped = false
232            } else if c == '\\' {
233                was_escaped = true
234            } else {
235                new_query.push(c);
236            }
237        }
238
239        is_case_sensitive.map(|c| (c, new_query))
240    }
241
242    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
243        let files_to_include = if message.files_to_include.is_empty() {
244            message
245                .files_to_include_legacy
246                .split(',')
247                .map(str::trim)
248                .filter(|&glob_str| !glob_str.is_empty())
249                .map(|s| s.to_string())
250                .collect()
251        } else {
252            message.files_to_include
253        };
254
255        let files_to_exclude = if message.files_to_exclude.is_empty() {
256            message
257                .files_to_exclude_legacy
258                .split(',')
259                .map(str::trim)
260                .filter(|&glob_str| !glob_str.is_empty())
261                .map(|s| s.to_string())
262                .collect()
263        } else {
264            message.files_to_exclude
265        };
266
267        if message.regex {
268            Self::regex(
269                message.query,
270                message.whole_word,
271                message.case_sensitive,
272                message.include_ignored,
273                false,
274                PathMatcher::new(files_to_include)?,
275                PathMatcher::new(files_to_exclude)?,
276                message.match_full_paths,
277                None, // search opened only don't need search remote
278            )
279        } else {
280            Self::text(
281                message.query,
282                message.whole_word,
283                message.case_sensitive,
284                message.include_ignored,
285                PathMatcher::new(files_to_include)?,
286                PathMatcher::new(files_to_exclude)?,
287                false,
288                None, // search opened only don't need search remote
289            )
290        }
291    }
292
293    pub fn with_replacement(mut self, new_replacement: String) -> Self {
294        match self {
295            Self::Text {
296                ref mut replacement,
297                ..
298            }
299            | Self::Regex {
300                ref mut replacement,
301                ..
302            } => {
303                *replacement = Some(new_replacement);
304                self
305            }
306        }
307    }
308
309    pub fn to_proto(&self) -> proto::SearchQuery {
310        let files_to_include = self.files_to_include().sources().to_vec();
311        let files_to_exclude = self.files_to_exclude().sources().to_vec();
312        proto::SearchQuery {
313            query: self.as_str().to_string(),
314            regex: self.is_regex(),
315            whole_word: self.whole_word(),
316            case_sensitive: self.case_sensitive(),
317            include_ignored: self.include_ignored(),
318            files_to_include: files_to_include.clone(),
319            files_to_exclude: files_to_exclude.clone(),
320            match_full_paths: self.match_full_paths(),
321            // Populate legacy fields for backwards compatibility
322            files_to_include_legacy: files_to_include.join(","),
323            files_to_exclude_legacy: files_to_exclude.join(","),
324        }
325    }
326
327    pub(crate) fn detect(
328        &self,
329        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
330    ) -> Result<bool> {
331        if self.as_str().is_empty() {
332            return Ok(false);
333        }
334
335        match self {
336            Self::Text { search, .. } => {
337                let mat = search.stream_find_iter(reader).next();
338                match mat {
339                    Some(Ok(_)) => Ok(true),
340                    Some(Err(err)) => Err(err.into()),
341                    None => Ok(false),
342                }
343            }
344            Self::Regex {
345                regex, multiline, ..
346            } => {
347                if *multiline {
348                    let mut text = String::new();
349                    if let Err(err) = reader.read_to_string(&mut text) {
350                        Err(err.into())
351                    } else {
352                        Ok(regex.find(&text)?.is_some())
353                    }
354                } else {
355                    for line in reader.lines() {
356                        let line = line?;
357                        if regex.find(&line)?.is_some() {
358                            return Ok(true);
359                        }
360                    }
361                    Ok(false)
362                }
363            }
364        }
365    }
366    /// Returns the replacement text for this `SearchQuery`.
367    pub fn replacement(&self) -> Option<&str> {
368        match self {
369            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
370                replacement.as_deref()
371            }
372        }
373    }
374    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
375    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
376        match self {
377            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
378            SearchQuery::Regex {
379                regex, replacement, ..
380            } => {
381                if let Some(replacement) = replacement {
382                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
383                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
384                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
385                        replacement,
386                        |c: &Captures| match c.get(0).unwrap().as_str() {
387                            r"\\" => "\\",
388                            r"\n" => "\n",
389                            r"\t" => "\t",
390                            x => unreachable!("Unexpected escape sequence: {}", x),
391                        },
392                    );
393                    Some(regex.replace(text, replacement))
394                } else {
395                    None
396                }
397            }
398        }
399    }
400
401    pub async fn search(
402        &self,
403        buffer: &BufferSnapshot,
404        subrange: Option<Range<usize>>,
405    ) -> Vec<Range<usize>> {
406        const YIELD_INTERVAL: usize = 20000;
407
408        if self.as_str().is_empty() {
409            return Default::default();
410        }
411
412        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
413        let rope = if let Some(range) = subrange {
414            buffer.as_rope().slice(range)
415        } else {
416            buffer.as_rope().clone()
417        };
418
419        let mut matches = Vec::new();
420        match self {
421            Self::Text {
422                search, whole_word, ..
423            } => {
424                for (ix, mat) in search
425                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
426                    .enumerate()
427                {
428                    if (ix + 1) % YIELD_INTERVAL == 0 {
429                        yield_now().await;
430                    }
431
432                    let mat = mat.unwrap();
433                    if *whole_word {
434                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
435
436                        let prev_kind = rope
437                            .reversed_chars_at(mat.start())
438                            .next()
439                            .map(|c| classifier.kind(c));
440                        let start_kind =
441                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
442                        let end_kind =
443                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
444                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
445                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
446                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
447                        {
448                            continue;
449                        }
450                    }
451                    matches.push(mat.start()..mat.end())
452                }
453            }
454
455            Self::Regex {
456                regex, multiline, ..
457            } => {
458                if *multiline {
459                    let text = rope.to_string();
460                    for (ix, mat) in regex.find_iter(&text).enumerate() {
461                        if (ix + 1) % YIELD_INTERVAL == 0 {
462                            yield_now().await;
463                        }
464
465                        if let Ok(mat) = mat {
466                            matches.push(mat.start()..mat.end());
467                        }
468                    }
469                } else {
470                    let mut line = String::new();
471                    let mut line_offset = 0;
472                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
473                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
474                            yield_now().await;
475                        }
476
477                        for (newline_ix, text) in chunk.split('\n').enumerate() {
478                            if newline_ix > 0 {
479                                for mat in regex.find_iter(&line).flatten() {
480                                    let start = line_offset + mat.start();
481                                    let end = line_offset + mat.end();
482                                    matches.push(start..end);
483                                    if self.one_match_per_line() == Some(true) {
484                                        break;
485                                    }
486                                }
487
488                                line_offset += line.len() + 1;
489                                line.clear();
490                            }
491                            line.push_str(text);
492                        }
493                    }
494                }
495            }
496        }
497
498        matches
499    }
500
501    pub fn is_empty(&self) -> bool {
502        self.as_str().is_empty()
503    }
504
505    pub fn as_str(&self) -> &str {
506        self.as_inner().as_str()
507    }
508
509    pub fn whole_word(&self) -> bool {
510        match self {
511            Self::Text { whole_word, .. } => *whole_word,
512            Self::Regex { whole_word, .. } => *whole_word,
513        }
514    }
515
516    pub fn case_sensitive(&self) -> bool {
517        match self {
518            Self::Text { case_sensitive, .. } => *case_sensitive,
519            Self::Regex { case_sensitive, .. } => *case_sensitive,
520        }
521    }
522
523    pub fn include_ignored(&self) -> bool {
524        match self {
525            Self::Text {
526                include_ignored, ..
527            } => *include_ignored,
528            Self::Regex {
529                include_ignored, ..
530            } => *include_ignored,
531        }
532    }
533
534    pub fn is_regex(&self) -> bool {
535        matches!(self, Self::Regex { .. })
536    }
537
538    pub fn files_to_include(&self) -> &PathMatcher {
539        self.as_inner().files_to_include()
540    }
541
542    pub fn files_to_exclude(&self) -> &PathMatcher {
543        self.as_inner().files_to_exclude()
544    }
545
546    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
547        self.as_inner().buffers.as_ref()
548    }
549
550    pub fn is_opened_only(&self) -> bool {
551        self.as_inner().buffers.is_some()
552    }
553
554    pub fn filters_path(&self) -> bool {
555        !(self.files_to_exclude().sources().is_empty()
556            && self.files_to_include().sources().is_empty())
557    }
558
559    pub fn match_full_paths(&self) -> bool {
560        self.as_inner().match_full_paths
561    }
562
563    /// Check match full paths to determine whether you're required to pass a fully qualified
564    /// project path (starts with a project root).
565    pub fn match_path(&self, file_path: &Path) -> bool {
566        let mut path = file_path.to_path_buf();
567        loop {
568            if self.files_to_exclude().is_match(&path) {
569                return false;
570            } else if self.files_to_include().sources().is_empty()
571                || self.files_to_include().is_match(&path)
572            {
573                return true;
574            } else if !path.pop() {
575                return false;
576            }
577        }
578    }
579    pub fn as_inner(&self) -> &SearchInputs {
580        match self {
581            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
582        }
583    }
584
585    /// Whether this search should replace only one match per line, instead of
586    /// all matches.
587    /// Returns `None` for text searches, as only regex searches support this
588    /// option.
589    pub fn one_match_per_line(&self) -> Option<bool> {
590        match self {
591            Self::Regex {
592                one_match_per_line, ..
593            } => Some(*one_match_per_line),
594            Self::Text { .. } => None,
595        }
596    }
597}
598
599#[cfg(test)]
600mod tests {
601    use super::*;
602
603    #[test]
604    fn path_matcher_creation_for_valid_paths() {
605        for valid_path in [
606            "file",
607            "Cargo.toml",
608            ".DS_Store",
609            "~/dir/another_dir/",
610            "./dir/file",
611            "dir/[a-z].txt",
612            "../dir/filé",
613        ] {
614            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
615                panic!("Valid path {valid_path} should be accepted, but got: {e}")
616            });
617            assert!(
618                path_matcher.is_match(valid_path),
619                "Path matcher for valid path {valid_path} should match itself"
620            )
621        }
622    }
623
624    #[test]
625    fn path_matcher_creation_for_globs() {
626        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
627            match PathMatcher::new(&[invalid_glob.to_owned()]) {
628                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
629                Err(_expected) => {}
630            }
631        }
632
633        for valid_glob in [
634            "dir/?ile",
635            "dir/*.txt",
636            "dir/**/file",
637            "dir/[a-z].txt",
638            "{dir,file}",
639        ] {
640            match PathMatcher::new(&[valid_glob.to_owned()]) {
641                Ok(_expected) => {}
642                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
643            }
644        }
645    }
646
647    #[test]
648    fn test_case_sensitive_pattern_items() {
649        let case_sensitive = false;
650        let search_query = SearchQuery::regex(
651            "test\\C",
652            false,
653            case_sensitive,
654            false,
655            false,
656            Default::default(),
657            Default::default(),
658            false,
659            None,
660        )
661        .expect("Should be able to create a regex SearchQuery");
662
663        assert_eq!(
664            search_query.case_sensitive(),
665            true,
666            "Case sensitivity should be enabled when \\C pattern item is present in the query."
667        );
668
669        let case_sensitive = true;
670        let search_query = SearchQuery::regex(
671            "test\\c",
672            true,
673            case_sensitive,
674            false,
675            false,
676            Default::default(),
677            Default::default(),
678            false,
679            None,
680        )
681        .expect("Should be able to create a regex SearchQuery");
682
683        assert_eq!(
684            search_query.case_sensitive(),
685            false,
686            "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
687        );
688
689        let case_sensitive = false;
690        let search_query = SearchQuery::regex(
691            "test\\c\\C",
692            false,
693            case_sensitive,
694            false,
695            false,
696            Default::default(),
697            Default::default(),
698            false,
699            None,
700        )
701        .expect("Should be able to create a regex SearchQuery");
702
703        assert_eq!(
704            search_query.case_sensitive(),
705            true,
706            "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
707        );
708
709        let case_sensitive = false;
710        let search_query = SearchQuery::regex(
711            "tests\\\\C",
712            false,
713            case_sensitive,
714            false,
715            false,
716            Default::default(),
717            Default::default(),
718            false,
719            None,
720        )
721        .expect("Should be able to create a regex SearchQuery");
722
723        assert_eq!(
724            search_query.case_sensitive(),
725            false,
726            "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
727        );
728    }
729}