search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use itertools::Itertools as _;
  7use language::{Buffer, BufferSnapshot, CharKind};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::{
 17    paths::{PathMatcher, PathStyle},
 18    rel_path::RelPath,
 19};
 20
 21#[derive(Debug)]
 22pub enum SearchResult {
 23    Buffer {
 24        buffer: Entity<Buffer>,
 25        ranges: Vec<Range<Anchor>>,
 26    },
 27    LimitReached,
 28}
 29
 30#[derive(Clone, Copy, PartialEq)]
 31pub enum SearchInputKind {
 32    Query,
 33    Include,
 34    Exclude,
 35}
 36
 37#[derive(Clone, Debug)]
 38pub struct SearchInputs {
 39    query: Arc<str>,
 40    files_to_include: PathMatcher,
 41    files_to_exclude: PathMatcher,
 42    match_full_paths: bool,
 43    buffers: Option<Vec<Entity<Buffer>>>,
 44}
 45
 46impl SearchInputs {
 47    pub fn as_str(&self) -> &str {
 48        self.query.as_ref()
 49    }
 50    pub fn files_to_include(&self) -> &PathMatcher {
 51        &self.files_to_include
 52    }
 53    pub fn files_to_exclude(&self) -> &PathMatcher {
 54        &self.files_to_exclude
 55    }
 56    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 57        &self.buffers
 58    }
 59}
 60#[derive(Clone, Debug)]
 61pub enum SearchQuery {
 62    Text {
 63        search: AhoCorasick,
 64        replacement: Option<String>,
 65        whole_word: bool,
 66        case_sensitive: bool,
 67        include_ignored: bool,
 68        inner: SearchInputs,
 69    },
 70    Regex {
 71        regex: Regex,
 72        replacement: Option<String>,
 73        multiline: bool,
 74        whole_word: bool,
 75        case_sensitive: bool,
 76        include_ignored: bool,
 77        one_match_per_line: bool,
 78        inner: SearchInputs,
 79    },
 80}
 81
 82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 83    RegexBuilder::new(r"\B")
 84        .build()
 85        .expect("Failed to create WORD_MATCH_TEST")
 86});
 87
 88impl SearchQuery {
 89    /// Create a text query
 90    ///
 91    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 92    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 93    pub fn text(
 94        query: impl ToString,
 95        whole_word: bool,
 96        case_sensitive: bool,
 97        include_ignored: bool,
 98        files_to_include: PathMatcher,
 99        files_to_exclude: PathMatcher,
100        match_full_paths: bool,
101        buffers: Option<Vec<Entity<Buffer>>>,
102    ) -> Result<Self> {
103        let query = query.to_string();
104        if !case_sensitive && !query.is_ascii() {
105            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106            // Fallback to regex search as recommended by
107            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108            return Self::regex(
109                regex::escape(&query),
110                whole_word,
111                case_sensitive,
112                include_ignored,
113                false,
114                files_to_include,
115                files_to_exclude,
116                false,
117                buffers,
118            );
119        }
120        let search = AhoCorasickBuilder::new()
121            .ascii_case_insensitive(!case_sensitive)
122            .build([&query])?;
123        let inner = SearchInputs {
124            query: query.into(),
125            files_to_exclude,
126            files_to_include,
127            match_full_paths,
128            buffers,
129        };
130        Ok(Self::Text {
131            search,
132            replacement: None,
133            whole_word,
134            case_sensitive,
135            include_ignored,
136            inner,
137        })
138    }
139
140    /// Create a regex query
141    ///
142    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144    /// with their respective project root).
145    pub fn regex(
146        query: impl ToString,
147        whole_word: bool,
148        mut case_sensitive: bool,
149        include_ignored: bool,
150        one_match_per_line: bool,
151        files_to_include: PathMatcher,
152        files_to_exclude: PathMatcher,
153        match_full_paths: bool,
154        buffers: Option<Vec<Entity<Buffer>>>,
155    ) -> Result<Self> {
156        let mut query = query.to_string();
157        let initial_query = Arc::from(query.as_str());
158
159        if let Some((case_sensitive_from_pattern, new_query)) =
160            Self::case_sensitive_from_pattern(&query)
161        {
162            case_sensitive = case_sensitive_from_pattern;
163            query = new_query
164        }
165
166        if whole_word {
167            let mut word_query = String::new();
168            if let Some(first) = query.get(0..1)
169                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170            {
171                word_query.push_str("\\b");
172            }
173            word_query.push_str(&query);
174            if let Some(last) = query.get(query.len() - 1..)
175                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176            {
177                word_query.push_str("\\b");
178            }
179            query = word_query
180        }
181
182        let multiline = query.contains('\n') || query.contains("\\n");
183        let regex = RegexBuilder::new(&query)
184            .case_insensitive(!case_sensitive)
185            .build()?;
186        let inner = SearchInputs {
187            query: initial_query,
188            files_to_exclude,
189            files_to_include,
190            match_full_paths,
191            buffers,
192        };
193        Ok(Self::Regex {
194            regex,
195            replacement: None,
196            multiline,
197            whole_word,
198            case_sensitive,
199            include_ignored,
200            inner,
201            one_match_per_line,
202        })
203    }
204
205    /// Extracts case sensitivity settings from pattern items in the provided
206    /// query and returns the same query, with the pattern items removed.
207    ///
208    /// The following pattern modifiers are supported:
209    ///
210    /// - `\c` (case_sensitive: false)
211    /// - `\C` (case_sensitive: true)
212    ///
213    /// If no pattern item were found, `None` will be returned.
214    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215        if !(query.contains("\\c") || query.contains("\\C")) {
216            return None;
217        }
218
219        let mut was_escaped = false;
220        let mut new_query = String::new();
221        let mut is_case_sensitive = None;
222
223        for c in query.chars() {
224            if was_escaped {
225                if c == 'c' {
226                    is_case_sensitive = Some(false);
227                } else if c == 'C' {
228                    is_case_sensitive = Some(true);
229                } else {
230                    new_query.push('\\');
231                    new_query.push(c);
232                }
233                was_escaped = false
234            } else if c == '\\' {
235                was_escaped = true
236            } else {
237                new_query.push(c);
238            }
239        }
240
241        is_case_sensitive.map(|c| (c, new_query))
242    }
243
244    pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245        let files_to_include = if message.files_to_include.is_empty() {
246            message
247                .files_to_include_legacy
248                .split(',')
249                .map(str::trim)
250                .filter(|&glob_str| !glob_str.is_empty())
251                .map(|s| s.to_string())
252                .collect()
253        } else {
254            message.files_to_include
255        };
256
257        let files_to_exclude = if message.files_to_exclude.is_empty() {
258            message
259                .files_to_exclude_legacy
260                .split(',')
261                .map(str::trim)
262                .filter(|&glob_str| !glob_str.is_empty())
263                .map(|s| s.to_string())
264                .collect()
265        } else {
266            message.files_to_exclude
267        };
268
269        if message.regex {
270            Self::regex(
271                message.query,
272                message.whole_word,
273                message.case_sensitive,
274                message.include_ignored,
275                false,
276                PathMatcher::new(files_to_include, path_style)?,
277                PathMatcher::new(files_to_exclude, path_style)?,
278                message.match_full_paths,
279                None, // search opened only don't need search remote
280            )
281        } else {
282            Self::text(
283                message.query,
284                message.whole_word,
285                message.case_sensitive,
286                message.include_ignored,
287                PathMatcher::new(files_to_include, path_style)?,
288                PathMatcher::new(files_to_exclude, path_style)?,
289                false,
290                None, // search opened only don't need search remote
291            )
292        }
293    }
294
295    pub fn with_replacement(mut self, new_replacement: String) -> Self {
296        match self {
297            Self::Text {
298                ref mut replacement,
299                ..
300            }
301            | Self::Regex {
302                ref mut replacement,
303                ..
304            } => {
305                *replacement = Some(new_replacement);
306                self
307            }
308        }
309    }
310
311    pub fn to_proto(&self) -> proto::SearchQuery {
312        let mut files_to_include = self.files_to_include().sources();
313        let mut files_to_exclude = self.files_to_exclude().sources();
314        proto::SearchQuery {
315            query: self.as_str().to_string(),
316            regex: self.is_regex(),
317            whole_word: self.whole_word(),
318            case_sensitive: self.case_sensitive(),
319            include_ignored: self.include_ignored(),
320            files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321            files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322            match_full_paths: self.match_full_paths(),
323            // Populate legacy fields for backwards compatibility
324            files_to_include_legacy: files_to_include.join(","),
325            files_to_exclude_legacy: files_to_exclude.join(","),
326        }
327    }
328
329    pub(crate) async fn detect(
330        &self,
331        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
332    ) -> Result<bool> {
333        let query_str = self.as_str();
334        let needle_len = query_str.len();
335        if needle_len == 0 {
336            return Ok(false);
337        }
338        if self.as_str().is_empty() {
339            return Ok(false);
340        }
341
342        let mut text = String::new();
343        let mut bytes_read = 0;
344        // Yield from this function every 128 bytes scanned.
345        const YIELD_THRESHOLD: usize = 128;
346        match self {
347            Self::Text { search, .. } => {
348                if query_str.contains('\n') {
349                    reader.read_to_string(&mut text)?;
350                    Ok(search.is_match(&text))
351                } else {
352                    // Yield from this function every 128 bytes scanned.
353                    const YIELD_THRESHOLD: usize = 128;
354                    while reader.read_line(&mut text)? > 0 {
355                        if search.is_match(&text) {
356                            return Ok(true);
357                        }
358                        bytes_read += text.len();
359                        if bytes_read >= YIELD_THRESHOLD {
360                            bytes_read = 0;
361                            smol::future::yield_now().await;
362                        }
363                        text.clear();
364                    }
365                    Ok(false)
366                }
367            }
368            Self::Regex {
369                regex, multiline, ..
370            } => {
371                if *multiline {
372                    if let Err(err) = reader.read_to_string(&mut text) {
373                        Err(err.into())
374                    } else {
375                        Ok(regex.is_match(&text)?)
376                    }
377                } else {
378                    while reader.read_line(&mut text)? > 0 {
379                        if regex.is_match(&text)? {
380                            return Ok(true);
381                        }
382                        bytes_read += text.len();
383                        if bytes_read >= YIELD_THRESHOLD {
384                            bytes_read = 0;
385                            smol::future::yield_now().await;
386                        }
387                        text.clear();
388                    }
389                    Ok(false)
390                }
391            }
392        }
393    }
394    /// Returns the replacement text for this `SearchQuery`.
395    pub fn replacement(&self) -> Option<&str> {
396        match self {
397            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
398                replacement.as_deref()
399            }
400        }
401    }
402    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
403    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
404        match self {
405            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
406            SearchQuery::Regex {
407                regex, replacement, ..
408            } => {
409                if let Some(replacement) = replacement {
410                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
411                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
412                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
413                        replacement,
414                        |c: &Captures| match c.get(0).unwrap().as_str() {
415                            r"\\" => "\\",
416                            r"\n" => "\n",
417                            r"\t" => "\t",
418                            x => unreachable!("Unexpected escape sequence: {}", x),
419                        },
420                    );
421                    Some(regex.replace(text, replacement))
422                } else {
423                    None
424                }
425            }
426        }
427    }
428
429    pub async fn search(
430        &self,
431        buffer: &BufferSnapshot,
432        subrange: Option<Range<usize>>,
433    ) -> Vec<Range<usize>> {
434        const YIELD_INTERVAL: usize = 20000;
435
436        if self.as_str().is_empty() {
437            return Default::default();
438        }
439
440        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
441        let rope = if let Some(range) = subrange {
442            buffer.as_rope().slice(range)
443        } else {
444            buffer.as_rope().clone()
445        };
446
447        let mut matches = Vec::new();
448        match self {
449            Self::Text {
450                search, whole_word, ..
451            } => {
452                for (ix, mat) in search
453                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
454                    .enumerate()
455                {
456                    if (ix + 1) % YIELD_INTERVAL == 0 {
457                        yield_now().await;
458                    }
459
460                    let mat = mat.unwrap();
461                    if *whole_word {
462                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
463
464                        let prev_kind = rope
465                            .reversed_chars_at(mat.start())
466                            .next()
467                            .map(|c| classifier.kind(c));
468                        let start_kind =
469                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
470                        let end_kind =
471                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
472                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
473                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
474                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
475                        {
476                            continue;
477                        }
478                    }
479                    matches.push(mat.start()..mat.end())
480                }
481            }
482
483            Self::Regex {
484                regex, multiline, ..
485            } => {
486                if *multiline {
487                    let text = rope.to_string();
488                    for (ix, mat) in regex.find_iter(&text).enumerate() {
489                        if (ix + 1) % YIELD_INTERVAL == 0 {
490                            yield_now().await;
491                        }
492
493                        if let Ok(mat) = mat {
494                            matches.push(mat.start()..mat.end());
495                        }
496                    }
497                } else {
498                    let mut line = String::new();
499                    let mut line_offset = 0;
500                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
501                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
502                            yield_now().await;
503                        }
504
505                        for (newline_ix, text) in chunk.split('\n').enumerate() {
506                            if newline_ix > 0 {
507                                for mat in regex.find_iter(&line).flatten() {
508                                    let start = line_offset + mat.start();
509                                    let end = line_offset + mat.end();
510                                    matches.push(start..end);
511                                    if self.one_match_per_line() == Some(true) {
512                                        break;
513                                    }
514                                }
515
516                                line_offset += line.len() + 1;
517                                line.clear();
518                            }
519                            line.push_str(text);
520                        }
521                    }
522                }
523            }
524        }
525
526        matches
527    }
528
529    pub fn is_empty(&self) -> bool {
530        self.as_str().is_empty()
531    }
532
533    pub fn as_str(&self) -> &str {
534        self.as_inner().as_str()
535    }
536
537    pub fn whole_word(&self) -> bool {
538        match self {
539            Self::Text { whole_word, .. } => *whole_word,
540            Self::Regex { whole_word, .. } => *whole_word,
541        }
542    }
543
544    pub fn case_sensitive(&self) -> bool {
545        match self {
546            Self::Text { case_sensitive, .. } => *case_sensitive,
547            Self::Regex { case_sensitive, .. } => *case_sensitive,
548        }
549    }
550
551    pub fn include_ignored(&self) -> bool {
552        match self {
553            Self::Text {
554                include_ignored, ..
555            } => *include_ignored,
556            Self::Regex {
557                include_ignored, ..
558            } => *include_ignored,
559        }
560    }
561
562    pub fn is_regex(&self) -> bool {
563        matches!(self, Self::Regex { .. })
564    }
565
566    pub fn files_to_include(&self) -> &PathMatcher {
567        self.as_inner().files_to_include()
568    }
569
570    pub fn files_to_exclude(&self) -> &PathMatcher {
571        self.as_inner().files_to_exclude()
572    }
573
574    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
575        self.as_inner().buffers.as_ref()
576    }
577
578    pub fn is_opened_only(&self) -> bool {
579        self.as_inner().buffers.is_some()
580    }
581
582    pub fn filters_path(&self) -> bool {
583        !(self.files_to_exclude().sources().next().is_none()
584            && self.files_to_include().sources().next().is_none())
585    }
586
587    pub fn match_full_paths(&self) -> bool {
588        self.as_inner().match_full_paths
589    }
590
591    /// Check match full paths to determine whether you're required to pass a fully qualified
592    /// project path (starts with a project root).
593    pub fn match_path(&self, file_path: &RelPath) -> bool {
594        let mut path = file_path.to_rel_path_buf();
595        loop {
596            if self.files_to_exclude().is_match(&path) {
597                return false;
598            } else if self.files_to_include().sources().next().is_none()
599                || self.files_to_include().is_match(&path)
600            {
601                return true;
602            } else if !path.pop() {
603                return false;
604            }
605        }
606    }
607    pub fn as_inner(&self) -> &SearchInputs {
608        match self {
609            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
610        }
611    }
612
613    /// Whether this search should replace only one match per line, instead of
614    /// all matches.
615    /// Returns `None` for text searches, as only regex searches support this
616    /// option.
617    pub fn one_match_per_line(&self) -> Option<bool> {
618        match self {
619            Self::Regex {
620                one_match_per_line, ..
621            } => Some(*one_match_per_line),
622            Self::Text { .. } => None,
623        }
624    }
625}
626
627#[cfg(test)]
628mod tests {
629    use super::*;
630
631    #[test]
632    fn path_matcher_creation_for_valid_paths() {
633        for valid_path in [
634            "file",
635            "Cargo.toml",
636            ".DS_Store",
637            "~/dir/another_dir/",
638            "./dir/file",
639            "dir/[a-z].txt",
640        ] {
641            let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
642                .unwrap_or_else(|e| {
643                    panic!("Valid path {valid_path} should be accepted, but got: {e}")
644                });
645            assert!(
646                path_matcher
647                    .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
648                "Path matcher for valid path {valid_path} should match itself"
649            )
650        }
651    }
652
653    #[test]
654    fn path_matcher_creation_for_globs() {
655        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
656            match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
657                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
658                Err(_expected) => {}
659            }
660        }
661
662        for valid_glob in [
663            "dir/?ile",
664            "dir/*.txt",
665            "dir/**/file",
666            "dir/[a-z].txt",
667            "{dir,file}",
668        ] {
669            match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
670                Ok(_expected) => {}
671                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
672            }
673        }
674    }
675
676    #[test]
677    fn test_case_sensitive_pattern_items() {
678        let case_sensitive = false;
679        let search_query = SearchQuery::regex(
680            "test\\C",
681            false,
682            case_sensitive,
683            false,
684            false,
685            Default::default(),
686            Default::default(),
687            false,
688            None,
689        )
690        .expect("Should be able to create a regex SearchQuery");
691
692        assert_eq!(
693            search_query.case_sensitive(),
694            true,
695            "Case sensitivity should be enabled when \\C pattern item is present in the query."
696        );
697
698        let case_sensitive = true;
699        let search_query = SearchQuery::regex(
700            "test\\c",
701            true,
702            case_sensitive,
703            false,
704            false,
705            Default::default(),
706            Default::default(),
707            false,
708            None,
709        )
710        .expect("Should be able to create a regex SearchQuery");
711
712        assert_eq!(
713            search_query.case_sensitive(),
714            false,
715            "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
716        );
717
718        let case_sensitive = false;
719        let search_query = SearchQuery::regex(
720            "test\\c\\C",
721            false,
722            case_sensitive,
723            false,
724            false,
725            Default::default(),
726            Default::default(),
727            false,
728            None,
729        )
730        .expect("Should be able to create a regex SearchQuery");
731
732        assert_eq!(
733            search_query.case_sensitive(),
734            true,
735            "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
736        );
737
738        let case_sensitive = false;
739        let search_query = SearchQuery::regex(
740            "tests\\\\C",
741            false,
742            case_sensitive,
743            false,
744            false,
745            Default::default(),
746            Default::default(),
747            false,
748            None,
749        )
750        .expect("Should be able to create a regex SearchQuery");
751
752        assert_eq!(
753            search_query.case_sensitive(),
754            false,
755            "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
756        );
757    }
758}