search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use itertools::Itertools as _;
  7use language::{Buffer, BufferSnapshot, CharKind};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::{
 17    paths::{PathMatcher, PathStyle},
 18    rel_path::RelPath,
 19};
 20
 21#[derive(Debug)]
 22pub enum SearchResult {
 23    Buffer {
 24        buffer: Entity<Buffer>,
 25        ranges: Vec<Range<Anchor>>,
 26    },
 27    LimitReached,
 28}
 29
 30#[derive(Clone, Copy, PartialEq)]
 31pub enum SearchInputKind {
 32    Query,
 33    Include,
 34    Exclude,
 35}
 36
 37#[derive(Clone, Debug)]
 38pub struct SearchInputs {
 39    query: Arc<str>,
 40    files_to_include: PathMatcher,
 41    files_to_exclude: PathMatcher,
 42    match_full_paths: bool,
 43    buffers: Option<Vec<Entity<Buffer>>>,
 44}
 45
 46impl SearchInputs {
 47    pub fn as_str(&self) -> &str {
 48        self.query.as_ref()
 49    }
 50    pub fn files_to_include(&self) -> &PathMatcher {
 51        &self.files_to_include
 52    }
 53    pub fn files_to_exclude(&self) -> &PathMatcher {
 54        &self.files_to_exclude
 55    }
 56    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 57        &self.buffers
 58    }
 59}
 60#[derive(Clone, Debug)]
 61pub enum SearchQuery {
 62    Text {
 63        search: AhoCorasick,
 64        replacement: Option<String>,
 65        whole_word: bool,
 66        case_sensitive: bool,
 67        include_ignored: bool,
 68        inner: SearchInputs,
 69    },
 70    Regex {
 71        regex: Regex,
 72        replacement: Option<String>,
 73        multiline: bool,
 74        whole_word: bool,
 75        case_sensitive: bool,
 76        include_ignored: bool,
 77        one_match_per_line: bool,
 78        inner: SearchInputs,
 79    },
 80}
 81
 82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 83    RegexBuilder::new(r"\B")
 84        .build()
 85        .expect("Failed to create WORD_MATCH_TEST")
 86});
 87
 88impl SearchQuery {
 89    /// Create a text query
 90    ///
 91    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 92    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 93    pub fn text(
 94        query: impl ToString,
 95        whole_word: bool,
 96        case_sensitive: bool,
 97        include_ignored: bool,
 98        files_to_include: PathMatcher,
 99        files_to_exclude: PathMatcher,
100        match_full_paths: bool,
101        buffers: Option<Vec<Entity<Buffer>>>,
102    ) -> Result<Self> {
103        let query = query.to_string();
104        if !case_sensitive && !query.is_ascii() {
105            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106            // Fallback to regex search as recommended by
107            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108            return Self::regex(
109                regex::escape(&query),
110                whole_word,
111                case_sensitive,
112                include_ignored,
113                false,
114                files_to_include,
115                files_to_exclude,
116                false,
117                buffers,
118            );
119        }
120        let search = AhoCorasickBuilder::new()
121            .ascii_case_insensitive(!case_sensitive)
122            .build([&query])?;
123        let inner = SearchInputs {
124            query: query.into(),
125            files_to_exclude,
126            files_to_include,
127            match_full_paths,
128            buffers,
129        };
130        Ok(Self::Text {
131            search,
132            replacement: None,
133            whole_word,
134            case_sensitive,
135            include_ignored,
136            inner,
137        })
138    }
139
140    /// Create a regex query
141    ///
142    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144    /// with their respective project root).
145    pub fn regex(
146        query: impl ToString,
147        whole_word: bool,
148        mut case_sensitive: bool,
149        include_ignored: bool,
150        one_match_per_line: bool,
151        files_to_include: PathMatcher,
152        files_to_exclude: PathMatcher,
153        match_full_paths: bool,
154        buffers: Option<Vec<Entity<Buffer>>>,
155    ) -> Result<Self> {
156        let mut query = query.to_string();
157        let initial_query = Arc::from(query.as_str());
158
159        if let Some((case_sensitive_from_pattern, new_query)) =
160            Self::case_sensitive_from_pattern(&query)
161        {
162            case_sensitive = case_sensitive_from_pattern;
163            query = new_query
164        }
165
166        if whole_word {
167            let mut word_query = String::new();
168            if let Some(first) = query.get(0..1)
169                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170            {
171                word_query.push_str("\\b");
172            }
173            word_query.push_str(&query);
174            if let Some(last) = query.get(query.len() - 1..)
175                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176            {
177                word_query.push_str("\\b");
178            }
179            query = word_query
180        }
181
182        let multiline = query.contains('\n') || query.contains("\\n");
183        let regex = RegexBuilder::new(&query)
184            .case_insensitive(!case_sensitive)
185            .build()?;
186        let inner = SearchInputs {
187            query: initial_query,
188            files_to_exclude,
189            files_to_include,
190            match_full_paths,
191            buffers,
192        };
193        Ok(Self::Regex {
194            regex,
195            replacement: None,
196            multiline,
197            whole_word,
198            case_sensitive,
199            include_ignored,
200            inner,
201            one_match_per_line,
202        })
203    }
204
205    /// Extracts case sensitivity settings from pattern items in the provided
206    /// query and returns the same query, with the pattern items removed.
207    ///
208    /// The following pattern modifiers are supported:
209    ///
210    /// - `\c` (case_sensitive: false)
211    /// - `\C` (case_sensitive: true)
212    ///
213    /// If no pattern item were found, `None` will be returned.
214    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215        if !(query.contains("\\c") || query.contains("\\C")) {
216            return None;
217        }
218
219        let mut was_escaped = false;
220        let mut new_query = String::new();
221        let mut is_case_sensitive = None;
222
223        for c in query.chars() {
224            if was_escaped {
225                if c == 'c' {
226                    is_case_sensitive = Some(false);
227                } else if c == 'C' {
228                    is_case_sensitive = Some(true);
229                } else {
230                    new_query.push('\\');
231                    new_query.push(c);
232                }
233                was_escaped = false
234            } else if c == '\\' {
235                was_escaped = true
236            } else {
237                new_query.push(c);
238            }
239        }
240
241        is_case_sensitive.map(|c| (c, new_query))
242    }
243
244    pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245        let files_to_include = if message.files_to_include.is_empty() {
246            message
247                .files_to_include_legacy
248                .split(',')
249                .map(str::trim)
250                .filter(|&glob_str| !glob_str.is_empty())
251                .map(|s| s.to_string())
252                .collect()
253        } else {
254            message.files_to_include
255        };
256
257        let files_to_exclude = if message.files_to_exclude.is_empty() {
258            message
259                .files_to_exclude_legacy
260                .split(',')
261                .map(str::trim)
262                .filter(|&glob_str| !glob_str.is_empty())
263                .map(|s| s.to_string())
264                .collect()
265        } else {
266            message.files_to_exclude
267        };
268
269        if message.regex {
270            Self::regex(
271                message.query,
272                message.whole_word,
273                message.case_sensitive,
274                message.include_ignored,
275                false,
276                PathMatcher::new(files_to_include, path_style)?,
277                PathMatcher::new(files_to_exclude, path_style)?,
278                message.match_full_paths,
279                None, // search opened only don't need search remote
280            )
281        } else {
282            Self::text(
283                message.query,
284                message.whole_word,
285                message.case_sensitive,
286                message.include_ignored,
287                PathMatcher::new(files_to_include, path_style)?,
288                PathMatcher::new(files_to_exclude, path_style)?,
289                false,
290                None, // search opened only don't need search remote
291            )
292        }
293    }
294
295    pub fn with_replacement(mut self, new_replacement: String) -> Self {
296        match self {
297            Self::Text {
298                ref mut replacement,
299                ..
300            }
301            | Self::Regex {
302                ref mut replacement,
303                ..
304            } => {
305                *replacement = Some(new_replacement);
306                self
307            }
308        }
309    }
310
311    pub fn to_proto(&self) -> proto::SearchQuery {
312        let mut files_to_include = self.files_to_include().sources();
313        let mut files_to_exclude = self.files_to_exclude().sources();
314        proto::SearchQuery {
315            query: self.as_str().to_string(),
316            regex: self.is_regex(),
317            whole_word: self.whole_word(),
318            case_sensitive: self.case_sensitive(),
319            include_ignored: self.include_ignored(),
320            files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321            files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322            match_full_paths: self.match_full_paths(),
323            // Populate legacy fields for backwards compatibility
324            files_to_include_legacy: files_to_include.join(","),
325            files_to_exclude_legacy: files_to_exclude.join(","),
326        }
327    }
328
329    pub(crate) fn detect(
330        &self,
331        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
332    ) -> Result<bool> {
333        if self.as_str().is_empty() {
334            return Ok(false);
335        }
336
337        match self {
338            Self::Text { search, .. } => {
339                let mat = search.stream_find_iter(reader).next();
340                match mat {
341                    Some(Ok(_)) => Ok(true),
342                    Some(Err(err)) => Err(err.into()),
343                    None => Ok(false),
344                }
345            }
346            Self::Regex {
347                regex, multiline, ..
348            } => {
349                if *multiline {
350                    let mut text = String::new();
351                    if let Err(err) = reader.read_to_string(&mut text) {
352                        Err(err.into())
353                    } else {
354                        Ok(regex.find(&text)?.is_some())
355                    }
356                } else {
357                    for line in reader.lines() {
358                        let line = line?;
359                        if regex.find(&line)?.is_some() {
360                            return Ok(true);
361                        }
362                    }
363                    Ok(false)
364                }
365            }
366        }
367    }
368    /// Returns the replacement text for this `SearchQuery`.
369    pub fn replacement(&self) -> Option<&str> {
370        match self {
371            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
372                replacement.as_deref()
373            }
374        }
375    }
376    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
377    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
378        match self {
379            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
380            SearchQuery::Regex {
381                regex, replacement, ..
382            } => {
383                if let Some(replacement) = replacement {
384                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
385                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
386                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
387                        replacement,
388                        |c: &Captures| match c.get(0).unwrap().as_str() {
389                            r"\\" => "\\",
390                            r"\n" => "\n",
391                            r"\t" => "\t",
392                            x => unreachable!("Unexpected escape sequence: {}", x),
393                        },
394                    );
395                    Some(regex.replace(text, replacement))
396                } else {
397                    None
398                }
399            }
400        }
401    }
402
403    pub async fn search(
404        &self,
405        buffer: &BufferSnapshot,
406        subrange: Option<Range<usize>>,
407    ) -> Vec<Range<usize>> {
408        const YIELD_INTERVAL: usize = 20000;
409
410        if self.as_str().is_empty() {
411            return Default::default();
412        }
413
414        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
415        let rope = if let Some(range) = subrange {
416            buffer.as_rope().slice(range)
417        } else {
418            buffer.as_rope().clone()
419        };
420
421        let mut matches = Vec::new();
422        match self {
423            Self::Text {
424                search, whole_word, ..
425            } => {
426                for (ix, mat) in search
427                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
428                    .enumerate()
429                {
430                    if (ix + 1) % YIELD_INTERVAL == 0 {
431                        yield_now().await;
432                    }
433
434                    let mat = mat.unwrap();
435                    if *whole_word {
436                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
437
438                        let prev_kind = rope
439                            .reversed_chars_at(mat.start())
440                            .next()
441                            .map(|c| classifier.kind(c));
442                        let start_kind =
443                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
444                        let end_kind =
445                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
446                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
447                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
448                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
449                        {
450                            continue;
451                        }
452                    }
453                    matches.push(mat.start()..mat.end())
454                }
455            }
456
457            Self::Regex {
458                regex, multiline, ..
459            } => {
460                if *multiline {
461                    let text = rope.to_string();
462                    for (ix, mat) in regex.find_iter(&text).enumerate() {
463                        if (ix + 1) % YIELD_INTERVAL == 0 {
464                            yield_now().await;
465                        }
466
467                        if let Ok(mat) = mat {
468                            matches.push(mat.start()..mat.end());
469                        }
470                    }
471                } else {
472                    let mut line = String::new();
473                    let mut line_offset = 0;
474                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
475                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
476                            yield_now().await;
477                        }
478
479                        for (newline_ix, text) in chunk.split('\n').enumerate() {
480                            if newline_ix > 0 {
481                                for mat in regex.find_iter(&line).flatten() {
482                                    let start = line_offset + mat.start();
483                                    let end = line_offset + mat.end();
484                                    matches.push(start..end);
485                                    if self.one_match_per_line() == Some(true) {
486                                        break;
487                                    }
488                                }
489
490                                line_offset += line.len() + 1;
491                                line.clear();
492                            }
493                            line.push_str(text);
494                        }
495                    }
496                }
497            }
498        }
499
500        matches
501    }
502
503    pub fn is_empty(&self) -> bool {
504        self.as_str().is_empty()
505    }
506
507    pub fn as_str(&self) -> &str {
508        self.as_inner().as_str()
509    }
510
511    pub fn whole_word(&self) -> bool {
512        match self {
513            Self::Text { whole_word, .. } => *whole_word,
514            Self::Regex { whole_word, .. } => *whole_word,
515        }
516    }
517
518    pub fn case_sensitive(&self) -> bool {
519        match self {
520            Self::Text { case_sensitive, .. } => *case_sensitive,
521            Self::Regex { case_sensitive, .. } => *case_sensitive,
522        }
523    }
524
525    pub fn include_ignored(&self) -> bool {
526        match self {
527            Self::Text {
528                include_ignored, ..
529            } => *include_ignored,
530            Self::Regex {
531                include_ignored, ..
532            } => *include_ignored,
533        }
534    }
535
536    pub fn is_regex(&self) -> bool {
537        matches!(self, Self::Regex { .. })
538    }
539
540    pub fn files_to_include(&self) -> &PathMatcher {
541        self.as_inner().files_to_include()
542    }
543
544    pub fn files_to_exclude(&self) -> &PathMatcher {
545        self.as_inner().files_to_exclude()
546    }
547
548    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
549        self.as_inner().buffers.as_ref()
550    }
551
552    pub fn is_opened_only(&self) -> bool {
553        self.as_inner().buffers.is_some()
554    }
555
556    pub fn filters_path(&self) -> bool {
557        !(self.files_to_exclude().sources().next().is_none()
558            && self.files_to_include().sources().next().is_none())
559    }
560
561    pub fn match_full_paths(&self) -> bool {
562        self.as_inner().match_full_paths
563    }
564
565    /// Check match full paths to determine whether you're required to pass a fully qualified
566    /// project path (starts with a project root).
567    pub fn match_path(&self, file_path: &RelPath) -> bool {
568        let mut path = file_path.to_rel_path_buf();
569        loop {
570            if self.files_to_exclude().is_match(&path) {
571                return false;
572            } else if self.files_to_include().sources().next().is_none()
573                || self.files_to_include().is_match(&path)
574            {
575                return true;
576            } else if !path.pop() {
577                return false;
578            }
579        }
580    }
581    pub fn as_inner(&self) -> &SearchInputs {
582        match self {
583            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
584        }
585    }
586
587    /// Whether this search should replace only one match per line, instead of
588    /// all matches.
589    /// Returns `None` for text searches, as only regex searches support this
590    /// option.
591    pub fn one_match_per_line(&self) -> Option<bool> {
592        match self {
593            Self::Regex {
594                one_match_per_line, ..
595            } => Some(*one_match_per_line),
596            Self::Text { .. } => None,
597        }
598    }
599}
600
601#[cfg(test)]
602mod tests {
603    use super::*;
604
605    #[test]
606    fn path_matcher_creation_for_valid_paths() {
607        for valid_path in [
608            "file",
609            "Cargo.toml",
610            ".DS_Store",
611            "~/dir/another_dir/",
612            "./dir/file",
613            "dir/[a-z].txt",
614        ] {
615            let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
616                .unwrap_or_else(|e| {
617                    panic!("Valid path {valid_path} should be accepted, but got: {e}")
618                });
619            assert!(
620                path_matcher
621                    .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
622                "Path matcher for valid path {valid_path} should match itself"
623            )
624        }
625    }
626
627    #[test]
628    fn path_matcher_creation_for_globs() {
629        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
630            match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
631                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
632                Err(_expected) => {}
633            }
634        }
635
636        for valid_glob in [
637            "dir/?ile",
638            "dir/*.txt",
639            "dir/**/file",
640            "dir/[a-z].txt",
641            "{dir,file}",
642        ] {
643            match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
644                Ok(_expected) => {}
645                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
646            }
647        }
648    }
649
650    #[test]
651    fn test_case_sensitive_pattern_items() {
652        let case_sensitive = false;
653        let search_query = SearchQuery::regex(
654            "test\\C",
655            false,
656            case_sensitive,
657            false,
658            false,
659            Default::default(),
660            Default::default(),
661            false,
662            None,
663        )
664        .expect("Should be able to create a regex SearchQuery");
665
666        assert_eq!(
667            search_query.case_sensitive(),
668            true,
669            "Case sensitivity should be enabled when \\C pattern item is present in the query."
670        );
671
672        let case_sensitive = true;
673        let search_query = SearchQuery::regex(
674            "test\\c",
675            true,
676            case_sensitive,
677            false,
678            false,
679            Default::default(),
680            Default::default(),
681            false,
682            None,
683        )
684        .expect("Should be able to create a regex SearchQuery");
685
686        assert_eq!(
687            search_query.case_sensitive(),
688            false,
689            "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
690        );
691
692        let case_sensitive = false;
693        let search_query = SearchQuery::regex(
694            "test\\c\\C",
695            false,
696            case_sensitive,
697            false,
698            false,
699            Default::default(),
700            Default::default(),
701            false,
702            None,
703        )
704        .expect("Should be able to create a regex SearchQuery");
705
706        assert_eq!(
707            search_query.case_sensitive(),
708            true,
709            "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
710        );
711
712        let case_sensitive = false;
713        let search_query = SearchQuery::regex(
714            "tests\\\\C",
715            false,
716            case_sensitive,
717            false,
718            false,
719            Default::default(),
720            Default::default(),
721            false,
722            None,
723        )
724        .expect("Should be able to create a regex SearchQuery");
725
726        assert_eq!(
727            search_query.case_sensitive(),
728            false,
729            "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
730        );
731    }
732}