search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use futures::{AsyncBufRead, AsyncBufReadExt, AsyncReadExt};
  6use gpui::Entity;
  7use itertools::Itertools as _;
  8use language::{Buffer, BufferSnapshot, CharKind};
  9use smol::future::yield_now;
 10use std::{
 11    borrow::Cow,
 12    ops::Range,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::{
 17    paths::{PathMatcher, PathStyle},
 18    rel_path::RelPath,
 19};
 20
 21#[derive(Debug)]
 22pub enum SearchResult {
 23    Buffer {
 24        buffer: Entity<Buffer>,
 25        ranges: Vec<Range<Anchor>>,
 26    },
 27    LimitReached,
 28}
 29
 30#[derive(Clone, Copy, PartialEq)]
 31pub enum SearchInputKind {
 32    Query,
 33    Include,
 34    Exclude,
 35}
 36
 37#[derive(Clone, Debug)]
 38pub struct SearchInputs {
 39    query: Arc<str>,
 40    files_to_include: PathMatcher,
 41    files_to_exclude: PathMatcher,
 42    match_full_paths: bool,
 43    buffers: Option<Vec<Entity<Buffer>>>,
 44}
 45
 46impl SearchInputs {
 47    pub fn as_str(&self) -> &str {
 48        self.query.as_ref()
 49    }
 50    pub fn files_to_include(&self) -> &PathMatcher {
 51        &self.files_to_include
 52    }
 53    pub fn files_to_exclude(&self) -> &PathMatcher {
 54        &self.files_to_exclude
 55    }
 56    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 57        &self.buffers
 58    }
 59}
 60#[derive(Clone, Debug)]
 61pub enum SearchQuery {
 62    Text {
 63        search: AhoCorasick,
 64        replacement: Option<String>,
 65        whole_word: bool,
 66        case_sensitive: bool,
 67        include_ignored: bool,
 68        inner: SearchInputs,
 69    },
 70    Regex {
 71        regex: Regex,
 72        replacement: Option<String>,
 73        multiline: bool,
 74        whole_word: bool,
 75        case_sensitive: bool,
 76        include_ignored: bool,
 77        one_match_per_line: bool,
 78        inner: SearchInputs,
 79    },
 80}
 81
 82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 83    RegexBuilder::new(r"\B")
 84        .build()
 85        .expect("Failed to create WORD_MATCH_TEST")
 86});
 87
 88impl SearchQuery {
 89    /// Create a text query
 90    ///
 91    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 92    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 93    pub fn text(
 94        query: impl ToString,
 95        whole_word: bool,
 96        case_sensitive: bool,
 97        include_ignored: bool,
 98        files_to_include: PathMatcher,
 99        files_to_exclude: PathMatcher,
100        match_full_paths: bool,
101        buffers: Option<Vec<Entity<Buffer>>>,
102    ) -> Result<Self> {
103        let query = query.to_string();
104        if !case_sensitive && !query.is_ascii() {
105            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106            // Fallback to regex search as recommended by
107            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108            return Self::regex(
109                regex::escape(&query),
110                whole_word,
111                case_sensitive,
112                include_ignored,
113                false,
114                files_to_include,
115                files_to_exclude,
116                false,
117                buffers,
118            );
119        }
120        let search = AhoCorasickBuilder::new()
121            .ascii_case_insensitive(!case_sensitive)
122            .build([&query])?;
123        let inner = SearchInputs {
124            query: query.into(),
125            files_to_exclude,
126            files_to_include,
127            match_full_paths,
128            buffers,
129        };
130        Ok(Self::Text {
131            search,
132            replacement: None,
133            whole_word,
134            case_sensitive,
135            include_ignored,
136            inner,
137        })
138    }
139
140    /// Create a regex query
141    ///
142    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144    /// with their respective project root).
145    pub fn regex(
146        query: impl ToString,
147        whole_word: bool,
148        mut case_sensitive: bool,
149        include_ignored: bool,
150        one_match_per_line: bool,
151        files_to_include: PathMatcher,
152        files_to_exclude: PathMatcher,
153        match_full_paths: bool,
154        buffers: Option<Vec<Entity<Buffer>>>,
155    ) -> Result<Self> {
156        let mut query = query.to_string();
157        let initial_query = Arc::from(query.as_str());
158
159        if let Some((case_sensitive_from_pattern, new_query)) =
160            Self::case_sensitive_from_pattern(&query)
161        {
162            case_sensitive = case_sensitive_from_pattern;
163            query = new_query
164        }
165
166        if whole_word {
167            let mut word_query = String::new();
168            if let Some(first) = query.get(0..1)
169                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170            {
171                word_query.push_str("\\b");
172            }
173            word_query.push_str(&query);
174            if let Some(last) = query.get(query.len() - 1..)
175                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176            {
177                word_query.push_str("\\b");
178            }
179            query = word_query
180        }
181
182        let multiline = query.contains('\n') || query.contains("\\n");
183        let regex = RegexBuilder::new(&query)
184            .case_insensitive(!case_sensitive)
185            .build()?;
186        let inner = SearchInputs {
187            query: initial_query,
188            files_to_exclude,
189            files_to_include,
190            match_full_paths,
191            buffers,
192        };
193        Ok(Self::Regex {
194            regex,
195            replacement: None,
196            multiline,
197            whole_word,
198            case_sensitive,
199            include_ignored,
200            inner,
201            one_match_per_line,
202        })
203    }
204
205    /// Extracts case sensitivity settings from pattern items in the provided
206    /// query and returns the same query, with the pattern items removed.
207    ///
208    /// The following pattern modifiers are supported:
209    ///
210    /// - `\c` (case_sensitive: false)
211    /// - `\C` (case_sensitive: true)
212    ///
213    /// If no pattern item were found, `None` will be returned.
214    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215        if !(query.contains("\\c") || query.contains("\\C")) {
216            return None;
217        }
218
219        let mut was_escaped = false;
220        let mut new_query = String::new();
221        let mut is_case_sensitive = None;
222
223        for c in query.chars() {
224            if was_escaped {
225                if c == 'c' {
226                    is_case_sensitive = Some(false);
227                } else if c == 'C' {
228                    is_case_sensitive = Some(true);
229                } else {
230                    new_query.push('\\');
231                    new_query.push(c);
232                }
233                was_escaped = false
234            } else if c == '\\' {
235                was_escaped = true
236            } else {
237                new_query.push(c);
238            }
239        }
240
241        is_case_sensitive.map(|c| (c, new_query))
242    }
243
244    pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245        let files_to_include = if message.files_to_include.is_empty() {
246            message
247                .files_to_include_legacy
248                .split(',')
249                .map(str::trim)
250                .filter(|&glob_str| !glob_str.is_empty())
251                .map(|s| s.to_string())
252                .collect()
253        } else {
254            message.files_to_include
255        };
256
257        let files_to_exclude = if message.files_to_exclude.is_empty() {
258            message
259                .files_to_exclude_legacy
260                .split(',')
261                .map(str::trim)
262                .filter(|&glob_str| !glob_str.is_empty())
263                .map(|s| s.to_string())
264                .collect()
265        } else {
266            message.files_to_exclude
267        };
268
269        if message.regex {
270            Self::regex(
271                message.query,
272                message.whole_word,
273                message.case_sensitive,
274                message.include_ignored,
275                false,
276                PathMatcher::new(files_to_include, path_style)?,
277                PathMatcher::new(files_to_exclude, path_style)?,
278                message.match_full_paths,
279                None, // search opened only don't need search remote
280            )
281        } else {
282            Self::text(
283                message.query,
284                message.whole_word,
285                message.case_sensitive,
286                message.include_ignored,
287                PathMatcher::new(files_to_include, path_style)?,
288                PathMatcher::new(files_to_exclude, path_style)?,
289                false,
290                None, // search opened only don't need search remote
291            )
292        }
293    }
294
295    pub fn with_replacement(mut self, new_replacement: String) -> Self {
296        match self {
297            Self::Text {
298                ref mut replacement,
299                ..
300            }
301            | Self::Regex {
302                ref mut replacement,
303                ..
304            } => {
305                *replacement = Some(new_replacement);
306                self
307            }
308        }
309    }
310
311    pub fn to_proto(&self) -> proto::SearchQuery {
312        let mut files_to_include = self.files_to_include().sources();
313        let mut files_to_exclude = self.files_to_exclude().sources();
314        proto::SearchQuery {
315            query: self.as_str().to_string(),
316            regex: self.is_regex(),
317            whole_word: self.whole_word(),
318            case_sensitive: self.case_sensitive(),
319            include_ignored: self.include_ignored(),
320            files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321            files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322            match_full_paths: self.match_full_paths(),
323            // Populate legacy fields for backwards compatibility
324            files_to_include_legacy: files_to_include.join(","),
325            files_to_exclude_legacy: files_to_exclude.join(","),
326        }
327    }
328
329    pub(crate) async fn detect(&self, mut reader: impl AsyncBufRead + Unpin) -> Result<bool> {
330        let query_str = self.as_str();
331        let needle_len = query_str.len();
332        if needle_len == 0 {
333            return Ok(false);
334        }
335
336        let mut text = String::new();
337        match self {
338            Self::Text { search, .. } => {
339                if query_str.contains('\n') {
340                    reader.read_to_string(&mut text).await?;
341                    Ok(search.find(&text).is_some())
342                } else {
343                    while reader.read_line(&mut text).await? > 0 {
344                        if search.find(&text).is_some() {
345                            return Ok(true);
346                        }
347                        text.clear();
348                    }
349                    Ok(false)
350                }
351            }
352            Self::Regex {
353                regex, multiline, ..
354            } => {
355                if *multiline {
356                    reader.read_to_string(&mut text).await?;
357                    Ok(regex.find(&text)?.is_some())
358                } else {
359                    while reader.read_line(&mut text).await? > 0 {
360                        if regex.find(&text)?.is_some() {
361                            return Ok(true);
362                        }
363                    }
364                    Ok(false)
365                }
366            }
367        }
368    }
369    /// Returns the replacement text for this `SearchQuery`.
370    pub fn replacement(&self) -> Option<&str> {
371        match self {
372            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
373                replacement.as_deref()
374            }
375        }
376    }
377    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
378    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
379        match self {
380            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
381            SearchQuery::Regex {
382                regex, replacement, ..
383            } => {
384                if let Some(replacement) = replacement {
385                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
386                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
387                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
388                        replacement,
389                        |c: &Captures| match c.get(0).unwrap().as_str() {
390                            r"\\" => "\\",
391                            r"\n" => "\n",
392                            r"\t" => "\t",
393                            x => unreachable!("Unexpected escape sequence: {}", x),
394                        },
395                    );
396                    Some(regex.replace(text, replacement))
397                } else {
398                    None
399                }
400            }
401        }
402    }
403
404    pub async fn search(
405        &self,
406        buffer: &BufferSnapshot,
407        subrange: Option<Range<usize>>,
408    ) -> Vec<Range<usize>> {
409        const YIELD_INTERVAL: usize = 20000;
410
411        if self.as_str().is_empty() {
412            return Default::default();
413        }
414
415        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
416        let rope = if let Some(range) = subrange {
417            buffer.as_rope().slice(range)
418        } else {
419            buffer.as_rope().clone()
420        };
421
422        let mut matches = Vec::new();
423        match self {
424            Self::Text {
425                search, whole_word, ..
426            } => {
427                for (ix, mat) in search
428                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
429                    .enumerate()
430                {
431                    if (ix + 1) % YIELD_INTERVAL == 0 {
432                        yield_now().await;
433                    }
434
435                    let mat = mat.unwrap();
436                    if *whole_word {
437                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
438
439                        let prev_kind = rope
440                            .reversed_chars_at(mat.start())
441                            .next()
442                            .map(|c| classifier.kind(c));
443                        let start_kind =
444                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
445                        let end_kind =
446                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
447                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
448                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
449                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
450                        {
451                            continue;
452                        }
453                    }
454                    matches.push(mat.start()..mat.end())
455                }
456            }
457
458            Self::Regex {
459                regex, multiline, ..
460            } => {
461                if *multiline {
462                    let text = rope.to_string();
463                    for (ix, mat) in regex.find_iter(&text).enumerate() {
464                        if (ix + 1) % YIELD_INTERVAL == 0 {
465                            yield_now().await;
466                        }
467
468                        if let Ok(mat) = mat {
469                            matches.push(mat.start()..mat.end());
470                        }
471                    }
472                } else {
473                    let mut line = String::new();
474                    let mut line_offset = 0;
475                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
476                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
477                            yield_now().await;
478                        }
479
480                        for (newline_ix, text) in chunk.split('\n').enumerate() {
481                            if newline_ix > 0 {
482                                for mat in regex.find_iter(&line).flatten() {
483                                    let start = line_offset + mat.start();
484                                    let end = line_offset + mat.end();
485                                    matches.push(start..end);
486                                    if self.one_match_per_line() == Some(true) {
487                                        break;
488                                    }
489                                }
490
491                                line_offset += line.len() + 1;
492                                line.clear();
493                            }
494                            line.push_str(text);
495                        }
496                    }
497                }
498            }
499        }
500
501        matches
502    }
503
504    pub fn is_empty(&self) -> bool {
505        self.as_str().is_empty()
506    }
507
508    pub fn as_str(&self) -> &str {
509        self.as_inner().as_str()
510    }
511
512    pub fn whole_word(&self) -> bool {
513        match self {
514            Self::Text { whole_word, .. } => *whole_word,
515            Self::Regex { whole_word, .. } => *whole_word,
516        }
517    }
518
519    pub fn case_sensitive(&self) -> bool {
520        match self {
521            Self::Text { case_sensitive, .. } => *case_sensitive,
522            Self::Regex { case_sensitive, .. } => *case_sensitive,
523        }
524    }
525
526    pub fn include_ignored(&self) -> bool {
527        match self {
528            Self::Text {
529                include_ignored, ..
530            } => *include_ignored,
531            Self::Regex {
532                include_ignored, ..
533            } => *include_ignored,
534        }
535    }
536
537    pub fn is_regex(&self) -> bool {
538        matches!(self, Self::Regex { .. })
539    }
540
541    pub fn files_to_include(&self) -> &PathMatcher {
542        self.as_inner().files_to_include()
543    }
544
545    pub fn files_to_exclude(&self) -> &PathMatcher {
546        self.as_inner().files_to_exclude()
547    }
548
549    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
550        self.as_inner().buffers.as_ref()
551    }
552
553    pub fn is_opened_only(&self) -> bool {
554        self.as_inner().buffers.is_some()
555    }
556
557    pub fn filters_path(&self) -> bool {
558        !(self.files_to_exclude().sources().next().is_none()
559            && self.files_to_include().sources().next().is_none())
560    }
561
562    pub fn match_full_paths(&self) -> bool {
563        self.as_inner().match_full_paths
564    }
565
566    /// Check match full paths to determine whether you're required to pass a fully qualified
567    /// project path (starts with a project root).
568    pub fn match_path(&self, file_path: &RelPath) -> bool {
569        let mut path = file_path.to_rel_path_buf();
570        loop {
571            if self.files_to_exclude().is_match(&path) {
572                return false;
573            } else if self.files_to_include().sources().next().is_none()
574                || self.files_to_include().is_match(&path)
575            {
576                return true;
577            } else if !path.pop() {
578                return false;
579            }
580        }
581    }
582    pub fn as_inner(&self) -> &SearchInputs {
583        match self {
584            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
585        }
586    }
587
588    /// Whether this search should replace only one match per line, instead of
589    /// all matches.
590    /// Returns `None` for text searches, as only regex searches support this
591    /// option.
592    pub fn one_match_per_line(&self) -> Option<bool> {
593        match self {
594            Self::Regex {
595                one_match_per_line, ..
596            } => Some(*one_match_per_line),
597            Self::Text { .. } => None,
598        }
599    }
600}
601
602#[cfg(test)]
603mod tests {
604    use super::*;
605
606    #[test]
607    fn path_matcher_creation_for_valid_paths() {
608        for valid_path in [
609            "file",
610            "Cargo.toml",
611            ".DS_Store",
612            "~/dir/another_dir/",
613            "./dir/file",
614            "dir/[a-z].txt",
615        ] {
616            let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
617                .unwrap_or_else(|e| {
618                    panic!("Valid path {valid_path} should be accepted, but got: {e}")
619                });
620            assert!(
621                path_matcher
622                    .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
623                "Path matcher for valid path {valid_path} should match itself"
624            )
625        }
626    }
627
628    #[test]
629    fn path_matcher_creation_for_globs() {
630        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
631            match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
632                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
633                Err(_expected) => {}
634            }
635        }
636
637        for valid_glob in [
638            "dir/?ile",
639            "dir/*.txt",
640            "dir/**/file",
641            "dir/[a-z].txt",
642            "{dir,file}",
643        ] {
644            match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
645                Ok(_expected) => {}
646                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
647            }
648        }
649    }
650
651    #[test]
652    fn test_case_sensitive_pattern_items() {
653        let case_sensitive = false;
654        let search_query = SearchQuery::regex(
655            "test\\C",
656            false,
657            case_sensitive,
658            false,
659            false,
660            Default::default(),
661            Default::default(),
662            false,
663            None,
664        )
665        .expect("Should be able to create a regex SearchQuery");
666
667        assert_eq!(
668            search_query.case_sensitive(),
669            true,
670            "Case sensitivity should be enabled when \\C pattern item is present in the query."
671        );
672
673        let case_sensitive = true;
674        let search_query = SearchQuery::regex(
675            "test\\c",
676            true,
677            case_sensitive,
678            false,
679            false,
680            Default::default(),
681            Default::default(),
682            false,
683            None,
684        )
685        .expect("Should be able to create a regex SearchQuery");
686
687        assert_eq!(
688            search_query.case_sensitive(),
689            false,
690            "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
691        );
692
693        let case_sensitive = false;
694        let search_query = SearchQuery::regex(
695            "test\\c\\C",
696            false,
697            case_sensitive,
698            false,
699            false,
700            Default::default(),
701            Default::default(),
702            false,
703            None,
704        )
705        .expect("Should be able to create a regex SearchQuery");
706
707        assert_eq!(
708            search_query.case_sensitive(),
709            true,
710            "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
711        );
712
713        let case_sensitive = false;
714        let search_query = SearchQuery::regex(
715            "tests\\\\C",
716            false,
717            case_sensitive,
718            false,
719            false,
720            Default::default(),
721            Default::default(),
722            false,
723            None,
724        )
725        .expect("Should be able to create a regex SearchQuery");
726
727        assert_eq!(
728            search_query.case_sensitive(),
729            false,
730            "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
731        );
732    }
733}