1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::{PathMatcher, PathStyle};
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    match_full_paths: bool,
 40    buffers: Option<Vec<Entity<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: AhoCorasick,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67    Regex {
 68        regex: Regex,
 69        replacement: Option<String>,
 70        multiline: bool,
 71        whole_word: bool,
 72        case_sensitive: bool,
 73        include_ignored: bool,
 74        one_match_per_line: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 80    RegexBuilder::new(r"\B")
 81        .build()
 82        .expect("Failed to create WORD_MATCH_TEST")
 83});
 84
 85impl SearchQuery {
 86    /// Create a text query
 87    ///
 88    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 89    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 90    pub fn text(
 91        query: impl ToString,
 92        whole_word: bool,
 93        case_sensitive: bool,
 94        include_ignored: bool,
 95        files_to_include: PathMatcher,
 96        files_to_exclude: PathMatcher,
 97        match_full_paths: bool,
 98        buffers: Option<Vec<Entity<Buffer>>>,
 99    ) -> Result<Self> {
100        let query = query.to_string();
101        if !case_sensitive && !query.is_ascii() {
102            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
103            // Fallback to regex search as recommended by
104            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
105            return Self::regex(
106                regex::escape(&query),
107                whole_word,
108                case_sensitive,
109                include_ignored,
110                false,
111                files_to_include,
112                files_to_exclude,
113                false,
114                buffers,
115            );
116        }
117        let search = AhoCorasickBuilder::new()
118            .ascii_case_insensitive(!case_sensitive)
119            .build([&query])?;
120        let inner = SearchInputs {
121            query: query.into(),
122            files_to_exclude,
123            files_to_include,
124            match_full_paths,
125            buffers,
126        };
127        Ok(Self::Text {
128            search,
129            replacement: None,
130            whole_word,
131            case_sensitive,
132            include_ignored,
133            inner,
134        })
135    }
136
137    /// Create a regex query
138    ///
139    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
140    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
141    /// with their respective project root).
142    pub fn regex(
143        query: impl ToString,
144        whole_word: bool,
145        mut case_sensitive: bool,
146        include_ignored: bool,
147        one_match_per_line: bool,
148        files_to_include: PathMatcher,
149        files_to_exclude: PathMatcher,
150        match_full_paths: bool,
151        buffers: Option<Vec<Entity<Buffer>>>,
152    ) -> Result<Self> {
153        let mut query = query.to_string();
154        let initial_query = Arc::from(query.as_str());
155
156        if let Some((case_sensitive_from_pattern, new_query)) =
157            Self::case_sensitive_from_pattern(&query)
158        {
159            case_sensitive = case_sensitive_from_pattern;
160            query = new_query
161        }
162
163        if whole_word {
164            let mut word_query = String::new();
165            if let Some(first) = query.get(0..1)
166                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
167            {
168                word_query.push_str("\\b");
169            }
170            word_query.push_str(&query);
171            if let Some(last) = query.get(query.len() - 1..)
172                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
173            {
174                word_query.push_str("\\b");
175            }
176            query = word_query
177        }
178
179        let multiline = query.contains('\n') || query.contains("\\n");
180        let regex = RegexBuilder::new(&query)
181            .case_insensitive(!case_sensitive)
182            .build()?;
183        let inner = SearchInputs {
184            query: initial_query,
185            files_to_exclude,
186            files_to_include,
187            match_full_paths,
188            buffers,
189        };
190        Ok(Self::Regex {
191            regex,
192            replacement: None,
193            multiline,
194            whole_word,
195            case_sensitive,
196            include_ignored,
197            inner,
198            one_match_per_line,
199        })
200    }
201
202    /// Extracts case sensitivity settings from pattern items in the provided
203    /// query and returns the same query, with the pattern items removed.
204    ///
205    /// The following pattern modifiers are supported:
206    ///
207    /// - `\c` (case_sensitive: false)
208    /// - `\C` (case_sensitive: true)
209    ///
210    /// If no pattern item were found, `None` will be returned.
211    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
212        if !(query.contains("\\c") || query.contains("\\C")) {
213            return None;
214        }
215
216        let mut was_escaped = false;
217        let mut new_query = String::new();
218        let mut is_case_sensitive = None;
219
220        for c in query.chars() {
221            if was_escaped {
222                if c == 'c' {
223                    is_case_sensitive = Some(false);
224                } else if c == 'C' {
225                    is_case_sensitive = Some(true);
226                } else {
227                    new_query.push('\\');
228                    new_query.push(c);
229                }
230                was_escaped = false
231            } else if c == '\\' {
232                was_escaped = true
233            } else {
234                new_query.push(c);
235            }
236        }
237
238        is_case_sensitive.map(|c| (c, new_query))
239    }
240
241    pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
242        let files_to_include = if message.files_to_include.is_empty() {
243            message
244                .files_to_include_legacy
245                .split(',')
246                .map(str::trim)
247                .filter(|&glob_str| !glob_str.is_empty())
248                .map(|s| s.to_string())
249                .collect()
250        } else {
251            message.files_to_include
252        };
253
254        let files_to_exclude = if message.files_to_exclude.is_empty() {
255            message
256                .files_to_exclude_legacy
257                .split(',')
258                .map(str::trim)
259                .filter(|&glob_str| !glob_str.is_empty())
260                .map(|s| s.to_string())
261                .collect()
262        } else {
263            message.files_to_exclude
264        };
265
266        if message.regex {
267            Self::regex(
268                message.query,
269                message.whole_word,
270                message.case_sensitive,
271                message.include_ignored,
272                false,
273                PathMatcher::new(files_to_include, path_style)?,
274                PathMatcher::new(files_to_exclude, path_style)?,
275                message.match_full_paths,
276                None, // search opened only don't need search remote
277            )
278        } else {
279            Self::text(
280                message.query,
281                message.whole_word,
282                message.case_sensitive,
283                message.include_ignored,
284                PathMatcher::new(files_to_include, path_style)?,
285                PathMatcher::new(files_to_exclude, path_style)?,
286                false,
287                None, // search opened only don't need search remote
288            )
289        }
290    }
291
292    pub fn with_replacement(mut self, new_replacement: String) -> Self {
293        match self {
294            Self::Text {
295                ref mut replacement,
296                ..
297            }
298            | Self::Regex {
299                ref mut replacement,
300                ..
301            } => {
302                *replacement = Some(new_replacement);
303                self
304            }
305        }
306    }
307
308    pub fn to_proto(&self) -> proto::SearchQuery {
309        let files_to_include = self.files_to_include().sources().to_vec();
310        let files_to_exclude = self.files_to_exclude().sources().to_vec();
311        proto::SearchQuery {
312            query: self.as_str().to_string(),
313            regex: self.is_regex(),
314            whole_word: self.whole_word(),
315            case_sensitive: self.case_sensitive(),
316            include_ignored: self.include_ignored(),
317            files_to_include: files_to_include.clone(),
318            files_to_exclude: files_to_exclude.clone(),
319            match_full_paths: self.match_full_paths(),
320            // Populate legacy fields for backwards compatibility
321            files_to_include_legacy: files_to_include.join(","),
322            files_to_exclude_legacy: files_to_exclude.join(","),
323        }
324    }
325
326    pub(crate) fn detect(
327        &self,
328        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
329    ) -> Result<bool> {
330        if self.as_str().is_empty() {
331            return Ok(false);
332        }
333
334        match self {
335            Self::Text { search, .. } => {
336                let mat = search.stream_find_iter(reader).next();
337                match mat {
338                    Some(Ok(_)) => Ok(true),
339                    Some(Err(err)) => Err(err.into()),
340                    None => Ok(false),
341                }
342            }
343            Self::Regex {
344                regex, multiline, ..
345            } => {
346                if *multiline {
347                    let mut text = String::new();
348                    if let Err(err) = reader.read_to_string(&mut text) {
349                        Err(err.into())
350                    } else {
351                        Ok(regex.find(&text)?.is_some())
352                    }
353                } else {
354                    for line in reader.lines() {
355                        let line = line?;
356                        if regex.find(&line)?.is_some() {
357                            return Ok(true);
358                        }
359                    }
360                    Ok(false)
361                }
362            }
363        }
364    }
365    /// Returns the replacement text for this `SearchQuery`.
366    pub fn replacement(&self) -> Option<&str> {
367        match self {
368            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
369                replacement.as_deref()
370            }
371        }
372    }
373    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
374    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
375        match self {
376            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
377            SearchQuery::Regex {
378                regex, replacement, ..
379            } => {
380                if let Some(replacement) = replacement {
381                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
382                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
383                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
384                        replacement,
385                        |c: &Captures| match c.get(0).unwrap().as_str() {
386                            r"\\" => "\\",
387                            r"\n" => "\n",
388                            r"\t" => "\t",
389                            x => unreachable!("Unexpected escape sequence: {}", x),
390                        },
391                    );
392                    Some(regex.replace(text, replacement))
393                } else {
394                    None
395                }
396            }
397        }
398    }
399
400    pub async fn search(
401        &self,
402        buffer: &BufferSnapshot,
403        subrange: Option<Range<usize>>,
404    ) -> Vec<Range<usize>> {
405        const YIELD_INTERVAL: usize = 20000;
406
407        if self.as_str().is_empty() {
408            return Default::default();
409        }
410
411        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
412        let rope = if let Some(range) = subrange {
413            buffer.as_rope().slice(range)
414        } else {
415            buffer.as_rope().clone()
416        };
417
418        let mut matches = Vec::new();
419        match self {
420            Self::Text {
421                search, whole_word, ..
422            } => {
423                for (ix, mat) in search
424                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
425                    .enumerate()
426                {
427                    if (ix + 1) % YIELD_INTERVAL == 0 {
428                        yield_now().await;
429                    }
430
431                    let mat = mat.unwrap();
432                    if *whole_word {
433                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
434
435                        let prev_kind = rope
436                            .reversed_chars_at(mat.start())
437                            .next()
438                            .map(|c| classifier.kind(c));
439                        let start_kind =
440                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
441                        let end_kind =
442                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
443                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
444                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
445                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
446                        {
447                            continue;
448                        }
449                    }
450                    matches.push(mat.start()..mat.end())
451                }
452            }
453
454            Self::Regex {
455                regex, multiline, ..
456            } => {
457                if *multiline {
458                    let text = rope.to_string();
459                    for (ix, mat) in regex.find_iter(&text).enumerate() {
460                        if (ix + 1) % YIELD_INTERVAL == 0 {
461                            yield_now().await;
462                        }
463
464                        if let Ok(mat) = mat {
465                            matches.push(mat.start()..mat.end());
466                        }
467                    }
468                } else {
469                    let mut line = String::new();
470                    let mut line_offset = 0;
471                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
472                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
473                            yield_now().await;
474                        }
475
476                        for (newline_ix, text) in chunk.split('\n').enumerate() {
477                            if newline_ix > 0 {
478                                for mat in regex.find_iter(&line).flatten() {
479                                    let start = line_offset + mat.start();
480                                    let end = line_offset + mat.end();
481                                    matches.push(start..end);
482                                    if self.one_match_per_line() == Some(true) {
483                                        break;
484                                    }
485                                }
486
487                                line_offset += line.len() + 1;
488                                line.clear();
489                            }
490                            line.push_str(text);
491                        }
492                    }
493                }
494            }
495        }
496
497        matches
498    }
499
500    pub fn is_empty(&self) -> bool {
501        self.as_str().is_empty()
502    }
503
504    pub fn as_str(&self) -> &str {
505        self.as_inner().as_str()
506    }
507
508    pub fn whole_word(&self) -> bool {
509        match self {
510            Self::Text { whole_word, .. } => *whole_word,
511            Self::Regex { whole_word, .. } => *whole_word,
512        }
513    }
514
515    pub fn case_sensitive(&self) -> bool {
516        match self {
517            Self::Text { case_sensitive, .. } => *case_sensitive,
518            Self::Regex { case_sensitive, .. } => *case_sensitive,
519        }
520    }
521
522    pub fn include_ignored(&self) -> bool {
523        match self {
524            Self::Text {
525                include_ignored, ..
526            } => *include_ignored,
527            Self::Regex {
528                include_ignored, ..
529            } => *include_ignored,
530        }
531    }
532
533    pub fn is_regex(&self) -> bool {
534        matches!(self, Self::Regex { .. })
535    }
536
537    pub fn files_to_include(&self) -> &PathMatcher {
538        self.as_inner().files_to_include()
539    }
540
541    pub fn files_to_exclude(&self) -> &PathMatcher {
542        self.as_inner().files_to_exclude()
543    }
544
545    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
546        self.as_inner().buffers.as_ref()
547    }
548
549    pub fn is_opened_only(&self) -> bool {
550        self.as_inner().buffers.is_some()
551    }
552
553    pub fn filters_path(&self) -> bool {
554        !(self.files_to_exclude().sources().is_empty()
555            && self.files_to_include().sources().is_empty())
556    }
557
558    pub fn match_full_paths(&self) -> bool {
559        self.as_inner().match_full_paths
560    }
561
562    /// Check match full paths to determine whether you're required to pass a fully qualified
563    /// project path (starts with a project root).
564    pub fn match_path(&self, file_path: &Path) -> bool {
565        let mut path = file_path.to_path_buf();
566        loop {
567            if self.files_to_exclude().is_match(&path) {
568                return false;
569            } else if self.files_to_include().sources().is_empty()
570                || self.files_to_include().is_match(&path)
571            {
572                return true;
573            } else if !path.pop() {
574                return false;
575            }
576        }
577    }
578    pub fn as_inner(&self) -> &SearchInputs {
579        match self {
580            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
581        }
582    }
583
584    /// Whether this search should replace only one match per line, instead of
585    /// all matches.
586    /// Returns `None` for text searches, as only regex searches support this
587    /// option.
588    pub fn one_match_per_line(&self) -> Option<bool> {
589        match self {
590            Self::Regex {
591                one_match_per_line, ..
592            } => Some(*one_match_per_line),
593            Self::Text { .. } => None,
594        }
595    }
596}
597
598#[cfg(test)]
599mod tests {
600    use super::*;
601
602    #[test]
603    fn path_matcher_creation_for_valid_paths() {
604        for valid_path in [
605            "file",
606            "Cargo.toml",
607            ".DS_Store",
608            "~/dir/another_dir/",
609            "./dir/file",
610            "dir/[a-z].txt",
611            "../dir/filé",
612        ] {
613            let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
614                .unwrap_or_else(|e| {
615                    panic!("Valid path {valid_path} should be accepted, but got: {e}")
616                });
617            assert!(
618                path_matcher.is_match(valid_path),
619                "Path matcher for valid path {valid_path} should match itself"
620            )
621        }
622    }
623
624    #[test]
625    fn path_matcher_creation_for_globs() {
626        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
627            match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
628                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
629                Err(_expected) => {}
630            }
631        }
632
633        for valid_glob in [
634            "dir/?ile",
635            "dir/*.txt",
636            "dir/**/file",
637            "dir/[a-z].txt",
638            "{dir,file}",
639        ] {
640            match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
641                Ok(_expected) => {}
642                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
643            }
644        }
645    }
646
647    #[test]
648    fn test_case_sensitive_pattern_items() {
649        let case_sensitive = false;
650        let search_query = SearchQuery::regex(
651            "test\\C",
652            false,
653            case_sensitive,
654            false,
655            false,
656            Default::default(),
657            Default::default(),
658            false,
659            None,
660        )
661        .expect("Should be able to create a regex SearchQuery");
662
663        assert_eq!(
664            search_query.case_sensitive(),
665            true,
666            "Case sensitivity should be enabled when \\C pattern item is present in the query."
667        );
668
669        let case_sensitive = true;
670        let search_query = SearchQuery::regex(
671            "test\\c",
672            true,
673            case_sensitive,
674            false,
675            false,
676            Default::default(),
677            Default::default(),
678            false,
679            None,
680        )
681        .expect("Should be able to create a regex SearchQuery");
682
683        assert_eq!(
684            search_query.case_sensitive(),
685            false,
686            "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
687        );
688
689        let case_sensitive = false;
690        let search_query = SearchQuery::regex(
691            "test\\c\\C",
692            false,
693            case_sensitive,
694            false,
695            false,
696            Default::default(),
697            Default::default(),
698            false,
699            None,
700        )
701        .expect("Should be able to create a regex SearchQuery");
702
703        assert_eq!(
704            search_query.case_sensitive(),
705            true,
706            "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
707        );
708
709        let case_sensitive = false;
710        let search_query = SearchQuery::regex(
711            "tests\\\\C",
712            false,
713            case_sensitive,
714            false,
715            false,
716            Default::default(),
717            Default::default(),
718            false,
719            None,
720        )
721        .expect("Should be able to create a regex SearchQuery");
722
723        assert_eq!(
724            search_query.case_sensitive(),
725            false,
726            "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
727        );
728    }
729}