search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    match_full_paths: bool,
 40    buffers: Option<Vec<Entity<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: AhoCorasick,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        one_match_per_line: bool,
 76        inner: SearchInputs,
 77    },
 78}
 79
 80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 81    RegexBuilder::new(r"\B")
 82        .build()
 83        .expect("Failed to create WORD_MATCH_TEST")
 84});
 85
 86impl SearchQuery {
 87    /// Create a text query
 88    ///
 89    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 90    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 91    pub fn text(
 92        query: impl ToString,
 93        whole_word: bool,
 94        case_sensitive: bool,
 95        include_ignored: bool,
 96        files_to_include: PathMatcher,
 97        files_to_exclude: PathMatcher,
 98        match_full_paths: bool,
 99        buffers: Option<Vec<Entity<Buffer>>>,
100    ) -> Result<Self> {
101        let query = query.to_string();
102        if !case_sensitive && !query.is_ascii() {
103            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104            // Fallback to regex search as recommended by
105            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106            return Self::regex(
107                regex::escape(&query),
108                whole_word,
109                case_sensitive,
110                include_ignored,
111                false,
112                files_to_include,
113                files_to_exclude,
114                false,
115                buffers,
116            );
117        }
118        let search = AhoCorasickBuilder::new()
119            .ascii_case_insensitive(!case_sensitive)
120            .build([&query])?;
121        let inner = SearchInputs {
122            query: query.into(),
123            files_to_exclude,
124            files_to_include,
125            match_full_paths,
126            buffers,
127        };
128        Ok(Self::Text {
129            search,
130            replacement: None,
131            whole_word,
132            case_sensitive,
133            include_ignored,
134            inner,
135        })
136    }
137
138    /// Create a regex query
139    ///
140    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142    /// with their respective project root).
143    pub fn regex(
144        query: impl ToString,
145        whole_word: bool,
146        case_sensitive: bool,
147        include_ignored: bool,
148        one_match_per_line: bool,
149        files_to_include: PathMatcher,
150        files_to_exclude: PathMatcher,
151        match_full_paths: bool,
152        buffers: Option<Vec<Entity<Buffer>>>,
153    ) -> Result<Self> {
154        let mut query = query.to_string();
155        let initial_query = Arc::from(query.as_str());
156        if whole_word {
157            let mut word_query = String::new();
158            if let Some(first) = query.get(0..1)
159                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
160                    word_query.push_str("\\b");
161                }
162            word_query.push_str(&query);
163            if let Some(last) = query.get(query.len() - 1..)
164                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
165                    word_query.push_str("\\b");
166                }
167            query = word_query
168        }
169
170        let multiline = query.contains('\n') || query.contains("\\n");
171        let regex = RegexBuilder::new(&query)
172            .case_insensitive(!case_sensitive)
173            .build()?;
174        let inner = SearchInputs {
175            query: initial_query,
176            files_to_exclude,
177            files_to_include,
178            match_full_paths,
179            buffers,
180        };
181        Ok(Self::Regex {
182            regex,
183            replacement: None,
184            multiline,
185            whole_word,
186            case_sensitive,
187            include_ignored,
188            inner,
189            one_match_per_line,
190        })
191    }
192
193    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
194        let files_to_include = if message.files_to_include.is_empty() {
195            message
196                .files_to_include_legacy
197                .split(',')
198                .map(str::trim)
199                .filter(|&glob_str| !glob_str.is_empty())
200                .map(|s| s.to_string())
201                .collect()
202        } else {
203            message.files_to_include
204        };
205
206        let files_to_exclude = if message.files_to_exclude.is_empty() {
207            message
208                .files_to_exclude_legacy
209                .split(',')
210                .map(str::trim)
211                .filter(|&glob_str| !glob_str.is_empty())
212                .map(|s| s.to_string())
213                .collect()
214        } else {
215            message.files_to_exclude
216        };
217
218        if message.regex {
219            Self::regex(
220                message.query,
221                message.whole_word,
222                message.case_sensitive,
223                message.include_ignored,
224                false,
225                PathMatcher::new(files_to_include)?,
226                PathMatcher::new(files_to_exclude)?,
227                message.match_full_paths,
228                None, // search opened only don't need search remote
229            )
230        } else {
231            Self::text(
232                message.query,
233                message.whole_word,
234                message.case_sensitive,
235                message.include_ignored,
236                PathMatcher::new(files_to_include)?,
237                PathMatcher::new(files_to_exclude)?,
238                false,
239                None, // search opened only don't need search remote
240            )
241        }
242    }
243
244    pub fn with_replacement(mut self, new_replacement: String) -> Self {
245        match self {
246            Self::Text {
247                ref mut replacement,
248                ..
249            }
250            | Self::Regex {
251                ref mut replacement,
252                ..
253            } => {
254                *replacement = Some(new_replacement);
255                self
256            }
257        }
258    }
259
260    pub fn to_proto(&self) -> proto::SearchQuery {
261        let files_to_include = self.files_to_include().sources().to_vec();
262        let files_to_exclude = self.files_to_exclude().sources().to_vec();
263        proto::SearchQuery {
264            query: self.as_str().to_string(),
265            regex: self.is_regex(),
266            whole_word: self.whole_word(),
267            case_sensitive: self.case_sensitive(),
268            include_ignored: self.include_ignored(),
269            files_to_include: files_to_include.clone(),
270            files_to_exclude: files_to_exclude.clone(),
271            match_full_paths: self.match_full_paths(),
272            // Populate legacy fields for backwards compatibility
273            files_to_include_legacy: files_to_include.join(","),
274            files_to_exclude_legacy: files_to_exclude.join(","),
275        }
276    }
277
278    pub(crate) fn detect(
279        &self,
280        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
281    ) -> Result<bool> {
282        if self.as_str().is_empty() {
283            return Ok(false);
284        }
285
286        match self {
287            Self::Text { search, .. } => {
288                let mat = search.stream_find_iter(reader).next();
289                match mat {
290                    Some(Ok(_)) => Ok(true),
291                    Some(Err(err)) => Err(err.into()),
292                    None => Ok(false),
293                }
294            }
295            Self::Regex {
296                regex, multiline, ..
297            } => {
298                if *multiline {
299                    let mut text = String::new();
300                    if let Err(err) = reader.read_to_string(&mut text) {
301                        Err(err.into())
302                    } else {
303                        Ok(regex.find(&text)?.is_some())
304                    }
305                } else {
306                    for line in reader.lines() {
307                        let line = line?;
308                        if regex.find(&line)?.is_some() {
309                            return Ok(true);
310                        }
311                    }
312                    Ok(false)
313                }
314            }
315        }
316    }
317    /// Returns the replacement text for this `SearchQuery`.
318    pub fn replacement(&self) -> Option<&str> {
319        match self {
320            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
321                replacement.as_deref()
322            }
323        }
324    }
325    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
326    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
327        match self {
328            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
329            SearchQuery::Regex {
330                regex, replacement, ..
331            } => {
332                if let Some(replacement) = replacement {
333                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
334                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
335                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
336                        replacement,
337                        |c: &Captures| match c.get(0).unwrap().as_str() {
338                            r"\\" => "\\",
339                            r"\n" => "\n",
340                            r"\t" => "\t",
341                            x => unreachable!("Unexpected escape sequence: {}", x),
342                        },
343                    );
344                    Some(regex.replace(text, replacement))
345                } else {
346                    None
347                }
348            }
349        }
350    }
351
352    pub async fn search(
353        &self,
354        buffer: &BufferSnapshot,
355        subrange: Option<Range<usize>>,
356    ) -> Vec<Range<usize>> {
357        const YIELD_INTERVAL: usize = 20000;
358
359        if self.as_str().is_empty() {
360            return Default::default();
361        }
362
363        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
364        let rope = if let Some(range) = subrange {
365            buffer.as_rope().slice(range)
366        } else {
367            buffer.as_rope().clone()
368        };
369
370        let mut matches = Vec::new();
371        match self {
372            Self::Text {
373                search, whole_word, ..
374            } => {
375                for (ix, mat) in search
376                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
377                    .enumerate()
378                {
379                    if (ix + 1) % YIELD_INTERVAL == 0 {
380                        yield_now().await;
381                    }
382
383                    let mat = mat.unwrap();
384                    if *whole_word {
385                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
386
387                        let prev_kind = rope
388                            .reversed_chars_at(mat.start())
389                            .next()
390                            .map(|c| classifier.kind(c));
391                        let start_kind =
392                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
393                        let end_kind =
394                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
395                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
396                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
397                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
398                        {
399                            continue;
400                        }
401                    }
402                    matches.push(mat.start()..mat.end())
403                }
404            }
405
406            Self::Regex {
407                regex, multiline, ..
408            } => {
409                if *multiline {
410                    let text = rope.to_string();
411                    for (ix, mat) in regex.find_iter(&text).enumerate() {
412                        if (ix + 1) % YIELD_INTERVAL == 0 {
413                            yield_now().await;
414                        }
415
416                        if let Ok(mat) = mat {
417                            matches.push(mat.start()..mat.end());
418                        }
419                    }
420                } else {
421                    let mut line = String::new();
422                    let mut line_offset = 0;
423                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
424                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
425                            yield_now().await;
426                        }
427
428                        for (newline_ix, text) in chunk.split('\n').enumerate() {
429                            if newline_ix > 0 {
430                                for mat in regex.find_iter(&line).flatten() {
431                                    let start = line_offset + mat.start();
432                                    let end = line_offset + mat.end();
433                                    matches.push(start..end);
434                                    if self.one_match_per_line() == Some(true) {
435                                        break;
436                                    }
437                                }
438
439                                line_offset += line.len() + 1;
440                                line.clear();
441                            }
442                            line.push_str(text);
443                        }
444                    }
445                }
446            }
447        }
448
449        matches
450    }
451
452    pub fn is_empty(&self) -> bool {
453        self.as_str().is_empty()
454    }
455
456    pub fn as_str(&self) -> &str {
457        self.as_inner().as_str()
458    }
459
460    pub fn whole_word(&self) -> bool {
461        match self {
462            Self::Text { whole_word, .. } => *whole_word,
463            Self::Regex { whole_word, .. } => *whole_word,
464        }
465    }
466
467    pub fn case_sensitive(&self) -> bool {
468        match self {
469            Self::Text { case_sensitive, .. } => *case_sensitive,
470            Self::Regex { case_sensitive, .. } => *case_sensitive,
471        }
472    }
473
474    pub fn include_ignored(&self) -> bool {
475        match self {
476            Self::Text {
477                include_ignored, ..
478            } => *include_ignored,
479            Self::Regex {
480                include_ignored, ..
481            } => *include_ignored,
482        }
483    }
484
485    pub fn is_regex(&self) -> bool {
486        matches!(self, Self::Regex { .. })
487    }
488
489    pub fn files_to_include(&self) -> &PathMatcher {
490        self.as_inner().files_to_include()
491    }
492
493    pub fn files_to_exclude(&self) -> &PathMatcher {
494        self.as_inner().files_to_exclude()
495    }
496
497    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
498        self.as_inner().buffers.as_ref()
499    }
500
501    pub fn is_opened_only(&self) -> bool {
502        self.as_inner().buffers.is_some()
503    }
504
505    pub fn filters_path(&self) -> bool {
506        !(self.files_to_exclude().sources().is_empty()
507            && self.files_to_include().sources().is_empty())
508    }
509
510    pub fn match_full_paths(&self) -> bool {
511        self.as_inner().match_full_paths
512    }
513
514    /// Check match full paths to determine whether you're required to pass a fully qualified
515    /// project path (starts with a project root).
516    pub fn match_path(&self, file_path: &Path) -> bool {
517        let mut path = file_path.to_path_buf();
518        loop {
519            if self.files_to_exclude().is_match(&path) {
520                return false;
521            } else if self.files_to_include().sources().is_empty()
522                || self.files_to_include().is_match(&path)
523            {
524                return true;
525            } else if !path.pop() {
526                return false;
527            }
528        }
529    }
530    pub fn as_inner(&self) -> &SearchInputs {
531        match self {
532            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
533        }
534    }
535
536    /// Whether this search should replace only one match per line, instead of
537    /// all matches.
538    /// Returns `None` for text searches, as only regex searches support this
539    /// option.
540    pub fn one_match_per_line(&self) -> Option<bool> {
541        match self {
542            Self::Regex {
543                one_match_per_line, ..
544            } => Some(*one_match_per_line),
545            Self::Text { .. } => None,
546        }
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553
554    #[test]
555    fn path_matcher_creation_for_valid_paths() {
556        for valid_path in [
557            "file",
558            "Cargo.toml",
559            ".DS_Store",
560            "~/dir/another_dir/",
561            "./dir/file",
562            "dir/[a-z].txt",
563            "../dir/filé",
564        ] {
565            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
566                panic!("Valid path {valid_path} should be accepted, but got: {e}")
567            });
568            assert!(
569                path_matcher.is_match(valid_path),
570                "Path matcher for valid path {valid_path} should match itself"
571            )
572        }
573    }
574
575    #[test]
576    fn path_matcher_creation_for_globs() {
577        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
578            match PathMatcher::new(&[invalid_glob.to_owned()]) {
579                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
580                Err(_expected) => {}
581            }
582        }
583
584        for valid_glob in [
585            "dir/?ile",
586            "dir/*.txt",
587            "dir/**/file",
588            "dir/[a-z].txt",
589            "{dir,file}",
590        ] {
591            match PathMatcher::new(&[valid_glob.to_owned()]) {
592                Ok(_expected) => {}
593                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
594            }
595        }
596    }
597}