search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    match_full_paths: bool,
 40    buffers: Option<Vec<Entity<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: AhoCorasick,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        one_match_per_line: bool,
 76        inner: SearchInputs,
 77    },
 78}
 79
 80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 81    RegexBuilder::new(r"\B")
 82        .build()
 83        .expect("Failed to create WORD_MATCH_TEST")
 84});
 85
 86impl SearchQuery {
 87    /// Create a text query
 88    ///
 89    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 90    /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
 91    pub fn text(
 92        query: impl ToString,
 93        whole_word: bool,
 94        case_sensitive: bool,
 95        include_ignored: bool,
 96        files_to_include: PathMatcher,
 97        files_to_exclude: PathMatcher,
 98        match_full_paths: bool,
 99        buffers: Option<Vec<Entity<Buffer>>>,
100    ) -> Result<Self> {
101        let query = query.to_string();
102        if !case_sensitive && !query.is_ascii() {
103            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104            // Fallback to regex search as recommended by
105            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106            return Self::regex(
107                regex::escape(&query),
108                whole_word,
109                case_sensitive,
110                include_ignored,
111                false,
112                files_to_include,
113                files_to_exclude,
114                false,
115                buffers,
116            );
117        }
118        let search = AhoCorasickBuilder::new()
119            .ascii_case_insensitive(!case_sensitive)
120            .build([&query])?;
121        let inner = SearchInputs {
122            query: query.into(),
123            files_to_exclude,
124            files_to_include,
125            match_full_paths,
126            buffers,
127        };
128        Ok(Self::Text {
129            search,
130            replacement: None,
131            whole_word,
132            case_sensitive,
133            include_ignored,
134            inner,
135        })
136    }
137
138    /// Create a regex query
139    ///
140    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142    /// with their respective project root).
143    pub fn regex(
144        query: impl ToString,
145        whole_word: bool,
146        case_sensitive: bool,
147        include_ignored: bool,
148        one_match_per_line: bool,
149        files_to_include: PathMatcher,
150        files_to_exclude: PathMatcher,
151        match_full_paths: bool,
152        buffers: Option<Vec<Entity<Buffer>>>,
153    ) -> Result<Self> {
154        let mut query = query.to_string();
155        let initial_query = Arc::from(query.as_str());
156        if whole_word {
157            let mut word_query = String::new();
158            if let Some(first) = query.get(0..1)
159                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
160            {
161                word_query.push_str("\\b");
162            }
163            word_query.push_str(&query);
164            if let Some(last) = query.get(query.len() - 1..)
165                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
166            {
167                word_query.push_str("\\b");
168            }
169            query = word_query
170        }
171
172        let multiline = query.contains('\n') || query.contains("\\n");
173        let regex = RegexBuilder::new(&query)
174            .case_insensitive(!case_sensitive)
175            .build()?;
176        let inner = SearchInputs {
177            query: initial_query,
178            files_to_exclude,
179            files_to_include,
180            match_full_paths,
181            buffers,
182        };
183        Ok(Self::Regex {
184            regex,
185            replacement: None,
186            multiline,
187            whole_word,
188            case_sensitive,
189            include_ignored,
190            inner,
191            one_match_per_line,
192        })
193    }
194
195    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
196        let files_to_include = if message.files_to_include.is_empty() {
197            message
198                .files_to_include_legacy
199                .split(',')
200                .map(str::trim)
201                .filter(|&glob_str| !glob_str.is_empty())
202                .map(|s| s.to_string())
203                .collect()
204        } else {
205            message.files_to_include
206        };
207
208        let files_to_exclude = if message.files_to_exclude.is_empty() {
209            message
210                .files_to_exclude_legacy
211                .split(',')
212                .map(str::trim)
213                .filter(|&glob_str| !glob_str.is_empty())
214                .map(|s| s.to_string())
215                .collect()
216        } else {
217            message.files_to_exclude
218        };
219
220        if message.regex {
221            Self::regex(
222                message.query,
223                message.whole_word,
224                message.case_sensitive,
225                message.include_ignored,
226                false,
227                PathMatcher::new(files_to_include)?,
228                PathMatcher::new(files_to_exclude)?,
229                message.match_full_paths,
230                None, // search opened only don't need search remote
231            )
232        } else {
233            Self::text(
234                message.query,
235                message.whole_word,
236                message.case_sensitive,
237                message.include_ignored,
238                PathMatcher::new(files_to_include)?,
239                PathMatcher::new(files_to_exclude)?,
240                false,
241                None, // search opened only don't need search remote
242            )
243        }
244    }
245
246    pub fn with_replacement(mut self, new_replacement: String) -> Self {
247        match self {
248            Self::Text {
249                ref mut replacement,
250                ..
251            }
252            | Self::Regex {
253                ref mut replacement,
254                ..
255            } => {
256                *replacement = Some(new_replacement);
257                self
258            }
259        }
260    }
261
262    pub fn to_proto(&self) -> proto::SearchQuery {
263        let files_to_include = self.files_to_include().sources().to_vec();
264        let files_to_exclude = self.files_to_exclude().sources().to_vec();
265        proto::SearchQuery {
266            query: self.as_str().to_string(),
267            regex: self.is_regex(),
268            whole_word: self.whole_word(),
269            case_sensitive: self.case_sensitive(),
270            include_ignored: self.include_ignored(),
271            files_to_include: files_to_include.clone(),
272            files_to_exclude: files_to_exclude.clone(),
273            match_full_paths: self.match_full_paths(),
274            // Populate legacy fields for backwards compatibility
275            files_to_include_legacy: files_to_include.join(","),
276            files_to_exclude_legacy: files_to_exclude.join(","),
277        }
278    }
279
280    pub(crate) fn detect(
281        &self,
282        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
283    ) -> Result<bool> {
284        if self.as_str().is_empty() {
285            return Ok(false);
286        }
287
288        match self {
289            Self::Text { search, .. } => {
290                let mat = search.stream_find_iter(reader).next();
291                match mat {
292                    Some(Ok(_)) => Ok(true),
293                    Some(Err(err)) => Err(err.into()),
294                    None => Ok(false),
295                }
296            }
297            Self::Regex {
298                regex, multiline, ..
299            } => {
300                if *multiline {
301                    let mut text = String::new();
302                    if let Err(err) = reader.read_to_string(&mut text) {
303                        Err(err.into())
304                    } else {
305                        Ok(regex.find(&text)?.is_some())
306                    }
307                } else {
308                    for line in reader.lines() {
309                        let line = line?;
310                        if regex.find(&line)?.is_some() {
311                            return Ok(true);
312                        }
313                    }
314                    Ok(false)
315                }
316            }
317        }
318    }
319    /// Returns the replacement text for this `SearchQuery`.
320    pub fn replacement(&self) -> Option<&str> {
321        match self {
322            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
323                replacement.as_deref()
324            }
325        }
326    }
327    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
328    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
329        match self {
330            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
331            SearchQuery::Regex {
332                regex, replacement, ..
333            } => {
334                if let Some(replacement) = replacement {
335                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
336                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
337                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
338                        replacement,
339                        |c: &Captures| match c.get(0).unwrap().as_str() {
340                            r"\\" => "\\",
341                            r"\n" => "\n",
342                            r"\t" => "\t",
343                            x => unreachable!("Unexpected escape sequence: {}", x),
344                        },
345                    );
346                    Some(regex.replace(text, replacement))
347                } else {
348                    None
349                }
350            }
351        }
352    }
353
354    pub async fn search(
355        &self,
356        buffer: &BufferSnapshot,
357        subrange: Option<Range<usize>>,
358    ) -> Vec<Range<usize>> {
359        const YIELD_INTERVAL: usize = 20000;
360
361        if self.as_str().is_empty() {
362            return Default::default();
363        }
364
365        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
366        let rope = if let Some(range) = subrange {
367            buffer.as_rope().slice(range)
368        } else {
369            buffer.as_rope().clone()
370        };
371
372        let mut matches = Vec::new();
373        match self {
374            Self::Text {
375                search, whole_word, ..
376            } => {
377                for (ix, mat) in search
378                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
379                    .enumerate()
380                {
381                    if (ix + 1) % YIELD_INTERVAL == 0 {
382                        yield_now().await;
383                    }
384
385                    let mat = mat.unwrap();
386                    if *whole_word {
387                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
388
389                        let prev_kind = rope
390                            .reversed_chars_at(mat.start())
391                            .next()
392                            .map(|c| classifier.kind(c));
393                        let start_kind =
394                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
395                        let end_kind =
396                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
397                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
398                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
399                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
400                        {
401                            continue;
402                        }
403                    }
404                    matches.push(mat.start()..mat.end())
405                }
406            }
407
408            Self::Regex {
409                regex, multiline, ..
410            } => {
411                if *multiline {
412                    let text = rope.to_string();
413                    for (ix, mat) in regex.find_iter(&text).enumerate() {
414                        if (ix + 1) % YIELD_INTERVAL == 0 {
415                            yield_now().await;
416                        }
417
418                        if let Ok(mat) = mat {
419                            matches.push(mat.start()..mat.end());
420                        }
421                    }
422                } else {
423                    let mut line = String::new();
424                    let mut line_offset = 0;
425                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
426                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
427                            yield_now().await;
428                        }
429
430                        for (newline_ix, text) in chunk.split('\n').enumerate() {
431                            if newline_ix > 0 {
432                                for mat in regex.find_iter(&line).flatten() {
433                                    let start = line_offset + mat.start();
434                                    let end = line_offset + mat.end();
435                                    matches.push(start..end);
436                                    if self.one_match_per_line() == Some(true) {
437                                        break;
438                                    }
439                                }
440
441                                line_offset += line.len() + 1;
442                                line.clear();
443                            }
444                            line.push_str(text);
445                        }
446                    }
447                }
448            }
449        }
450
451        matches
452    }
453
454    pub fn is_empty(&self) -> bool {
455        self.as_str().is_empty()
456    }
457
458    pub fn as_str(&self) -> &str {
459        self.as_inner().as_str()
460    }
461
462    pub fn whole_word(&self) -> bool {
463        match self {
464            Self::Text { whole_word, .. } => *whole_word,
465            Self::Regex { whole_word, .. } => *whole_word,
466        }
467    }
468
469    pub fn case_sensitive(&self) -> bool {
470        match self {
471            Self::Text { case_sensitive, .. } => *case_sensitive,
472            Self::Regex { case_sensitive, .. } => *case_sensitive,
473        }
474    }
475
476    pub fn include_ignored(&self) -> bool {
477        match self {
478            Self::Text {
479                include_ignored, ..
480            } => *include_ignored,
481            Self::Regex {
482                include_ignored, ..
483            } => *include_ignored,
484        }
485    }
486
487    pub fn is_regex(&self) -> bool {
488        matches!(self, Self::Regex { .. })
489    }
490
491    pub fn files_to_include(&self) -> &PathMatcher {
492        self.as_inner().files_to_include()
493    }
494
495    pub fn files_to_exclude(&self) -> &PathMatcher {
496        self.as_inner().files_to_exclude()
497    }
498
499    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
500        self.as_inner().buffers.as_ref()
501    }
502
503    pub fn is_opened_only(&self) -> bool {
504        self.as_inner().buffers.is_some()
505    }
506
507    pub fn filters_path(&self) -> bool {
508        !(self.files_to_exclude().sources().is_empty()
509            && self.files_to_include().sources().is_empty())
510    }
511
512    pub fn match_full_paths(&self) -> bool {
513        self.as_inner().match_full_paths
514    }
515
516    /// Check match full paths to determine whether you're required to pass a fully qualified
517    /// project path (starts with a project root).
518    pub fn match_path(&self, file_path: &Path) -> bool {
519        let mut path = file_path.to_path_buf();
520        loop {
521            if self.files_to_exclude().is_match(&path) {
522                return false;
523            } else if self.files_to_include().sources().is_empty()
524                || self.files_to_include().is_match(&path)
525            {
526                return true;
527            } else if !path.pop() {
528                return false;
529            }
530        }
531    }
532    pub fn as_inner(&self) -> &SearchInputs {
533        match self {
534            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
535        }
536    }
537
538    /// Whether this search should replace only one match per line, instead of
539    /// all matches.
540    /// Returns `None` for text searches, as only regex searches support this
541    /// option.
542    pub fn one_match_per_line(&self) -> Option<bool> {
543        match self {
544            Self::Regex {
545                one_match_per_line, ..
546            } => Some(*one_match_per_line),
547            Self::Text { .. } => None,
548        }
549    }
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    #[test]
557    fn path_matcher_creation_for_valid_paths() {
558        for valid_path in [
559            "file",
560            "Cargo.toml",
561            ".DS_Store",
562            "~/dir/another_dir/",
563            "./dir/file",
564            "dir/[a-z].txt",
565            "../dir/filé",
566        ] {
567            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
568                panic!("Valid path {valid_path} should be accepted, but got: {e}")
569            });
570            assert!(
571                path_matcher.is_match(valid_path),
572                "Path matcher for valid path {valid_path} should match itself"
573            )
574        }
575    }
576
577    #[test]
578    fn path_matcher_creation_for_globs() {
579        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
580            match PathMatcher::new(&[invalid_glob.to_owned()]) {
581                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
582                Err(_expected) => {}
583            }
584        }
585
586        for valid_glob in [
587            "dir/?ile",
588            "dir/*.txt",
589            "dir/**/file",
590            "dir/[a-z].txt",
591            "{dir,file}",
592        ] {
593            match PathMatcher::new(&[valid_glob.to_owned()]) {
594                Ok(_expected) => {}
595                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
596            }
597        }
598    }
599}