search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    buffers: Option<Vec<Entity<Buffer>>>,
 40}
 41
 42impl SearchInputs {
 43    pub fn as_str(&self) -> &str {
 44        self.query.as_ref()
 45    }
 46    pub fn files_to_include(&self) -> &PathMatcher {
 47        &self.files_to_include
 48    }
 49    pub fn files_to_exclude(&self) -> &PathMatcher {
 50        &self.files_to_exclude
 51    }
 52    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 53        &self.buffers
 54    }
 55}
 56#[derive(Clone, Debug)]
 57pub enum SearchQuery {
 58    Text {
 59        search: AhoCorasick,
 60        replacement: Option<String>,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        include_ignored: bool,
 64        inner: SearchInputs,
 65    },
 66
 67    Regex {
 68        regex: Regex,
 69        replacement: Option<String>,
 70        multiline: bool,
 71        whole_word: bool,
 72        case_sensitive: bool,
 73        include_ignored: bool,
 74        inner: SearchInputs,
 75    },
 76}
 77
 78static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 79    RegexBuilder::new(r"\B")
 80        .build()
 81        .expect("Failed to create WORD_MATCH_TEST")
 82});
 83
 84impl SearchQuery {
 85    pub fn text(
 86        query: impl ToString,
 87        whole_word: bool,
 88        case_sensitive: bool,
 89        include_ignored: bool,
 90        files_to_include: PathMatcher,
 91        files_to_exclude: PathMatcher,
 92        buffers: Option<Vec<Entity<Buffer>>>,
 93    ) -> Result<Self> {
 94        let query = query.to_string();
 95        let search = AhoCorasickBuilder::new()
 96            .ascii_case_insensitive(!case_sensitive)
 97            .build([&query])?;
 98        let inner = SearchInputs {
 99            query: query.into(),
100            files_to_exclude,
101            files_to_include,
102            buffers,
103        };
104        Ok(Self::Text {
105            search,
106            replacement: None,
107            whole_word,
108            case_sensitive,
109            include_ignored,
110            inner,
111        })
112    }
113
114    pub fn regex(
115        query: impl ToString,
116        whole_word: bool,
117        case_sensitive: bool,
118        include_ignored: bool,
119        files_to_include: PathMatcher,
120        files_to_exclude: PathMatcher,
121        buffers: Option<Vec<Entity<Buffer>>>,
122    ) -> Result<Self> {
123        let mut query = query.to_string();
124        let initial_query = Arc::from(query.as_str());
125        if whole_word {
126            let mut word_query = String::new();
127            if let Some(first) = query.get(0..1) {
128                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
129                    word_query.push_str("\\b");
130                }
131            }
132            word_query.push_str(&query);
133            if let Some(last) = query.get(query.len() - 1..) {
134                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
135                    word_query.push_str("\\b");
136                }
137            }
138            query = word_query
139        }
140
141        let multiline = query.contains('\n') || query.contains("\\n");
142        let regex = RegexBuilder::new(&query)
143            .case_insensitive(!case_sensitive)
144            .build()?;
145        let inner = SearchInputs {
146            query: initial_query,
147            files_to_exclude,
148            files_to_include,
149            buffers,
150        };
151        Ok(Self::Regex {
152            regex,
153            replacement: None,
154            multiline,
155            whole_word,
156            case_sensitive,
157            include_ignored,
158            inner,
159        })
160    }
161
162    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
163        if message.regex {
164            Self::regex(
165                message.query,
166                message.whole_word,
167                message.case_sensitive,
168                message.include_ignored,
169                deserialize_path_matches(&message.files_to_include)?,
170                deserialize_path_matches(&message.files_to_exclude)?,
171                None, // search opened only don't need search remote
172            )
173        } else {
174            Self::text(
175                message.query,
176                message.whole_word,
177                message.case_sensitive,
178                message.include_ignored,
179                deserialize_path_matches(&message.files_to_include)?,
180                deserialize_path_matches(&message.files_to_exclude)?,
181                None, // search opened only don't need search remote
182            )
183        }
184    }
185
186    pub fn with_replacement(mut self, new_replacement: String) -> Self {
187        match self {
188            Self::Text {
189                ref mut replacement,
190                ..
191            }
192            | Self::Regex {
193                ref mut replacement,
194                ..
195            } => {
196                *replacement = Some(new_replacement);
197                self
198            }
199        }
200    }
201
202    pub fn to_proto(&self) -> proto::SearchQuery {
203        proto::SearchQuery {
204            query: self.as_str().to_string(),
205            regex: self.is_regex(),
206            whole_word: self.whole_word(),
207            case_sensitive: self.case_sensitive(),
208            include_ignored: self.include_ignored(),
209            files_to_include: self.files_to_include().sources().join(","),
210            files_to_exclude: self.files_to_exclude().sources().join(","),
211        }
212    }
213
214    pub(crate) fn detect(
215        &self,
216        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
217    ) -> Result<bool> {
218        if self.as_str().is_empty() {
219            return Ok(false);
220        }
221
222        match self {
223            Self::Text { search, .. } => {
224                let mat = search.stream_find_iter(reader).next();
225                match mat {
226                    Some(Ok(_)) => Ok(true),
227                    Some(Err(err)) => Err(err.into()),
228                    None => Ok(false),
229                }
230            }
231            Self::Regex {
232                regex, multiline, ..
233            } => {
234                if *multiline {
235                    let mut text = String::new();
236                    if let Err(err) = reader.read_to_string(&mut text) {
237                        Err(err.into())
238                    } else {
239                        Ok(regex.find(&text)?.is_some())
240                    }
241                } else {
242                    for line in reader.lines() {
243                        let line = line?;
244                        if regex.find(&line)?.is_some() {
245                            return Ok(true);
246                        }
247                    }
248                    Ok(false)
249                }
250            }
251        }
252    }
253    /// Returns the replacement text for this `SearchQuery`.
254    pub fn replacement(&self) -> Option<&str> {
255        match self {
256            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
257                replacement.as_deref()
258            }
259        }
260    }
261    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
262    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
263        match self {
264            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
265            SearchQuery::Regex {
266                regex, replacement, ..
267            } => {
268                if let Some(replacement) = replacement {
269                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
270                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
271                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
272                        replacement,
273                        |c: &Captures| match c.get(0).unwrap().as_str() {
274                            r"\\" => "\\",
275                            r"\n" => "\n",
276                            r"\t" => "\t",
277                            x => unreachable!("Unexpected escape sequence: {}", x),
278                        },
279                    );
280                    Some(regex.replace(text, replacement))
281                } else {
282                    None
283                }
284            }
285        }
286    }
287
288    pub async fn search(
289        &self,
290        buffer: &BufferSnapshot,
291        subrange: Option<Range<usize>>,
292    ) -> Vec<Range<usize>> {
293        const YIELD_INTERVAL: usize = 20000;
294
295        if self.as_str().is_empty() {
296            return Default::default();
297        }
298
299        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
300        let rope = if let Some(range) = subrange {
301            buffer.as_rope().slice(range)
302        } else {
303            buffer.as_rope().clone()
304        };
305
306        let mut matches = Vec::new();
307        match self {
308            Self::Text {
309                search, whole_word, ..
310            } => {
311                for (ix, mat) in search
312                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
313                    .enumerate()
314                {
315                    if (ix + 1) % YIELD_INTERVAL == 0 {
316                        yield_now().await;
317                    }
318
319                    let mat = mat.unwrap();
320                    if *whole_word {
321                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
322
323                        let prev_kind = rope
324                            .reversed_chars_at(mat.start())
325                            .next()
326                            .map(|c| classifier.kind(c));
327                        let start_kind =
328                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
329                        let end_kind =
330                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
331                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
332                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
333                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
334                        {
335                            continue;
336                        }
337                    }
338                    matches.push(mat.start()..mat.end())
339                }
340            }
341
342            Self::Regex {
343                regex, multiline, ..
344            } => {
345                if *multiline {
346                    let text = rope.to_string();
347                    for (ix, mat) in regex.find_iter(&text).enumerate() {
348                        if (ix + 1) % YIELD_INTERVAL == 0 {
349                            yield_now().await;
350                        }
351
352                        if let Ok(mat) = mat {
353                            matches.push(mat.start()..mat.end());
354                        }
355                    }
356                } else {
357                    let mut line = String::new();
358                    let mut line_offset = 0;
359                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
360                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
361                            yield_now().await;
362                        }
363
364                        for (newline_ix, text) in chunk.split('\n').enumerate() {
365                            if newline_ix > 0 {
366                                for mat in regex.find_iter(&line).flatten() {
367                                    let start = line_offset + mat.start();
368                                    let end = line_offset + mat.end();
369                                    matches.push(start..end);
370                                }
371
372                                line_offset += line.len() + 1;
373                                line.clear();
374                            }
375                            line.push_str(text);
376                        }
377                    }
378                }
379            }
380        }
381
382        matches
383    }
384
385    pub fn is_empty(&self) -> bool {
386        self.as_str().is_empty()
387    }
388
389    pub fn as_str(&self) -> &str {
390        self.as_inner().as_str()
391    }
392
393    pub fn whole_word(&self) -> bool {
394        match self {
395            Self::Text { whole_word, .. } => *whole_word,
396            Self::Regex { whole_word, .. } => *whole_word,
397        }
398    }
399
400    pub fn case_sensitive(&self) -> bool {
401        match self {
402            Self::Text { case_sensitive, .. } => *case_sensitive,
403            Self::Regex { case_sensitive, .. } => *case_sensitive,
404        }
405    }
406
407    pub fn include_ignored(&self) -> bool {
408        match self {
409            Self::Text {
410                include_ignored, ..
411            } => *include_ignored,
412            Self::Regex {
413                include_ignored, ..
414            } => *include_ignored,
415        }
416    }
417
418    pub fn is_regex(&self) -> bool {
419        matches!(self, Self::Regex { .. })
420    }
421
422    pub fn files_to_include(&self) -> &PathMatcher {
423        self.as_inner().files_to_include()
424    }
425
426    pub fn files_to_exclude(&self) -> &PathMatcher {
427        self.as_inner().files_to_exclude()
428    }
429
430    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
431        self.as_inner().buffers.as_ref()
432    }
433
434    pub fn is_opened_only(&self) -> bool {
435        self.as_inner().buffers.is_some()
436    }
437
438    pub fn filters_path(&self) -> bool {
439        !(self.files_to_exclude().sources().is_empty()
440            && self.files_to_include().sources().is_empty())
441    }
442
443    pub fn file_matches(&self, file_path: &Path) -> bool {
444        let mut path = file_path.to_path_buf();
445        loop {
446            if self.files_to_exclude().is_match(&path) {
447                return false;
448            } else if self.files_to_include().sources().is_empty()
449                || self.files_to_include().is_match(&path)
450            {
451                return true;
452            } else if !path.pop() {
453                return false;
454            }
455        }
456    }
457    pub fn as_inner(&self) -> &SearchInputs {
458        match self {
459            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
460        }
461    }
462}
463
464pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
465    let globs = glob_set
466        .split(',')
467        .map(str::trim)
468        .filter(|&glob_str| (!glob_str.is_empty()))
469        .map(|glob_str| glob_str.to_owned())
470        .collect::<Vec<_>>();
471    Ok(PathMatcher::new(&globs)?)
472}
473
474#[cfg(test)]
475mod tests {
476    use super::*;
477
478    #[test]
479    fn path_matcher_creation_for_valid_paths() {
480        for valid_path in [
481            "file",
482            "Cargo.toml",
483            ".DS_Store",
484            "~/dir/another_dir/",
485            "./dir/file",
486            "dir/[a-z].txt",
487            "../dir/filé",
488        ] {
489            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
490                panic!("Valid path {valid_path} should be accepted, but got: {e}")
491            });
492            assert!(
493                path_matcher.is_match(valid_path),
494                "Path matcher for valid path {valid_path} should match itself"
495            )
496        }
497    }
498
499    #[test]
500    fn path_matcher_creation_for_globs() {
501        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
502            match PathMatcher::new(&[invalid_glob.to_owned()]) {
503                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
504                Err(_expected) => {}
505            }
506        }
507
508        for valid_glob in [
509            "dir/?ile",
510            "dir/*.txt",
511            "dir/**/file",
512            "dir/[a-z].txt",
513            "{dir,file}",
514        ] {
515            match PathMatcher::new(&[valid_glob.to_owned()]) {
516                Ok(_expected) => {}
517                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
518            }
519        }
520    }
521}