search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18pub enum SearchResult {
 19    Buffer {
 20        buffer: Entity<Buffer>,
 21        ranges: Vec<Range<Anchor>>,
 22    },
 23    LimitReached,
 24}
 25
 26#[derive(Clone, Copy, PartialEq)]
 27pub enum SearchInputKind {
 28    Query,
 29    Include,
 30    Exclude,
 31}
 32
 33#[derive(Clone, Debug)]
 34pub struct SearchInputs {
 35    query: Arc<str>,
 36    files_to_include: PathMatcher,
 37    files_to_exclude: PathMatcher,
 38    buffers: Option<Vec<Entity<Buffer>>>,
 39}
 40
 41impl SearchInputs {
 42    pub fn as_str(&self) -> &str {
 43        self.query.as_ref()
 44    }
 45    pub fn files_to_include(&self) -> &PathMatcher {
 46        &self.files_to_include
 47    }
 48    pub fn files_to_exclude(&self) -> &PathMatcher {
 49        &self.files_to_exclude
 50    }
 51    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 52        &self.buffers
 53    }
 54}
 55#[derive(Clone, Debug)]
 56pub enum SearchQuery {
 57    Text {
 58        search: AhoCorasick,
 59        replacement: Option<String>,
 60        whole_word: bool,
 61        case_sensitive: bool,
 62        include_ignored: bool,
 63        inner: SearchInputs,
 64    },
 65
 66    Regex {
 67        regex: Regex,
 68        replacement: Option<String>,
 69        multiline: bool,
 70        whole_word: bool,
 71        case_sensitive: bool,
 72        include_ignored: bool,
 73        inner: SearchInputs,
 74    },
 75}
 76
 77static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 78    RegexBuilder::new(r"\B")
 79        .build()
 80        .expect("Failed to create WORD_MATCH_TEST")
 81});
 82
 83impl SearchQuery {
 84    pub fn text(
 85        query: impl ToString,
 86        whole_word: bool,
 87        case_sensitive: bool,
 88        include_ignored: bool,
 89        files_to_include: PathMatcher,
 90        files_to_exclude: PathMatcher,
 91        buffers: Option<Vec<Entity<Buffer>>>,
 92    ) -> Result<Self> {
 93        let query = query.to_string();
 94        let search = AhoCorasickBuilder::new()
 95            .ascii_case_insensitive(!case_sensitive)
 96            .build([&query])?;
 97        let inner = SearchInputs {
 98            query: query.into(),
 99            files_to_exclude,
100            files_to_include,
101            buffers,
102        };
103        Ok(Self::Text {
104            search,
105            replacement: None,
106            whole_word,
107            case_sensitive,
108            include_ignored,
109            inner,
110        })
111    }
112
113    pub fn regex(
114        query: impl ToString,
115        whole_word: bool,
116        case_sensitive: bool,
117        include_ignored: bool,
118        files_to_include: PathMatcher,
119        files_to_exclude: PathMatcher,
120        buffers: Option<Vec<Entity<Buffer>>>,
121    ) -> Result<Self> {
122        let mut query = query.to_string();
123        let initial_query = Arc::from(query.as_str());
124        if whole_word {
125            let mut word_query = String::new();
126            if let Some(first) = query.get(0..1) {
127                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
128                    word_query.push_str("\\b");
129                }
130            }
131            word_query.push_str(&query);
132            if let Some(last) = query.get(query.len() - 1..) {
133                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
134                    word_query.push_str("\\b");
135                }
136            }
137            query = word_query
138        }
139
140        let multiline = query.contains('\n') || query.contains("\\n");
141        let regex = RegexBuilder::new(&query)
142            .case_insensitive(!case_sensitive)
143            .build()?;
144        let inner = SearchInputs {
145            query: initial_query,
146            files_to_exclude,
147            files_to_include,
148            buffers,
149        };
150        Ok(Self::Regex {
151            regex,
152            replacement: None,
153            multiline,
154            whole_word,
155            case_sensitive,
156            include_ignored,
157            inner,
158        })
159    }
160
161    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
162        if message.regex {
163            Self::regex(
164                message.query,
165                message.whole_word,
166                message.case_sensitive,
167                message.include_ignored,
168                deserialize_path_matches(&message.files_to_include)?,
169                deserialize_path_matches(&message.files_to_exclude)?,
170                None, // search opened only don't need search remote
171            )
172        } else {
173            Self::text(
174                message.query,
175                message.whole_word,
176                message.case_sensitive,
177                message.include_ignored,
178                deserialize_path_matches(&message.files_to_include)?,
179                deserialize_path_matches(&message.files_to_exclude)?,
180                None, // search opened only don't need search remote
181            )
182        }
183    }
184
185    pub fn with_replacement(mut self, new_replacement: String) -> Self {
186        match self {
187            Self::Text {
188                ref mut replacement,
189                ..
190            }
191            | Self::Regex {
192                ref mut replacement,
193                ..
194            } => {
195                *replacement = Some(new_replacement);
196                self
197            }
198        }
199    }
200
201    pub fn to_proto(&self) -> proto::SearchQuery {
202        proto::SearchQuery {
203            query: self.as_str().to_string(),
204            regex: self.is_regex(),
205            whole_word: self.whole_word(),
206            case_sensitive: self.case_sensitive(),
207            include_ignored: self.include_ignored(),
208            files_to_include: self.files_to_include().sources().join(","),
209            files_to_exclude: self.files_to_exclude().sources().join(","),
210        }
211    }
212
213    pub(crate) fn detect(
214        &self,
215        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
216    ) -> Result<bool> {
217        if self.as_str().is_empty() {
218            return Ok(false);
219        }
220
221        match self {
222            Self::Text { search, .. } => {
223                let mat = search.stream_find_iter(reader).next();
224                match mat {
225                    Some(Ok(_)) => Ok(true),
226                    Some(Err(err)) => Err(err.into()),
227                    None => Ok(false),
228                }
229            }
230            Self::Regex {
231                regex, multiline, ..
232            } => {
233                if *multiline {
234                    let mut text = String::new();
235                    if let Err(err) = reader.read_to_string(&mut text) {
236                        Err(err.into())
237                    } else {
238                        Ok(regex.find(&text)?.is_some())
239                    }
240                } else {
241                    for line in reader.lines() {
242                        let line = line?;
243                        if regex.find(&line)?.is_some() {
244                            return Ok(true);
245                        }
246                    }
247                    Ok(false)
248                }
249            }
250        }
251    }
252    /// Returns the replacement text for this `SearchQuery`.
253    pub fn replacement(&self) -> Option<&str> {
254        match self {
255            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
256                replacement.as_deref()
257            }
258        }
259    }
260    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
261    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
262        match self {
263            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
264            SearchQuery::Regex {
265                regex, replacement, ..
266            } => {
267                if let Some(replacement) = replacement {
268                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
269                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
270                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
271                        replacement,
272                        |c: &Captures| match c.get(0).unwrap().as_str() {
273                            r"\\" => "\\",
274                            r"\n" => "\n",
275                            r"\t" => "\t",
276                            x => unreachable!("Unexpected escape sequence: {}", x),
277                        },
278                    );
279                    Some(regex.replace(text, replacement))
280                } else {
281                    None
282                }
283            }
284        }
285    }
286
287    pub async fn search(
288        &self,
289        buffer: &BufferSnapshot,
290        subrange: Option<Range<usize>>,
291    ) -> Vec<Range<usize>> {
292        const YIELD_INTERVAL: usize = 20000;
293
294        if self.as_str().is_empty() {
295            return Default::default();
296        }
297
298        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
299        let rope = if let Some(range) = subrange {
300            buffer.as_rope().slice(range)
301        } else {
302            buffer.as_rope().clone()
303        };
304
305        let mut matches = Vec::new();
306        match self {
307            Self::Text {
308                search, whole_word, ..
309            } => {
310                for (ix, mat) in search
311                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
312                    .enumerate()
313                {
314                    if (ix + 1) % YIELD_INTERVAL == 0 {
315                        yield_now().await;
316                    }
317
318                    let mat = mat.unwrap();
319                    if *whole_word {
320                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
321
322                        let prev_kind = rope
323                            .reversed_chars_at(mat.start())
324                            .next()
325                            .map(|c| classifier.kind(c));
326                        let start_kind =
327                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
328                        let end_kind =
329                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
330                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
331                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
332                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
333                        {
334                            continue;
335                        }
336                    }
337                    matches.push(mat.start()..mat.end())
338                }
339            }
340
341            Self::Regex {
342                regex, multiline, ..
343            } => {
344                if *multiline {
345                    let text = rope.to_string();
346                    for (ix, mat) in regex.find_iter(&text).enumerate() {
347                        if (ix + 1) % YIELD_INTERVAL == 0 {
348                            yield_now().await;
349                        }
350
351                        if let Ok(mat) = mat {
352                            matches.push(mat.start()..mat.end());
353                        }
354                    }
355                } else {
356                    let mut line = String::new();
357                    let mut line_offset = 0;
358                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
359                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
360                            yield_now().await;
361                        }
362
363                        for (newline_ix, text) in chunk.split('\n').enumerate() {
364                            if newline_ix > 0 {
365                                for mat in regex.find_iter(&line).flatten() {
366                                    let start = line_offset + mat.start();
367                                    let end = line_offset + mat.end();
368                                    matches.push(start..end);
369                                }
370
371                                line_offset += line.len() + 1;
372                                line.clear();
373                            }
374                            line.push_str(text);
375                        }
376                    }
377                }
378            }
379        }
380
381        matches
382    }
383
384    pub fn is_empty(&self) -> bool {
385        self.as_str().is_empty()
386    }
387
388    pub fn as_str(&self) -> &str {
389        self.as_inner().as_str()
390    }
391
392    pub fn whole_word(&self) -> bool {
393        match self {
394            Self::Text { whole_word, .. } => *whole_word,
395            Self::Regex { whole_word, .. } => *whole_word,
396        }
397    }
398
399    pub fn case_sensitive(&self) -> bool {
400        match self {
401            Self::Text { case_sensitive, .. } => *case_sensitive,
402            Self::Regex { case_sensitive, .. } => *case_sensitive,
403        }
404    }
405
406    pub fn include_ignored(&self) -> bool {
407        match self {
408            Self::Text {
409                include_ignored, ..
410            } => *include_ignored,
411            Self::Regex {
412                include_ignored, ..
413            } => *include_ignored,
414        }
415    }
416
417    pub fn is_regex(&self) -> bool {
418        matches!(self, Self::Regex { .. })
419    }
420
421    pub fn files_to_include(&self) -> &PathMatcher {
422        self.as_inner().files_to_include()
423    }
424
425    pub fn files_to_exclude(&self) -> &PathMatcher {
426        self.as_inner().files_to_exclude()
427    }
428
429    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
430        self.as_inner().buffers.as_ref()
431    }
432
433    pub fn is_opened_only(&self) -> bool {
434        self.as_inner().buffers.is_some()
435    }
436
437    pub fn filters_path(&self) -> bool {
438        !(self.files_to_exclude().sources().is_empty()
439            && self.files_to_include().sources().is_empty())
440    }
441
442    pub fn file_matches(&self, file_path: &Path) -> bool {
443        let mut path = file_path.to_path_buf();
444        loop {
445            if self.files_to_exclude().is_match(&path) {
446                return false;
447            } else if self.files_to_include().sources().is_empty()
448                || self.files_to_include().is_match(&path)
449            {
450                return true;
451            } else if !path.pop() {
452                return false;
453            }
454        }
455    }
456    pub fn as_inner(&self) -> &SearchInputs {
457        match self {
458            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
459        }
460    }
461}
462
463pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
464    let globs = glob_set
465        .split(',')
466        .map(str::trim)
467        .filter(|&glob_str| (!glob_str.is_empty()))
468        .map(|glob_str| glob_str.to_owned())
469        .collect::<Vec<_>>();
470    Ok(PathMatcher::new(&globs)?)
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476
477    #[test]
478    fn path_matcher_creation_for_valid_paths() {
479        for valid_path in [
480            "file",
481            "Cargo.toml",
482            ".DS_Store",
483            "~/dir/another_dir/",
484            "./dir/file",
485            "dir/[a-z].txt",
486            "../dir/filé",
487        ] {
488            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
489                panic!("Valid path {valid_path} should be accepted, but got: {e}")
490            });
491            assert!(
492                path_matcher.is_match(valid_path),
493                "Path matcher for valid path {valid_path} should match itself"
494            )
495        }
496    }
497
498    #[test]
499    fn path_matcher_creation_for_globs() {
500        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
501            match PathMatcher::new(&[invalid_glob.to_owned()]) {
502                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
503                Err(_expected) => {}
504            }
505        }
506
507        for valid_glob in [
508            "dir/?ile",
509            "dir/*.txt",
510            "dir/**/file",
511            "dir/[a-z].txt",
512            "{dir,file}",
513        ] {
514            match PathMatcher::new(&[valid_glob.to_owned()]) {
515                Ok(_expected) => {}
516                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
517            }
518        }
519    }
520}