search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Model;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18pub enum SearchResult {
 19    Buffer {
 20        buffer: Model<Buffer>,
 21        ranges: Vec<Range<Anchor>>,
 22    },
 23    LimitReached,
 24}
 25
 26#[derive(Clone, Copy, PartialEq)]
 27pub enum SearchInputKind {
 28    Query,
 29    Include,
 30    Exclude,
 31}
 32
 33#[derive(Clone, Debug)]
 34pub struct SearchInputs {
 35    query: Arc<str>,
 36    files_to_include: PathMatcher,
 37    files_to_exclude: PathMatcher,
 38    buffers: Option<Vec<Model<Buffer>>>,
 39}
 40
 41impl SearchInputs {
 42    pub fn as_str(&self) -> &str {
 43        self.query.as_ref()
 44    }
 45    pub fn files_to_include(&self) -> &PathMatcher {
 46        &self.files_to_include
 47    }
 48    pub fn files_to_exclude(&self) -> &PathMatcher {
 49        &self.files_to_exclude
 50    }
 51    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 52        &self.buffers
 53    }
 54}
 55#[derive(Clone, Debug)]
 56pub enum SearchQuery {
 57    Text {
 58        search: Arc<AhoCorasick>,
 59        replacement: Option<String>,
 60        whole_word: bool,
 61        case_sensitive: bool,
 62        include_ignored: bool,
 63        inner: SearchInputs,
 64    },
 65
 66    Regex {
 67        regex: Regex,
 68        replacement: Option<String>,
 69        multiline: bool,
 70        whole_word: bool,
 71        case_sensitive: bool,
 72        include_ignored: bool,
 73        inner: SearchInputs,
 74    },
 75}
 76
 77static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 78    RegexBuilder::new(r"\B")
 79        .build()
 80        .expect("Failed to create WORD_MATCH_TEST")
 81});
 82
 83impl SearchQuery {
 84    pub fn text(
 85        query: impl ToString,
 86        whole_word: bool,
 87        case_sensitive: bool,
 88        include_ignored: bool,
 89        files_to_include: PathMatcher,
 90        files_to_exclude: PathMatcher,
 91        buffers: Option<Vec<Model<Buffer>>>,
 92    ) -> Result<Self> {
 93        let query = query.to_string();
 94        let search = AhoCorasickBuilder::new()
 95            .ascii_case_insensitive(!case_sensitive)
 96            .build([&query])?;
 97        let inner = SearchInputs {
 98            query: query.into(),
 99            files_to_exclude,
100            files_to_include,
101            buffers,
102        };
103        Ok(Self::Text {
104            search: Arc::new(search),
105            replacement: None,
106            whole_word,
107            case_sensitive,
108            include_ignored,
109            inner,
110        })
111    }
112
113    pub fn regex(
114        query: impl ToString,
115        whole_word: bool,
116        case_sensitive: bool,
117        include_ignored: bool,
118        files_to_include: PathMatcher,
119        files_to_exclude: PathMatcher,
120        buffers: Option<Vec<Model<Buffer>>>,
121    ) -> Result<Self> {
122        let mut query = query.to_string();
123        let initial_query = Arc::from(query.as_str());
124        if whole_word {
125            let mut word_query = String::new();
126            if let Some(first) = query.get(0..1) {
127                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
128                    word_query.push_str("\\b");
129                }
130            }
131            word_query.push_str(&query);
132            if let Some(last) = query.get(query.len() - 1..) {
133                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
134                    word_query.push_str("\\b");
135                }
136            }
137            query = word_query
138        }
139
140        let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
141        let regex = RegexBuilder::new(&query)
142            .case_insensitive(!case_sensitive)
143            .build()?;
144        let inner = SearchInputs {
145            query: initial_query,
146            files_to_exclude,
147            files_to_include,
148            buffers,
149        };
150        Ok(Self::Regex {
151            regex,
152            replacement: None,
153            multiline,
154            whole_word,
155            case_sensitive,
156            include_ignored,
157            inner,
158        })
159    }
160
161    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
162        if message.regex {
163            Self::regex(
164                message.query,
165                message.whole_word,
166                message.case_sensitive,
167                message.include_ignored,
168                deserialize_path_matches(&message.files_to_include)?,
169                deserialize_path_matches(&message.files_to_exclude)?,
170                None, // search opened only don't need search remote
171            )
172        } else {
173            Self::text(
174                message.query,
175                message.whole_word,
176                message.case_sensitive,
177                message.include_ignored,
178                deserialize_path_matches(&message.files_to_include)?,
179                deserialize_path_matches(&message.files_to_exclude)?,
180                None, // search opened only don't need search remote
181            )
182        }
183    }
184
185    pub fn with_replacement(mut self, new_replacement: String) -> Self {
186        match self {
187            Self::Text {
188                ref mut replacement,
189                ..
190            }
191            | Self::Regex {
192                ref mut replacement,
193                ..
194            } => {
195                *replacement = Some(new_replacement);
196                self
197            }
198        }
199    }
200
201    pub fn to_proto(&self) -> proto::SearchQuery {
202        proto::SearchQuery {
203            query: self.as_str().to_string(),
204            regex: self.is_regex(),
205            whole_word: self.whole_word(),
206            case_sensitive: self.case_sensitive(),
207            include_ignored: self.include_ignored(),
208            files_to_include: self.files_to_include().sources().join(","),
209            files_to_exclude: self.files_to_exclude().sources().join(","),
210        }
211    }
212
213    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
214        if self.as_str().is_empty() {
215            return Ok(false);
216        }
217
218        match self {
219            Self::Text { search, .. } => {
220                let mat = search.stream_find_iter(stream).next();
221                match mat {
222                    Some(Ok(_)) => Ok(true),
223                    Some(Err(err)) => Err(err.into()),
224                    None => Ok(false),
225                }
226            }
227            Self::Regex {
228                regex, multiline, ..
229            } => {
230                let mut reader = BufReader::new(stream);
231                if *multiline {
232                    let mut text = String::new();
233                    if let Err(err) = reader.read_to_string(&mut text) {
234                        Err(err.into())
235                    } else {
236                        Ok(regex.find(&text)?.is_some())
237                    }
238                } else {
239                    for line in reader.lines() {
240                        let line = line?;
241                        if regex.find(&line)?.is_some() {
242                            return Ok(true);
243                        }
244                    }
245                    Ok(false)
246                }
247            }
248        }
249    }
250    /// Returns the replacement text for this `SearchQuery`.
251    pub fn replacement(&self) -> Option<&str> {
252        match self {
253            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
254                replacement.as_deref()
255            }
256        }
257    }
258    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
259    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
260        match self {
261            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
262            SearchQuery::Regex {
263                regex, replacement, ..
264            } => {
265                if let Some(replacement) = replacement {
266                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
267                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
268                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
269                        replacement,
270                        |c: &Captures| match c.get(0).unwrap().as_str() {
271                            r"\\" => "\\",
272                            r"\n" => "\n",
273                            r"\t" => "\t",
274                            x => unreachable!("Unexpected escape sequence: {}", x),
275                        },
276                    );
277                    Some(regex.replace(text, replacement))
278                } else {
279                    None
280                }
281            }
282        }
283    }
284
285    pub async fn search(
286        &self,
287        buffer: &BufferSnapshot,
288        subrange: Option<Range<usize>>,
289    ) -> Vec<Range<usize>> {
290        const YIELD_INTERVAL: usize = 20000;
291
292        if self.as_str().is_empty() {
293            return Default::default();
294        }
295
296        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
297        let rope = if let Some(range) = subrange {
298            buffer.as_rope().slice(range)
299        } else {
300            buffer.as_rope().clone()
301        };
302
303        let mut matches = Vec::new();
304        match self {
305            Self::Text {
306                search, whole_word, ..
307            } => {
308                for (ix, mat) in search
309                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
310                    .enumerate()
311                {
312                    if (ix + 1) % YIELD_INTERVAL == 0 {
313                        yield_now().await;
314                    }
315
316                    let mat = mat.unwrap();
317                    if *whole_word {
318                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
319
320                        let prev_kind = rope
321                            .reversed_chars_at(mat.start())
322                            .next()
323                            .map(|c| classifier.kind(c));
324                        let start_kind =
325                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
326                        let end_kind =
327                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
328                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
329                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
330                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
331                        {
332                            continue;
333                        }
334                    }
335                    matches.push(mat.start()..mat.end())
336                }
337            }
338
339            Self::Regex {
340                regex, multiline, ..
341            } => {
342                if *multiline {
343                    let text = rope.to_string();
344                    for (ix, mat) in regex.find_iter(&text).enumerate() {
345                        if (ix + 1) % YIELD_INTERVAL == 0 {
346                            yield_now().await;
347                        }
348
349                        if let Ok(mat) = mat {
350                            matches.push(mat.start()..mat.end());
351                        }
352                    }
353                } else {
354                    let mut line = String::new();
355                    let mut line_offset = 0;
356                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
357                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
358                            yield_now().await;
359                        }
360
361                        for (newline_ix, text) in chunk.split('\n').enumerate() {
362                            if newline_ix > 0 {
363                                for mat in regex.find_iter(&line).flatten() {
364                                    let start = line_offset + mat.start();
365                                    let end = line_offset + mat.end();
366                                    matches.push(start..end);
367                                }
368
369                                line_offset += line.len() + 1;
370                                line.clear();
371                            }
372                            line.push_str(text);
373                        }
374                    }
375                }
376            }
377        }
378
379        matches
380    }
381
382    pub fn is_empty(&self) -> bool {
383        self.as_str().is_empty()
384    }
385
386    pub fn as_str(&self) -> &str {
387        self.as_inner().as_str()
388    }
389
390    pub fn whole_word(&self) -> bool {
391        match self {
392            Self::Text { whole_word, .. } => *whole_word,
393            Self::Regex { whole_word, .. } => *whole_word,
394        }
395    }
396
397    pub fn case_sensitive(&self) -> bool {
398        match self {
399            Self::Text { case_sensitive, .. } => *case_sensitive,
400            Self::Regex { case_sensitive, .. } => *case_sensitive,
401        }
402    }
403
404    pub fn include_ignored(&self) -> bool {
405        match self {
406            Self::Text {
407                include_ignored, ..
408            } => *include_ignored,
409            Self::Regex {
410                include_ignored, ..
411            } => *include_ignored,
412        }
413    }
414
415    pub fn is_regex(&self) -> bool {
416        matches!(self, Self::Regex { .. })
417    }
418
419    pub fn files_to_include(&self) -> &PathMatcher {
420        self.as_inner().files_to_include()
421    }
422
423    pub fn files_to_exclude(&self) -> &PathMatcher {
424        self.as_inner().files_to_exclude()
425    }
426
427    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
428        self.as_inner().buffers.as_ref()
429    }
430
431    pub fn is_opened_only(&self) -> bool {
432        self.as_inner().buffers.is_some()
433    }
434
435    pub fn filters_path(&self) -> bool {
436        !(self.files_to_exclude().sources().is_empty()
437            && self.files_to_include().sources().is_empty())
438    }
439
440    pub fn file_matches(&self, file_path: &Path) -> bool {
441        let mut path = file_path.to_path_buf();
442        loop {
443            if self.files_to_exclude().is_match(&path) {
444                return false;
445            } else if self.files_to_include().sources().is_empty()
446                || self.files_to_include().is_match(&path)
447            {
448                return true;
449            } else if !path.pop() {
450                return false;
451            }
452        }
453    }
454    pub fn as_inner(&self) -> &SearchInputs {
455        match self {
456            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
457        }
458    }
459}
460
461pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
462    let globs = glob_set
463        .split(',')
464        .map(str::trim)
465        .filter(|&glob_str| (!glob_str.is_empty()))
466        .map(|glob_str| glob_str.to_owned())
467        .collect::<Vec<_>>();
468    Ok(PathMatcher::new(&globs)?)
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    #[test]
476    fn path_matcher_creation_for_valid_paths() {
477        for valid_path in [
478            "file",
479            "Cargo.toml",
480            ".DS_Store",
481            "~/dir/another_dir/",
482            "./dir/file",
483            "dir/[a-z].txt",
484            "../dir/filé",
485        ] {
486            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
487                panic!("Valid path {valid_path} should be accepted, but got: {e}")
488            });
489            assert!(
490                path_matcher.is_match(valid_path),
491                "Path matcher for valid path {valid_path} should match itself"
492            )
493        }
494    }
495
496    #[test]
497    fn path_matcher_creation_for_globs() {
498        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
499            match PathMatcher::new(&[invalid_glob.to_owned()]) {
500                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
501                Err(_expected) => {}
502            }
503        }
504
505        for valid_glob in [
506            "dir/?ile",
507            "dir/*.txt",
508            "dir/**/file",
509            "dir/[a-z].txt",
510            "{dir,file}",
511        ] {
512            match PathMatcher::new(&[valid_glob.to_owned()]) {
513                Ok(_expected) => {}
514                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
515            }
516        }
517    }
518}