search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use gpui::Model;
  5use language::{Buffer, BufferSnapshot};
  6use regex::{Captures, Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Copy, PartialEq)]
 29pub enum SearchInputKind {
 30    Query,
 31    Include,
 32    Exclude,
 33}
 34
 35#[derive(Clone, Debug)]
 36pub struct SearchInputs {
 37    query: Arc<str>,
 38    files_to_include: PathMatcher,
 39    files_to_exclude: PathMatcher,
 40    buffers: Option<Vec<Model<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: Arc<AhoCorasick>,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79impl SearchQuery {
 80    pub fn text(
 81        query: impl ToString,
 82        whole_word: bool,
 83        case_sensitive: bool,
 84        include_ignored: bool,
 85        files_to_include: PathMatcher,
 86        files_to_exclude: PathMatcher,
 87        buffers: Option<Vec<Model<Buffer>>>,
 88    ) -> Result<Self> {
 89        let query = query.to_string();
 90        let search = AhoCorasickBuilder::new()
 91            .ascii_case_insensitive(!case_sensitive)
 92            .build([&query])?;
 93        let inner = SearchInputs {
 94            query: query.into(),
 95            files_to_exclude,
 96            files_to_include,
 97            buffers,
 98        };
 99        Ok(Self::Text {
100            search: Arc::new(search),
101            replacement: None,
102            whole_word,
103            case_sensitive,
104            include_ignored,
105            inner,
106        })
107    }
108
109    pub fn regex(
110        query: impl ToString,
111        whole_word: bool,
112        case_sensitive: bool,
113        include_ignored: bool,
114        files_to_include: PathMatcher,
115        files_to_exclude: PathMatcher,
116        buffers: Option<Vec<Model<Buffer>>>,
117    ) -> Result<Self> {
118        let mut query = query.to_string();
119        let initial_query = Arc::from(query.as_str());
120        if whole_word {
121            let mut word_query = String::new();
122            word_query.push_str("\\b");
123            word_query.push_str(&query);
124            word_query.push_str("\\b");
125            query = word_query
126        }
127
128        let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
129        let regex = RegexBuilder::new(&query)
130            .case_insensitive(!case_sensitive)
131            .multi_line(multiline)
132            .build()?;
133        let inner = SearchInputs {
134            query: initial_query,
135            files_to_exclude,
136            files_to_include,
137            buffers,
138        };
139        Ok(Self::Regex {
140            regex,
141            replacement: None,
142            multiline,
143            whole_word,
144            case_sensitive,
145            include_ignored,
146            inner,
147        })
148    }
149
150    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
151        if message.regex {
152            Self::regex(
153                message.query,
154                message.whole_word,
155                message.case_sensitive,
156                message.include_ignored,
157                deserialize_path_matches(&message.files_to_include)?,
158                deserialize_path_matches(&message.files_to_exclude)?,
159                None, // search opened only don't need search remote
160            )
161        } else {
162            Self::text(
163                message.query,
164                message.whole_word,
165                message.case_sensitive,
166                message.include_ignored,
167                deserialize_path_matches(&message.files_to_include)?,
168                deserialize_path_matches(&message.files_to_exclude)?,
169                None, // search opened only don't need search remote
170            )
171        }
172    }
173
174    pub fn with_replacement(mut self, new_replacement: String) -> Self {
175        match self {
176            Self::Text {
177                ref mut replacement,
178                ..
179            }
180            | Self::Regex {
181                ref mut replacement,
182                ..
183            } => {
184                *replacement = Some(new_replacement);
185                self
186            }
187        }
188    }
189
190    pub fn to_proto(&self) -> proto::SearchQuery {
191        proto::SearchQuery {
192            query: self.as_str().to_string(),
193            regex: self.is_regex(),
194            whole_word: self.whole_word(),
195            case_sensitive: self.case_sensitive(),
196            include_ignored: self.include_ignored(),
197            files_to_include: self.files_to_include().sources().join(","),
198            files_to_exclude: self.files_to_exclude().sources().join(","),
199        }
200    }
201
202    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
203        if self.as_str().is_empty() {
204            return Ok(false);
205        }
206
207        match self {
208            Self::Text { search, .. } => {
209                let mat = search.stream_find_iter(stream).next();
210                match mat {
211                    Some(Ok(_)) => Ok(true),
212                    Some(Err(err)) => Err(err.into()),
213                    None => Ok(false),
214                }
215            }
216            Self::Regex {
217                regex, multiline, ..
218            } => {
219                let mut reader = BufReader::new(stream);
220                if *multiline {
221                    let mut text = String::new();
222                    if let Err(err) = reader.read_to_string(&mut text) {
223                        Err(err.into())
224                    } else {
225                        Ok(regex.find(&text).is_some())
226                    }
227                } else {
228                    for line in reader.lines() {
229                        let line = line?;
230                        if regex.find(&line).is_some() {
231                            return Ok(true);
232                        }
233                    }
234                    Ok(false)
235                }
236            }
237        }
238    }
239    /// Returns the replacement text for this `SearchQuery`.
240    pub fn replacement(&self) -> Option<&str> {
241        match self {
242            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
243                replacement.as_deref()
244            }
245        }
246    }
247    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
248    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
249        match self {
250            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
251            SearchQuery::Regex {
252                regex, replacement, ..
253            } => {
254                if let Some(replacement) = replacement {
255                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
256                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
257                        .replace_all(replacement, |c: &Captures| {
258                            match c.get(0).unwrap().as_str() {
259                                r"\\" => "\\",
260                                r"\n" => "\n",
261                                r"\t" => "\t",
262                                x => unreachable!("Unexpected escape sequence: {}", x),
263                            }
264                        });
265                    Some(regex.replace(text, replacement))
266                } else {
267                    None
268                }
269            }
270        }
271    }
272
273    pub async fn search(
274        &self,
275        buffer: &BufferSnapshot,
276        subrange: Option<Range<usize>>,
277    ) -> Vec<Range<usize>> {
278        const YIELD_INTERVAL: usize = 20000;
279
280        if self.as_str().is_empty() {
281            return Default::default();
282        }
283
284        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
285        let rope = if let Some(range) = subrange {
286            buffer.as_rope().slice(range)
287        } else {
288            buffer.as_rope().clone()
289        };
290
291        let mut matches = Vec::new();
292        match self {
293            Self::Text {
294                search, whole_word, ..
295            } => {
296                for (ix, mat) in search
297                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
298                    .enumerate()
299                {
300                    if (ix + 1) % YIELD_INTERVAL == 0 {
301                        yield_now().await;
302                    }
303
304                    let mat = mat.unwrap();
305                    if *whole_word {
306                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
307
308                        let prev_kind = rope
309                            .reversed_chars_at(mat.start())
310                            .next()
311                            .map(|c| classifier.kind(c));
312                        let start_kind =
313                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
314                        let end_kind =
315                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
316                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
317                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
318                            continue;
319                        }
320                    }
321                    matches.push(mat.start()..mat.end())
322                }
323            }
324
325            Self::Regex {
326                regex, multiline, ..
327            } => {
328                if *multiline {
329                    let text = rope.to_string();
330                    for (ix, mat) in regex.find_iter(&text).enumerate() {
331                        if (ix + 1) % YIELD_INTERVAL == 0 {
332                            yield_now().await;
333                        }
334
335                        matches.push(mat.start()..mat.end());
336                    }
337                } else {
338                    let mut line = String::new();
339                    let mut line_offset = 0;
340                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
341                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
342                            yield_now().await;
343                        }
344
345                        for (newline_ix, text) in chunk.split('\n').enumerate() {
346                            if newline_ix > 0 {
347                                for mat in regex.find_iter(&line) {
348                                    let start = line_offset + mat.start();
349                                    let end = line_offset + mat.end();
350                                    matches.push(start..end);
351                                }
352
353                                line_offset += line.len() + 1;
354                                line.clear();
355                            }
356                            line.push_str(text);
357                        }
358                    }
359                }
360            }
361        }
362
363        matches
364    }
365
366    pub fn is_empty(&self) -> bool {
367        self.as_str().is_empty()
368    }
369
370    pub fn as_str(&self) -> &str {
371        self.as_inner().as_str()
372    }
373
374    pub fn whole_word(&self) -> bool {
375        match self {
376            Self::Text { whole_word, .. } => *whole_word,
377            Self::Regex { whole_word, .. } => *whole_word,
378        }
379    }
380
381    pub fn case_sensitive(&self) -> bool {
382        match self {
383            Self::Text { case_sensitive, .. } => *case_sensitive,
384            Self::Regex { case_sensitive, .. } => *case_sensitive,
385        }
386    }
387
388    pub fn include_ignored(&self) -> bool {
389        match self {
390            Self::Text {
391                include_ignored, ..
392            } => *include_ignored,
393            Self::Regex {
394                include_ignored, ..
395            } => *include_ignored,
396        }
397    }
398
399    pub fn is_regex(&self) -> bool {
400        matches!(self, Self::Regex { .. })
401    }
402
403    pub fn files_to_include(&self) -> &PathMatcher {
404        self.as_inner().files_to_include()
405    }
406
407    pub fn files_to_exclude(&self) -> &PathMatcher {
408        self.as_inner().files_to_exclude()
409    }
410
411    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
412        self.as_inner().buffers.as_ref()
413    }
414
415    pub fn is_opened_only(&self) -> bool {
416        self.as_inner().buffers.is_some()
417    }
418
419    pub fn filters_path(&self) -> bool {
420        !(self.files_to_exclude().sources().is_empty()
421            && self.files_to_include().sources().is_empty())
422    }
423
424    pub fn file_matches(&self, file_path: &Path) -> bool {
425        let mut path = file_path.to_path_buf();
426        loop {
427            if self.files_to_exclude().is_match(&path) {
428                return false;
429            } else if self.files_to_include().sources().is_empty()
430                || self.files_to_include().is_match(&path)
431            {
432                return true;
433            } else if !path.pop() {
434                return false;
435            }
436        }
437    }
438    pub fn as_inner(&self) -> &SearchInputs {
439        match self {
440            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
441        }
442    }
443}
444
445pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
446    let globs = glob_set
447        .split(',')
448        .map(str::trim)
449        .filter(|&glob_str| (!glob_str.is_empty()))
450        .map(|glob_str| glob_str.to_owned())
451        .collect::<Vec<_>>();
452    Ok(PathMatcher::new(&globs)?)
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn path_matcher_creation_for_valid_paths() {
461        for valid_path in [
462            "file",
463            "Cargo.toml",
464            ".DS_Store",
465            "~/dir/another_dir/",
466            "./dir/file",
467            "dir/[a-z].txt",
468            "../dir/filé",
469        ] {
470            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
471                panic!("Valid path {valid_path} should be accepted, but got: {e}")
472            });
473            assert!(
474                path_matcher.is_match(valid_path),
475                "Path matcher for valid path {valid_path} should match itself"
476            )
477        }
478    }
479
480    #[test]
481    fn path_matcher_creation_for_globs() {
482        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
483            match PathMatcher::new(&[invalid_glob.to_owned()]) {
484                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
485                Err(_expected) => {}
486            }
487        }
488
489        for valid_glob in [
490            "dir/?ile",
491            "dir/*.txt",
492            "dir/**/file",
493            "dir/[a-z].txt",
494            "{dir,file}",
495        ] {
496            match PathMatcher::new(&[valid_glob.to_owned()]) {
497                Ok(_expected) => {}
498                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
499            }
500        }
501    }
502}