search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Model;
  6use language::{Buffer, BufferSnapshot};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Copy, PartialEq)]
 29pub enum SearchInputKind {
 30    Query,
 31    Include,
 32    Exclude,
 33}
 34
 35#[derive(Clone, Debug)]
 36pub struct SearchInputs {
 37    query: Arc<str>,
 38    files_to_include: PathMatcher,
 39    files_to_exclude: PathMatcher,
 40    buffers: Option<Vec<Model<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: Arc<AhoCorasick>,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79impl SearchQuery {
 80    pub fn text(
 81        query: impl ToString,
 82        whole_word: bool,
 83        case_sensitive: bool,
 84        include_ignored: bool,
 85        files_to_include: PathMatcher,
 86        files_to_exclude: PathMatcher,
 87        buffers: Option<Vec<Model<Buffer>>>,
 88    ) -> Result<Self> {
 89        let query = query.to_string();
 90        let search = AhoCorasickBuilder::new()
 91            .ascii_case_insensitive(!case_sensitive)
 92            .build([&query])?;
 93        let inner = SearchInputs {
 94            query: query.into(),
 95            files_to_exclude,
 96            files_to_include,
 97            buffers,
 98        };
 99        Ok(Self::Text {
100            search: Arc::new(search),
101            replacement: None,
102            whole_word,
103            case_sensitive,
104            include_ignored,
105            inner,
106        })
107    }
108
109    pub fn regex(
110        query: impl ToString,
111        whole_word: bool,
112        case_sensitive: bool,
113        include_ignored: bool,
114        files_to_include: PathMatcher,
115        files_to_exclude: PathMatcher,
116        buffers: Option<Vec<Model<Buffer>>>,
117    ) -> Result<Self> {
118        let mut query = query.to_string();
119        let initial_query = Arc::from(query.as_str());
120        if whole_word {
121            let mut word_query = String::new();
122            word_query.push_str("\\b");
123            word_query.push_str(&query);
124            word_query.push_str("\\b");
125            query = word_query
126        }
127
128        let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
129        let regex = RegexBuilder::new(&query)
130            .case_insensitive(!case_sensitive)
131            .build()?;
132        let inner = SearchInputs {
133            query: initial_query,
134            files_to_exclude,
135            files_to_include,
136            buffers,
137        };
138        Ok(Self::Regex {
139            regex,
140            replacement: None,
141            multiline,
142            whole_word,
143            case_sensitive,
144            include_ignored,
145            inner,
146        })
147    }
148
149    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
150        if message.regex {
151            Self::regex(
152                message.query,
153                message.whole_word,
154                message.case_sensitive,
155                message.include_ignored,
156                deserialize_path_matches(&message.files_to_include)?,
157                deserialize_path_matches(&message.files_to_exclude)?,
158                None, // search opened only don't need search remote
159            )
160        } else {
161            Self::text(
162                message.query,
163                message.whole_word,
164                message.case_sensitive,
165                message.include_ignored,
166                deserialize_path_matches(&message.files_to_include)?,
167                deserialize_path_matches(&message.files_to_exclude)?,
168                None, // search opened only don't need search remote
169            )
170        }
171    }
172
173    pub fn with_replacement(mut self, new_replacement: String) -> Self {
174        match self {
175            Self::Text {
176                ref mut replacement,
177                ..
178            }
179            | Self::Regex {
180                ref mut replacement,
181                ..
182            } => {
183                *replacement = Some(new_replacement);
184                self
185            }
186        }
187    }
188
189    pub fn to_proto(&self) -> proto::SearchQuery {
190        proto::SearchQuery {
191            query: self.as_str().to_string(),
192            regex: self.is_regex(),
193            whole_word: self.whole_word(),
194            case_sensitive: self.case_sensitive(),
195            include_ignored: self.include_ignored(),
196            files_to_include: self.files_to_include().sources().join(","),
197            files_to_exclude: self.files_to_exclude().sources().join(","),
198        }
199    }
200
201    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
202        if self.as_str().is_empty() {
203            return Ok(false);
204        }
205
206        match self {
207            Self::Text { search, .. } => {
208                let mat = search.stream_find_iter(stream).next();
209                match mat {
210                    Some(Ok(_)) => Ok(true),
211                    Some(Err(err)) => Err(err.into()),
212                    None => Ok(false),
213                }
214            }
215            Self::Regex {
216                regex, multiline, ..
217            } => {
218                let mut reader = BufReader::new(stream);
219                if *multiline {
220                    let mut text = String::new();
221                    if let Err(err) = reader.read_to_string(&mut text) {
222                        Err(err.into())
223                    } else {
224                        Ok(regex.find(&text)?.is_some())
225                    }
226                } else {
227                    for line in reader.lines() {
228                        let line = line?;
229                        if regex.find(&line)?.is_some() {
230                            return Ok(true);
231                        }
232                    }
233                    Ok(false)
234                }
235            }
236        }
237    }
238    /// Returns the replacement text for this `SearchQuery`.
239    pub fn replacement(&self) -> Option<&str> {
240        match self {
241            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
242                replacement.as_deref()
243            }
244        }
245    }
246    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
247    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
248        match self {
249            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
250            SearchQuery::Regex {
251                regex, replacement, ..
252            } => {
253                if let Some(replacement) = replacement {
254                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
255                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
256                        .replace_all(replacement, |c: &Captures| {
257                            match c.get(0).unwrap().as_str() {
258                                r"\\" => "\\",
259                                r"\n" => "\n",
260                                r"\t" => "\t",
261                                x => unreachable!("Unexpected escape sequence: {}", x),
262                            }
263                        });
264                    Some(regex.replace(text, replacement))
265                } else {
266                    None
267                }
268            }
269        }
270    }
271
272    pub async fn search(
273        &self,
274        buffer: &BufferSnapshot,
275        subrange: Option<Range<usize>>,
276    ) -> Vec<Range<usize>> {
277        const YIELD_INTERVAL: usize = 20000;
278
279        if self.as_str().is_empty() {
280            return Default::default();
281        }
282
283        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
284        let rope = if let Some(range) = subrange {
285            buffer.as_rope().slice(range)
286        } else {
287            buffer.as_rope().clone()
288        };
289
290        let mut matches = Vec::new();
291        match self {
292            Self::Text {
293                search, whole_word, ..
294            } => {
295                for (ix, mat) in search
296                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
297                    .enumerate()
298                {
299                    if (ix + 1) % YIELD_INTERVAL == 0 {
300                        yield_now().await;
301                    }
302
303                    let mat = mat.unwrap();
304                    if *whole_word {
305                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
306
307                        let prev_kind = rope
308                            .reversed_chars_at(mat.start())
309                            .next()
310                            .map(|c| classifier.kind(c));
311                        let start_kind =
312                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
313                        let end_kind =
314                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
315                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
316                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
317                            continue;
318                        }
319                    }
320                    matches.push(mat.start()..mat.end())
321                }
322            }
323
324            Self::Regex {
325                regex, multiline, ..
326            } => {
327                if *multiline {
328                    let text = rope.to_string();
329                    for (ix, mat) in regex.find_iter(&text).enumerate() {
330                        if (ix + 1) % YIELD_INTERVAL == 0 {
331                            yield_now().await;
332                        }
333
334                        if let Ok(mat) = mat {
335                            matches.push(mat.start()..mat.end());
336                        }
337                    }
338                } else {
339                    let mut line = String::new();
340                    let mut line_offset = 0;
341                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
342                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
343                            yield_now().await;
344                        }
345
346                        for (newline_ix, text) in chunk.split('\n').enumerate() {
347                            if newline_ix > 0 {
348                                for mat in regex.find_iter(&line).flatten() {
349                                    let start = line_offset + mat.start();
350                                    let end = line_offset + mat.end();
351                                    matches.push(start..end);
352                                }
353
354                                line_offset += line.len() + 1;
355                                line.clear();
356                            }
357                            line.push_str(text);
358                        }
359                    }
360                }
361            }
362        }
363
364        matches
365    }
366
367    pub fn is_empty(&self) -> bool {
368        self.as_str().is_empty()
369    }
370
371    pub fn as_str(&self) -> &str {
372        self.as_inner().as_str()
373    }
374
375    pub fn whole_word(&self) -> bool {
376        match self {
377            Self::Text { whole_word, .. } => *whole_word,
378            Self::Regex { whole_word, .. } => *whole_word,
379        }
380    }
381
382    pub fn case_sensitive(&self) -> bool {
383        match self {
384            Self::Text { case_sensitive, .. } => *case_sensitive,
385            Self::Regex { case_sensitive, .. } => *case_sensitive,
386        }
387    }
388
389    pub fn include_ignored(&self) -> bool {
390        match self {
391            Self::Text {
392                include_ignored, ..
393            } => *include_ignored,
394            Self::Regex {
395                include_ignored, ..
396            } => *include_ignored,
397        }
398    }
399
400    pub fn is_regex(&self) -> bool {
401        matches!(self, Self::Regex { .. })
402    }
403
404    pub fn files_to_include(&self) -> &PathMatcher {
405        self.as_inner().files_to_include()
406    }
407
408    pub fn files_to_exclude(&self) -> &PathMatcher {
409        self.as_inner().files_to_exclude()
410    }
411
412    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
413        self.as_inner().buffers.as_ref()
414    }
415
416    pub fn is_opened_only(&self) -> bool {
417        self.as_inner().buffers.is_some()
418    }
419
420    pub fn filters_path(&self) -> bool {
421        !(self.files_to_exclude().sources().is_empty()
422            && self.files_to_include().sources().is_empty())
423    }
424
425    pub fn file_matches(&self, file_path: &Path) -> bool {
426        let mut path = file_path.to_path_buf();
427        loop {
428            if self.files_to_exclude().is_match(&path) {
429                return false;
430            } else if self.files_to_include().sources().is_empty()
431                || self.files_to_include().is_match(&path)
432            {
433                return true;
434            } else if !path.pop() {
435                return false;
436            }
437        }
438    }
439    pub fn as_inner(&self) -> &SearchInputs {
440        match self {
441            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
442        }
443    }
444}
445
446pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
447    let globs = glob_set
448        .split(',')
449        .map(str::trim)
450        .filter(|&glob_str| (!glob_str.is_empty()))
451        .map(|glob_str| glob_str.to_owned())
452        .collect::<Vec<_>>();
453    Ok(PathMatcher::new(&globs)?)
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459
460    #[test]
461    fn path_matcher_creation_for_valid_paths() {
462        for valid_path in [
463            "file",
464            "Cargo.toml",
465            ".DS_Store",
466            "~/dir/another_dir/",
467            "./dir/file",
468            "dir/[a-z].txt",
469            "../dir/filé",
470        ] {
471            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
472                panic!("Valid path {valid_path} should be accepted, but got: {e}")
473            });
474            assert!(
475                path_matcher.is_match(valid_path),
476                "Path matcher for valid path {valid_path} should match itself"
477            )
478        }
479    }
480
481    #[test]
482    fn path_matcher_creation_for_globs() {
483        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
484            match PathMatcher::new(&[invalid_glob.to_owned()]) {
485                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
486                Err(_expected) => {}
487            }
488        }
489
490        for valid_glob in [
491            "dir/?ile",
492            "dir/*.txt",
493            "dir/**/file",
494            "dir/[a-z].txt",
495            "{dir,file}",
496        ] {
497            match PathMatcher::new(&[valid_glob.to_owned()]) {
498                Ok(_expected) => {}
499                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
500            }
501        }
502    }
503}