search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use gpui::Model;
  5use language::{Buffer, BufferSnapshot};
  6use regex::{Captures, Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Copy, PartialEq)]
 29pub enum SearchInputKind {
 30    Query,
 31    Include,
 32    Exclude,
 33}
 34
 35#[derive(Clone, Debug)]
 36pub struct SearchInputs {
 37    query: Arc<str>,
 38    files_to_include: PathMatcher,
 39    files_to_exclude: PathMatcher,
 40    buffers: Option<Vec<Model<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: Arc<AhoCorasick>,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79impl SearchQuery {
 80    pub fn text(
 81        query: impl ToString,
 82        whole_word: bool,
 83        case_sensitive: bool,
 84        include_ignored: bool,
 85        files_to_include: PathMatcher,
 86        files_to_exclude: PathMatcher,
 87        buffers: Option<Vec<Model<Buffer>>>,
 88    ) -> Result<Self> {
 89        let query = query.to_string();
 90        let search = AhoCorasickBuilder::new()
 91            .ascii_case_insensitive(!case_sensitive)
 92            .build([&query])?;
 93        let inner = SearchInputs {
 94            query: query.into(),
 95            files_to_exclude,
 96            files_to_include,
 97            buffers,
 98        };
 99        Ok(Self::Text {
100            search: Arc::new(search),
101            replacement: None,
102            whole_word,
103            case_sensitive,
104            include_ignored,
105            inner,
106        })
107    }
108
109    pub fn regex(
110        query: impl ToString,
111        whole_word: bool,
112        case_sensitive: bool,
113        include_ignored: bool,
114        files_to_include: PathMatcher,
115        files_to_exclude: PathMatcher,
116        buffers: Option<Vec<Model<Buffer>>>,
117    ) -> Result<Self> {
118        let mut query = query.to_string();
119        let initial_query = Arc::from(query.as_str());
120        if whole_word {
121            let mut word_query = String::new();
122            word_query.push_str("\\b");
123            word_query.push_str(&query);
124            word_query.push_str("\\b");
125            query = word_query
126        }
127
128        let multiline = query.contains('\n') || query.contains("\\n");
129        let regex = RegexBuilder::new(&query)
130            .case_insensitive(!case_sensitive)
131            .multi_line(multiline)
132            .build()?;
133        let inner = SearchInputs {
134            query: initial_query,
135            files_to_exclude,
136            files_to_include,
137            buffers,
138        };
139        Ok(Self::Regex {
140            regex,
141            replacement: None,
142            multiline,
143            whole_word,
144            case_sensitive,
145            include_ignored,
146            inner,
147        })
148    }
149
150    pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
151        if message.regex {
152            Self::regex(
153                message.query,
154                message.whole_word,
155                message.case_sensitive,
156                message.include_ignored,
157                deserialize_path_matches(&message.files_to_include)?,
158                deserialize_path_matches(&message.files_to_exclude)?,
159                None,
160            )
161        } else {
162            Self::text(
163                message.query,
164                message.whole_word,
165                message.case_sensitive,
166                message.include_ignored,
167                deserialize_path_matches(&message.files_to_include)?,
168                deserialize_path_matches(&message.files_to_exclude)?,
169                None,
170            )
171        }
172    }
173
174    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
175        if message.regex {
176            Self::regex(
177                message.query,
178                message.whole_word,
179                message.case_sensitive,
180                message.include_ignored,
181                deserialize_path_matches(&message.files_to_include)?,
182                deserialize_path_matches(&message.files_to_exclude)?,
183                None, // search opened only don't need search remote
184            )
185        } else {
186            Self::text(
187                message.query,
188                message.whole_word,
189                message.case_sensitive,
190                message.include_ignored,
191                deserialize_path_matches(&message.files_to_include)?,
192                deserialize_path_matches(&message.files_to_exclude)?,
193                None, // search opened only don't need search remote
194            )
195        }
196    }
197    pub fn with_replacement(mut self, new_replacement: String) -> Self {
198        match self {
199            Self::Text {
200                ref mut replacement,
201                ..
202            }
203            | Self::Regex {
204                ref mut replacement,
205                ..
206            } => {
207                *replacement = Some(new_replacement);
208                self
209            }
210        }
211    }
212    pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
213        proto::SearchProject {
214            project_id,
215            query: self.as_str().to_string(),
216            regex: self.is_regex(),
217            whole_word: self.whole_word(),
218            case_sensitive: self.case_sensitive(),
219            include_ignored: self.include_ignored(),
220            files_to_include: self.files_to_include().sources().join(","),
221            files_to_exclude: self.files_to_exclude().sources().join(","),
222        }
223    }
224
225    pub fn to_proto(&self) -> proto::SearchQuery {
226        proto::SearchQuery {
227            query: self.as_str().to_string(),
228            regex: self.is_regex(),
229            whole_word: self.whole_word(),
230            case_sensitive: self.case_sensitive(),
231            include_ignored: self.include_ignored(),
232            files_to_include: self.files_to_include().sources().join(","),
233            files_to_exclude: self.files_to_exclude().sources().join(","),
234        }
235    }
236
237    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
238        if self.as_str().is_empty() {
239            return Ok(false);
240        }
241
242        match self {
243            Self::Text { search, .. } => {
244                let mat = search.stream_find_iter(stream).next();
245                match mat {
246                    Some(Ok(_)) => Ok(true),
247                    Some(Err(err)) => Err(err.into()),
248                    None => Ok(false),
249                }
250            }
251            Self::Regex {
252                regex, multiline, ..
253            } => {
254                let mut reader = BufReader::new(stream);
255                if *multiline {
256                    let mut text = String::new();
257                    if let Err(err) = reader.read_to_string(&mut text) {
258                        Err(err.into())
259                    } else {
260                        Ok(regex.find(&text).is_some())
261                    }
262                } else {
263                    for line in reader.lines() {
264                        let line = line?;
265                        if regex.find(&line).is_some() {
266                            return Ok(true);
267                        }
268                    }
269                    Ok(false)
270                }
271            }
272        }
273    }
274    /// Returns the replacement text for this `SearchQuery`.
275    pub fn replacement(&self) -> Option<&str> {
276        match self {
277            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
278                replacement.as_deref()
279            }
280        }
281    }
282    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
283    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
284        match self {
285            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
286            SearchQuery::Regex {
287                regex, replacement, ..
288            } => {
289                if let Some(replacement) = replacement {
290                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
291                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
292                        .replace_all(replacement, |c: &Captures| {
293                            match c.get(0).unwrap().as_str() {
294                                r"\\" => "\\",
295                                r"\n" => "\n",
296                                r"\t" => "\t",
297                                x => unreachable!("Unexpected escape sequence: {}", x),
298                            }
299                        });
300                    Some(regex.replace(text, replacement))
301                } else {
302                    None
303                }
304            }
305        }
306    }
307
308    pub async fn search(
309        &self,
310        buffer: &BufferSnapshot,
311        subrange: Option<Range<usize>>,
312    ) -> Vec<Range<usize>> {
313        const YIELD_INTERVAL: usize = 20000;
314
315        if self.as_str().is_empty() {
316            return Default::default();
317        }
318
319        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
320        let rope = if let Some(range) = subrange {
321            buffer.as_rope().slice(range)
322        } else {
323            buffer.as_rope().clone()
324        };
325
326        let mut matches = Vec::new();
327        match self {
328            Self::Text {
329                search, whole_word, ..
330            } => {
331                for (ix, mat) in search
332                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
333                    .enumerate()
334                {
335                    if (ix + 1) % YIELD_INTERVAL == 0 {
336                        yield_now().await;
337                    }
338
339                    let mat = mat.unwrap();
340                    if *whole_word {
341                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
342
343                        let prev_kind = rope
344                            .reversed_chars_at(mat.start())
345                            .next()
346                            .map(|c| classifier.kind(c));
347                        let start_kind =
348                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
349                        let end_kind =
350                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
351                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
352                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
353                            continue;
354                        }
355                    }
356                    matches.push(mat.start()..mat.end())
357                }
358            }
359
360            Self::Regex {
361                regex, multiline, ..
362            } => {
363                if *multiline {
364                    let text = rope.to_string();
365                    for (ix, mat) in regex.find_iter(&text).enumerate() {
366                        if (ix + 1) % YIELD_INTERVAL == 0 {
367                            yield_now().await;
368                        }
369
370                        matches.push(mat.start()..mat.end());
371                    }
372                } else {
373                    let mut line = String::new();
374                    let mut line_offset = 0;
375                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
376                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
377                            yield_now().await;
378                        }
379
380                        for (newline_ix, text) in chunk.split('\n').enumerate() {
381                            if newline_ix > 0 {
382                                for mat in regex.find_iter(&line) {
383                                    let start = line_offset + mat.start();
384                                    let end = line_offset + mat.end();
385                                    matches.push(start..end);
386                                }
387
388                                line_offset += line.len() + 1;
389                                line.clear();
390                            }
391                            line.push_str(text);
392                        }
393                    }
394                }
395            }
396        }
397
398        matches
399    }
400
401    pub fn is_empty(&self) -> bool {
402        self.as_str().is_empty()
403    }
404
405    pub fn as_str(&self) -> &str {
406        self.as_inner().as_str()
407    }
408
409    pub fn whole_word(&self) -> bool {
410        match self {
411            Self::Text { whole_word, .. } => *whole_word,
412            Self::Regex { whole_word, .. } => *whole_word,
413        }
414    }
415
416    pub fn case_sensitive(&self) -> bool {
417        match self {
418            Self::Text { case_sensitive, .. } => *case_sensitive,
419            Self::Regex { case_sensitive, .. } => *case_sensitive,
420        }
421    }
422
423    pub fn include_ignored(&self) -> bool {
424        match self {
425            Self::Text {
426                include_ignored, ..
427            } => *include_ignored,
428            Self::Regex {
429                include_ignored, ..
430            } => *include_ignored,
431        }
432    }
433
434    pub fn is_regex(&self) -> bool {
435        matches!(self, Self::Regex { .. })
436    }
437
438    pub fn files_to_include(&self) -> &PathMatcher {
439        self.as_inner().files_to_include()
440    }
441
442    pub fn files_to_exclude(&self) -> &PathMatcher {
443        self.as_inner().files_to_exclude()
444    }
445
446    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
447        self.as_inner().buffers.as_ref()
448    }
449
450    pub fn is_opened_only(&self) -> bool {
451        self.as_inner().buffers.is_some()
452    }
453
454    pub fn filters_path(&self) -> bool {
455        !(self.files_to_exclude().sources().is_empty()
456            && self.files_to_include().sources().is_empty())
457    }
458
459    pub fn file_matches(&self, file_path: &Path) -> bool {
460        let mut path = file_path.to_path_buf();
461        loop {
462            if self.files_to_exclude().is_match(&path) {
463                return false;
464            } else if self.files_to_include().sources().is_empty()
465                || self.files_to_include().is_match(&path)
466            {
467                return true;
468            } else if !path.pop() {
469                return false;
470            }
471        }
472    }
473    pub fn as_inner(&self) -> &SearchInputs {
474        match self {
475            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
476        }
477    }
478}
479
480pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
481    let globs = glob_set
482        .split(',')
483        .map(str::trim)
484        .filter(|&glob_str| (!glob_str.is_empty()))
485        .map(|glob_str| glob_str.to_owned())
486        .collect::<Vec<_>>();
487    Ok(PathMatcher::new(&globs)?)
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    #[test]
495    fn path_matcher_creation_for_valid_paths() {
496        for valid_path in [
497            "file",
498            "Cargo.toml",
499            ".DS_Store",
500            "~/dir/another_dir/",
501            "./dir/file",
502            "dir/[a-z].txt",
503            "../dir/filé",
504        ] {
505            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
506                panic!("Valid path {valid_path} should be accepted, but got: {e}")
507            });
508            assert!(
509                path_matcher.is_match(valid_path),
510                "Path matcher for valid path {valid_path} should match itself"
511            )
512        }
513    }
514
515    #[test]
516    fn path_matcher_creation_for_globs() {
517        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
518            match PathMatcher::new(&[invalid_glob.to_owned()]) {
519                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
520                Err(_expected) => {}
521            }
522        }
523
524        for valid_glob in [
525            "dir/?ile",
526            "dir/*.txt",
527            "dir/**/file",
528            "dir/[a-z].txt",
529            "{dir,file}",
530        ] {
531            match PathMatcher::new(&[valid_glob.to_owned()]) {
532                Ok(_expected) => {}
533                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
534            }
535        }
536    }
537}