search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use gpui::Model;
  5use language::{Buffer, BufferSnapshot};
  6use regex::{Captures, Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Debug)]
 29pub struct SearchInputs {
 30    query: Arc<str>,
 31    files_to_include: PathMatcher,
 32    files_to_exclude: PathMatcher,
 33    buffers: Option<Vec<Model<Buffer>>>,
 34}
 35
 36impl SearchInputs {
 37    pub fn as_str(&self) -> &str {
 38        self.query.as_ref()
 39    }
 40    pub fn files_to_include(&self) -> &PathMatcher {
 41        &self.files_to_include
 42    }
 43    pub fn files_to_exclude(&self) -> &PathMatcher {
 44        &self.files_to_exclude
 45    }
 46    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 47        &self.buffers
 48    }
 49}
 50#[derive(Clone, Debug)]
 51pub enum SearchQuery {
 52    Text {
 53        search: Arc<AhoCorasick>,
 54        replacement: Option<String>,
 55        whole_word: bool,
 56        case_sensitive: bool,
 57        include_ignored: bool,
 58        inner: SearchInputs,
 59    },
 60
 61    Regex {
 62        regex: Regex,
 63        replacement: Option<String>,
 64        multiline: bool,
 65        whole_word: bool,
 66        case_sensitive: bool,
 67        include_ignored: bool,
 68        inner: SearchInputs,
 69    },
 70}
 71
 72impl SearchQuery {
 73    pub fn text(
 74        query: impl ToString,
 75        whole_word: bool,
 76        case_sensitive: bool,
 77        include_ignored: bool,
 78        files_to_include: PathMatcher,
 79        files_to_exclude: PathMatcher,
 80        buffers: Option<Vec<Model<Buffer>>>,
 81    ) -> Result<Self> {
 82        let query = query.to_string();
 83        let search = AhoCorasickBuilder::new()
 84            .ascii_case_insensitive(!case_sensitive)
 85            .build(&[&query])?;
 86        let inner = SearchInputs {
 87            query: query.into(),
 88            files_to_exclude,
 89            files_to_include,
 90            buffers,
 91        };
 92        Ok(Self::Text {
 93            search: Arc::new(search),
 94            replacement: None,
 95            whole_word,
 96            case_sensitive,
 97            include_ignored,
 98            inner,
 99        })
100    }
101
102    pub fn regex(
103        query: impl ToString,
104        whole_word: bool,
105        case_sensitive: bool,
106        include_ignored: bool,
107        files_to_include: PathMatcher,
108        files_to_exclude: PathMatcher,
109        buffers: Option<Vec<Model<Buffer>>>,
110    ) -> Result<Self> {
111        let mut query = query.to_string();
112        let initial_query = Arc::from(query.as_str());
113        if whole_word {
114            let mut word_query = String::new();
115            word_query.push_str("\\b");
116            word_query.push_str(&query);
117            word_query.push_str("\\b");
118            query = word_query
119        }
120
121        let multiline = query.contains('\n') || query.contains("\\n");
122        let regex = RegexBuilder::new(&query)
123            .case_insensitive(!case_sensitive)
124            .multi_line(multiline)
125            .build()?;
126        let inner = SearchInputs {
127            query: initial_query,
128            files_to_exclude,
129            files_to_include,
130            buffers,
131        };
132        Ok(Self::Regex {
133            regex,
134            replacement: None,
135            multiline,
136            whole_word,
137            case_sensitive,
138            include_ignored,
139            inner,
140        })
141    }
142
143    pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
144        if message.regex {
145            Self::regex(
146                message.query,
147                message.whole_word,
148                message.case_sensitive,
149                message.include_ignored,
150                deserialize_path_matches(&message.files_to_include)?,
151                deserialize_path_matches(&message.files_to_exclude)?,
152                None,
153            )
154        } else {
155            Self::text(
156                message.query,
157                message.whole_word,
158                message.case_sensitive,
159                message.include_ignored,
160                deserialize_path_matches(&message.files_to_include)?,
161                deserialize_path_matches(&message.files_to_exclude)?,
162                None,
163            )
164        }
165    }
166
167    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
168        if message.regex {
169            Self::regex(
170                message.query,
171                message.whole_word,
172                message.case_sensitive,
173                message.include_ignored,
174                deserialize_path_matches(&message.files_to_include)?,
175                deserialize_path_matches(&message.files_to_exclude)?,
176                None, // search opened only don't need search remote
177            )
178        } else {
179            Self::text(
180                message.query,
181                message.whole_word,
182                message.case_sensitive,
183                message.include_ignored,
184                deserialize_path_matches(&message.files_to_include)?,
185                deserialize_path_matches(&message.files_to_exclude)?,
186                None, // search opened only don't need search remote
187            )
188        }
189    }
190    pub fn with_replacement(mut self, new_replacement: String) -> Self {
191        match self {
192            Self::Text {
193                ref mut replacement,
194                ..
195            }
196            | Self::Regex {
197                ref mut replacement,
198                ..
199            } => {
200                *replacement = Some(new_replacement);
201                self
202            }
203        }
204    }
205    pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
206        proto::SearchProject {
207            project_id,
208            query: self.as_str().to_string(),
209            regex: self.is_regex(),
210            whole_word: self.whole_word(),
211            case_sensitive: self.case_sensitive(),
212            include_ignored: self.include_ignored(),
213            files_to_include: self.files_to_include().sources().join(","),
214            files_to_exclude: self.files_to_exclude().sources().join(","),
215        }
216    }
217
218    pub fn to_proto(&self) -> proto::SearchQuery {
219        proto::SearchQuery {
220            query: self.as_str().to_string(),
221            regex: self.is_regex(),
222            whole_word: self.whole_word(),
223            case_sensitive: self.case_sensitive(),
224            include_ignored: self.include_ignored(),
225            files_to_include: self.files_to_include().sources().join(","),
226            files_to_exclude: self.files_to_exclude().sources().join(","),
227        }
228    }
229
230    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
231        if self.as_str().is_empty() {
232            return Ok(false);
233        }
234
235        match self {
236            Self::Text { search, .. } => {
237                let mat = search.stream_find_iter(stream).next();
238                match mat {
239                    Some(Ok(_)) => Ok(true),
240                    Some(Err(err)) => Err(err.into()),
241                    None => Ok(false),
242                }
243            }
244            Self::Regex {
245                regex, multiline, ..
246            } => {
247                let mut reader = BufReader::new(stream);
248                if *multiline {
249                    let mut text = String::new();
250                    if let Err(err) = reader.read_to_string(&mut text) {
251                        Err(err.into())
252                    } else {
253                        Ok(regex.find(&text).is_some())
254                    }
255                } else {
256                    for line in reader.lines() {
257                        let line = line?;
258                        if regex.find(&line).is_some() {
259                            return Ok(true);
260                        }
261                    }
262                    Ok(false)
263                }
264            }
265        }
266    }
267    /// Returns the replacement text for this `SearchQuery`.
268    pub fn replacement(&self) -> Option<&str> {
269        match self {
270            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
271                replacement.as_deref()
272            }
273        }
274    }
275    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
276    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
277        match self {
278            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
279            SearchQuery::Regex {
280                regex, replacement, ..
281            } => {
282                if let Some(replacement) = replacement {
283                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
284                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
285                        .replace_all(replacement, |c: &Captures| {
286                            match c.get(0).unwrap().as_str() {
287                                r"\\" => "\\",
288                                r"\n" => "\n",
289                                r"\t" => "\t",
290                                x => unreachable!("Unexpected escape sequence: {}", x),
291                            }
292                        });
293                    Some(regex.replace(text, replacement))
294                } else {
295                    None
296                }
297            }
298        }
299    }
300
301    pub async fn search(
302        &self,
303        buffer: &BufferSnapshot,
304        subrange: Option<Range<usize>>,
305    ) -> Vec<Range<usize>> {
306        const YIELD_INTERVAL: usize = 20000;
307
308        if self.as_str().is_empty() {
309            return Default::default();
310        }
311
312        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
313        let rope = if let Some(range) = subrange {
314            buffer.as_rope().slice(range)
315        } else {
316            buffer.as_rope().clone()
317        };
318
319        let mut matches = Vec::new();
320        match self {
321            Self::Text {
322                search, whole_word, ..
323            } => {
324                for (ix, mat) in search
325                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
326                    .enumerate()
327                {
328                    if (ix + 1) % YIELD_INTERVAL == 0 {
329                        yield_now().await;
330                    }
331
332                    let mat = mat.unwrap();
333                    if *whole_word {
334                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
335
336                        let prev_kind = rope
337                            .reversed_chars_at(mat.start())
338                            .next()
339                            .map(|c| classifier.kind(c));
340                        let start_kind =
341                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
342                        let end_kind =
343                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
344                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
345                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
346                            continue;
347                        }
348                    }
349                    matches.push(mat.start()..mat.end())
350                }
351            }
352
353            Self::Regex {
354                regex, multiline, ..
355            } => {
356                if *multiline {
357                    let text = rope.to_string();
358                    for (ix, mat) in regex.find_iter(&text).enumerate() {
359                        if (ix + 1) % YIELD_INTERVAL == 0 {
360                            yield_now().await;
361                        }
362
363                        matches.push(mat.start()..mat.end());
364                    }
365                } else {
366                    let mut line = String::new();
367                    let mut line_offset = 0;
368                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
369                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
370                            yield_now().await;
371                        }
372
373                        for (newline_ix, text) in chunk.split('\n').enumerate() {
374                            if newline_ix > 0 {
375                                for mat in regex.find_iter(&line) {
376                                    let start = line_offset + mat.start();
377                                    let end = line_offset + mat.end();
378                                    matches.push(start..end);
379                                }
380
381                                line_offset += line.len() + 1;
382                                line.clear();
383                            }
384                            line.push_str(text);
385                        }
386                    }
387                }
388            }
389        }
390
391        matches
392    }
393
394    pub fn is_empty(&self) -> bool {
395        self.as_str().is_empty()
396    }
397
398    pub fn as_str(&self) -> &str {
399        self.as_inner().as_str()
400    }
401
402    pub fn whole_word(&self) -> bool {
403        match self {
404            Self::Text { whole_word, .. } => *whole_word,
405            Self::Regex { whole_word, .. } => *whole_word,
406        }
407    }
408
409    pub fn case_sensitive(&self) -> bool {
410        match self {
411            Self::Text { case_sensitive, .. } => *case_sensitive,
412            Self::Regex { case_sensitive, .. } => *case_sensitive,
413        }
414    }
415
416    pub fn include_ignored(&self) -> bool {
417        match self {
418            Self::Text {
419                include_ignored, ..
420            } => *include_ignored,
421            Self::Regex {
422                include_ignored, ..
423            } => *include_ignored,
424        }
425    }
426
427    pub fn is_regex(&self) -> bool {
428        matches!(self, Self::Regex { .. })
429    }
430
431    pub fn files_to_include(&self) -> &PathMatcher {
432        self.as_inner().files_to_include()
433    }
434
435    pub fn files_to_exclude(&self) -> &PathMatcher {
436        self.as_inner().files_to_exclude()
437    }
438
439    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
440        self.as_inner().buffers.as_ref()
441    }
442
443    pub fn is_opened_only(&self) -> bool {
444        self.as_inner().buffers.is_some()
445    }
446
447    pub fn filters_path(&self) -> bool {
448        !(self.files_to_exclude().sources().is_empty()
449            && self.files_to_include().sources().is_empty())
450    }
451
452    pub fn file_matches(&self, file_path: &Path) -> bool {
453        let mut path = file_path.to_path_buf();
454        loop {
455            if self.files_to_exclude().is_match(&path) {
456                return false;
457            } else if self.files_to_include().sources().is_empty()
458                || self.files_to_include().is_match(&path)
459            {
460                return true;
461            } else if !path.pop() {
462                return false;
463            }
464        }
465    }
466    pub fn as_inner(&self) -> &SearchInputs {
467        match self {
468            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
469        }
470    }
471}
472
473pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
474    let globs = glob_set
475        .split(',')
476        .map(str::trim)
477        .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
478        .collect::<Vec<_>>();
479    Ok(PathMatcher::new(&globs)?)
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485
486    #[test]
487    fn path_matcher_creation_for_valid_paths() {
488        for valid_path in [
489            "file",
490            "Cargo.toml",
491            ".DS_Store",
492            "~/dir/another_dir/",
493            "./dir/file",
494            "dir/[a-z].txt",
495            "../dir/filé",
496        ] {
497            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
498                panic!("Valid path {valid_path} should be accepted, but got: {e}")
499            });
500            assert!(
501                path_matcher.is_match(valid_path),
502                "Path matcher for valid path {valid_path} should match itself"
503            )
504        }
505    }
506
507    #[test]
508    fn path_matcher_creation_for_globs() {
509        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
510            match PathMatcher::new(&[invalid_glob.to_owned()]) {
511                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
512                Err(_expected) => {}
513            }
514        }
515
516        for valid_glob in [
517            "dir/?ile",
518            "dir/*.txt",
519            "dir/**/file",
520            "dir/[a-z].txt",
521            "{dir,file}",
522        ] {
523            match PathMatcher::new(&[valid_glob.to_owned()]) {
524                Ok(_expected) => {}
525                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
526            }
527        }
528    }
529}