search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Model;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Copy, PartialEq)]
 29pub enum SearchInputKind {
 30    Query,
 31    Include,
 32    Exclude,
 33}
 34
 35#[derive(Clone, Debug)]
 36pub struct SearchInputs {
 37    query: Arc<str>,
 38    files_to_include: PathMatcher,
 39    files_to_exclude: PathMatcher,
 40    buffers: Option<Vec<Model<Buffer>>>,
 41}
 42
 43impl SearchInputs {
 44    pub fn as_str(&self) -> &str {
 45        self.query.as_ref()
 46    }
 47    pub fn files_to_include(&self) -> &PathMatcher {
 48        &self.files_to_include
 49    }
 50    pub fn files_to_exclude(&self) -> &PathMatcher {
 51        &self.files_to_exclude
 52    }
 53    pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
 54        &self.buffers
 55    }
 56}
 57#[derive(Clone, Debug)]
 58pub enum SearchQuery {
 59    Text {
 60        search: Arc<AhoCorasick>,
 61        replacement: Option<String>,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        inner: SearchInputs,
 66    },
 67
 68    Regex {
 69        regex: Regex,
 70        replacement: Option<String>,
 71        multiline: bool,
 72        whole_word: bool,
 73        case_sensitive: bool,
 74        include_ignored: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 80    RegexBuilder::new(r"\B")
 81        .build()
 82        .expect("Failed to create WORD_MATCH_TEST")
 83});
 84
 85impl SearchQuery {
 86    pub fn text(
 87        query: impl ToString,
 88        whole_word: bool,
 89        case_sensitive: bool,
 90        include_ignored: bool,
 91        files_to_include: PathMatcher,
 92        files_to_exclude: PathMatcher,
 93        buffers: Option<Vec<Model<Buffer>>>,
 94    ) -> Result<Self> {
 95        let query = query.to_string();
 96        let search = AhoCorasickBuilder::new()
 97            .ascii_case_insensitive(!case_sensitive)
 98            .build([&query])?;
 99        let inner = SearchInputs {
100            query: query.into(),
101            files_to_exclude,
102            files_to_include,
103            buffers,
104        };
105        Ok(Self::Text {
106            search: Arc::new(search),
107            replacement: None,
108            whole_word,
109            case_sensitive,
110            include_ignored,
111            inner,
112        })
113    }
114
115    pub fn regex(
116        query: impl ToString,
117        whole_word: bool,
118        case_sensitive: bool,
119        include_ignored: bool,
120        files_to_include: PathMatcher,
121        files_to_exclude: PathMatcher,
122        buffers: Option<Vec<Model<Buffer>>>,
123    ) -> Result<Self> {
124        let mut query = query.to_string();
125        let initial_query = Arc::from(query.as_str());
126        if whole_word {
127            let mut word_query = String::new();
128            if let Some(first) = query.get(0..1) {
129                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
130                    word_query.push_str("\\b");
131                }
132            }
133            word_query.push_str(&query);
134            if let Some(last) = query.get(query.len() - 1..) {
135                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
136                    word_query.push_str("\\b");
137                }
138            }
139            query = word_query
140        }
141
142        let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
143        let regex = RegexBuilder::new(&query)
144            .case_insensitive(!case_sensitive)
145            .build()?;
146        let inner = SearchInputs {
147            query: initial_query,
148            files_to_exclude,
149            files_to_include,
150            buffers,
151        };
152        Ok(Self::Regex {
153            regex,
154            replacement: None,
155            multiline,
156            whole_word,
157            case_sensitive,
158            include_ignored,
159            inner,
160        })
161    }
162
163    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
164        if message.regex {
165            Self::regex(
166                message.query,
167                message.whole_word,
168                message.case_sensitive,
169                message.include_ignored,
170                deserialize_path_matches(&message.files_to_include)?,
171                deserialize_path_matches(&message.files_to_exclude)?,
172                None, // search opened only don't need search remote
173            )
174        } else {
175            Self::text(
176                message.query,
177                message.whole_word,
178                message.case_sensitive,
179                message.include_ignored,
180                deserialize_path_matches(&message.files_to_include)?,
181                deserialize_path_matches(&message.files_to_exclude)?,
182                None, // search opened only don't need search remote
183            )
184        }
185    }
186
187    pub fn with_replacement(mut self, new_replacement: String) -> Self {
188        match self {
189            Self::Text {
190                ref mut replacement,
191                ..
192            }
193            | Self::Regex {
194                ref mut replacement,
195                ..
196            } => {
197                *replacement = Some(new_replacement);
198                self
199            }
200        }
201    }
202
203    pub fn to_proto(&self) -> proto::SearchQuery {
204        proto::SearchQuery {
205            query: self.as_str().to_string(),
206            regex: self.is_regex(),
207            whole_word: self.whole_word(),
208            case_sensitive: self.case_sensitive(),
209            include_ignored: self.include_ignored(),
210            files_to_include: self.files_to_include().sources().join(","),
211            files_to_exclude: self.files_to_exclude().sources().join(","),
212        }
213    }
214
215    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
216        if self.as_str().is_empty() {
217            return Ok(false);
218        }
219
220        match self {
221            Self::Text { search, .. } => {
222                let mat = search.stream_find_iter(stream).next();
223                match mat {
224                    Some(Ok(_)) => Ok(true),
225                    Some(Err(err)) => Err(err.into()),
226                    None => Ok(false),
227                }
228            }
229            Self::Regex {
230                regex, multiline, ..
231            } => {
232                let mut reader = BufReader::new(stream);
233                if *multiline {
234                    let mut text = String::new();
235                    if let Err(err) = reader.read_to_string(&mut text) {
236                        Err(err.into())
237                    } else {
238                        Ok(regex.find(&text)?.is_some())
239                    }
240                } else {
241                    for line in reader.lines() {
242                        let line = line?;
243                        if regex.find(&line)?.is_some() {
244                            return Ok(true);
245                        }
246                    }
247                    Ok(false)
248                }
249            }
250        }
251    }
252    /// Returns the replacement text for this `SearchQuery`.
253    pub fn replacement(&self) -> Option<&str> {
254        match self {
255            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
256                replacement.as_deref()
257            }
258        }
259    }
260    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
261    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
262        match self {
263            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
264            SearchQuery::Regex {
265                regex, replacement, ..
266            } => {
267                if let Some(replacement) = replacement {
268                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
269                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
270                        .replace_all(replacement, |c: &Captures| {
271                            match c.get(0).unwrap().as_str() {
272                                r"\\" => "\\",
273                                r"\n" => "\n",
274                                r"\t" => "\t",
275                                x => unreachable!("Unexpected escape sequence: {}", x),
276                            }
277                        });
278                    Some(regex.replace(text, replacement))
279                } else {
280                    None
281                }
282            }
283        }
284    }
285
286    pub async fn search(
287        &self,
288        buffer: &BufferSnapshot,
289        subrange: Option<Range<usize>>,
290    ) -> Vec<Range<usize>> {
291        const YIELD_INTERVAL: usize = 20000;
292
293        if self.as_str().is_empty() {
294            return Default::default();
295        }
296
297        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
298        let rope = if let Some(range) = subrange {
299            buffer.as_rope().slice(range)
300        } else {
301            buffer.as_rope().clone()
302        };
303
304        let mut matches = Vec::new();
305        match self {
306            Self::Text {
307                search, whole_word, ..
308            } => {
309                for (ix, mat) in search
310                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
311                    .enumerate()
312                {
313                    if (ix + 1) % YIELD_INTERVAL == 0 {
314                        yield_now().await;
315                    }
316
317                    let mat = mat.unwrap();
318                    if *whole_word {
319                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
320
321                        let prev_kind = rope
322                            .reversed_chars_at(mat.start())
323                            .next()
324                            .map(|c| classifier.kind(c));
325                        let start_kind =
326                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
327                        let end_kind =
328                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
329                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
330                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
331                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
332                        {
333                            continue;
334                        }
335                    }
336                    matches.push(mat.start()..mat.end())
337                }
338            }
339
340            Self::Regex {
341                regex, multiline, ..
342            } => {
343                if *multiline {
344                    let text = rope.to_string();
345                    for (ix, mat) in regex.find_iter(&text).enumerate() {
346                        if (ix + 1) % YIELD_INTERVAL == 0 {
347                            yield_now().await;
348                        }
349
350                        if let Ok(mat) = mat {
351                            matches.push(mat.start()..mat.end());
352                        }
353                    }
354                } else {
355                    let mut line = String::new();
356                    let mut line_offset = 0;
357                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
358                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
359                            yield_now().await;
360                        }
361
362                        for (newline_ix, text) in chunk.split('\n').enumerate() {
363                            if newline_ix > 0 {
364                                for mat in regex.find_iter(&line).flatten() {
365                                    let start = line_offset + mat.start();
366                                    let end = line_offset + mat.end();
367                                    matches.push(start..end);
368                                }
369
370                                line_offset += line.len() + 1;
371                                line.clear();
372                            }
373                            line.push_str(text);
374                        }
375                    }
376                }
377            }
378        }
379
380        matches
381    }
382
383    pub fn is_empty(&self) -> bool {
384        self.as_str().is_empty()
385    }
386
387    pub fn as_str(&self) -> &str {
388        self.as_inner().as_str()
389    }
390
391    pub fn whole_word(&self) -> bool {
392        match self {
393            Self::Text { whole_word, .. } => *whole_word,
394            Self::Regex { whole_word, .. } => *whole_word,
395        }
396    }
397
398    pub fn case_sensitive(&self) -> bool {
399        match self {
400            Self::Text { case_sensitive, .. } => *case_sensitive,
401            Self::Regex { case_sensitive, .. } => *case_sensitive,
402        }
403    }
404
405    pub fn include_ignored(&self) -> bool {
406        match self {
407            Self::Text {
408                include_ignored, ..
409            } => *include_ignored,
410            Self::Regex {
411                include_ignored, ..
412            } => *include_ignored,
413        }
414    }
415
416    pub fn is_regex(&self) -> bool {
417        matches!(self, Self::Regex { .. })
418    }
419
420    pub fn files_to_include(&self) -> &PathMatcher {
421        self.as_inner().files_to_include()
422    }
423
424    pub fn files_to_exclude(&self) -> &PathMatcher {
425        self.as_inner().files_to_exclude()
426    }
427
428    pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
429        self.as_inner().buffers.as_ref()
430    }
431
432    pub fn is_opened_only(&self) -> bool {
433        self.as_inner().buffers.is_some()
434    }
435
436    pub fn filters_path(&self) -> bool {
437        !(self.files_to_exclude().sources().is_empty()
438            && self.files_to_include().sources().is_empty())
439    }
440
441    pub fn file_matches(&self, file_path: &Path) -> bool {
442        let mut path = file_path.to_path_buf();
443        loop {
444            if self.files_to_exclude().is_match(&path) {
445                return false;
446            } else if self.files_to_include().sources().is_empty()
447                || self.files_to_include().is_match(&path)
448            {
449                return true;
450            } else if !path.pop() {
451                return false;
452            }
453        }
454    }
455    pub fn as_inner(&self) -> &SearchInputs {
456        match self {
457            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
458        }
459    }
460}
461
462pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
463    let globs = glob_set
464        .split(',')
465        .map(str::trim)
466        .filter(|&glob_str| (!glob_str.is_empty()))
467        .map(|glob_str| glob_str.to_owned())
468        .collect::<Vec<_>>();
469    Ok(PathMatcher::new(&globs)?)
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    #[test]
477    fn path_matcher_creation_for_valid_paths() {
478        for valid_path in [
479            "file",
480            "Cargo.toml",
481            ".DS_Store",
482            "~/dir/another_dir/",
483            "./dir/file",
484            "dir/[a-z].txt",
485            "../dir/filé",
486        ] {
487            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
488                panic!("Valid path {valid_path} should be accepted, but got: {e}")
489            });
490            assert!(
491                path_matcher.is_match(valid_path),
492                "Path matcher for valid path {valid_path} should match itself"
493            )
494        }
495    }
496
497    #[test]
498    fn path_matcher_creation_for_globs() {
499        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
500            match PathMatcher::new(&[invalid_glob.to_owned()]) {
501                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
502                Err(_expected) => {}
503            }
504        }
505
506        for valid_glob in [
507            "dir/?ile",
508            "dir/*.txt",
509            "dir/**/file",
510            "dir/[a-z].txt",
511            "{dir,file}",
512        ] {
513            match PathMatcher::new(&[valid_glob.to_owned()]) {
514                Ok(_expected) => {}
515                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
516            }
517        }
518    }
519}