search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use language::{Buffer, BufferSnapshot, CharKind};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18#[derive(Debug)]
 19pub enum SearchResult {
 20    Buffer {
 21        buffer: Entity<Buffer>,
 22        ranges: Vec<Range<Anchor>>,
 23    },
 24    LimitReached,
 25}
 26
 27#[derive(Clone, Copy, PartialEq)]
 28pub enum SearchInputKind {
 29    Query,
 30    Include,
 31    Exclude,
 32}
 33
 34#[derive(Clone, Debug)]
 35pub struct SearchInputs {
 36    query: Arc<str>,
 37    files_to_include: PathMatcher,
 38    files_to_exclude: PathMatcher,
 39    buffers: Option<Vec<Entity<Buffer>>>,
 40}
 41
 42impl SearchInputs {
 43    pub fn as_str(&self) -> &str {
 44        self.query.as_ref()
 45    }
 46    pub fn files_to_include(&self) -> &PathMatcher {
 47        &self.files_to_include
 48    }
 49    pub fn files_to_exclude(&self) -> &PathMatcher {
 50        &self.files_to_exclude
 51    }
 52    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 53        &self.buffers
 54    }
 55}
 56#[derive(Clone, Debug)]
 57pub enum SearchQuery {
 58    Text {
 59        search: AhoCorasick,
 60        replacement: Option<String>,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        include_ignored: bool,
 64        inner: SearchInputs,
 65    },
 66
 67    Regex {
 68        regex: Regex,
 69        replacement: Option<String>,
 70        multiline: bool,
 71        whole_word: bool,
 72        case_sensitive: bool,
 73        include_ignored: bool,
 74        one_match_per_line: bool,
 75        inner: SearchInputs,
 76    },
 77}
 78
 79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 80    RegexBuilder::new(r"\B")
 81        .build()
 82        .expect("Failed to create WORD_MATCH_TEST")
 83});
 84
 85impl SearchQuery {
 86    pub fn text(
 87        query: impl ToString,
 88        whole_word: bool,
 89        case_sensitive: bool,
 90        include_ignored: bool,
 91        files_to_include: PathMatcher,
 92        files_to_exclude: PathMatcher,
 93        buffers: Option<Vec<Entity<Buffer>>>,
 94    ) -> Result<Self> {
 95        let query = query.to_string();
 96        let search = AhoCorasickBuilder::new()
 97            .ascii_case_insensitive(!case_sensitive)
 98            .build([&query])?;
 99        let inner = SearchInputs {
100            query: query.into(),
101            files_to_exclude,
102            files_to_include,
103            buffers,
104        };
105        Ok(Self::Text {
106            search,
107            replacement: None,
108            whole_word,
109            case_sensitive,
110            include_ignored,
111            inner,
112        })
113    }
114
115    pub fn regex(
116        query: impl ToString,
117        whole_word: bool,
118        case_sensitive: bool,
119        include_ignored: bool,
120        one_match_per_line: bool,
121        files_to_include: PathMatcher,
122        files_to_exclude: PathMatcher,
123        buffers: Option<Vec<Entity<Buffer>>>,
124    ) -> Result<Self> {
125        let mut query = query.to_string();
126        let initial_query = Arc::from(query.as_str());
127        if whole_word {
128            let mut word_query = String::new();
129            if let Some(first) = query.get(0..1) {
130                if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
131                    word_query.push_str("\\b");
132                }
133            }
134            word_query.push_str(&query);
135            if let Some(last) = query.get(query.len() - 1..) {
136                if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
137                    word_query.push_str("\\b");
138                }
139            }
140            query = word_query
141        }
142
143        let multiline = query.contains('\n') || query.contains("\\n");
144        let regex = RegexBuilder::new(&query)
145            .case_insensitive(!case_sensitive)
146            .build()?;
147        let inner = SearchInputs {
148            query: initial_query,
149            files_to_exclude,
150            files_to_include,
151            buffers,
152        };
153        Ok(Self::Regex {
154            regex,
155            replacement: None,
156            multiline,
157            whole_word,
158            case_sensitive,
159            include_ignored,
160            inner,
161            one_match_per_line,
162        })
163    }
164
165    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
166        if message.regex {
167            Self::regex(
168                message.query,
169                message.whole_word,
170                message.case_sensitive,
171                message.include_ignored,
172                false,
173                deserialize_path_matches(&message.files_to_include)?,
174                deserialize_path_matches(&message.files_to_exclude)?,
175                None, // search opened only don't need search remote
176            )
177        } else {
178            Self::text(
179                message.query,
180                message.whole_word,
181                message.case_sensitive,
182                message.include_ignored,
183                deserialize_path_matches(&message.files_to_include)?,
184                deserialize_path_matches(&message.files_to_exclude)?,
185                None, // search opened only don't need search remote
186            )
187        }
188    }
189
190    pub fn with_replacement(mut self, new_replacement: String) -> Self {
191        match self {
192            Self::Text {
193                ref mut replacement,
194                ..
195            }
196            | Self::Regex {
197                ref mut replacement,
198                ..
199            } => {
200                *replacement = Some(new_replacement);
201                self
202            }
203        }
204    }
205
206    pub fn to_proto(&self) -> proto::SearchQuery {
207        proto::SearchQuery {
208            query: self.as_str().to_string(),
209            regex: self.is_regex(),
210            whole_word: self.whole_word(),
211            case_sensitive: self.case_sensitive(),
212            include_ignored: self.include_ignored(),
213            files_to_include: self.files_to_include().sources().join(","),
214            files_to_exclude: self.files_to_exclude().sources().join(","),
215        }
216    }
217
218    pub(crate) fn detect(
219        &self,
220        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
221    ) -> Result<bool> {
222        if self.as_str().is_empty() {
223            return Ok(false);
224        }
225
226        match self {
227            Self::Text { search, .. } => {
228                let mat = search.stream_find_iter(reader).next();
229                match mat {
230                    Some(Ok(_)) => Ok(true),
231                    Some(Err(err)) => Err(err.into()),
232                    None => Ok(false),
233                }
234            }
235            Self::Regex {
236                regex, multiline, ..
237            } => {
238                if *multiline {
239                    let mut text = String::new();
240                    if let Err(err) = reader.read_to_string(&mut text) {
241                        Err(err.into())
242                    } else {
243                        Ok(regex.find(&text)?.is_some())
244                    }
245                } else {
246                    for line in reader.lines() {
247                        let line = line?;
248                        if regex.find(&line)?.is_some() {
249                            return Ok(true);
250                        }
251                    }
252                    Ok(false)
253                }
254            }
255        }
256    }
257    /// Returns the replacement text for this `SearchQuery`.
258    pub fn replacement(&self) -> Option<&str> {
259        match self {
260            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
261                replacement.as_deref()
262            }
263        }
264    }
265    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
266    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
267        match self {
268            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
269            SearchQuery::Regex {
270                regex, replacement, ..
271            } => {
272                if let Some(replacement) = replacement {
273                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
274                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
275                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
276                        replacement,
277                        |c: &Captures| match c.get(0).unwrap().as_str() {
278                            r"\\" => "\\",
279                            r"\n" => "\n",
280                            r"\t" => "\t",
281                            x => unreachable!("Unexpected escape sequence: {}", x),
282                        },
283                    );
284                    Some(regex.replace(text, replacement))
285                } else {
286                    None
287                }
288            }
289        }
290    }
291
292    pub async fn search(
293        &self,
294        buffer: &BufferSnapshot,
295        subrange: Option<Range<usize>>,
296    ) -> Vec<Range<usize>> {
297        const YIELD_INTERVAL: usize = 20000;
298
299        if self.as_str().is_empty() {
300            return Default::default();
301        }
302
303        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
304        let rope = if let Some(range) = subrange {
305            buffer.as_rope().slice(range)
306        } else {
307            buffer.as_rope().clone()
308        };
309
310        let mut matches = Vec::new();
311        match self {
312            Self::Text {
313                search, whole_word, ..
314            } => {
315                for (ix, mat) in search
316                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
317                    .enumerate()
318                {
319                    if (ix + 1) % YIELD_INTERVAL == 0 {
320                        yield_now().await;
321                    }
322
323                    let mat = mat.unwrap();
324                    if *whole_word {
325                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
326
327                        let prev_kind = rope
328                            .reversed_chars_at(mat.start())
329                            .next()
330                            .map(|c| classifier.kind(c));
331                        let start_kind =
332                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
333                        let end_kind =
334                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
335                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
336                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
337                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
338                        {
339                            continue;
340                        }
341                    }
342                    matches.push(mat.start()..mat.end())
343                }
344            }
345
346            Self::Regex {
347                regex, multiline, ..
348            } => {
349                if *multiline {
350                    let text = rope.to_string();
351                    for (ix, mat) in regex.find_iter(&text).enumerate() {
352                        if (ix + 1) % YIELD_INTERVAL == 0 {
353                            yield_now().await;
354                        }
355
356                        if let Ok(mat) = mat {
357                            matches.push(mat.start()..mat.end());
358                        }
359                    }
360                } else {
361                    let mut line = String::new();
362                    let mut line_offset = 0;
363                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
364                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
365                            yield_now().await;
366                        }
367
368                        for (newline_ix, text) in chunk.split('\n').enumerate() {
369                            if newline_ix > 0 {
370                                for mat in regex.find_iter(&line).flatten() {
371                                    let start = line_offset + mat.start();
372                                    let end = line_offset + mat.end();
373                                    matches.push(start..end);
374                                }
375
376                                line_offset += line.len() + 1;
377                                line.clear();
378                            }
379                            line.push_str(text);
380                        }
381                    }
382                }
383            }
384        }
385
386        matches
387    }
388
389    pub fn is_empty(&self) -> bool {
390        self.as_str().is_empty()
391    }
392
393    pub fn as_str(&self) -> &str {
394        self.as_inner().as_str()
395    }
396
397    pub fn whole_word(&self) -> bool {
398        match self {
399            Self::Text { whole_word, .. } => *whole_word,
400            Self::Regex { whole_word, .. } => *whole_word,
401        }
402    }
403
404    pub fn case_sensitive(&self) -> bool {
405        match self {
406            Self::Text { case_sensitive, .. } => *case_sensitive,
407            Self::Regex { case_sensitive, .. } => *case_sensitive,
408        }
409    }
410
411    pub fn include_ignored(&self) -> bool {
412        match self {
413            Self::Text {
414                include_ignored, ..
415            } => *include_ignored,
416            Self::Regex {
417                include_ignored, ..
418            } => *include_ignored,
419        }
420    }
421
422    pub fn is_regex(&self) -> bool {
423        matches!(self, Self::Regex { .. })
424    }
425
426    pub fn files_to_include(&self) -> &PathMatcher {
427        self.as_inner().files_to_include()
428    }
429
430    pub fn files_to_exclude(&self) -> &PathMatcher {
431        self.as_inner().files_to_exclude()
432    }
433
434    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
435        self.as_inner().buffers.as_ref()
436    }
437
438    pub fn is_opened_only(&self) -> bool {
439        self.as_inner().buffers.is_some()
440    }
441
442    pub fn filters_path(&self) -> bool {
443        !(self.files_to_exclude().sources().is_empty()
444            && self.files_to_include().sources().is_empty())
445    }
446
447    pub fn file_matches(&self, file_path: &Path) -> bool {
448        let mut path = file_path.to_path_buf();
449        loop {
450            if self.files_to_exclude().is_match(&path) {
451                return false;
452            } else if self.files_to_include().sources().is_empty()
453                || self.files_to_include().is_match(&path)
454            {
455                return true;
456            } else if !path.pop() {
457                return false;
458            }
459        }
460    }
461    pub fn as_inner(&self) -> &SearchInputs {
462        match self {
463            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
464        }
465    }
466
467    /// Whether this search should replace only one match per line, instead of
468    /// all matches.
469    /// Returns `None` for text searches, as only regex searches support this
470    /// option.
471    pub fn one_match_per_line(&self) -> Option<bool> {
472        match self {
473            Self::Regex {
474                one_match_per_line, ..
475            } => Some(*one_match_per_line),
476            Self::Text { .. } => None,
477        }
478    }
479}
480
481pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
482    let globs = glob_set
483        .split(',')
484        .map(str::trim)
485        .filter(|&glob_str| (!glob_str.is_empty()))
486        .map(|glob_str| glob_str.to_owned())
487        .collect::<Vec<_>>();
488    Ok(PathMatcher::new(&globs)?)
489}
490
491#[cfg(test)]
492mod tests {
493    use super::*;
494
495    #[test]
496    fn path_matcher_creation_for_valid_paths() {
497        for valid_path in [
498            "file",
499            "Cargo.toml",
500            ".DS_Store",
501            "~/dir/another_dir/",
502            "./dir/file",
503            "dir/[a-z].txt",
504            "../dir/filé",
505        ] {
506            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
507                panic!("Valid path {valid_path} should be accepted, but got: {e}")
508            });
509            assert!(
510                path_matcher.is_match(valid_path),
511                "Path matcher for valid path {valid_path} should match itself"
512            )
513        }
514    }
515
516    #[test]
517    fn path_matcher_creation_for_globs() {
518        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
519            match PathMatcher::new(&[invalid_glob.to_owned()]) {
520                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
521                Err(_expected) => {}
522            }
523        }
524
525        for valid_glob in [
526            "dir/?ile",
527            "dir/*.txt",
528            "dir/**/file",
529            "dir/[a-z].txt",
530            "{dir,file}",
531        ] {
532            match PathMatcher::new(&[valid_glob.to_owned()]) {
533                Ok(_expected) => {}
534                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
535            }
536        }
537    }
538}