search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use fancy_regex::{Captures, Regex, RegexBuilder};
  5use gpui::Entity;
  6use itertools::Itertools as _;
  7use language::{Buffer, BufferSnapshot, CharKind};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    sync::{Arc, LazyLock},
 14};
 15use text::Anchor;
 16use util::{
 17    paths::{PathMatcher, PathStyle},
 18    rel_path::RelPath,
 19};
 20
 21#[derive(Debug)]
 22pub enum SearchResult {
 23    Buffer {
 24        buffer: Entity<Buffer>,
 25        ranges: Vec<Range<Anchor>>,
 26    },
 27    LimitReached,
 28}
 29
 30#[derive(Clone, Copy, PartialEq)]
 31pub enum SearchInputKind {
 32    Query,
 33    Include,
 34    Exclude,
 35}
 36
 37#[derive(Clone, Debug)]
 38pub struct SearchInputs {
 39    query: Arc<str>,
 40    files_to_include: PathMatcher,
 41    files_to_exclude: PathMatcher,
 42    match_full_paths: bool,
 43    buffers: Option<Vec<Entity<Buffer>>>,
 44}
 45
 46impl SearchInputs {
 47    pub fn as_str(&self) -> &str {
 48        self.query.as_ref()
 49    }
 50    pub fn files_to_include(&self) -> &PathMatcher {
 51        &self.files_to_include
 52    }
 53    pub fn files_to_exclude(&self) -> &PathMatcher {
 54        &self.files_to_exclude
 55    }
 56    pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
 57        &self.buffers
 58    }
 59}
 60#[derive(Clone, Debug)]
 61pub enum SearchQuery {
 62    Text {
 63        search: AhoCorasick,
 64        replacement: Option<String>,
 65        whole_word: bool,
 66        case_sensitive: bool,
 67        include_ignored: bool,
 68        inner: SearchInputs,
 69    },
 70    Regex {
 71        regex: Regex,
 72        replacement: Option<String>,
 73        multiline: bool,
 74        whole_word: bool,
 75        case_sensitive: bool,
 76        include_ignored: bool,
 77        one_match_per_line: bool,
 78        inner: SearchInputs,
 79    },
 80}
 81
 82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
 83    RegexBuilder::new(r"\B")
 84        .build()
 85        .expect("Failed to create WORD_MATCH_TEST")
 86});
 87
 88impl SearchQuery {
 89    /// Create a text query
 90    ///
 91    /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
 92    /// If `match_full_paths` is false, patterns will be matched against worktree-relative paths.
 93    pub fn text(
 94        query: impl ToString,
 95        whole_word: bool,
 96        case_sensitive: bool,
 97        include_ignored: bool,
 98        files_to_include: PathMatcher,
 99        files_to_exclude: PathMatcher,
100        match_full_paths: bool,
101        buffers: Option<Vec<Entity<Buffer>>>,
102    ) -> Result<Self> {
103        let query = query.to_string();
104        if !case_sensitive && !query.is_ascii() {
105            // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106            // Fallback to regex search as recommended by
107            // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108            return Self::regex(
109                regex::escape(&query),
110                whole_word,
111                case_sensitive,
112                include_ignored,
113                false,
114                files_to_include,
115                files_to_exclude,
116                false,
117                buffers,
118            );
119        }
120        let search = AhoCorasickBuilder::new()
121            .ascii_case_insensitive(!case_sensitive)
122            .build([&query])?;
123        let inner = SearchInputs {
124            query: query.into(),
125            files_to_exclude,
126            files_to_include,
127            match_full_paths,
128            buffers,
129        };
130        Ok(Self::Text {
131            search,
132            replacement: None,
133            whole_word,
134            case_sensitive,
135            include_ignored,
136            inner,
137        })
138    }
139
140    /// Create a regex query
141    ///
142    /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143    /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144    /// with their respective project root).
145    pub fn regex(
146        query: impl ToString,
147        whole_word: bool,
148        mut case_sensitive: bool,
149        include_ignored: bool,
150        one_match_per_line: bool,
151        files_to_include: PathMatcher,
152        files_to_exclude: PathMatcher,
153        match_full_paths: bool,
154        buffers: Option<Vec<Entity<Buffer>>>,
155    ) -> Result<Self> {
156        let mut query = query.to_string();
157        let initial_query = Arc::from(query.as_str());
158
159        if let Some((case_sensitive_from_pattern, new_query)) =
160            Self::case_sensitive_from_pattern(&query)
161        {
162            case_sensitive = case_sensitive_from_pattern;
163            query = new_query
164        }
165
166        if whole_word {
167            let mut word_query = String::new();
168            if let Some(first) = query.get(0..1)
169                && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170            {
171                word_query.push_str("\\b");
172            }
173            word_query.push_str(&query);
174            if let Some(last) = query.get(query.len() - 1..)
175                && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176            {
177                word_query.push_str("\\b");
178            }
179            query = word_query
180        }
181
182        let multiline = query.contains('\n') || query.contains("\\n");
183        if multiline {
184            query.insert_str(0, "(?m)");
185        }
186
187        let regex = RegexBuilder::new(&query)
188            .case_insensitive(!case_sensitive)
189            .build()?;
190        let inner = SearchInputs {
191            query: initial_query,
192            files_to_exclude,
193            files_to_include,
194            match_full_paths,
195            buffers,
196        };
197        Ok(Self::Regex {
198            regex,
199            replacement: None,
200            multiline,
201            whole_word,
202            case_sensitive,
203            include_ignored,
204            inner,
205            one_match_per_line,
206        })
207    }
208
209    /// Extracts case sensitivity settings from pattern items in the provided
210    /// query and returns the same query, with the pattern items removed.
211    ///
212    /// The following pattern modifiers are supported:
213    ///
214    /// - `\c` (case_sensitive: false)
215    /// - `\C` (case_sensitive: true)
216    ///
217    /// If no pattern item were found, `None` will be returned.
218    fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
219        if !(query.contains("\\c") || query.contains("\\C")) {
220            return None;
221        }
222
223        let mut was_escaped = false;
224        let mut new_query = String::new();
225        let mut is_case_sensitive = None;
226
227        for c in query.chars() {
228            if was_escaped {
229                if c == 'c' {
230                    is_case_sensitive = Some(false);
231                } else if c == 'C' {
232                    is_case_sensitive = Some(true);
233                } else {
234                    new_query.push('\\');
235                    new_query.push(c);
236                }
237                was_escaped = false
238            } else if c == '\\' {
239                was_escaped = true
240            } else {
241                new_query.push(c);
242            }
243        }
244
245        is_case_sensitive.map(|c| (c, new_query))
246    }
247
248    pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
249        let files_to_include = if message.files_to_include.is_empty() {
250            message
251                .files_to_include_legacy
252                .split(',')
253                .map(str::trim)
254                .filter(|&glob_str| !glob_str.is_empty())
255                .map(|s| s.to_string())
256                .collect()
257        } else {
258            message.files_to_include
259        };
260
261        let files_to_exclude = if message.files_to_exclude.is_empty() {
262            message
263                .files_to_exclude_legacy
264                .split(',')
265                .map(str::trim)
266                .filter(|&glob_str| !glob_str.is_empty())
267                .map(|s| s.to_string())
268                .collect()
269        } else {
270            message.files_to_exclude
271        };
272
273        if message.regex {
274            Self::regex(
275                message.query,
276                message.whole_word,
277                message.case_sensitive,
278                message.include_ignored,
279                false,
280                PathMatcher::new(files_to_include, path_style)?,
281                PathMatcher::new(files_to_exclude, path_style)?,
282                message.match_full_paths,
283                None, // search opened only don't need search remote
284            )
285        } else {
286            Self::text(
287                message.query,
288                message.whole_word,
289                message.case_sensitive,
290                message.include_ignored,
291                PathMatcher::new(files_to_include, path_style)?,
292                PathMatcher::new(files_to_exclude, path_style)?,
293                message.match_full_paths,
294                None, // search opened only don't need search remote
295            )
296        }
297    }
298
299    pub fn with_replacement(mut self, new_replacement: String) -> Self {
300        match self {
301            Self::Text {
302                ref mut replacement,
303                ..
304            }
305            | Self::Regex {
306                ref mut replacement,
307                ..
308            } => {
309                *replacement = Some(new_replacement);
310                self
311            }
312        }
313    }
314
315    pub fn to_proto(&self) -> proto::SearchQuery {
316        let mut files_to_include = self.files_to_include().sources();
317        let mut files_to_exclude = self.files_to_exclude().sources();
318        proto::SearchQuery {
319            query: self.as_str().to_string(),
320            regex: self.is_regex(),
321            whole_word: self.whole_word(),
322            case_sensitive: self.case_sensitive(),
323            include_ignored: self.include_ignored(),
324            files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
325            files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
326            match_full_paths: self.match_full_paths(),
327            // Populate legacy fields for backwards compatibility
328            files_to_include_legacy: files_to_include.join(","),
329            files_to_exclude_legacy: files_to_exclude.join(","),
330        }
331    }
332
333    pub(crate) async fn detect(
334        &self,
335        mut reader: BufReader<Box<dyn Read + Send + Sync>>,
336    ) -> Result<bool> {
337        let query_str = self.as_str();
338        if query_str.is_empty() {
339            return Ok(false);
340        }
341
342        // Yield from this function every 20KB scanned.
343        const YIELD_THRESHOLD: usize = 20 * 1024;
344
345        match self {
346            Self::Text { search, .. } => {
347                let mut text = String::new();
348                if query_str.contains('\n') {
349                    reader.read_to_string(&mut text)?;
350                    Ok(search.is_match(&text))
351                } else {
352                    let mut bytes_read = 0;
353                    while reader.read_line(&mut text)? > 0 {
354                        if search.is_match(&text) {
355                            return Ok(true);
356                        }
357                        bytes_read += text.len();
358                        if bytes_read >= YIELD_THRESHOLD {
359                            bytes_read = 0;
360                            smol::future::yield_now().await;
361                        }
362                        text.clear();
363                    }
364                    Ok(false)
365                }
366            }
367            Self::Regex {
368                regex, multiline, ..
369            } => {
370                let mut text = String::new();
371                if *multiline {
372                    reader.read_to_string(&mut text)?;
373                    Ok(regex.is_match(&text)?)
374                } else {
375                    let mut bytes_read = 0;
376                    while reader.read_line(&mut text)? > 0 {
377                        if regex.is_match(&text)? {
378                            return Ok(true);
379                        }
380                        bytes_read += text.len();
381                        if bytes_read >= YIELD_THRESHOLD {
382                            bytes_read = 0;
383                            smol::future::yield_now().await;
384                        }
385                        text.clear();
386                    }
387                    Ok(false)
388                }
389            }
390        }
391    }
392    /// Returns the replacement text for this `SearchQuery`.
393    pub fn replacement(&self) -> Option<&str> {
394        match self {
395            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
396                replacement.as_deref()
397            }
398        }
399    }
400    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
401    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
402        match self {
403            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
404            SearchQuery::Regex {
405                regex, replacement, ..
406            } => {
407                if let Some(replacement) = replacement {
408                    static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
409                        LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
410                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
411                        replacement,
412                        |c: &Captures| match c.get(0).unwrap().as_str() {
413                            r"\\" => "\\",
414                            r"\n" => "\n",
415                            r"\t" => "\t",
416                            x => unreachable!("Unexpected escape sequence: {}", x),
417                        },
418                    );
419                    Some(regex.replace(text, replacement))
420                } else {
421                    None
422                }
423            }
424        }
425    }
426
427    pub async fn search(
428        &self,
429        buffer: &BufferSnapshot,
430        subrange: Option<Range<usize>>,
431    ) -> Vec<Range<usize>> {
432        const YIELD_INTERVAL: usize = 20000;
433
434        if self.as_str().is_empty() {
435            return Default::default();
436        }
437
438        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
439        let rope = if let Some(range) = subrange {
440            buffer.as_rope().slice(range)
441        } else {
442            buffer.as_rope().clone()
443        };
444
445        let mut matches = Vec::new();
446        match self {
447            Self::Text {
448                search, whole_word, ..
449            } => {
450                for (ix, mat) in search
451                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
452                    .enumerate()
453                {
454                    if (ix + 1) % YIELD_INTERVAL == 0 {
455                        yield_now().await;
456                    }
457
458                    let mat = mat.unwrap();
459                    if *whole_word {
460                        let classifier = buffer.char_classifier_at(range_offset + mat.start());
461
462                        let prev_kind = rope
463                            .reversed_chars_at(mat.start())
464                            .next()
465                            .map(|c| classifier.kind(c));
466                        let start_kind =
467                            classifier.kind(rope.chars_at(mat.start()).next().unwrap());
468                        let end_kind =
469                            classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
470                        let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
471                        if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
472                            || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
473                        {
474                            continue;
475                        }
476                    }
477                    matches.push(mat.start()..mat.end())
478                }
479            }
480
481            Self::Regex {
482                regex, multiline, ..
483            } => {
484                if *multiline {
485                    let text = rope.to_string();
486                    for (ix, mat) in regex.find_iter(&text).enumerate() {
487                        if (ix + 1) % YIELD_INTERVAL == 0 {
488                            yield_now().await;
489                        }
490
491                        if let Ok(mat) = mat {
492                            matches.push(mat.start()..mat.end());
493                        }
494                    }
495                } else {
496                    let mut line = String::new();
497                    let mut line_offset = 0;
498                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
499                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
500                            yield_now().await;
501                        }
502
503                        for (newline_ix, text) in chunk.split('\n').enumerate() {
504                            if newline_ix > 0 {
505                                for mat in regex.find_iter(&line).flatten() {
506                                    let start = line_offset + mat.start();
507                                    let end = line_offset + mat.end();
508                                    matches.push(start..end);
509                                    if self.one_match_per_line() == Some(true) {
510                                        break;
511                                    }
512                                }
513
514                                line_offset += line.len() + 1;
515                                line.clear();
516                            }
517                            line.push_str(text);
518                        }
519                    }
520                }
521            }
522        }
523
524        matches
525    }
526
527    pub fn is_empty(&self) -> bool {
528        self.as_str().is_empty()
529    }
530
531    pub fn as_str(&self) -> &str {
532        self.as_inner().as_str()
533    }
534
535    pub fn whole_word(&self) -> bool {
536        match self {
537            Self::Text { whole_word, .. } => *whole_word,
538            Self::Regex { whole_word, .. } => *whole_word,
539        }
540    }
541
542    pub fn case_sensitive(&self) -> bool {
543        match self {
544            Self::Text { case_sensitive, .. } => *case_sensitive,
545            Self::Regex { case_sensitive, .. } => *case_sensitive,
546        }
547    }
548
549    pub fn include_ignored(&self) -> bool {
550        match self {
551            Self::Text {
552                include_ignored, ..
553            } => *include_ignored,
554            Self::Regex {
555                include_ignored, ..
556            } => *include_ignored,
557        }
558    }
559
560    pub fn is_regex(&self) -> bool {
561        matches!(self, Self::Regex { .. })
562    }
563
564    pub fn files_to_include(&self) -> &PathMatcher {
565        self.as_inner().files_to_include()
566    }
567
568    pub fn files_to_exclude(&self) -> &PathMatcher {
569        self.as_inner().files_to_exclude()
570    }
571
572    pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
573        self.as_inner().buffers.as_ref()
574    }
575
576    pub fn is_opened_only(&self) -> bool {
577        self.as_inner().buffers.is_some()
578    }
579
580    pub fn filters_path(&self) -> bool {
581        !(self.files_to_exclude().sources().next().is_none()
582            && self.files_to_include().sources().next().is_none())
583    }
584
585    pub fn match_full_paths(&self) -> bool {
586        self.as_inner().match_full_paths
587    }
588
589    /// Check match full paths to determine whether you're required to pass a fully qualified
590    /// project path (starts with a project root).
591    pub fn match_path(&self, file_path: &RelPath) -> bool {
592        let mut path = file_path.to_rel_path_buf();
593        loop {
594            if self.files_to_exclude().is_match(&path) {
595                return false;
596            } else if self.files_to_include().sources().next().is_none()
597                || self.files_to_include().is_match(&path)
598            {
599                return true;
600            } else if !path.pop() {
601                return false;
602            }
603        }
604    }
605    pub fn as_inner(&self) -> &SearchInputs {
606        match self {
607            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
608        }
609    }
610
611    /// Whether this search should replace only one match per line, instead of
612    /// all matches.
613    /// Returns `None` for text searches, as only regex searches support this
614    /// option.
615    pub fn one_match_per_line(&self) -> Option<bool> {
616        match self {
617            Self::Regex {
618                one_match_per_line, ..
619            } => Some(*one_match_per_line),
620            Self::Text { .. } => None,
621        }
622    }
623}