search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use gpui::Model;
  5use language::{char_kind, Buffer, BufferSnapshot};
  6use regex::{Captures, Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use text::Anchor;
 16use util::paths::PathMatcher;
 17
 18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 19
 20pub enum SearchResult {
 21    Buffer {
 22        buffer: Model<Buffer>,
 23        ranges: Vec<Range<Anchor>>,
 24    },
 25    LimitReached,
 26}
 27
 28#[derive(Clone, Debug)]
 29pub struct SearchInputs {
 30    query: Arc<str>,
 31    files_to_include: PathMatcher,
 32    files_to_exclude: PathMatcher,
 33}
 34
 35impl SearchInputs {
 36    pub fn as_str(&self) -> &str {
 37        self.query.as_ref()
 38    }
 39    pub fn files_to_include(&self) -> &PathMatcher {
 40        &self.files_to_include
 41    }
 42    pub fn files_to_exclude(&self) -> &PathMatcher {
 43        &self.files_to_exclude
 44    }
 45}
 46#[derive(Clone, Debug)]
 47pub enum SearchQuery {
 48    Text {
 49        search: Arc<AhoCorasick>,
 50        replacement: Option<String>,
 51        whole_word: bool,
 52        case_sensitive: bool,
 53        include_ignored: bool,
 54        inner: SearchInputs,
 55    },
 56
 57    Regex {
 58        regex: Regex,
 59        replacement: Option<String>,
 60        multiline: bool,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        include_ignored: bool,
 64        inner: SearchInputs,
 65    },
 66}
 67
 68impl SearchQuery {
 69    pub fn text(
 70        query: impl ToString,
 71        whole_word: bool,
 72        case_sensitive: bool,
 73        include_ignored: bool,
 74        files_to_include: PathMatcher,
 75        files_to_exclude: PathMatcher,
 76    ) -> Result<Self> {
 77        let query = query.to_string();
 78        let search = AhoCorasickBuilder::new()
 79            .ascii_case_insensitive(!case_sensitive)
 80            .build(&[&query])?;
 81        let inner = SearchInputs {
 82            query: query.into(),
 83            files_to_exclude,
 84            files_to_include,
 85        };
 86        Ok(Self::Text {
 87            search: Arc::new(search),
 88            replacement: None,
 89            whole_word,
 90            case_sensitive,
 91            include_ignored,
 92            inner,
 93        })
 94    }
 95
 96    pub fn regex(
 97        query: impl ToString,
 98        whole_word: bool,
 99        case_sensitive: bool,
100        include_ignored: bool,
101        files_to_include: PathMatcher,
102        files_to_exclude: PathMatcher,
103    ) -> Result<Self> {
104        let mut query = query.to_string();
105        let initial_query = Arc::from(query.as_str());
106        if whole_word {
107            let mut word_query = String::new();
108            word_query.push_str("\\b");
109            word_query.push_str(&query);
110            word_query.push_str("\\b");
111            query = word_query
112        }
113
114        let multiline = query.contains('\n') || query.contains("\\n");
115        let regex = RegexBuilder::new(&query)
116            .case_insensitive(!case_sensitive)
117            .multi_line(multiline)
118            .build()?;
119        let inner = SearchInputs {
120            query: initial_query,
121            files_to_exclude,
122            files_to_include,
123        };
124        Ok(Self::Regex {
125            regex,
126            replacement: None,
127            multiline,
128            whole_word,
129            case_sensitive,
130            include_ignored,
131            inner,
132        })
133    }
134
135    pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
136        if message.regex {
137            Self::regex(
138                message.query,
139                message.whole_word,
140                message.case_sensitive,
141                message.include_ignored,
142                deserialize_path_matches(&message.files_to_include)?,
143                deserialize_path_matches(&message.files_to_exclude)?,
144            )
145        } else {
146            Self::text(
147                message.query,
148                message.whole_word,
149                message.case_sensitive,
150                message.include_ignored,
151                deserialize_path_matches(&message.files_to_include)?,
152                deserialize_path_matches(&message.files_to_exclude)?,
153            )
154        }
155    }
156
157    pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
158        if message.regex {
159            Self::regex(
160                message.query,
161                message.whole_word,
162                message.case_sensitive,
163                message.include_ignored,
164                deserialize_path_matches(&message.files_to_include)?,
165                deserialize_path_matches(&message.files_to_exclude)?,
166            )
167        } else {
168            Self::text(
169                message.query,
170                message.whole_word,
171                message.case_sensitive,
172                message.include_ignored,
173                deserialize_path_matches(&message.files_to_include)?,
174                deserialize_path_matches(&message.files_to_exclude)?,
175            )
176        }
177    }
178    pub fn with_replacement(mut self, new_replacement: String) -> Self {
179        match self {
180            Self::Text {
181                ref mut replacement,
182                ..
183            }
184            | Self::Regex {
185                ref mut replacement,
186                ..
187            } => {
188                *replacement = Some(new_replacement);
189                self
190            }
191        }
192    }
193    pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
194        proto::SearchProject {
195            project_id,
196            query: self.as_str().to_string(),
197            regex: self.is_regex(),
198            whole_word: self.whole_word(),
199            case_sensitive: self.case_sensitive(),
200            include_ignored: self.include_ignored(),
201            files_to_include: self.files_to_include().sources().join(","),
202            files_to_exclude: self.files_to_exclude().sources().join(","),
203        }
204    }
205
206    pub fn to_proto(&self) -> proto::SearchQuery {
207        proto::SearchQuery {
208            query: self.as_str().to_string(),
209            regex: self.is_regex(),
210            whole_word: self.whole_word(),
211            case_sensitive: self.case_sensitive(),
212            include_ignored: self.include_ignored(),
213            files_to_include: self.files_to_include().sources().join(","),
214            files_to_exclude: self.files_to_exclude().sources().join(","),
215        }
216    }
217
218    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
219        if self.as_str().is_empty() {
220            return Ok(false);
221        }
222
223        match self {
224            Self::Text { search, .. } => {
225                let mat = search.stream_find_iter(stream).next();
226                match mat {
227                    Some(Ok(_)) => Ok(true),
228                    Some(Err(err)) => Err(err.into()),
229                    None => Ok(false),
230                }
231            }
232            Self::Regex {
233                regex, multiline, ..
234            } => {
235                let mut reader = BufReader::new(stream);
236                if *multiline {
237                    let mut text = String::new();
238                    if let Err(err) = reader.read_to_string(&mut text) {
239                        Err(err.into())
240                    } else {
241                        Ok(regex.find(&text).is_some())
242                    }
243                } else {
244                    for line in reader.lines() {
245                        let line = line?;
246                        if regex.find(&line).is_some() {
247                            return Ok(true);
248                        }
249                    }
250                    Ok(false)
251                }
252            }
253        }
254    }
255    /// Returns the replacement text for this `SearchQuery`.
256    pub fn replacement(&self) -> Option<&str> {
257        match self {
258            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
259                replacement.as_deref()
260            }
261        }
262    }
263    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
264    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
265        match self {
266            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
267            SearchQuery::Regex {
268                regex, replacement, ..
269            } => {
270                if let Some(replacement) = replacement {
271                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
272                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
273                        .replace_all(replacement, |c: &Captures| {
274                            match c.get(0).unwrap().as_str() {
275                                r"\\" => "\\",
276                                r"\n" => "\n",
277                                r"\t" => "\t",
278                                x => unreachable!("Unexpected escape sequence: {}", x),
279                            }
280                        });
281                    Some(regex.replace(text, replacement))
282                } else {
283                    None
284                }
285            }
286        }
287    }
288
289    pub async fn search(
290        &self,
291        buffer: &BufferSnapshot,
292        subrange: Option<Range<usize>>,
293    ) -> Vec<Range<usize>> {
294        const YIELD_INTERVAL: usize = 20000;
295
296        if self.as_str().is_empty() {
297            return Default::default();
298        }
299
300        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
301        let rope = if let Some(range) = subrange {
302            buffer.as_rope().slice(range)
303        } else {
304            buffer.as_rope().clone()
305        };
306
307        let mut matches = Vec::new();
308        match self {
309            Self::Text {
310                search, whole_word, ..
311            } => {
312                for (ix, mat) in search
313                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
314                    .enumerate()
315                {
316                    if (ix + 1) % YIELD_INTERVAL == 0 {
317                        yield_now().await;
318                    }
319
320                    let mat = mat.unwrap();
321                    if *whole_word {
322                        let scope = buffer.language_scope_at(range_offset + mat.start());
323                        let kind = |c| char_kind(&scope, c);
324
325                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
326                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
327                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
328                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
329                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
330                            continue;
331                        }
332                    }
333                    matches.push(mat.start()..mat.end())
334                }
335            }
336
337            Self::Regex {
338                regex, multiline, ..
339            } => {
340                if *multiline {
341                    let text = rope.to_string();
342                    for (ix, mat) in regex.find_iter(&text).enumerate() {
343                        if (ix + 1) % YIELD_INTERVAL == 0 {
344                            yield_now().await;
345                        }
346
347                        matches.push(mat.start()..mat.end());
348                    }
349                } else {
350                    let mut line = String::new();
351                    let mut line_offset = 0;
352                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
353                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
354                            yield_now().await;
355                        }
356
357                        for (newline_ix, text) in chunk.split('\n').enumerate() {
358                            if newline_ix > 0 {
359                                for mat in regex.find_iter(&line) {
360                                    let start = line_offset + mat.start();
361                                    let end = line_offset + mat.end();
362                                    matches.push(start..end);
363                                }
364
365                                line_offset += line.len() + 1;
366                                line.clear();
367                            }
368                            line.push_str(text);
369                        }
370                    }
371                }
372            }
373        }
374
375        matches
376    }
377
378    pub fn is_empty(&self) -> bool {
379        self.as_str().is_empty()
380    }
381
382    pub fn as_str(&self) -> &str {
383        self.as_inner().as_str()
384    }
385
386    pub fn whole_word(&self) -> bool {
387        match self {
388            Self::Text { whole_word, .. } => *whole_word,
389            Self::Regex { whole_word, .. } => *whole_word,
390        }
391    }
392
393    pub fn case_sensitive(&self) -> bool {
394        match self {
395            Self::Text { case_sensitive, .. } => *case_sensitive,
396            Self::Regex { case_sensitive, .. } => *case_sensitive,
397        }
398    }
399
400    pub fn include_ignored(&self) -> bool {
401        match self {
402            Self::Text {
403                include_ignored, ..
404            } => *include_ignored,
405            Self::Regex {
406                include_ignored, ..
407            } => *include_ignored,
408        }
409    }
410
411    pub fn is_regex(&self) -> bool {
412        matches!(self, Self::Regex { .. })
413    }
414
415    pub fn files_to_include(&self) -> &PathMatcher {
416        self.as_inner().files_to_include()
417    }
418
419    pub fn files_to_exclude(&self) -> &PathMatcher {
420        self.as_inner().files_to_exclude()
421    }
422
423    pub fn filters_path(&self) -> bool {
424        !(self.files_to_exclude().sources().is_empty()
425            && self.files_to_include().sources().is_empty())
426    }
427
428    pub fn file_matches(&self, file_path: &Path) -> bool {
429        let mut path = file_path.to_path_buf();
430        loop {
431            if self.files_to_exclude().is_match(&path) {
432                return false;
433            } else if self.files_to_include().sources().is_empty()
434                || self.files_to_include().is_match(&path)
435            {
436                return true;
437            } else if !path.pop() {
438                return false;
439            }
440        }
441    }
442    pub fn as_inner(&self) -> &SearchInputs {
443        match self {
444            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
445        }
446    }
447}
448
449pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
450    let globs = glob_set
451        .split(',')
452        .map(str::trim)
453        .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
454        .collect::<Vec<_>>();
455    Ok(PathMatcher::new(&globs)?)
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461
462    #[test]
463    fn path_matcher_creation_for_valid_paths() {
464        for valid_path in [
465            "file",
466            "Cargo.toml",
467            ".DS_Store",
468            "~/dir/another_dir/",
469            "./dir/file",
470            "dir/[a-z].txt",
471            "../dir/filé",
472        ] {
473            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
474                panic!("Valid path {valid_path} should be accepted, but got: {e}")
475            });
476            assert!(
477                path_matcher.is_match(valid_path),
478                "Path matcher for valid path {valid_path} should match itself"
479            )
480        }
481    }
482
483    #[test]
484    fn path_matcher_creation_for_globs() {
485        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
486            match PathMatcher::new(&[invalid_glob.to_owned()]) {
487                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
488                Err(_expected) => {}
489            }
490        }
491
492        for valid_glob in [
493            "dir/?ile",
494            "dir/*.txt",
495            "dir/**/file",
496            "dir/[a-z].txt",
497            "{dir,file}",
498        ] {
499            match PathMatcher::new(&[valid_glob.to_owned()]) {
500                Ok(_expected) => {}
501                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
502            }
503        }
504    }
505}