search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, BufferSnapshot};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::{Path, PathBuf},
 13    sync::Arc,
 14};
 15
 16#[derive(Clone, Debug)]
 17pub struct SearchInputs {
 18    query: Arc<str>,
 19    files_to_include: Vec<PathMatcher>,
 20    files_to_exclude: Vec<PathMatcher>,
 21}
 22
 23impl SearchInputs {
 24    pub fn as_str(&self) -> &str {
 25        self.query.as_ref()
 26    }
 27    pub fn files_to_include(&self) -> &[PathMatcher] {
 28        &self.files_to_include
 29    }
 30    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 31        &self.files_to_exclude
 32    }
 33}
 34#[derive(Clone, Debug)]
 35pub enum SearchQuery {
 36    Text {
 37        search: Arc<AhoCorasick<usize>>,
 38        whole_word: bool,
 39        case_sensitive: bool,
 40        inner: SearchInputs,
 41    },
 42
 43    Regex {
 44        regex: Regex,
 45
 46        multiline: bool,
 47        whole_word: bool,
 48        case_sensitive: bool,
 49        inner: SearchInputs,
 50    },
 51}
 52
 53#[derive(Clone, Debug)]
 54pub struct PathMatcher {
 55    maybe_path: PathBuf,
 56    glob: GlobMatcher,
 57}
 58
 59impl std::fmt::Display for PathMatcher {
 60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 61        self.maybe_path.to_string_lossy().fmt(f)
 62    }
 63}
 64
 65impl PathMatcher {
 66    pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
 67        Ok(PathMatcher {
 68            glob: Glob::new(&maybe_glob)?.compile_matcher(),
 69            maybe_path: PathBuf::from(maybe_glob),
 70        })
 71    }
 72
 73    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
 74        other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
 75    }
 76}
 77
 78impl SearchQuery {
 79    pub fn text(
 80        query: impl ToString,
 81        whole_word: bool,
 82        case_sensitive: bool,
 83        files_to_include: Vec<PathMatcher>,
 84        files_to_exclude: Vec<PathMatcher>,
 85    ) -> Self {
 86        let query = query.to_string();
 87        let search = AhoCorasickBuilder::new()
 88            .auto_configure(&[&query])
 89            .ascii_case_insensitive(!case_sensitive)
 90            .build(&[&query]);
 91        let inner = SearchInputs {
 92            query: query.into(),
 93            files_to_exclude,
 94            files_to_include,
 95        };
 96        Self::Text {
 97            search: Arc::new(search),
 98            whole_word,
 99            case_sensitive,
100            inner,
101        }
102    }
103
104    pub fn regex(
105        query: impl ToString,
106        whole_word: bool,
107        case_sensitive: bool,
108        files_to_include: Vec<PathMatcher>,
109        files_to_exclude: Vec<PathMatcher>,
110    ) -> Result<Self> {
111        let mut query = query.to_string();
112        let initial_query = Arc::from(query.as_str());
113        if whole_word {
114            let mut word_query = String::new();
115            word_query.push_str("\\b");
116            word_query.push_str(&query);
117            word_query.push_str("\\b");
118            query = word_query
119        }
120
121        let multiline = query.contains('\n') || query.contains("\\n");
122        let regex = RegexBuilder::new(&query)
123            .case_insensitive(!case_sensitive)
124            .multi_line(multiline)
125            .build()?;
126        let inner = SearchInputs {
127            query: initial_query,
128            files_to_exclude,
129            files_to_include,
130        };
131        Ok(Self::Regex {
132            regex,
133            multiline,
134            whole_word,
135            case_sensitive,
136            inner,
137        })
138    }
139
140    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
141        if message.regex {
142            Self::regex(
143                message.query,
144                message.whole_word,
145                message.case_sensitive,
146                deserialize_path_matches(&message.files_to_include)?,
147                deserialize_path_matches(&message.files_to_exclude)?,
148            )
149        } else {
150            Ok(Self::text(
151                message.query,
152                message.whole_word,
153                message.case_sensitive,
154                deserialize_path_matches(&message.files_to_include)?,
155                deserialize_path_matches(&message.files_to_exclude)?,
156            ))
157        }
158    }
159
160    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
161        proto::SearchProject {
162            project_id,
163            query: self.as_str().to_string(),
164            regex: self.is_regex(),
165            whole_word: self.whole_word(),
166            case_sensitive: self.case_sensitive(),
167            files_to_include: self
168                .files_to_include()
169                .iter()
170                .map(|matcher| matcher.to_string())
171                .join(","),
172            files_to_exclude: self
173                .files_to_exclude()
174                .iter()
175                .map(|matcher| matcher.to_string())
176                .join(","),
177        }
178    }
179
180    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
181        if self.as_str().is_empty() {
182            return Ok(false);
183        }
184
185        match self {
186            Self::Text { search, .. } => {
187                let mat = search.stream_find_iter(stream).next();
188                match mat {
189                    Some(Ok(_)) => Ok(true),
190                    Some(Err(err)) => Err(err.into()),
191                    None => Ok(false),
192                }
193            }
194            Self::Regex {
195                regex, multiline, ..
196            } => {
197                let mut reader = BufReader::new(stream);
198                if *multiline {
199                    let mut text = String::new();
200                    if let Err(err) = reader.read_to_string(&mut text) {
201                        Err(err.into())
202                    } else {
203                        Ok(regex.find(&text).is_some())
204                    }
205                } else {
206                    for line in reader.lines() {
207                        let line = line?;
208                        if regex.find(&line).is_some() {
209                            return Ok(true);
210                        }
211                    }
212                    Ok(false)
213                }
214            }
215        }
216    }
217
218    pub async fn search(
219        &self,
220        buffer: &BufferSnapshot,
221        subrange: Option<Range<usize>>,
222    ) -> Vec<Range<usize>> {
223        const YIELD_INTERVAL: usize = 20000;
224
225        if self.as_str().is_empty() {
226            return Default::default();
227        }
228
229        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
230        let rope = if let Some(range) = subrange {
231            buffer.as_rope().slice(range)
232        } else {
233            buffer.as_rope().clone()
234        };
235
236        let mut matches = Vec::new();
237        match self {
238            Self::Text {
239                search, whole_word, ..
240            } => {
241                for (ix, mat) in search
242                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
243                    .enumerate()
244                {
245                    if (ix + 1) % YIELD_INTERVAL == 0 {
246                        yield_now().await;
247                    }
248
249                    let mat = mat.unwrap();
250                    if *whole_word {
251                        let scope = buffer.language_scope_at(range_offset + mat.start());
252                        let kind = |c| char_kind(&scope, c);
253
254                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
255                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
256                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
257                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
258                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
259                            continue;
260                        }
261                    }
262                    matches.push(mat.start()..mat.end())
263                }
264            }
265
266            Self::Regex {
267                regex, multiline, ..
268            } => {
269                if *multiline {
270                    let text = rope.to_string();
271                    for (ix, mat) in regex.find_iter(&text).enumerate() {
272                        if (ix + 1) % YIELD_INTERVAL == 0 {
273                            yield_now().await;
274                        }
275
276                        matches.push(mat.start()..mat.end());
277                    }
278                } else {
279                    let mut line = String::new();
280                    let mut line_offset = 0;
281                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
282                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
283                            yield_now().await;
284                        }
285
286                        for (newline_ix, text) in chunk.split('\n').enumerate() {
287                            if newline_ix > 0 {
288                                for mat in regex.find_iter(&line) {
289                                    let start = line_offset + mat.start();
290                                    let end = line_offset + mat.end();
291                                    matches.push(start..end);
292                                }
293
294                                line_offset += line.len() + 1;
295                                line.clear();
296                            }
297                            line.push_str(text);
298                        }
299                    }
300                }
301            }
302        }
303
304        matches
305    }
306
307    pub fn as_str(&self) -> &str {
308        self.as_inner().as_str()
309    }
310
311    pub fn whole_word(&self) -> bool {
312        match self {
313            Self::Text { whole_word, .. } => *whole_word,
314            Self::Regex { whole_word, .. } => *whole_word,
315        }
316    }
317
318    pub fn case_sensitive(&self) -> bool {
319        match self {
320            Self::Text { case_sensitive, .. } => *case_sensitive,
321            Self::Regex { case_sensitive, .. } => *case_sensitive,
322        }
323    }
324
325    pub fn is_regex(&self) -> bool {
326        matches!(self, Self::Regex { .. })
327    }
328
329    pub fn files_to_include(&self) -> &[PathMatcher] {
330        self.as_inner().files_to_include()
331    }
332
333    pub fn files_to_exclude(&self) -> &[PathMatcher] {
334        self.as_inner().files_to_exclude()
335    }
336
337    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
338        match file_path {
339            Some(file_path) => {
340                !self
341                    .files_to_exclude()
342                    .iter()
343                    .any(|exclude_glob| exclude_glob.is_match(file_path))
344                    && (self.files_to_include().is_empty()
345                        || self
346                            .files_to_include()
347                            .iter()
348                            .any(|include_glob| include_glob.is_match(file_path)))
349            }
350            None => self.files_to_include().is_empty(),
351        }
352    }
353    pub fn as_inner(&self) -> &SearchInputs {
354        match self {
355            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
356        }
357    }
358}
359
360fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
361    glob_set
362        .split(',')
363        .map(str::trim)
364        .filter(|glob_str| !glob_str.is_empty())
365        .map(|glob_str| {
366            PathMatcher::new(glob_str)
367                .with_context(|| format!("deserializing path match glob {glob_str}"))
368        })
369        .collect()
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375
376    #[test]
377    fn path_matcher_creation_for_valid_paths() {
378        for valid_path in [
379            "file",
380            "Cargo.toml",
381            ".DS_Store",
382            "~/dir/another_dir/",
383            "./dir/file",
384            "dir/[a-z].txt",
385            "../dir/filé",
386        ] {
387            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
388                panic!("Valid path {valid_path} should be accepted, but got: {e}")
389            });
390            assert!(
391                path_matcher.is_match(valid_path),
392                "Path matcher for valid path {valid_path} should match itself"
393            )
394        }
395    }
396
397    #[test]
398    fn path_matcher_creation_for_globs() {
399        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
400            match PathMatcher::new(invalid_glob) {
401                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
402                Err(_expected) => {}
403            }
404        }
405
406        for valid_glob in [
407            "dir/?ile",
408            "dir/*.txt",
409            "dir/**/file",
410            "dir/[a-z].txt",
411            "{dir,file}",
412        ] {
413            match PathMatcher::new(valid_glob) {
414                Ok(_expected) => {}
415                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
416            }
417        }
418    }
419}