search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, Rope};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::{Path, PathBuf},
 13    sync::Arc,
 14};
 15
 16#[derive(Clone, Debug)]
 17pub struct SearchInputs {
 18    query: Arc<str>,
 19    files_to_include: Vec<PathMatcher>,
 20    files_to_exclude: Vec<PathMatcher>,
 21}
 22
 23impl SearchInputs {
 24    pub fn as_str(&self) -> &str {
 25        self.query.as_ref()
 26    }
 27    pub fn files_to_include(&self) -> &[PathMatcher] {
 28        &self.files_to_include
 29    }
 30    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 31        &self.files_to_exclude
 32    }
 33}
 34#[derive(Clone, Debug)]
 35pub enum SearchQuery {
 36    Text {
 37        search: Arc<AhoCorasick<usize>>,
 38        whole_word: bool,
 39        case_sensitive: bool,
 40        inner: SearchInputs,
 41    },
 42    Regex {
 43        regex: Regex,
 44
 45        multiline: bool,
 46        whole_word: bool,
 47        case_sensitive: bool,
 48        inner: SearchInputs,
 49    },
 50}
 51
 52#[derive(Clone, Debug)]
 53pub struct PathMatcher {
 54    maybe_path: PathBuf,
 55    glob: GlobMatcher,
 56}
 57
 58impl std::fmt::Display for PathMatcher {
 59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 60        self.maybe_path.to_string_lossy().fmt(f)
 61    }
 62}
 63
 64impl PathMatcher {
 65    pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
 66        Ok(PathMatcher {
 67            glob: Glob::new(&maybe_glob)?.compile_matcher(),
 68            maybe_path: PathBuf::from(maybe_glob),
 69        })
 70    }
 71
 72    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
 73        other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
 74    }
 75}
 76
 77impl SearchQuery {
 78    pub fn text(
 79        query: impl ToString,
 80        whole_word: bool,
 81        case_sensitive: bool,
 82        files_to_include: Vec<PathMatcher>,
 83        files_to_exclude: Vec<PathMatcher>,
 84    ) -> Self {
 85        let query = query.to_string();
 86        let search = AhoCorasickBuilder::new()
 87            .auto_configure(&[&query])
 88            .ascii_case_insensitive(!case_sensitive)
 89            .build(&[&query]);
 90        let inner = SearchInputs {
 91            query: query.into(),
 92            files_to_exclude,
 93            files_to_include,
 94        };
 95        Self::Text {
 96            search: Arc::new(search),
 97            whole_word,
 98            case_sensitive,
 99            inner,
100        }
101    }
102
103    pub fn regex(
104        query: impl ToString,
105        whole_word: bool,
106        case_sensitive: bool,
107        files_to_include: Vec<PathMatcher>,
108        files_to_exclude: Vec<PathMatcher>,
109    ) -> Result<Self> {
110        let mut query = query.to_string();
111        let initial_query = Arc::from(query.as_str());
112        if whole_word {
113            let mut word_query = String::new();
114            word_query.push_str("\\b");
115            word_query.push_str(&query);
116            word_query.push_str("\\b");
117            query = word_query
118        }
119
120        let multiline = query.contains('\n') || query.contains("\\n");
121        let regex = RegexBuilder::new(&query)
122            .case_insensitive(!case_sensitive)
123            .multi_line(multiline)
124            .build()?;
125        let inner = SearchInputs {
126            query: initial_query,
127            files_to_exclude,
128            files_to_include,
129        };
130        Ok(Self::Regex {
131            regex,
132            multiline,
133            whole_word,
134            case_sensitive,
135            inner,
136        })
137    }
138
139    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
140        if message.regex {
141            Self::regex(
142                message.query,
143                message.whole_word,
144                message.case_sensitive,
145                deserialize_path_matches(&message.files_to_include)?,
146                deserialize_path_matches(&message.files_to_exclude)?,
147            )
148        } else {
149            Ok(Self::text(
150                message.query,
151                message.whole_word,
152                message.case_sensitive,
153                deserialize_path_matches(&message.files_to_include)?,
154                deserialize_path_matches(&message.files_to_exclude)?,
155            ))
156        }
157    }
158
159    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
160        proto::SearchProject {
161            project_id,
162            query: self.as_str().to_string(),
163            regex: self.is_regex(),
164            whole_word: self.whole_word(),
165            case_sensitive: self.case_sensitive(),
166            files_to_include: self
167                .files_to_include()
168                .iter()
169                .map(|matcher| matcher.to_string())
170                .join(","),
171            files_to_exclude: self
172                .files_to_exclude()
173                .iter()
174                .map(|matcher| matcher.to_string())
175                .join(","),
176        }
177    }
178
179    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
180        if self.as_str().is_empty() {
181            return Ok(false);
182        }
183
184        match self {
185            Self::Text { search, .. } => {
186                let mat = search.stream_find_iter(stream).next();
187                match mat {
188                    Some(Ok(_)) => Ok(true),
189                    Some(Err(err)) => Err(err.into()),
190                    None => Ok(false),
191                }
192            }
193            Self::Regex {
194                regex, multiline, ..
195            } => {
196                let mut reader = BufReader::new(stream);
197                if *multiline {
198                    let mut text = String::new();
199                    if let Err(err) = reader.read_to_string(&mut text) {
200                        Err(err.into())
201                    } else {
202                        Ok(regex.find(&text).is_some())
203                    }
204                } else {
205                    for line in reader.lines() {
206                        let line = line?;
207                        if regex.find(&line).is_some() {
208                            return Ok(true);
209                        }
210                    }
211                    Ok(false)
212                }
213            }
214        }
215    }
216
217    pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
218        const YIELD_INTERVAL: usize = 20000;
219
220        if self.as_str().is_empty() {
221            return Default::default();
222        }
223
224        let mut matches = Vec::new();
225        match self {
226            Self::Text {
227                search, whole_word, ..
228            } => {
229                for (ix, mat) in search
230                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
231                    .enumerate()
232                {
233                    if (ix + 1) % YIELD_INTERVAL == 0 {
234                        yield_now().await;
235                    }
236
237                    let mat = mat.unwrap();
238                    if *whole_word {
239                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
240                        let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
241                        let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
242                        let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
243                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
244                            continue;
245                        }
246                    }
247                    matches.push(mat.start()..mat.end())
248                }
249            }
250            Self::Regex {
251                regex, multiline, ..
252            } => {
253                if *multiline {
254                    let text = rope.to_string();
255                    for (ix, mat) in regex.find_iter(&text).enumerate() {
256                        if (ix + 1) % YIELD_INTERVAL == 0 {
257                            yield_now().await;
258                        }
259
260                        matches.push(mat.start()..mat.end());
261                    }
262                } else {
263                    let mut line = String::new();
264                    let mut line_offset = 0;
265                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
266                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
267                            yield_now().await;
268                        }
269
270                        for (newline_ix, text) in chunk.split('\n').enumerate() {
271                            if newline_ix > 0 {
272                                for mat in regex.find_iter(&line) {
273                                    let start = line_offset + mat.start();
274                                    let end = line_offset + mat.end();
275                                    matches.push(start..end);
276                                }
277
278                                line_offset += line.len() + 1;
279                                line.clear();
280                            }
281                            line.push_str(text);
282                        }
283                    }
284                }
285            }
286        }
287        matches
288    }
289
290    pub fn as_str(&self) -> &str {
291        self.as_inner().as_str()
292    }
293
294    pub fn whole_word(&self) -> bool {
295        match self {
296            Self::Text { whole_word, .. } => *whole_word,
297            Self::Regex { whole_word, .. } => *whole_word,
298        }
299    }
300
301    pub fn case_sensitive(&self) -> bool {
302        match self {
303            Self::Text { case_sensitive, .. } => *case_sensitive,
304            Self::Regex { case_sensitive, .. } => *case_sensitive,
305        }
306    }
307
308    pub fn is_regex(&self) -> bool {
309        matches!(self, Self::Regex { .. })
310    }
311
312    pub fn files_to_include(&self) -> &[PathMatcher] {
313        self.as_inner().files_to_include()
314    }
315
316    pub fn files_to_exclude(&self) -> &[PathMatcher] {
317        self.as_inner().files_to_exclude()
318    }
319
320    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
321        match file_path {
322            Some(file_path) => {
323                !self
324                    .files_to_exclude()
325                    .iter()
326                    .any(|exclude_glob| exclude_glob.is_match(file_path))
327                    && (self.files_to_include().is_empty()
328                        || self
329                            .files_to_include()
330                            .iter()
331                            .any(|include_glob| include_glob.is_match(file_path)))
332            }
333            None => self.files_to_include().is_empty(),
334        }
335    }
336    pub fn as_inner(&self) -> &SearchInputs {
337        match self {
338            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
339        }
340    }
341}
342
343fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
344    glob_set
345        .split(',')
346        .map(str::trim)
347        .filter(|glob_str| !glob_str.is_empty())
348        .map(|glob_str| {
349            PathMatcher::new(glob_str)
350                .with_context(|| format!("deserializing path match glob {glob_str}"))
351        })
352        .collect()
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    #[test]
360    fn path_matcher_creation_for_valid_paths() {
361        for valid_path in [
362            "file",
363            "Cargo.toml",
364            ".DS_Store",
365            "~/dir/another_dir/",
366            "./dir/file",
367            "dir/[a-z].txt",
368            "../dir/filé",
369        ] {
370            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
371                panic!("Valid path {valid_path} should be accepted, but got: {e}")
372            });
373            assert!(
374                path_matcher.is_match(valid_path),
375                "Path matcher for valid path {valid_path} should match itself"
376            )
377        }
378    }
379
380    #[test]
381    fn path_matcher_creation_for_globs() {
382        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
383            match PathMatcher::new(invalid_glob) {
384                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
385                Err(_expected) => {}
386            }
387        }
388
389        for valid_glob in [
390            "dir/?ile",
391            "dir/*.txt",
392            "dir/**/file",
393            "dir/[a-z].txt",
394            "{dir,file}",
395        ] {
396            match PathMatcher::new(valid_glob) {
397                Ok(_expected) => {}
398                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
399            }
400        }
401    }
402}