search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, BufferSnapshot};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    path::{Path, PathBuf},
 14    sync::Arc,
 15};
 16
 17#[derive(Clone, Debug)]
 18pub struct SearchInputs {
 19    query: Arc<str>,
 20    files_to_include: Vec<PathMatcher>,
 21    files_to_exclude: Vec<PathMatcher>,
 22}
 23
 24impl SearchInputs {
 25    pub fn as_str(&self) -> &str {
 26        self.query.as_ref()
 27    }
 28    pub fn files_to_include(&self) -> &[PathMatcher] {
 29        &self.files_to_include
 30    }
 31    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 32        &self.files_to_exclude
 33    }
 34}
 35#[derive(Clone, Debug)]
 36pub enum SearchQuery {
 37    Text {
 38        search: Arc<AhoCorasick>,
 39        replacement: Option<String>,
 40        whole_word: bool,
 41        case_sensitive: bool,
 42        inner: SearchInputs,
 43    },
 44
 45    Regex {
 46        regex: Regex,
 47        replacement: Option<String>,
 48        multiline: bool,
 49        whole_word: bool,
 50        case_sensitive: bool,
 51        inner: SearchInputs,
 52    },
 53}
 54
 55#[derive(Clone, Debug)]
 56pub struct PathMatcher {
 57    maybe_path: PathBuf,
 58    glob: GlobMatcher,
 59}
 60
 61impl std::fmt::Display for PathMatcher {
 62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 63        self.maybe_path.to_string_lossy().fmt(f)
 64    }
 65}
 66
 67impl PathMatcher {
 68    pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
 69        Ok(PathMatcher {
 70            glob: Glob::new(&maybe_glob)?.compile_matcher(),
 71            maybe_path: PathBuf::from(maybe_glob),
 72        })
 73    }
 74
 75    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
 76        other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
 77    }
 78}
 79
 80impl SearchQuery {
 81    pub fn text(
 82        query: impl ToString,
 83        whole_word: bool,
 84        case_sensitive: bool,
 85        files_to_include: Vec<PathMatcher>,
 86        files_to_exclude: Vec<PathMatcher>,
 87    ) -> Result<Self> {
 88        let query = query.to_string();
 89        let search = AhoCorasickBuilder::new()
 90            .ascii_case_insensitive(!case_sensitive)
 91            .build(&[&query])?;
 92        let inner = SearchInputs {
 93            query: query.into(),
 94            files_to_exclude,
 95            files_to_include,
 96        };
 97        Ok(Self::Text {
 98            search: Arc::new(search),
 99            replacement: None,
100            whole_word,
101            case_sensitive,
102            inner,
103        })
104    }
105
106    pub fn regex(
107        query: impl ToString,
108        whole_word: bool,
109        case_sensitive: bool,
110        files_to_include: Vec<PathMatcher>,
111        files_to_exclude: Vec<PathMatcher>,
112    ) -> Result<Self> {
113        let mut query = query.to_string();
114        let initial_query = Arc::from(query.as_str());
115        if whole_word {
116            let mut word_query = String::new();
117            word_query.push_str("\\b");
118            word_query.push_str(&query);
119            word_query.push_str("\\b");
120            query = word_query
121        }
122
123        let multiline = query.contains('\n') || query.contains("\\n");
124        let regex = RegexBuilder::new(&query)
125            .case_insensitive(!case_sensitive)
126            .multi_line(multiline)
127            .build()?;
128        let inner = SearchInputs {
129            query: initial_query,
130            files_to_exclude,
131            files_to_include,
132        };
133        Ok(Self::Regex {
134            regex,
135            replacement: None,
136            multiline,
137            whole_word,
138            case_sensitive,
139            inner,
140        })
141    }
142
143    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
144        if message.regex {
145            Self::regex(
146                message.query,
147                message.whole_word,
148                message.case_sensitive,
149                deserialize_path_matches(&message.files_to_include)?,
150                deserialize_path_matches(&message.files_to_exclude)?,
151            )
152        } else {
153            Self::text(
154                message.query,
155                message.whole_word,
156                message.case_sensitive,
157                deserialize_path_matches(&message.files_to_include)?,
158                deserialize_path_matches(&message.files_to_exclude)?,
159            )
160        }
161    }
162    pub fn with_replacement(mut self, new_replacement: Option<String>) -> Self {
163        match self {
164            Self::Text {
165                ref mut replacement,
166                ..
167            }
168            | Self::Regex {
169                ref mut replacement,
170                ..
171            } => {
172                *replacement = new_replacement;
173                self
174            }
175        }
176    }
177    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
178        proto::SearchProject {
179            project_id,
180            query: self.as_str().to_string(),
181            regex: self.is_regex(),
182            whole_word: self.whole_word(),
183            case_sensitive: self.case_sensitive(),
184            files_to_include: self
185                .files_to_include()
186                .iter()
187                .map(|matcher| matcher.to_string())
188                .join(","),
189            files_to_exclude: self
190                .files_to_exclude()
191                .iter()
192                .map(|matcher| matcher.to_string())
193                .join(","),
194        }
195    }
196
197    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
198        if self.as_str().is_empty() {
199            return Ok(false);
200        }
201
202        match self {
203            Self::Text { search, .. } => {
204                let mat = search.stream_find_iter(stream).next();
205                match mat {
206                    Some(Ok(_)) => Ok(true),
207                    Some(Err(err)) => Err(err.into()),
208                    None => Ok(false),
209                }
210            }
211            Self::Regex {
212                regex, multiline, ..
213            } => {
214                let mut reader = BufReader::new(stream);
215                if *multiline {
216                    let mut text = String::new();
217                    if let Err(err) = reader.read_to_string(&mut text) {
218                        Err(err.into())
219                    } else {
220                        Ok(regex.find(&text).is_some())
221                    }
222                } else {
223                    for line in reader.lines() {
224                        let line = line?;
225                        if regex.find(&line).is_some() {
226                            return Ok(true);
227                        }
228                    }
229                    Ok(false)
230                }
231            }
232        }
233    }
234    pub fn replacement<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
235        match self {
236            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
237            SearchQuery::Regex {
238                regex, replacement, ..
239            } => {
240                if let Some(replacement) = replacement {
241                    Some(regex.replace(text, replacement))
242                } else {
243                    None
244                }
245            }
246        }
247    }
248    pub async fn search(
249        &self,
250        buffer: &BufferSnapshot,
251        subrange: Option<Range<usize>>,
252    ) -> Vec<Range<usize>> {
253        const YIELD_INTERVAL: usize = 20000;
254
255        if self.as_str().is_empty() {
256            return Default::default();
257        }
258
259        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
260        let rope = if let Some(range) = subrange {
261            buffer.as_rope().slice(range)
262        } else {
263            buffer.as_rope().clone()
264        };
265
266        let mut matches = Vec::new();
267        match self {
268            Self::Text {
269                search, whole_word, ..
270            } => {
271                for (ix, mat) in search
272                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
273                    .enumerate()
274                {
275                    if (ix + 1) % YIELD_INTERVAL == 0 {
276                        yield_now().await;
277                    }
278
279                    let mat = mat.unwrap();
280                    if *whole_word {
281                        let scope = buffer.language_scope_at(range_offset + mat.start());
282                        let kind = |c| char_kind(&scope, c);
283
284                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
285                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
286                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
287                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
288                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
289                            continue;
290                        }
291                    }
292                    matches.push(mat.start()..mat.end())
293                }
294            }
295
296            Self::Regex {
297                regex, multiline, ..
298            } => {
299                if *multiline {
300                    let text = rope.to_string();
301                    for (ix, mat) in regex.find_iter(&text).enumerate() {
302                        if (ix + 1) % YIELD_INTERVAL == 0 {
303                            yield_now().await;
304                        }
305
306                        matches.push(mat.start()..mat.end());
307                    }
308                } else {
309                    let mut line = String::new();
310                    let mut line_offset = 0;
311                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
312                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
313                            yield_now().await;
314                        }
315
316                        for (newline_ix, text) in chunk.split('\n').enumerate() {
317                            if newline_ix > 0 {
318                                for mat in regex.find_iter(&line) {
319                                    let start = line_offset + mat.start();
320                                    let end = line_offset + mat.end();
321                                    matches.push(start..end);
322                                }
323
324                                line_offset += line.len() + 1;
325                                line.clear();
326                            }
327                            line.push_str(text);
328                        }
329                    }
330                }
331            }
332        }
333
334        matches
335    }
336
337    pub fn as_str(&self) -> &str {
338        self.as_inner().as_str()
339    }
340
341    pub fn whole_word(&self) -> bool {
342        match self {
343            Self::Text { whole_word, .. } => *whole_word,
344            Self::Regex { whole_word, .. } => *whole_word,
345        }
346    }
347
348    pub fn case_sensitive(&self) -> bool {
349        match self {
350            Self::Text { case_sensitive, .. } => *case_sensitive,
351            Self::Regex { case_sensitive, .. } => *case_sensitive,
352        }
353    }
354
355    pub fn is_regex(&self) -> bool {
356        matches!(self, Self::Regex { .. })
357    }
358
359    pub fn files_to_include(&self) -> &[PathMatcher] {
360        self.as_inner().files_to_include()
361    }
362
363    pub fn files_to_exclude(&self) -> &[PathMatcher] {
364        self.as_inner().files_to_exclude()
365    }
366
367    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
368        match file_path {
369            Some(file_path) => {
370                !self
371                    .files_to_exclude()
372                    .iter()
373                    .any(|exclude_glob| exclude_glob.is_match(file_path))
374                    && (self.files_to_include().is_empty()
375                        || self
376                            .files_to_include()
377                            .iter()
378                            .any(|include_glob| include_glob.is_match(file_path)))
379            }
380            None => self.files_to_include().is_empty(),
381        }
382    }
383    pub fn as_inner(&self) -> &SearchInputs {
384        match self {
385            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
386        }
387    }
388}
389
390fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
391    glob_set
392        .split(',')
393        .map(str::trim)
394        .filter(|glob_str| !glob_str.is_empty())
395        .map(|glob_str| {
396            PathMatcher::new(glob_str)
397                .with_context(|| format!("deserializing path match glob {glob_str}"))
398        })
399        .collect()
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405
406    #[test]
407    fn path_matcher_creation_for_valid_paths() {
408        for valid_path in [
409            "file",
410            "Cargo.toml",
411            ".DS_Store",
412            "~/dir/another_dir/",
413            "./dir/file",
414            "dir/[a-z].txt",
415            "../dir/filé",
416        ] {
417            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
418                panic!("Valid path {valid_path} should be accepted, but got: {e}")
419            });
420            assert!(
421                path_matcher.is_match(valid_path),
422                "Path matcher for valid path {valid_path} should match itself"
423            )
424        }
425    }
426
427    #[test]
428    fn path_matcher_creation_for_globs() {
429        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
430            match PathMatcher::new(invalid_glob) {
431                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
432                Err(_expected) => {}
433            }
434        }
435
436        for valid_glob in [
437            "dir/?ile",
438            "dir/*.txt",
439            "dir/**/file",
440            "dir/[a-z].txt",
441            "{dir,file}",
442        ] {
443            match PathMatcher::new(valid_glob) {
444                Ok(_expected) => {}
445                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
446            }
447        }
448    }
449}