search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, BufferSnapshot};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    path::{Path, PathBuf},
 14    sync::Arc,
 15};
 16
 17#[derive(Clone, Debug)]
 18pub struct SearchInputs {
 19    query: Arc<str>,
 20    files_to_include: Vec<PathMatcher>,
 21    files_to_exclude: Vec<PathMatcher>,
 22}
 23
 24impl SearchInputs {
 25    pub fn as_str(&self) -> &str {
 26        self.query.as_ref()
 27    }
 28    pub fn files_to_include(&self) -> &[PathMatcher] {
 29        &self.files_to_include
 30    }
 31    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 32        &self.files_to_exclude
 33    }
 34}
 35#[derive(Clone, Debug)]
 36pub enum SearchQuery {
 37    Text {
 38        search: Arc<AhoCorasick>,
 39        replacement: Option<String>,
 40        whole_word: bool,
 41        case_sensitive: bool,
 42        inner: SearchInputs,
 43    },
 44
 45    Regex {
 46        regex: Regex,
 47        replacement: Option<String>,
 48        multiline: bool,
 49        whole_word: bool,
 50        case_sensitive: bool,
 51        inner: SearchInputs,
 52    },
 53}
 54
 55#[derive(Clone, Debug)]
 56pub struct PathMatcher {
 57    maybe_path: PathBuf,
 58    glob: GlobMatcher,
 59}
 60
 61impl std::fmt::Display for PathMatcher {
 62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 63        self.maybe_path.to_string_lossy().fmt(f)
 64    }
 65}
 66
 67impl PathMatcher {
 68    pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
 69        Ok(PathMatcher {
 70            glob: Glob::new(&maybe_glob)?.compile_matcher(),
 71            maybe_path: PathBuf::from(maybe_glob),
 72        })
 73    }
 74
 75    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
 76        other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
 77    }
 78}
 79
 80impl SearchQuery {
 81    pub fn text(
 82        query: impl ToString,
 83        whole_word: bool,
 84        case_sensitive: bool,
 85        files_to_include: Vec<PathMatcher>,
 86        files_to_exclude: Vec<PathMatcher>,
 87    ) -> Result<Self> {
 88        let query = query.to_string();
 89        let search = AhoCorasickBuilder::new()
 90            .ascii_case_insensitive(!case_sensitive)
 91            .build(&[&query])?;
 92        let inner = SearchInputs {
 93            query: query.into(),
 94            files_to_exclude,
 95            files_to_include,
 96        };
 97        Ok(Self::Text {
 98            search: Arc::new(search),
 99            replacement: None,
100            whole_word,
101            case_sensitive,
102            inner,
103        })
104    }
105
106    pub fn regex(
107        query: impl ToString,
108        whole_word: bool,
109        case_sensitive: bool,
110        files_to_include: Vec<PathMatcher>,
111        files_to_exclude: Vec<PathMatcher>,
112    ) -> Result<Self> {
113        let mut query = query.to_string();
114        let initial_query = Arc::from(query.as_str());
115        if whole_word {
116            let mut word_query = String::new();
117            word_query.push_str("\\b");
118            word_query.push_str(&query);
119            word_query.push_str("\\b");
120            query = word_query
121        }
122
123        let multiline = query.contains('\n') || query.contains("\\n");
124        let regex = RegexBuilder::new(&query)
125            .case_insensitive(!case_sensitive)
126            .multi_line(multiline)
127            .build()?;
128        let inner = SearchInputs {
129            query: initial_query,
130            files_to_exclude,
131            files_to_include,
132        };
133        Ok(Self::Regex {
134            regex,
135            replacement: None,
136            multiline,
137            whole_word,
138            case_sensitive,
139            inner,
140        })
141    }
142
143    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
144        if message.regex {
145            Self::regex(
146                message.query,
147                message.whole_word,
148                message.case_sensitive,
149                deserialize_path_matches(&message.files_to_include)?,
150                deserialize_path_matches(&message.files_to_exclude)?,
151            )
152        } else {
153            Self::text(
154                message.query,
155                message.whole_word,
156                message.case_sensitive,
157                deserialize_path_matches(&message.files_to_include)?,
158                deserialize_path_matches(&message.files_to_exclude)?,
159            )
160        }
161    }
162    pub fn with_replacement(mut self, new_replacement: String) -> Self {
163        match self {
164            Self::Text {
165                ref mut replacement,
166                ..
167            }
168            | Self::Regex {
169                ref mut replacement,
170                ..
171            } => {
172                *replacement = Some(new_replacement);
173                self
174            }
175        }
176    }
177    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
178        proto::SearchProject {
179            project_id,
180            query: self.as_str().to_string(),
181            regex: self.is_regex(),
182            whole_word: self.whole_word(),
183            case_sensitive: self.case_sensitive(),
184            files_to_include: self
185                .files_to_include()
186                .iter()
187                .map(|matcher| matcher.to_string())
188                .join(","),
189            files_to_exclude: self
190                .files_to_exclude()
191                .iter()
192                .map(|matcher| matcher.to_string())
193                .join(","),
194        }
195    }
196
197    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
198        if self.as_str().is_empty() {
199            return Ok(false);
200        }
201
202        match self {
203            Self::Text { search, .. } => {
204                let mat = search.stream_find_iter(stream).next();
205                match mat {
206                    Some(Ok(_)) => Ok(true),
207                    Some(Err(err)) => Err(err.into()),
208                    None => Ok(false),
209                }
210            }
211            Self::Regex {
212                regex, multiline, ..
213            } => {
214                let mut reader = BufReader::new(stream);
215                if *multiline {
216                    let mut text = String::new();
217                    if let Err(err) = reader.read_to_string(&mut text) {
218                        Err(err.into())
219                    } else {
220                        Ok(regex.find(&text).is_some())
221                    }
222                } else {
223                    for line in reader.lines() {
224                        let line = line?;
225                        if regex.find(&line).is_some() {
226                            return Ok(true);
227                        }
228                    }
229                    Ok(false)
230                }
231            }
232        }
233    }
234    /// Returns the replacement text for this `SearchQuery`.
235    pub fn replacement(&self) -> Option<&str> {
236        match self {
237            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
238                replacement.as_deref()
239            }
240        }
241    }
242    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
243    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
244        match self {
245            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
246            SearchQuery::Regex {
247                regex, replacement, ..
248            } => {
249                if let Some(replacement) = replacement {
250                    Some(regex.replace(text, replacement))
251                } else {
252                    None
253                }
254            }
255        }
256    }
257    pub async fn search(
258        &self,
259        buffer: &BufferSnapshot,
260        subrange: Option<Range<usize>>,
261    ) -> Vec<Range<usize>> {
262        const YIELD_INTERVAL: usize = 20000;
263
264        if self.as_str().is_empty() {
265            return Default::default();
266        }
267
268        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
269        let rope = if let Some(range) = subrange {
270            buffer.as_rope().slice(range)
271        } else {
272            buffer.as_rope().clone()
273        };
274
275        let mut matches = Vec::new();
276        match self {
277            Self::Text {
278                search, whole_word, ..
279            } => {
280                for (ix, mat) in search
281                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
282                    .enumerate()
283                {
284                    if (ix + 1) % YIELD_INTERVAL == 0 {
285                        yield_now().await;
286                    }
287
288                    let mat = mat.unwrap();
289                    if *whole_word {
290                        let scope = buffer.language_scope_at(range_offset + mat.start());
291                        let kind = |c| char_kind(&scope, c);
292
293                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
294                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
295                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
296                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
297                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
298                            continue;
299                        }
300                    }
301                    matches.push(mat.start()..mat.end())
302                }
303            }
304
305            Self::Regex {
306                regex, multiline, ..
307            } => {
308                if *multiline {
309                    let text = rope.to_string();
310                    for (ix, mat) in regex.find_iter(&text).enumerate() {
311                        if (ix + 1) % YIELD_INTERVAL == 0 {
312                            yield_now().await;
313                        }
314
315                        matches.push(mat.start()..mat.end());
316                    }
317                } else {
318                    let mut line = String::new();
319                    let mut line_offset = 0;
320                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
321                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
322                            yield_now().await;
323                        }
324
325                        for (newline_ix, text) in chunk.split('\n').enumerate() {
326                            if newline_ix > 0 {
327                                for mat in regex.find_iter(&line) {
328                                    let start = line_offset + mat.start();
329                                    let end = line_offset + mat.end();
330                                    matches.push(start..end);
331                                }
332
333                                line_offset += line.len() + 1;
334                                line.clear();
335                            }
336                            line.push_str(text);
337                        }
338                    }
339                }
340            }
341        }
342
343        matches
344    }
345
346    pub fn as_str(&self) -> &str {
347        self.as_inner().as_str()
348    }
349
350    pub fn whole_word(&self) -> bool {
351        match self {
352            Self::Text { whole_word, .. } => *whole_word,
353            Self::Regex { whole_word, .. } => *whole_word,
354        }
355    }
356
357    pub fn case_sensitive(&self) -> bool {
358        match self {
359            Self::Text { case_sensitive, .. } => *case_sensitive,
360            Self::Regex { case_sensitive, .. } => *case_sensitive,
361        }
362    }
363
364    pub fn is_regex(&self) -> bool {
365        matches!(self, Self::Regex { .. })
366    }
367
368    pub fn files_to_include(&self) -> &[PathMatcher] {
369        self.as_inner().files_to_include()
370    }
371
372    pub fn files_to_exclude(&self) -> &[PathMatcher] {
373        self.as_inner().files_to_exclude()
374    }
375
376    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
377        match file_path {
378            Some(file_path) => {
379                !self
380                    .files_to_exclude()
381                    .iter()
382                    .any(|exclude_glob| exclude_glob.is_match(file_path))
383                    && (self.files_to_include().is_empty()
384                        || self
385                            .files_to_include()
386                            .iter()
387                            .any(|include_glob| include_glob.is_match(file_path)))
388            }
389            None => self.files_to_include().is_empty(),
390        }
391    }
392    pub fn as_inner(&self) -> &SearchInputs {
393        match self {
394            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
395        }
396    }
397}
398
399fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
400    glob_set
401        .split(',')
402        .map(str::trim)
403        .filter(|glob_str| !glob_str.is_empty())
404        .map(|glob_str| {
405            PathMatcher::new(glob_str)
406                .with_context(|| format!("deserializing path match glob {glob_str}"))
407        })
408        .collect()
409}
410
411#[cfg(test)]
412mod tests {
413    use super::*;
414
415    #[test]
416    fn path_matcher_creation_for_valid_paths() {
417        for valid_path in [
418            "file",
419            "Cargo.toml",
420            ".DS_Store",
421            "~/dir/another_dir/",
422            "./dir/file",
423            "dir/[a-z].txt",
424            "../dir/filé",
425        ] {
426            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
427                panic!("Valid path {valid_path} should be accepted, but got: {e}")
428            });
429            assert!(
430                path_matcher.is_match(valid_path),
431                "Path matcher for valid path {valid_path} should match itself"
432            )
433        }
434    }
435
436    #[test]
437    fn path_matcher_creation_for_globs() {
438        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
439            match PathMatcher::new(invalid_glob) {
440                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
441                Err(_expected) => {}
442            }
443        }
444
445        for valid_glob in [
446            "dir/?ile",
447            "dir/*.txt",
448            "dir/**/file",
449            "dir/[a-z].txt",
450            "{dir,file}",
451        ] {
452            match PathMatcher::new(valid_glob) {
453                Ok(_expected) => {}
454                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
455            }
456        }
457    }
458}