search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, BufferSnapshot};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::{Path, PathBuf},
 13    sync::Arc,
 14};
 15
 16#[derive(Clone, Debug)]
 17pub enum SearchQuery {
 18    Text {
 19        search: Arc<AhoCorasick<usize>>,
 20        query: Arc<str>,
 21        whole_word: bool,
 22        case_sensitive: bool,
 23        files_to_include: Vec<PathMatcher>,
 24        files_to_exclude: Vec<PathMatcher>,
 25    },
 26
 27    Regex {
 28        regex: Regex,
 29        query: Arc<str>,
 30        multiline: bool,
 31        whole_word: bool,
 32        case_sensitive: bool,
 33        files_to_include: Vec<PathMatcher>,
 34        files_to_exclude: Vec<PathMatcher>,
 35    },
 36}
 37
 38#[derive(Clone, Debug)]
 39pub struct PathMatcher {
 40    maybe_path: PathBuf,
 41    glob: GlobMatcher,
 42}
 43
 44impl std::fmt::Display for PathMatcher {
 45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 46        self.maybe_path.to_string_lossy().fmt(f)
 47    }
 48}
 49
 50impl PathMatcher {
 51    pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
 52        Ok(PathMatcher {
 53            glob: Glob::new(&maybe_glob)?.compile_matcher(),
 54            maybe_path: PathBuf::from(maybe_glob),
 55        })
 56    }
 57
 58    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
 59        other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
 60    }
 61}
 62
 63impl SearchQuery {
 64    pub fn text(
 65        query: impl ToString,
 66        whole_word: bool,
 67        case_sensitive: bool,
 68        files_to_include: Vec<PathMatcher>,
 69        files_to_exclude: Vec<PathMatcher>,
 70    ) -> Self {
 71        let query = query.to_string();
 72        let search = AhoCorasickBuilder::new()
 73            .auto_configure(&[&query])
 74            .ascii_case_insensitive(!case_sensitive)
 75            .build(&[&query]);
 76        Self::Text {
 77            search: Arc::new(search),
 78            query: Arc::from(query),
 79            whole_word,
 80            case_sensitive,
 81            files_to_include,
 82            files_to_exclude,
 83        }
 84    }
 85
 86    pub fn regex(
 87        query: impl ToString,
 88        whole_word: bool,
 89        case_sensitive: bool,
 90        files_to_include: Vec<PathMatcher>,
 91        files_to_exclude: Vec<PathMatcher>,
 92    ) -> Result<Self> {
 93        let mut query = query.to_string();
 94        let initial_query = Arc::from(query.as_str());
 95        if whole_word {
 96            let mut word_query = String::new();
 97            word_query.push_str("\\b");
 98            word_query.push_str(&query);
 99            word_query.push_str("\\b");
100            query = word_query
101        }
102
103        let multiline = query.contains('\n') || query.contains("\\n");
104        let regex = RegexBuilder::new(&query)
105            .case_insensitive(!case_sensitive)
106            .multi_line(multiline)
107            .build()?;
108        Ok(Self::Regex {
109            regex,
110            query: initial_query,
111            multiline,
112            whole_word,
113            case_sensitive,
114            files_to_include,
115            files_to_exclude,
116        })
117    }
118
119    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
120        if message.regex {
121            Self::regex(
122                message.query,
123                message.whole_word,
124                message.case_sensitive,
125                deserialize_path_matches(&message.files_to_include)?,
126                deserialize_path_matches(&message.files_to_exclude)?,
127            )
128        } else {
129            Ok(Self::text(
130                message.query,
131                message.whole_word,
132                message.case_sensitive,
133                deserialize_path_matches(&message.files_to_include)?,
134                deserialize_path_matches(&message.files_to_exclude)?,
135            ))
136        }
137    }
138
139    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
140        proto::SearchProject {
141            project_id,
142            query: self.as_str().to_string(),
143            regex: self.is_regex(),
144            whole_word: self.whole_word(),
145            case_sensitive: self.case_sensitive(),
146            files_to_include: self
147                .files_to_include()
148                .iter()
149                .map(|matcher| matcher.to_string())
150                .join(","),
151            files_to_exclude: self
152                .files_to_exclude()
153                .iter()
154                .map(|matcher| matcher.to_string())
155                .join(","),
156        }
157    }
158
159    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
160        if self.as_str().is_empty() {
161            return Ok(false);
162        }
163
164        match self {
165            Self::Text { search, .. } => {
166                let mat = search.stream_find_iter(stream).next();
167                match mat {
168                    Some(Ok(_)) => Ok(true),
169                    Some(Err(err)) => Err(err.into()),
170                    None => Ok(false),
171                }
172            }
173            Self::Regex {
174                regex, multiline, ..
175            } => {
176                let mut reader = BufReader::new(stream);
177                if *multiline {
178                    let mut text = String::new();
179                    if let Err(err) = reader.read_to_string(&mut text) {
180                        Err(err.into())
181                    } else {
182                        Ok(regex.find(&text).is_some())
183                    }
184                } else {
185                    for line in reader.lines() {
186                        let line = line?;
187                        if regex.find(&line).is_some() {
188                            return Ok(true);
189                        }
190                    }
191                    Ok(false)
192                }
193            }
194        }
195    }
196
197    pub async fn search(
198        &self,
199        buffer: &BufferSnapshot,
200        subrange: Option<Range<usize>>,
201    ) -> Vec<Range<usize>> {
202        const YIELD_INTERVAL: usize = 20000;
203
204        if self.as_str().is_empty() {
205            return Default::default();
206        }
207
208        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
209        let rope = if let Some(range) = subrange {
210            buffer.as_rope().slice(range)
211        } else {
212            buffer.as_rope().clone()
213        };
214
215        let mut matches = Vec::new();
216        match self {
217            Self::Text {
218                search, whole_word, ..
219            } => {
220                for (ix, mat) in search
221                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
222                    .enumerate()
223                {
224                    if (ix + 1) % YIELD_INTERVAL == 0 {
225                        yield_now().await;
226                    }
227
228                    let mat = mat.unwrap();
229                    if *whole_word {
230                        let scope = buffer.language_scope_at(range_offset + mat.start());
231                        let kind = |c| char_kind(&scope, c);
232
233                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
234                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
235                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
236                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
237                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
238                            continue;
239                        }
240                    }
241                    matches.push(mat.start()..mat.end())
242                }
243            }
244
245            Self::Regex {
246                regex, multiline, ..
247            } => {
248                if *multiline {
249                    let text = rope.to_string();
250                    for (ix, mat) in regex.find_iter(&text).enumerate() {
251                        if (ix + 1) % YIELD_INTERVAL == 0 {
252                            yield_now().await;
253                        }
254
255                        matches.push(mat.start()..mat.end());
256                    }
257                } else {
258                    let mut line = String::new();
259                    let mut line_offset = 0;
260                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
261                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
262                            yield_now().await;
263                        }
264
265                        for (newline_ix, text) in chunk.split('\n').enumerate() {
266                            if newline_ix > 0 {
267                                for mat in regex.find_iter(&line) {
268                                    let start = line_offset + mat.start();
269                                    let end = line_offset + mat.end();
270                                    matches.push(start..end);
271                                }
272
273                                line_offset += line.len() + 1;
274                                line.clear();
275                            }
276                            line.push_str(text);
277                        }
278                    }
279                }
280            }
281        }
282
283        matches
284    }
285
286    pub fn as_str(&self) -> &str {
287        match self {
288            Self::Text { query, .. } => query.as_ref(),
289            Self::Regex { query, .. } => query.as_ref(),
290        }
291    }
292
293    pub fn whole_word(&self) -> bool {
294        match self {
295            Self::Text { whole_word, .. } => *whole_word,
296            Self::Regex { whole_word, .. } => *whole_word,
297        }
298    }
299
300    pub fn case_sensitive(&self) -> bool {
301        match self {
302            Self::Text { case_sensitive, .. } => *case_sensitive,
303            Self::Regex { case_sensitive, .. } => *case_sensitive,
304        }
305    }
306
307    pub fn is_regex(&self) -> bool {
308        matches!(self, Self::Regex { .. })
309    }
310
311    pub fn files_to_include(&self) -> &[PathMatcher] {
312        match self {
313            Self::Text {
314                files_to_include, ..
315            } => files_to_include,
316            Self::Regex {
317                files_to_include, ..
318            } => files_to_include,
319        }
320    }
321
322    pub fn files_to_exclude(&self) -> &[PathMatcher] {
323        match self {
324            Self::Text {
325                files_to_exclude, ..
326            } => files_to_exclude,
327            Self::Regex {
328                files_to_exclude, ..
329            } => files_to_exclude,
330        }
331    }
332
333    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
334        match file_path {
335            Some(file_path) => {
336                !self
337                    .files_to_exclude()
338                    .iter()
339                    .any(|exclude_glob| exclude_glob.is_match(file_path))
340                    && (self.files_to_include().is_empty()
341                        || self
342                            .files_to_include()
343                            .iter()
344                            .any(|include_glob| include_glob.is_match(file_path)))
345            }
346            None => self.files_to_include().is_empty(),
347        }
348    }
349}
350
351fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
352    glob_set
353        .split(',')
354        .map(str::trim)
355        .filter(|glob_str| !glob_str.is_empty())
356        .map(|glob_str| {
357            PathMatcher::new(glob_str)
358                .with_context(|| format!("deserializing path match glob {glob_str}"))
359        })
360        .collect()
361}
362
363#[cfg(test)]
364mod tests {
365    use super::*;
366
367    #[test]
368    fn path_matcher_creation_for_valid_paths() {
369        for valid_path in [
370            "file",
371            "Cargo.toml",
372            ".DS_Store",
373            "~/dir/another_dir/",
374            "./dir/file",
375            "dir/[a-z].txt",
376            "../dir/filé",
377        ] {
378            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
379                panic!("Valid path {valid_path} should be accepted, but got: {e}")
380            });
381            assert!(
382                path_matcher.is_match(valid_path),
383                "Path matcher for valid path {valid_path} should match itself"
384            )
385        }
386    }
387
388    #[test]
389    fn path_matcher_creation_for_globs() {
390        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
391            match PathMatcher::new(invalid_glob) {
392                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
393                Err(_expected) => {}
394            }
395        }
396
397        for valid_glob in [
398            "dir/?ile",
399            "dir/*.txt",
400            "dir/**/file",
401            "dir/[a-z].txt",
402            "{dir,file}",
403        ] {
404            match PathMatcher::new(valid_glob) {
405                Ok(_expected) => {}
406                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
407            }
408        }
409    }
410}