search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use itertools::Itertools;
  5use language::{char_kind, BufferSnapshot};
  6use regex::{Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::Arc,
 14};
 15use util::paths::PathMatcher;
 16
 17#[derive(Clone, Debug)]
 18pub struct SearchInputs {
 19    query: Arc<str>,
 20    files_to_include: Vec<PathMatcher>,
 21    files_to_exclude: Vec<PathMatcher>,
 22}
 23
 24impl SearchInputs {
 25    pub fn as_str(&self) -> &str {
 26        self.query.as_ref()
 27    }
 28    pub fn files_to_include(&self) -> &[PathMatcher] {
 29        &self.files_to_include
 30    }
 31    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 32        &self.files_to_exclude
 33    }
 34}
 35#[derive(Clone, Debug)]
 36pub enum SearchQuery {
 37    Text {
 38        search: Arc<AhoCorasick>,
 39        replacement: Option<String>,
 40        whole_word: bool,
 41        case_sensitive: bool,
 42        include_ignored: bool,
 43        inner: SearchInputs,
 44    },
 45
 46    Regex {
 47        regex: Regex,
 48        replacement: Option<String>,
 49        multiline: bool,
 50        whole_word: bool,
 51        case_sensitive: bool,
 52        include_ignored: bool,
 53        inner: SearchInputs,
 54    },
 55}
 56
 57impl SearchQuery {
 58    pub fn text(
 59        query: impl ToString,
 60        whole_word: bool,
 61        case_sensitive: bool,
 62        include_ignored: bool,
 63        files_to_include: Vec<PathMatcher>,
 64        files_to_exclude: Vec<PathMatcher>,
 65    ) -> Result<Self> {
 66        let query = query.to_string();
 67        let search = AhoCorasickBuilder::new()
 68            .ascii_case_insensitive(!case_sensitive)
 69            .build(&[&query])?;
 70        let inner = SearchInputs {
 71            query: query.into(),
 72            files_to_exclude,
 73            files_to_include,
 74        };
 75        Ok(Self::Text {
 76            search: Arc::new(search),
 77            replacement: None,
 78            whole_word,
 79            case_sensitive,
 80            include_ignored,
 81            inner,
 82        })
 83    }
 84
 85    pub fn regex(
 86        query: impl ToString,
 87        whole_word: bool,
 88        case_sensitive: bool,
 89        include_ignored: bool,
 90        files_to_include: Vec<PathMatcher>,
 91        files_to_exclude: Vec<PathMatcher>,
 92    ) -> Result<Self> {
 93        let mut query = query.to_string();
 94        let initial_query = Arc::from(query.as_str());
 95        if whole_word {
 96            let mut word_query = String::new();
 97            word_query.push_str("\\b");
 98            word_query.push_str(&query);
 99            word_query.push_str("\\b");
100            query = word_query
101        }
102
103        let multiline = query.contains('\n') || query.contains("\\n");
104        let regex = RegexBuilder::new(&query)
105            .case_insensitive(!case_sensitive)
106            .multi_line(multiline)
107            .build()?;
108        let inner = SearchInputs {
109            query: initial_query,
110            files_to_exclude,
111            files_to_include,
112        };
113        Ok(Self::Regex {
114            regex,
115            replacement: None,
116            multiline,
117            whole_word,
118            case_sensitive,
119            include_ignored,
120            inner,
121        })
122    }
123
124    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
125        if message.regex {
126            Self::regex(
127                message.query,
128                message.whole_word,
129                message.case_sensitive,
130                message.include_ignored,
131                deserialize_path_matches(&message.files_to_include)?,
132                deserialize_path_matches(&message.files_to_exclude)?,
133            )
134        } else {
135            Self::text(
136                message.query,
137                message.whole_word,
138                message.case_sensitive,
139                message.include_ignored,
140                deserialize_path_matches(&message.files_to_include)?,
141                deserialize_path_matches(&message.files_to_exclude)?,
142            )
143        }
144    }
145    pub fn with_replacement(mut self, new_replacement: String) -> Self {
146        match self {
147            Self::Text {
148                ref mut replacement,
149                ..
150            }
151            | Self::Regex {
152                ref mut replacement,
153                ..
154            } => {
155                *replacement = Some(new_replacement);
156                self
157            }
158        }
159    }
160    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
161        proto::SearchProject {
162            project_id,
163            query: self.as_str().to_string(),
164            regex: self.is_regex(),
165            whole_word: self.whole_word(),
166            case_sensitive: self.case_sensitive(),
167            include_ignored: self.include_ignored(),
168            files_to_include: self
169                .files_to_include()
170                .iter()
171                .map(|matcher| matcher.to_string())
172                .join(","),
173            files_to_exclude: self
174                .files_to_exclude()
175                .iter()
176                .map(|matcher| matcher.to_string())
177                .join(","),
178        }
179    }
180
181    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
182        if self.as_str().is_empty() {
183            return Ok(false);
184        }
185
186        match self {
187            Self::Text { search, .. } => {
188                let mat = search.stream_find_iter(stream).next();
189                match mat {
190                    Some(Ok(_)) => Ok(true),
191                    Some(Err(err)) => Err(err.into()),
192                    None => Ok(false),
193                }
194            }
195            Self::Regex {
196                regex, multiline, ..
197            } => {
198                let mut reader = BufReader::new(stream);
199                if *multiline {
200                    let mut text = String::new();
201                    if let Err(err) = reader.read_to_string(&mut text) {
202                        Err(err.into())
203                    } else {
204                        Ok(regex.find(&text).is_some())
205                    }
206                } else {
207                    for line in reader.lines() {
208                        let line = line?;
209                        if regex.find(&line).is_some() {
210                            return Ok(true);
211                        }
212                    }
213                    Ok(false)
214                }
215            }
216        }
217    }
218    /// Returns the replacement text for this `SearchQuery`.
219    pub fn replacement(&self) -> Option<&str> {
220        match self {
221            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
222                replacement.as_deref()
223            }
224        }
225    }
226    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
227    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
228        match self {
229            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
230            SearchQuery::Regex {
231                regex, replacement, ..
232            } => {
233                if let Some(replacement) = replacement {
234                    Some(regex.replace(text, replacement))
235                } else {
236                    None
237                }
238            }
239        }
240    }
241    pub async fn search(
242        &self,
243        buffer: &BufferSnapshot,
244        subrange: Option<Range<usize>>,
245    ) -> Vec<Range<usize>> {
246        const YIELD_INTERVAL: usize = 20000;
247
248        if self.as_str().is_empty() {
249            return Default::default();
250        }
251
252        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
253        let rope = if let Some(range) = subrange {
254            buffer.as_rope().slice(range)
255        } else {
256            buffer.as_rope().clone()
257        };
258
259        let mut matches = Vec::new();
260        match self {
261            Self::Text {
262                search, whole_word, ..
263            } => {
264                for (ix, mat) in search
265                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
266                    .enumerate()
267                {
268                    if (ix + 1) % YIELD_INTERVAL == 0 {
269                        yield_now().await;
270                    }
271
272                    let mat = mat.unwrap();
273                    if *whole_word {
274                        let scope = buffer.language_scope_at(range_offset + mat.start());
275                        let kind = |c| char_kind(&scope, c);
276
277                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
278                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
279                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
280                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
281                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
282                            continue;
283                        }
284                    }
285                    matches.push(mat.start()..mat.end())
286                }
287            }
288
289            Self::Regex {
290                regex, multiline, ..
291            } => {
292                if *multiline {
293                    let text = rope.to_string();
294                    for (ix, mat) in regex.find_iter(&text).enumerate() {
295                        if (ix + 1) % YIELD_INTERVAL == 0 {
296                            yield_now().await;
297                        }
298
299                        matches.push(mat.start()..mat.end());
300                    }
301                } else {
302                    let mut line = String::new();
303                    let mut line_offset = 0;
304                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
305                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
306                            yield_now().await;
307                        }
308
309                        for (newline_ix, text) in chunk.split('\n').enumerate() {
310                            if newline_ix > 0 {
311                                for mat in regex.find_iter(&line) {
312                                    let start = line_offset + mat.start();
313                                    let end = line_offset + mat.end();
314                                    matches.push(start..end);
315                                }
316
317                                line_offset += line.len() + 1;
318                                line.clear();
319                            }
320                            line.push_str(text);
321                        }
322                    }
323                }
324            }
325        }
326
327        matches
328    }
329
330    pub fn is_empty(&self) -> bool {
331        self.as_str().is_empty()
332    }
333
334    pub fn as_str(&self) -> &str {
335        self.as_inner().as_str()
336    }
337
338    pub fn whole_word(&self) -> bool {
339        match self {
340            Self::Text { whole_word, .. } => *whole_word,
341            Self::Regex { whole_word, .. } => *whole_word,
342        }
343    }
344
345    pub fn case_sensitive(&self) -> bool {
346        match self {
347            Self::Text { case_sensitive, .. } => *case_sensitive,
348            Self::Regex { case_sensitive, .. } => *case_sensitive,
349        }
350    }
351
352    pub fn include_ignored(&self) -> bool {
353        match self {
354            Self::Text {
355                include_ignored, ..
356            } => *include_ignored,
357            Self::Regex {
358                include_ignored, ..
359            } => *include_ignored,
360        }
361    }
362
363    pub fn is_regex(&self) -> bool {
364        matches!(self, Self::Regex { .. })
365    }
366
367    pub fn files_to_include(&self) -> &[PathMatcher] {
368        self.as_inner().files_to_include()
369    }
370
371    pub fn files_to_exclude(&self) -> &[PathMatcher] {
372        self.as_inner().files_to_exclude()
373    }
374
375    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
376        match file_path {
377            Some(file_path) => {
378                let mut path = file_path.to_path_buf();
379                loop {
380                    if self
381                        .files_to_exclude()
382                        .iter()
383                        .any(|exclude_glob| exclude_glob.is_match(&path))
384                    {
385                        return false;
386                    } else if self.files_to_include().is_empty()
387                        || self
388                            .files_to_include()
389                            .iter()
390                            .any(|include_glob| include_glob.is_match(&path))
391                    {
392                        return true;
393                    } else if !path.pop() {
394                        return false;
395                    }
396                }
397            }
398            None => self.files_to_include().is_empty(),
399        }
400    }
401    pub fn as_inner(&self) -> &SearchInputs {
402        match self {
403            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
404        }
405    }
406}
407
408fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
409    glob_set
410        .split(',')
411        .map(str::trim)
412        .filter(|glob_str| !glob_str.is_empty())
413        .map(|glob_str| {
414            PathMatcher::new(glob_str)
415                .with_context(|| format!("deserializing path match glob {glob_str}"))
416        })
417        .collect()
418}
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    #[test]
425    fn path_matcher_creation_for_valid_paths() {
426        for valid_path in [
427            "file",
428            "Cargo.toml",
429            ".DS_Store",
430            "~/dir/another_dir/",
431            "./dir/file",
432            "dir/[a-z].txt",
433            "../dir/filé",
434        ] {
435            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
436                panic!("Valid path {valid_path} should be accepted, but got: {e}")
437            });
438            assert!(
439                path_matcher.is_match(valid_path),
440                "Path matcher for valid path {valid_path} should match itself"
441            )
442        }
443    }
444
445    #[test]
446    fn path_matcher_creation_for_globs() {
447        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
448            match PathMatcher::new(invalid_glob) {
449                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
450                Err(_expected) => {}
451            }
452        }
453
454        for valid_glob in [
455            "dir/?ile",
456            "dir/*.txt",
457            "dir/**/file",
458            "dir/[a-z].txt",
459            "{dir,file}",
460        ] {
461            match PathMatcher::new(valid_glob) {
462                Ok(_expected) => {}
463                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
464            }
465        }
466    }
467}