search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client2::proto;
  4use itertools::Itertools;
  5use language2::{char_kind, BufferSnapshot};
  6use regex::{Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::Arc,
 14};
 15use util::paths::PathMatcher;
 16
 17#[derive(Clone, Debug)]
 18pub struct SearchInputs {
 19    query: Arc<str>,
 20    files_to_include: Vec<PathMatcher>,
 21    files_to_exclude: Vec<PathMatcher>,
 22}
 23
 24impl SearchInputs {
 25    pub fn as_str(&self) -> &str {
 26        self.query.as_ref()
 27    }
 28    pub fn files_to_include(&self) -> &[PathMatcher] {
 29        &self.files_to_include
 30    }
 31    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 32        &self.files_to_exclude
 33    }
 34}
 35#[derive(Clone, Debug)]
 36pub enum SearchQuery {
 37    Text {
 38        search: Arc<AhoCorasick>,
 39        replacement: Option<String>,
 40        whole_word: bool,
 41        case_sensitive: bool,
 42        inner: SearchInputs,
 43    },
 44
 45    Regex {
 46        regex: Regex,
 47        replacement: Option<String>,
 48        multiline: bool,
 49        whole_word: bool,
 50        case_sensitive: bool,
 51        inner: SearchInputs,
 52    },
 53}
 54
 55impl SearchQuery {
 56    pub fn text(
 57        query: impl ToString,
 58        whole_word: bool,
 59        case_sensitive: bool,
 60        files_to_include: Vec<PathMatcher>,
 61        files_to_exclude: Vec<PathMatcher>,
 62    ) -> Result<Self> {
 63        let query = query.to_string();
 64        let search = AhoCorasickBuilder::new()
 65            .ascii_case_insensitive(!case_sensitive)
 66            .build(&[&query])?;
 67        let inner = SearchInputs {
 68            query: query.into(),
 69            files_to_exclude,
 70            files_to_include,
 71        };
 72        Ok(Self::Text {
 73            search: Arc::new(search),
 74            replacement: None,
 75            whole_word,
 76            case_sensitive,
 77            inner,
 78        })
 79    }
 80
 81    pub fn regex(
 82        query: impl ToString,
 83        whole_word: bool,
 84        case_sensitive: bool,
 85        files_to_include: Vec<PathMatcher>,
 86        files_to_exclude: Vec<PathMatcher>,
 87    ) -> Result<Self> {
 88        let mut query = query.to_string();
 89        let initial_query = Arc::from(query.as_str());
 90        if whole_word {
 91            let mut word_query = String::new();
 92            word_query.push_str("\\b");
 93            word_query.push_str(&query);
 94            word_query.push_str("\\b");
 95            query = word_query
 96        }
 97
 98        let multiline = query.contains('\n') || query.contains("\\n");
 99        let regex = RegexBuilder::new(&query)
100            .case_insensitive(!case_sensitive)
101            .multi_line(multiline)
102            .build()?;
103        let inner = SearchInputs {
104            query: initial_query,
105            files_to_exclude,
106            files_to_include,
107        };
108        Ok(Self::Regex {
109            regex,
110            replacement: None,
111            multiline,
112            whole_word,
113            case_sensitive,
114            inner,
115        })
116    }
117
118    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
119        if message.regex {
120            Self::regex(
121                message.query,
122                message.whole_word,
123                message.case_sensitive,
124                deserialize_path_matches(&message.files_to_include)?,
125                deserialize_path_matches(&message.files_to_exclude)?,
126            )
127        } else {
128            Self::text(
129                message.query,
130                message.whole_word,
131                message.case_sensitive,
132                deserialize_path_matches(&message.files_to_include)?,
133                deserialize_path_matches(&message.files_to_exclude)?,
134            )
135        }
136    }
137    pub fn with_replacement(mut self, new_replacement: String) -> Self {
138        match self {
139            Self::Text {
140                ref mut replacement,
141                ..
142            }
143            | Self::Regex {
144                ref mut replacement,
145                ..
146            } => {
147                *replacement = Some(new_replacement);
148                self
149            }
150        }
151    }
152    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
153        proto::SearchProject {
154            project_id,
155            query: self.as_str().to_string(),
156            regex: self.is_regex(),
157            whole_word: self.whole_word(),
158            case_sensitive: self.case_sensitive(),
159            files_to_include: self
160                .files_to_include()
161                .iter()
162                .map(|matcher| matcher.to_string())
163                .join(","),
164            files_to_exclude: self
165                .files_to_exclude()
166                .iter()
167                .map(|matcher| matcher.to_string())
168                .join(","),
169        }
170    }
171
172    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
173        if self.as_str().is_empty() {
174            return Ok(false);
175        }
176
177        match self {
178            Self::Text { search, .. } => {
179                let mat = search.stream_find_iter(stream).next();
180                match mat {
181                    Some(Ok(_)) => Ok(true),
182                    Some(Err(err)) => Err(err.into()),
183                    None => Ok(false),
184                }
185            }
186            Self::Regex {
187                regex, multiline, ..
188            } => {
189                let mut reader = BufReader::new(stream);
190                if *multiline {
191                    let mut text = String::new();
192                    if let Err(err) = reader.read_to_string(&mut text) {
193                        Err(err.into())
194                    } else {
195                        Ok(regex.find(&text).is_some())
196                    }
197                } else {
198                    for line in reader.lines() {
199                        let line = line?;
200                        if regex.find(&line).is_some() {
201                            return Ok(true);
202                        }
203                    }
204                    Ok(false)
205                }
206            }
207        }
208    }
209    /// Returns the replacement text for this `SearchQuery`.
210    pub fn replacement(&self) -> Option<&str> {
211        match self {
212            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
213                replacement.as_deref()
214            }
215        }
216    }
217    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
218    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
219        match self {
220            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
221            SearchQuery::Regex {
222                regex, replacement, ..
223            } => {
224                if let Some(replacement) = replacement {
225                    Some(regex.replace(text, replacement))
226                } else {
227                    None
228                }
229            }
230        }
231    }
232    pub async fn search(
233        &self,
234        buffer: &BufferSnapshot,
235        subrange: Option<Range<usize>>,
236    ) -> Vec<Range<usize>> {
237        const YIELD_INTERVAL: usize = 20000;
238
239        if self.as_str().is_empty() {
240            return Default::default();
241        }
242
243        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
244        let rope = if let Some(range) = subrange {
245            buffer.as_rope().slice(range)
246        } else {
247            buffer.as_rope().clone()
248        };
249
250        let mut matches = Vec::new();
251        match self {
252            Self::Text {
253                search, whole_word, ..
254            } => {
255                for (ix, mat) in search
256                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
257                    .enumerate()
258                {
259                    if (ix + 1) % YIELD_INTERVAL == 0 {
260                        yield_now().await;
261                    }
262
263                    let mat = mat.unwrap();
264                    if *whole_word {
265                        let scope = buffer.language_scope_at(range_offset + mat.start());
266                        let kind = |c| char_kind(&scope, c);
267
268                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
269                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
270                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
271                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
272                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
273                            continue;
274                        }
275                    }
276                    matches.push(mat.start()..mat.end())
277                }
278            }
279
280            Self::Regex {
281                regex, multiline, ..
282            } => {
283                if *multiline {
284                    let text = rope.to_string();
285                    for (ix, mat) in regex.find_iter(&text).enumerate() {
286                        if (ix + 1) % YIELD_INTERVAL == 0 {
287                            yield_now().await;
288                        }
289
290                        matches.push(mat.start()..mat.end());
291                    }
292                } else {
293                    let mut line = String::new();
294                    let mut line_offset = 0;
295                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
296                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
297                            yield_now().await;
298                        }
299
300                        for (newline_ix, text) in chunk.split('\n').enumerate() {
301                            if newline_ix > 0 {
302                                for mat in regex.find_iter(&line) {
303                                    let start = line_offset + mat.start();
304                                    let end = line_offset + mat.end();
305                                    matches.push(start..end);
306                                }
307
308                                line_offset += line.len() + 1;
309                                line.clear();
310                            }
311                            line.push_str(text);
312                        }
313                    }
314                }
315            }
316        }
317
318        matches
319    }
320
321    pub fn as_str(&self) -> &str {
322        self.as_inner().as_str()
323    }
324
325    pub fn whole_word(&self) -> bool {
326        match self {
327            Self::Text { whole_word, .. } => *whole_word,
328            Self::Regex { whole_word, .. } => *whole_word,
329        }
330    }
331
332    pub fn case_sensitive(&self) -> bool {
333        match self {
334            Self::Text { case_sensitive, .. } => *case_sensitive,
335            Self::Regex { case_sensitive, .. } => *case_sensitive,
336        }
337    }
338
339    pub fn is_regex(&self) -> bool {
340        matches!(self, Self::Regex { .. })
341    }
342
343    pub fn files_to_include(&self) -> &[PathMatcher] {
344        self.as_inner().files_to_include()
345    }
346
347    pub fn files_to_exclude(&self) -> &[PathMatcher] {
348        self.as_inner().files_to_exclude()
349    }
350
351    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
352        match file_path {
353            Some(file_path) => {
354                !self
355                    .files_to_exclude()
356                    .iter()
357                    .any(|exclude_glob| exclude_glob.is_match(file_path))
358                    && (self.files_to_include().is_empty()
359                        || self
360                            .files_to_include()
361                            .iter()
362                            .any(|include_glob| include_glob.is_match(file_path)))
363            }
364            None => self.files_to_include().is_empty(),
365        }
366    }
367    pub fn as_inner(&self) -> &SearchInputs {
368        match self {
369            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
370        }
371    }
372}
373
374fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
375    glob_set
376        .split(',')
377        .map(str::trim)
378        .filter(|glob_str| !glob_str.is_empty())
379        .map(|glob_str| {
380            PathMatcher::new(glob_str)
381                .with_context(|| format!("deserializing path match glob {glob_str}"))
382        })
383        .collect()
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn path_matcher_creation_for_valid_paths() {
392        for valid_path in [
393            "file",
394            "Cargo.toml",
395            ".DS_Store",
396            "~/dir/another_dir/",
397            "./dir/file",
398            "dir/[a-z].txt",
399            "../dir/filé",
400        ] {
401            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
402                panic!("Valid path {valid_path} should be accepted, but got: {e}")
403            });
404            assert!(
405                path_matcher.is_match(valid_path),
406                "Path matcher for valid path {valid_path} should match itself"
407            )
408        }
409    }
410
411    #[test]
412    fn path_matcher_creation_for_globs() {
413        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
414            match PathMatcher::new(invalid_glob) {
415                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
416                Err(_expected) => {}
417            }
418        }
419
420        for valid_glob in [
421            "dir/?ile",
422            "dir/*.txt",
423            "dir/**/file",
424            "dir/[a-z].txt",
425            "{dir,file}",
426        ] {
427            match PathMatcher::new(valid_glob) {
428                Ok(_expected) => {}
429                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
430            }
431        }
432    }
433}