search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, BufferSnapshot};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    borrow::Cow,
 11    io::{BufRead, BufReader, Read},
 12    ops::Range,
 13    path::{Path, PathBuf},
 14    sync::Arc,
 15};
 16use util::paths::PathMatcher;
 17
 18#[derive(Clone, Debug)]
 19pub struct SearchInputs {
 20    query: Arc<str>,
 21    files_to_include: Vec<PathMatcher>,
 22    files_to_exclude: Vec<PathMatcher>,
 23}
 24
 25impl SearchInputs {
 26    pub fn as_str(&self) -> &str {
 27        self.query.as_ref()
 28    }
 29    pub fn files_to_include(&self) -> &[PathMatcher] {
 30        &self.files_to_include
 31    }
 32    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 33        &self.files_to_exclude
 34    }
 35}
 36#[derive(Clone, Debug)]
 37pub enum SearchQuery {
 38    Text {
 39        search: Arc<AhoCorasick>,
 40        replacement: Option<String>,
 41        whole_word: bool,
 42        case_sensitive: bool,
 43        inner: SearchInputs,
 44    },
 45
 46    Regex {
 47        regex: Regex,
 48        replacement: Option<String>,
 49        multiline: bool,
 50        whole_word: bool,
 51        case_sensitive: bool,
 52        inner: SearchInputs,
 53    },
 54}
 55
 56impl SearchQuery {
 57    pub fn text(
 58        query: impl ToString,
 59        whole_word: bool,
 60        case_sensitive: bool,
 61        files_to_include: Vec<PathMatcher>,
 62        files_to_exclude: Vec<PathMatcher>,
 63    ) -> Result<Self> {
 64        let query = query.to_string();
 65        let search = AhoCorasickBuilder::new()
 66            .ascii_case_insensitive(!case_sensitive)
 67            .build(&[&query])?;
 68        let inner = SearchInputs {
 69            query: query.into(),
 70            files_to_exclude,
 71            files_to_include,
 72        };
 73        Ok(Self::Text {
 74            search: Arc::new(search),
 75            replacement: None,
 76            whole_word,
 77            case_sensitive,
 78            inner,
 79        })
 80    }
 81
 82    pub fn regex(
 83        query: impl ToString,
 84        whole_word: bool,
 85        case_sensitive: bool,
 86        files_to_include: Vec<PathMatcher>,
 87        files_to_exclude: Vec<PathMatcher>,
 88    ) -> Result<Self> {
 89        let mut query = query.to_string();
 90        let initial_query = Arc::from(query.as_str());
 91        if whole_word {
 92            let mut word_query = String::new();
 93            word_query.push_str("\\b");
 94            word_query.push_str(&query);
 95            word_query.push_str("\\b");
 96            query = word_query
 97        }
 98
 99        let multiline = query.contains('\n') || query.contains("\\n");
100        let regex = RegexBuilder::new(&query)
101            .case_insensitive(!case_sensitive)
102            .multi_line(multiline)
103            .build()?;
104        let inner = SearchInputs {
105            query: initial_query,
106            files_to_exclude,
107            files_to_include,
108        };
109        Ok(Self::Regex {
110            regex,
111            replacement: None,
112            multiline,
113            whole_word,
114            case_sensitive,
115            inner,
116        })
117    }
118
119    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
120        if message.regex {
121            Self::regex(
122                message.query,
123                message.whole_word,
124                message.case_sensitive,
125                deserialize_path_matches(&message.files_to_include)?,
126                deserialize_path_matches(&message.files_to_exclude)?,
127            )
128        } else {
129            Self::text(
130                message.query,
131                message.whole_word,
132                message.case_sensitive,
133                deserialize_path_matches(&message.files_to_include)?,
134                deserialize_path_matches(&message.files_to_exclude)?,
135            )
136        }
137    }
138    pub fn with_replacement(mut self, new_replacement: String) -> Self {
139        match self {
140            Self::Text {
141                ref mut replacement,
142                ..
143            }
144            | Self::Regex {
145                ref mut replacement,
146                ..
147            } => {
148                *replacement = Some(new_replacement);
149                self
150            }
151        }
152    }
153    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
154        proto::SearchProject {
155            project_id,
156            query: self.as_str().to_string(),
157            regex: self.is_regex(),
158            whole_word: self.whole_word(),
159            case_sensitive: self.case_sensitive(),
160            files_to_include: self
161                .files_to_include()
162                .iter()
163                .map(|matcher| matcher.to_string())
164                .join(","),
165            files_to_exclude: self
166                .files_to_exclude()
167                .iter()
168                .map(|matcher| matcher.to_string())
169                .join(","),
170        }
171    }
172
173    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
174        if self.as_str().is_empty() {
175            return Ok(false);
176        }
177
178        match self {
179            Self::Text { search, .. } => {
180                let mat = search.stream_find_iter(stream).next();
181                match mat {
182                    Some(Ok(_)) => Ok(true),
183                    Some(Err(err)) => Err(err.into()),
184                    None => Ok(false),
185                }
186            }
187            Self::Regex {
188                regex, multiline, ..
189            } => {
190                let mut reader = BufReader::new(stream);
191                if *multiline {
192                    let mut text = String::new();
193                    if let Err(err) = reader.read_to_string(&mut text) {
194                        Err(err.into())
195                    } else {
196                        Ok(regex.find(&text).is_some())
197                    }
198                } else {
199                    for line in reader.lines() {
200                        let line = line?;
201                        if regex.find(&line).is_some() {
202                            return Ok(true);
203                        }
204                    }
205                    Ok(false)
206                }
207            }
208        }
209    }
210    /// Returns the replacement text for this `SearchQuery`.
211    pub fn replacement(&self) -> Option<&str> {
212        match self {
213            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
214                replacement.as_deref()
215            }
216        }
217    }
218    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
219    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
220        match self {
221            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
222            SearchQuery::Regex {
223                regex, replacement, ..
224            } => {
225                if let Some(replacement) = replacement {
226                    Some(regex.replace(text, replacement))
227                } else {
228                    None
229                }
230            }
231        }
232    }
233    pub async fn search(
234        &self,
235        buffer: &BufferSnapshot,
236        subrange: Option<Range<usize>>,
237    ) -> Vec<Range<usize>> {
238        const YIELD_INTERVAL: usize = 20000;
239
240        if self.as_str().is_empty() {
241            return Default::default();
242        }
243
244        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
245        let rope = if let Some(range) = subrange {
246            buffer.as_rope().slice(range)
247        } else {
248            buffer.as_rope().clone()
249        };
250
251        let mut matches = Vec::new();
252        match self {
253            Self::Text {
254                search, whole_word, ..
255            } => {
256                for (ix, mat) in search
257                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
258                    .enumerate()
259                {
260                    if (ix + 1) % YIELD_INTERVAL == 0 {
261                        yield_now().await;
262                    }
263
264                    let mat = mat.unwrap();
265                    if *whole_word {
266                        let scope = buffer.language_scope_at(range_offset + mat.start());
267                        let kind = |c| char_kind(&scope, c);
268
269                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
270                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
271                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
272                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
273                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
274                            continue;
275                        }
276                    }
277                    matches.push(mat.start()..mat.end())
278                }
279            }
280
281            Self::Regex {
282                regex, multiline, ..
283            } => {
284                if *multiline {
285                    let text = rope.to_string();
286                    for (ix, mat) in regex.find_iter(&text).enumerate() {
287                        if (ix + 1) % YIELD_INTERVAL == 0 {
288                            yield_now().await;
289                        }
290
291                        matches.push(mat.start()..mat.end());
292                    }
293                } else {
294                    let mut line = String::new();
295                    let mut line_offset = 0;
296                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
297                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
298                            yield_now().await;
299                        }
300
301                        for (newline_ix, text) in chunk.split('\n').enumerate() {
302                            if newline_ix > 0 {
303                                for mat in regex.find_iter(&line) {
304                                    let start = line_offset + mat.start();
305                                    let end = line_offset + mat.end();
306                                    matches.push(start..end);
307                                }
308
309                                line_offset += line.len() + 1;
310                                line.clear();
311                            }
312                            line.push_str(text);
313                        }
314                    }
315                }
316            }
317        }
318
319        matches
320    }
321
322    pub fn as_str(&self) -> &str {
323        self.as_inner().as_str()
324    }
325
326    pub fn whole_word(&self) -> bool {
327        match self {
328            Self::Text { whole_word, .. } => *whole_word,
329            Self::Regex { whole_word, .. } => *whole_word,
330        }
331    }
332
333    pub fn case_sensitive(&self) -> bool {
334        match self {
335            Self::Text { case_sensitive, .. } => *case_sensitive,
336            Self::Regex { case_sensitive, .. } => *case_sensitive,
337        }
338    }
339
340    pub fn is_regex(&self) -> bool {
341        matches!(self, Self::Regex { .. })
342    }
343
344    pub fn files_to_include(&self) -> &[PathMatcher] {
345        self.as_inner().files_to_include()
346    }
347
348    pub fn files_to_exclude(&self) -> &[PathMatcher] {
349        self.as_inner().files_to_exclude()
350    }
351
352    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
353        match file_path {
354            Some(file_path) => {
355                !self
356                    .files_to_exclude()
357                    .iter()
358                    .any(|exclude_glob| exclude_glob.is_match(file_path))
359                    && (self.files_to_include().is_empty()
360                        || self
361                            .files_to_include()
362                            .iter()
363                            .any(|include_glob| include_glob.is_match(file_path)))
364            }
365            None => self.files_to_include().is_empty(),
366        }
367    }
368    pub fn as_inner(&self) -> &SearchInputs {
369        match self {
370            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
371        }
372    }
373}
374
375fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
376    glob_set
377        .split(',')
378        .map(str::trim)
379        .filter(|glob_str| !glob_str.is_empty())
380        .map(|glob_str| {
381            PathMatcher::new(glob_str)
382                .with_context(|| format!("deserializing path match glob {glob_str}"))
383        })
384        .collect()
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    #[test]
392    fn path_matcher_creation_for_valid_paths() {
393        for valid_path in [
394            "file",
395            "Cargo.toml",
396            ".DS_Store",
397            "~/dir/another_dir/",
398            "./dir/file",
399            "dir/[a-z].txt",
400            "../dir/filé",
401        ] {
402            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
403                panic!("Valid path {valid_path} should be accepted, but got: {e}")
404            });
405            assert!(
406                path_matcher.is_match(valid_path),
407                "Path matcher for valid path {valid_path} should match itself"
408            )
409        }
410    }
411
412    #[test]
413    fn path_matcher_creation_for_globs() {
414        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
415            match PathMatcher::new(invalid_glob) {
416                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
417                Err(_expected) => {}
418            }
419        }
420
421        for valid_glob in [
422            "dir/?ile",
423            "dir/*.txt",
424            "dir/**/file",
425            "dir/[a-z].txt",
426            "{dir,file}",
427        ] {
428            match PathMatcher::new(valid_glob) {
429                Ok(_expected) => {}
430                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
431            }
432        }
433    }
434}