search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::{Context, Result};
  3use client::proto;
  4use itertools::Itertools;
  5use language::{char_kind, BufferSnapshot};
  6use regex::{Captures, Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    borrow::Cow,
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::{Arc, OnceLock},
 14};
 15use util::paths::PathMatcher;
 16
 17static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 18
 19#[derive(Clone, Debug)]
 20pub struct SearchInputs {
 21    query: Arc<str>,
 22    files_to_include: Vec<PathMatcher>,
 23    files_to_exclude: Vec<PathMatcher>,
 24}
 25
 26impl SearchInputs {
 27    pub fn as_str(&self) -> &str {
 28        self.query.as_ref()
 29    }
 30    pub fn files_to_include(&self) -> &[PathMatcher] {
 31        &self.files_to_include
 32    }
 33    pub fn files_to_exclude(&self) -> &[PathMatcher] {
 34        &self.files_to_exclude
 35    }
 36}
 37#[derive(Clone, Debug)]
 38pub enum SearchQuery {
 39    Text {
 40        search: Arc<AhoCorasick>,
 41        replacement: Option<String>,
 42        whole_word: bool,
 43        case_sensitive: bool,
 44        include_ignored: bool,
 45        inner: SearchInputs,
 46    },
 47
 48    Regex {
 49        regex: Regex,
 50        replacement: Option<String>,
 51        multiline: bool,
 52        whole_word: bool,
 53        case_sensitive: bool,
 54        include_ignored: bool,
 55        inner: SearchInputs,
 56    },
 57}
 58
 59impl SearchQuery {
 60    pub fn text(
 61        query: impl ToString,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        include_ignored: bool,
 65        files_to_include: Vec<PathMatcher>,
 66        files_to_exclude: Vec<PathMatcher>,
 67    ) -> Result<Self> {
 68        let query = query.to_string();
 69        let search = AhoCorasickBuilder::new()
 70            .ascii_case_insensitive(!case_sensitive)
 71            .build(&[&query])?;
 72        let inner = SearchInputs {
 73            query: query.into(),
 74            files_to_exclude,
 75            files_to_include,
 76        };
 77        Ok(Self::Text {
 78            search: Arc::new(search),
 79            replacement: None,
 80            whole_word,
 81            case_sensitive,
 82            include_ignored,
 83            inner,
 84        })
 85    }
 86
 87    pub fn regex(
 88        query: impl ToString,
 89        whole_word: bool,
 90        case_sensitive: bool,
 91        include_ignored: bool,
 92        files_to_include: Vec<PathMatcher>,
 93        files_to_exclude: Vec<PathMatcher>,
 94    ) -> Result<Self> {
 95        let mut query = query.to_string();
 96        let initial_query = Arc::from(query.as_str());
 97        if whole_word {
 98            let mut word_query = String::new();
 99            word_query.push_str("\\b");
100            word_query.push_str(&query);
101            word_query.push_str("\\b");
102            query = word_query
103        }
104
105        let multiline = query.contains('\n') || query.contains("\\n");
106        let regex = RegexBuilder::new(&query)
107            .case_insensitive(!case_sensitive)
108            .multi_line(multiline)
109            .build()?;
110        let inner = SearchInputs {
111            query: initial_query,
112            files_to_exclude,
113            files_to_include,
114        };
115        Ok(Self::Regex {
116            regex,
117            replacement: None,
118            multiline,
119            whole_word,
120            case_sensitive,
121            include_ignored,
122            inner,
123        })
124    }
125
126    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
127        if message.regex {
128            Self::regex(
129                message.query,
130                message.whole_word,
131                message.case_sensitive,
132                message.include_ignored,
133                deserialize_path_matches(&message.files_to_include)?,
134                deserialize_path_matches(&message.files_to_exclude)?,
135            )
136        } else {
137            Self::text(
138                message.query,
139                message.whole_word,
140                message.case_sensitive,
141                message.include_ignored,
142                deserialize_path_matches(&message.files_to_include)?,
143                deserialize_path_matches(&message.files_to_exclude)?,
144            )
145        }
146    }
147    pub fn with_replacement(mut self, new_replacement: String) -> Self {
148        match self {
149            Self::Text {
150                ref mut replacement,
151                ..
152            }
153            | Self::Regex {
154                ref mut replacement,
155                ..
156            } => {
157                *replacement = Some(new_replacement);
158                self
159            }
160        }
161    }
162    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
163        proto::SearchProject {
164            project_id,
165            query: self.as_str().to_string(),
166            regex: self.is_regex(),
167            whole_word: self.whole_word(),
168            case_sensitive: self.case_sensitive(),
169            include_ignored: self.include_ignored(),
170            files_to_include: self
171                .files_to_include()
172                .iter()
173                .map(|matcher| matcher.to_string())
174                .join(","),
175            files_to_exclude: self
176                .files_to_exclude()
177                .iter()
178                .map(|matcher| matcher.to_string())
179                .join(","),
180        }
181    }
182
183    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
184        if self.as_str().is_empty() {
185            return Ok(false);
186        }
187
188        match self {
189            Self::Text { search, .. } => {
190                let mat = search.stream_find_iter(stream).next();
191                match mat {
192                    Some(Ok(_)) => Ok(true),
193                    Some(Err(err)) => Err(err.into()),
194                    None => Ok(false),
195                }
196            }
197            Self::Regex {
198                regex, multiline, ..
199            } => {
200                let mut reader = BufReader::new(stream);
201                if *multiline {
202                    let mut text = String::new();
203                    if let Err(err) = reader.read_to_string(&mut text) {
204                        Err(err.into())
205                    } else {
206                        Ok(regex.find(&text).is_some())
207                    }
208                } else {
209                    for line in reader.lines() {
210                        let line = line?;
211                        if regex.find(&line).is_some() {
212                            return Ok(true);
213                        }
214                    }
215                    Ok(false)
216                }
217            }
218        }
219    }
220    /// Returns the replacement text for this `SearchQuery`.
221    pub fn replacement(&self) -> Option<&str> {
222        match self {
223            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
224                replacement.as_deref()
225            }
226        }
227    }
228    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
229    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
230        match self {
231            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
232            SearchQuery::Regex {
233                regex, replacement, ..
234            } => {
235                if let Some(replacement) = replacement {
236                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
237                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
238                        .replace_all(replacement, |c: &Captures| {
239                            match c.get(0).unwrap().as_str() {
240                                r"\\" => "\\",
241                                r"\n" => "\n",
242                                r"\t" => "\t",
243                                x => unreachable!("Unexpected escape sequence: {}", x),
244                            }
245                        });
246                    Some(regex.replace(text, replacement))
247                } else {
248                    None
249                }
250            }
251        }
252    }
253    pub async fn search(
254        &self,
255        buffer: &BufferSnapshot,
256        subrange: Option<Range<usize>>,
257    ) -> Vec<Range<usize>> {
258        const YIELD_INTERVAL: usize = 20000;
259
260        if self.as_str().is_empty() {
261            return Default::default();
262        }
263
264        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
265        let rope = if let Some(range) = subrange {
266            buffer.as_rope().slice(range)
267        } else {
268            buffer.as_rope().clone()
269        };
270
271        let mut matches = Vec::new();
272        match self {
273            Self::Text {
274                search, whole_word, ..
275            } => {
276                for (ix, mat) in search
277                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
278                    .enumerate()
279                {
280                    if (ix + 1) % YIELD_INTERVAL == 0 {
281                        yield_now().await;
282                    }
283
284                    let mat = mat.unwrap();
285                    if *whole_word {
286                        let scope = buffer.language_scope_at(range_offset + mat.start());
287                        let kind = |c| char_kind(&scope, c);
288
289                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
290                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
291                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
292                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
293                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
294                            continue;
295                        }
296                    }
297                    matches.push(mat.start()..mat.end())
298                }
299            }
300
301            Self::Regex {
302                regex, multiline, ..
303            } => {
304                if *multiline {
305                    let text = rope.to_string();
306                    for (ix, mat) in regex.find_iter(&text).enumerate() {
307                        if (ix + 1) % YIELD_INTERVAL == 0 {
308                            yield_now().await;
309                        }
310
311                        matches.push(mat.start()..mat.end());
312                    }
313                } else {
314                    let mut line = String::new();
315                    let mut line_offset = 0;
316                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
317                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
318                            yield_now().await;
319                        }
320
321                        for (newline_ix, text) in chunk.split('\n').enumerate() {
322                            if newline_ix > 0 {
323                                for mat in regex.find_iter(&line) {
324                                    let start = line_offset + mat.start();
325                                    let end = line_offset + mat.end();
326                                    matches.push(start..end);
327                                }
328
329                                line_offset += line.len() + 1;
330                                line.clear();
331                            }
332                            line.push_str(text);
333                        }
334                    }
335                }
336            }
337        }
338
339        matches
340    }
341
342    pub fn is_empty(&self) -> bool {
343        self.as_str().is_empty()
344    }
345
346    pub fn as_str(&self) -> &str {
347        self.as_inner().as_str()
348    }
349
350    pub fn whole_word(&self) -> bool {
351        match self {
352            Self::Text { whole_word, .. } => *whole_word,
353            Self::Regex { whole_word, .. } => *whole_word,
354        }
355    }
356
357    pub fn case_sensitive(&self) -> bool {
358        match self {
359            Self::Text { case_sensitive, .. } => *case_sensitive,
360            Self::Regex { case_sensitive, .. } => *case_sensitive,
361        }
362    }
363
364    pub fn include_ignored(&self) -> bool {
365        match self {
366            Self::Text {
367                include_ignored, ..
368            } => *include_ignored,
369            Self::Regex {
370                include_ignored, ..
371            } => *include_ignored,
372        }
373    }
374
375    pub fn is_regex(&self) -> bool {
376        matches!(self, Self::Regex { .. })
377    }
378
379    pub fn files_to_include(&self) -> &[PathMatcher] {
380        self.as_inner().files_to_include()
381    }
382
383    pub fn files_to_exclude(&self) -> &[PathMatcher] {
384        self.as_inner().files_to_exclude()
385    }
386
387    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
388        match file_path {
389            Some(file_path) => {
390                let mut path = file_path.to_path_buf();
391                loop {
392                    if self
393                        .files_to_exclude()
394                        .iter()
395                        .any(|exclude_glob| exclude_glob.is_match(&path))
396                    {
397                        return false;
398                    } else if self.files_to_include().is_empty()
399                        || self
400                            .files_to_include()
401                            .iter()
402                            .any(|include_glob| include_glob.is_match(&path))
403                    {
404                        return true;
405                    } else if !path.pop() {
406                        return false;
407                    }
408                }
409            }
410            None => self.files_to_include().is_empty(),
411        }
412    }
413    pub fn as_inner(&self) -> &SearchInputs {
414        match self {
415            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
416        }
417    }
418}
419
420fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
421    glob_set
422        .split(',')
423        .map(str::trim)
424        .filter(|glob_str| !glob_str.is_empty())
425        .map(|glob_str| {
426            PathMatcher::new(glob_str)
427                .with_context(|| format!("deserializing path match glob {glob_str}"))
428        })
429        .collect()
430}
431
432#[cfg(test)]
433mod tests {
434    use super::*;
435
436    #[test]
437    fn path_matcher_creation_for_valid_paths() {
438        for valid_path in [
439            "file",
440            "Cargo.toml",
441            ".DS_Store",
442            "~/dir/another_dir/",
443            "./dir/file",
444            "dir/[a-z].txt",
445            "../dir/filé",
446        ] {
447            let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
448                panic!("Valid path {valid_path} should be accepted, but got: {e}")
449            });
450            assert!(
451                path_matcher.is_match(valid_path),
452                "Path matcher for valid path {valid_path} should match itself"
453            )
454        }
455    }
456
457    #[test]
458    fn path_matcher_creation_for_globs() {
459        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
460            match PathMatcher::new(invalid_glob) {
461                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
462                Err(_expected) => {}
463            }
464        }
465
466        for valid_glob in [
467            "dir/?ile",
468            "dir/*.txt",
469            "dir/**/file",
470            "dir/[a-z].txt",
471            "{dir,file}",
472        ] {
473            match PathMatcher::new(valid_glob) {
474                Ok(_expected) => {}
475                Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
476            }
477        }
478    }
479}