search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use language::{char_kind, BufferSnapshot};
  5use regex::{Captures, Regex, RegexBuilder};
  6use smol::future::yield_now;
  7use std::{
  8    borrow::Cow,
  9    io::{BufRead, BufReader, Read},
 10    ops::Range,
 11    path::Path,
 12    sync::{Arc, OnceLock},
 13};
 14use util::paths::PathMatcher;
 15
 16static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
 17
 18#[derive(Clone, Debug)]
 19pub struct SearchInputs {
 20    query: Arc<str>,
 21    files_to_include: PathMatcher,
 22    files_to_exclude: PathMatcher,
 23}
 24
 25impl SearchInputs {
 26    pub fn as_str(&self) -> &str {
 27        self.query.as_ref()
 28    }
 29    pub fn files_to_include(&self) -> &PathMatcher {
 30        &self.files_to_include
 31    }
 32    pub fn files_to_exclude(&self) -> &PathMatcher {
 33        &self.files_to_exclude
 34    }
 35}
 36#[derive(Clone, Debug)]
 37pub enum SearchQuery {
 38    Text {
 39        search: Arc<AhoCorasick>,
 40        replacement: Option<String>,
 41        whole_word: bool,
 42        case_sensitive: bool,
 43        include_ignored: bool,
 44        inner: SearchInputs,
 45    },
 46
 47    Regex {
 48        regex: Regex,
 49        replacement: Option<String>,
 50        multiline: bool,
 51        whole_word: bool,
 52        case_sensitive: bool,
 53        include_ignored: bool,
 54        inner: SearchInputs,
 55    },
 56}
 57
 58impl SearchQuery {
 59    pub fn text(
 60        query: impl ToString,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        include_ignored: bool,
 64        files_to_include: PathMatcher,
 65        files_to_exclude: PathMatcher,
 66    ) -> Result<Self> {
 67        let query = query.to_string();
 68        let search = AhoCorasickBuilder::new()
 69            .ascii_case_insensitive(!case_sensitive)
 70            .build(&[&query])?;
 71        let inner = SearchInputs {
 72            query: query.into(),
 73            files_to_exclude,
 74            files_to_include,
 75        };
 76        Ok(Self::Text {
 77            search: Arc::new(search),
 78            replacement: None,
 79            whole_word,
 80            case_sensitive,
 81            include_ignored,
 82            inner,
 83        })
 84    }
 85
 86    pub fn regex(
 87        query: impl ToString,
 88        whole_word: bool,
 89        case_sensitive: bool,
 90        include_ignored: bool,
 91        files_to_include: PathMatcher,
 92        files_to_exclude: PathMatcher,
 93    ) -> Result<Self> {
 94        let mut query = query.to_string();
 95        let initial_query = Arc::from(query.as_str());
 96        if whole_word {
 97            let mut word_query = String::new();
 98            word_query.push_str("\\b");
 99            word_query.push_str(&query);
100            word_query.push_str("\\b");
101            query = word_query
102        }
103
104        let multiline = query.contains('\n') || query.contains("\\n");
105        let regex = RegexBuilder::new(&query)
106            .case_insensitive(!case_sensitive)
107            .multi_line(multiline)
108            .build()?;
109        let inner = SearchInputs {
110            query: initial_query,
111            files_to_exclude,
112            files_to_include,
113        };
114        Ok(Self::Regex {
115            regex,
116            replacement: None,
117            multiline,
118            whole_word,
119            case_sensitive,
120            include_ignored,
121            inner,
122        })
123    }
124
125    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
126        if message.regex {
127            Self::regex(
128                message.query,
129                message.whole_word,
130                message.case_sensitive,
131                message.include_ignored,
132                deserialize_path_matches(&message.files_to_include)?,
133                deserialize_path_matches(&message.files_to_exclude)?,
134            )
135        } else {
136            Self::text(
137                message.query,
138                message.whole_word,
139                message.case_sensitive,
140                message.include_ignored,
141                deserialize_path_matches(&message.files_to_include)?,
142                deserialize_path_matches(&message.files_to_exclude)?,
143            )
144        }
145    }
146    pub fn with_replacement(mut self, new_replacement: String) -> Self {
147        match self {
148            Self::Text {
149                ref mut replacement,
150                ..
151            }
152            | Self::Regex {
153                ref mut replacement,
154                ..
155            } => {
156                *replacement = Some(new_replacement);
157                self
158            }
159        }
160    }
161    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
162        proto::SearchProject {
163            project_id,
164            query: self.as_str().to_string(),
165            regex: self.is_regex(),
166            whole_word: self.whole_word(),
167            case_sensitive: self.case_sensitive(),
168            include_ignored: self.include_ignored(),
169            files_to_include: self.files_to_include().sources().join(","),
170            files_to_exclude: self.files_to_exclude().sources().join(","),
171        }
172    }
173
174    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
175        if self.as_str().is_empty() {
176            return Ok(false);
177        }
178
179        match self {
180            Self::Text { search, .. } => {
181                let mat = search.stream_find_iter(stream).next();
182                match mat {
183                    Some(Ok(_)) => Ok(true),
184                    Some(Err(err)) => Err(err.into()),
185                    None => Ok(false),
186                }
187            }
188            Self::Regex {
189                regex, multiline, ..
190            } => {
191                let mut reader = BufReader::new(stream);
192                if *multiline {
193                    let mut text = String::new();
194                    if let Err(err) = reader.read_to_string(&mut text) {
195                        Err(err.into())
196                    } else {
197                        Ok(regex.find(&text).is_some())
198                    }
199                } else {
200                    for line in reader.lines() {
201                        let line = line?;
202                        if regex.find(&line).is_some() {
203                            return Ok(true);
204                        }
205                    }
206                    Ok(false)
207                }
208            }
209        }
210    }
211    /// Returns the replacement text for this `SearchQuery`.
212    pub fn replacement(&self) -> Option<&str> {
213        match self {
214            SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
215                replacement.as_deref()
216            }
217        }
218    }
219    /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
220    pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
221        match self {
222            SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
223            SearchQuery::Regex {
224                regex, replacement, ..
225            } => {
226                if let Some(replacement) = replacement {
227                    let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
228                        .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
229                        .replace_all(replacement, |c: &Captures| {
230                            match c.get(0).unwrap().as_str() {
231                                r"\\" => "\\",
232                                r"\n" => "\n",
233                                r"\t" => "\t",
234                                x => unreachable!("Unexpected escape sequence: {}", x),
235                            }
236                        });
237                    Some(regex.replace(text, replacement))
238                } else {
239                    None
240                }
241            }
242        }
243    }
244
245    pub async fn search(
246        &self,
247        buffer: &BufferSnapshot,
248        subrange: Option<Range<usize>>,
249    ) -> Vec<Range<usize>> {
250        const YIELD_INTERVAL: usize = 20000;
251
252        if self.as_str().is_empty() {
253            return Default::default();
254        }
255
256        let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
257        let rope = if let Some(range) = subrange {
258            buffer.as_rope().slice(range)
259        } else {
260            buffer.as_rope().clone()
261        };
262
263        let mut matches = Vec::new();
264        match self {
265            Self::Text {
266                search, whole_word, ..
267            } => {
268                for (ix, mat) in search
269                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
270                    .enumerate()
271                {
272                    if (ix + 1) % YIELD_INTERVAL == 0 {
273                        yield_now().await;
274                    }
275
276                    let mat = mat.unwrap();
277                    if *whole_word {
278                        let scope = buffer.language_scope_at(range_offset + mat.start());
279                        let kind = |c| char_kind(&scope, c);
280
281                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
282                        let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
283                        let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
284                        let next_kind = rope.chars_at(mat.end()).next().map(kind);
285                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
286                            continue;
287                        }
288                    }
289                    matches.push(mat.start()..mat.end())
290                }
291            }
292
293            Self::Regex {
294                regex, multiline, ..
295            } => {
296                if *multiline {
297                    let text = rope.to_string();
298                    for (ix, mat) in regex.find_iter(&text).enumerate() {
299                        if (ix + 1) % YIELD_INTERVAL == 0 {
300                            yield_now().await;
301                        }
302
303                        matches.push(mat.start()..mat.end());
304                    }
305                } else {
306                    let mut line = String::new();
307                    let mut line_offset = 0;
308                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
309                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
310                            yield_now().await;
311                        }
312
313                        for (newline_ix, text) in chunk.split('\n').enumerate() {
314                            if newline_ix > 0 {
315                                for mat in regex.find_iter(&line) {
316                                    let start = line_offset + mat.start();
317                                    let end = line_offset + mat.end();
318                                    matches.push(start..end);
319                                }
320
321                                line_offset += line.len() + 1;
322                                line.clear();
323                            }
324                            line.push_str(text);
325                        }
326                    }
327                }
328            }
329        }
330
331        matches
332    }
333
334    pub fn is_empty(&self) -> bool {
335        self.as_str().is_empty()
336    }
337
338    pub fn as_str(&self) -> &str {
339        self.as_inner().as_str()
340    }
341
342    pub fn whole_word(&self) -> bool {
343        match self {
344            Self::Text { whole_word, .. } => *whole_word,
345            Self::Regex { whole_word, .. } => *whole_word,
346        }
347    }
348
349    pub fn case_sensitive(&self) -> bool {
350        match self {
351            Self::Text { case_sensitive, .. } => *case_sensitive,
352            Self::Regex { case_sensitive, .. } => *case_sensitive,
353        }
354    }
355
356    pub fn include_ignored(&self) -> bool {
357        match self {
358            Self::Text {
359                include_ignored, ..
360            } => *include_ignored,
361            Self::Regex {
362                include_ignored, ..
363            } => *include_ignored,
364        }
365    }
366
367    pub fn is_regex(&self) -> bool {
368        matches!(self, Self::Regex { .. })
369    }
370
371    pub fn files_to_include(&self) -> &PathMatcher {
372        self.as_inner().files_to_include()
373    }
374
375    pub fn files_to_exclude(&self) -> &PathMatcher {
376        self.as_inner().files_to_exclude()
377    }
378
379    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
380        match file_path {
381            Some(file_path) => {
382                let mut path = file_path.to_path_buf();
383                loop {
384                    if self.files_to_exclude().is_match(&path) {
385                        return false;
386                    } else if self.files_to_include().sources().is_empty()
387                        || self.files_to_include().is_match(&path)
388                    {
389                        return true;
390                    } else if !path.pop() {
391                        return false;
392                    }
393                }
394            }
395            None => self.files_to_include().sources().is_empty(),
396        }
397    }
398    pub fn as_inner(&self) -> &SearchInputs {
399        match self {
400            Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
401        }
402    }
403}
404
405fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
406    let globs = glob_set
407        .split(',')
408        .map(str::trim)
409        .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
410        .collect::<Vec<_>>();
411    Ok(PathMatcher::new(&globs)?)
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417
418    #[test]
419    fn path_matcher_creation_for_valid_paths() {
420        for valid_path in [
421            "file",
422            "Cargo.toml",
423            ".DS_Store",
424            "~/dir/another_dir/",
425            "./dir/file",
426            "dir/[a-z].txt",
427            "../dir/filé",
428        ] {
429            let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
430                panic!("Valid path {valid_path} should be accepted, but got: {e}")
431            });
432            assert!(
433                path_matcher.is_match(valid_path),
434                "Path matcher for valid path {valid_path} should match itself"
435            )
436        }
437    }
438
439    #[test]
440    fn path_matcher_creation_for_globs() {
441        for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
442            match PathMatcher::new(&[invalid_glob.to_owned()]) {
443                Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
444                Err(_expected) => {}
445            }
446        }
447
448        for valid_glob in [
449            "dir/?ile",
450            "dir/*.txt",
451            "dir/**/file",
452            "dir/[a-z].txt",
453            "{dir,file}",
454        ] {
455            match PathMatcher::new(&[valid_glob.to_owned()]) {
456                Ok(_expected) => {}
457                Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
458            }
459        }
460    }
461}