search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use language::{char_kind, Rope};
  4use regex::{Regex, RegexBuilder};
  5use smol::future::yield_now;
  6use std::{
  7    io::{BufRead, BufReader, Read},
  8    ops::Range,
  9    sync::Arc,
 10};
 11
 12#[derive(Clone)]
 13pub enum SearchQuery {
 14    Text {
 15        search: Arc<AhoCorasick<usize>>,
 16        query: String,
 17        whole_word: bool,
 18    },
 19    Regex {
 20        multiline: bool,
 21        regex: Regex,
 22    },
 23}
 24
 25impl SearchQuery {
 26    pub fn text(query: impl ToString, whole_word: bool, case_sensitive: bool) -> Self {
 27        let query = query.to_string();
 28        let search = AhoCorasickBuilder::new()
 29            .auto_configure(&[&query])
 30            .ascii_case_insensitive(!case_sensitive)
 31            .build(&[&query]);
 32        Self::Text {
 33            search: Arc::new(search),
 34            query,
 35            whole_word,
 36        }
 37    }
 38
 39    pub fn regex(query: impl ToString, whole_word: bool, case_sensitive: bool) -> Result<Self> {
 40        let mut query = query.to_string();
 41        if whole_word {
 42            let mut word_query = String::new();
 43            word_query.push_str("\\b");
 44            word_query.push_str(&query);
 45            word_query.push_str("\\b");
 46            query = word_query
 47        }
 48
 49        let multiline = query.contains("\n") || query.contains("\\n");
 50        let regex = RegexBuilder::new(&query)
 51            .case_insensitive(!case_sensitive)
 52            .multi_line(multiline)
 53            .build()?;
 54        Ok(Self::Regex { multiline, regex })
 55    }
 56
 57    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
 58        if self.as_str().is_empty() {
 59            return Ok(false);
 60        }
 61
 62        match self {
 63            SearchQuery::Text { search, .. } => {
 64                let mat = search.stream_find_iter(stream).next();
 65                match mat {
 66                    Some(Ok(_)) => Ok(true),
 67                    Some(Err(err)) => Err(err.into()),
 68                    None => Ok(false),
 69                }
 70            }
 71            SearchQuery::Regex { multiline, regex } => {
 72                let mut reader = BufReader::new(stream);
 73                if *multiline {
 74                    let mut text = String::new();
 75                    if let Err(err) = reader.read_to_string(&mut text) {
 76                        Err(err.into())
 77                    } else {
 78                        Ok(regex.find(&text).is_some())
 79                    }
 80                } else {
 81                    for line in reader.lines() {
 82                        let line = line?;
 83                        if regex.find(&line).is_some() {
 84                            return Ok(true);
 85                        }
 86                    }
 87                    Ok(false)
 88                }
 89            }
 90        }
 91    }
 92
 93    pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
 94        const YIELD_INTERVAL: usize = 20000;
 95
 96        if self.as_str().is_empty() {
 97            return Default::default();
 98        }
 99
100        let mut matches = Vec::new();
101        match self {
102            SearchQuery::Text {
103                search, whole_word, ..
104            } => {
105                for (ix, mat) in search
106                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
107                    .enumerate()
108                {
109                    if (ix + 1) % YIELD_INTERVAL == 0 {
110                        yield_now().await;
111                    }
112
113                    let mat = mat.unwrap();
114                    if *whole_word {
115                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
116                        let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
117                        let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
118                        let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
119                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
120                            continue;
121                        }
122                    }
123                    matches.push(mat.start()..mat.end())
124                }
125            }
126            SearchQuery::Regex { multiline, regex } => {
127                if *multiline {
128                    let text = rope.to_string();
129                    for (ix, mat) in regex.find_iter(&text).enumerate() {
130                        if (ix + 1) % YIELD_INTERVAL == 0 {
131                            yield_now().await;
132                        }
133
134                        matches.push(mat.start()..mat.end());
135                    }
136                } else {
137                    let mut line = String::new();
138                    let mut line_offset = 0;
139                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
140                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
141                            yield_now().await;
142                        }
143
144                        for (newline_ix, text) in chunk.split('\n').enumerate() {
145                            if newline_ix > 0 {
146                                for mat in regex.find_iter(&line) {
147                                    let start = line_offset + mat.start();
148                                    let end = line_offset + mat.end();
149                                    matches.push(start..end);
150                                }
151
152                                line_offset += line.len() + 1;
153                                line.clear();
154                            }
155                            line.push_str(text);
156                        }
157                    }
158                }
159            }
160        }
161        matches
162    }
163
164    fn as_str(&self) -> &str {
165        match self {
166            SearchQuery::Text { query, .. } => query.as_str(),
167            SearchQuery::Regex { regex, .. } => regex.as_str(),
168        }
169    }
170}