search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use globset::{Glob, GlobMatcher};
  5use itertools::Itertools;
  6use language::{char_kind, Rope};
  7use regex::{Regex, RegexBuilder};
  8use smol::future::yield_now;
  9use std::{
 10    io::{BufRead, BufReader, Read},
 11    ops::Range,
 12    path::Path,
 13    sync::Arc,
 14};
 15
 16#[derive(Clone, Debug)]
 17pub enum SearchQuery {
 18    Text {
 19        search: Arc<AhoCorasick<usize>>,
 20        query: Arc<str>,
 21        whole_word: bool,
 22        case_sensitive: bool,
 23        files_to_include: Vec<GlobMatcher>,
 24        files_to_exclude: Vec<GlobMatcher>,
 25    },
 26    Regex {
 27        regex: Regex,
 28        query: Arc<str>,
 29        multiline: bool,
 30        whole_word: bool,
 31        case_sensitive: bool,
 32        files_to_include: Vec<GlobMatcher>,
 33        files_to_exclude: Vec<GlobMatcher>,
 34    },
 35}
 36
 37impl SearchQuery {
 38    pub fn text(
 39        query: impl ToString,
 40        whole_word: bool,
 41        case_sensitive: bool,
 42        files_to_include: Vec<GlobMatcher>,
 43        files_to_exclude: Vec<GlobMatcher>,
 44    ) -> Self {
 45        let query = query.to_string();
 46        let search = AhoCorasickBuilder::new()
 47            .auto_configure(&[&query])
 48            .ascii_case_insensitive(!case_sensitive)
 49            .build(&[&query]);
 50        Self::Text {
 51            search: Arc::new(search),
 52            query: Arc::from(query),
 53            whole_word,
 54            case_sensitive,
 55            files_to_include,
 56            files_to_exclude,
 57        }
 58    }
 59
 60    pub fn regex(
 61        query: impl ToString,
 62        whole_word: bool,
 63        case_sensitive: bool,
 64        files_to_include: Vec<GlobMatcher>,
 65        files_to_exclude: Vec<GlobMatcher>,
 66    ) -> Result<Self> {
 67        let mut query = query.to_string();
 68        let initial_query = Arc::from(query.as_str());
 69        if whole_word {
 70            let mut word_query = String::new();
 71            word_query.push_str("\\b");
 72            word_query.push_str(&query);
 73            word_query.push_str("\\b");
 74            query = word_query
 75        }
 76
 77        let multiline = query.contains('\n') || query.contains("\\n");
 78        let regex = RegexBuilder::new(&query)
 79            .case_insensitive(!case_sensitive)
 80            .multi_line(multiline)
 81            .build()?;
 82        Ok(Self::Regex {
 83            regex,
 84            query: initial_query,
 85            multiline,
 86            whole_word,
 87            case_sensitive,
 88            files_to_include,
 89            files_to_exclude,
 90        })
 91    }
 92
 93    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
 94        if message.regex {
 95            Self::regex(
 96                message.query,
 97                message.whole_word,
 98                message.case_sensitive,
 99                deserialize_globs(&message.files_to_include)?,
100                deserialize_globs(&message.files_to_exclude)?,
101            )
102        } else {
103            Ok(Self::text(
104                message.query,
105                message.whole_word,
106                message.case_sensitive,
107                deserialize_globs(&message.files_to_include)?,
108                deserialize_globs(&message.files_to_exclude)?,
109            ))
110        }
111    }
112
113    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
114        proto::SearchProject {
115            project_id,
116            query: self.as_str().to_string(),
117            regex: self.is_regex(),
118            whole_word: self.whole_word(),
119            case_sensitive: self.case_sensitive(),
120            files_to_include: self
121                .files_to_include()
122                .iter()
123                .map(|g| g.glob().to_string())
124                .join(","),
125            files_to_exclude: self
126                .files_to_exclude()
127                .iter()
128                .map(|g| g.glob().to_string())
129                .join(","),
130        }
131    }
132
133    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
134        if self.as_str().is_empty() {
135            return Ok(false);
136        }
137
138        match self {
139            Self::Text { search, .. } => {
140                let mat = search.stream_find_iter(stream).next();
141                match mat {
142                    Some(Ok(_)) => Ok(true),
143                    Some(Err(err)) => Err(err.into()),
144                    None => Ok(false),
145                }
146            }
147            Self::Regex {
148                regex, multiline, ..
149            } => {
150                let mut reader = BufReader::new(stream);
151                if *multiline {
152                    let mut text = String::new();
153                    if let Err(err) = reader.read_to_string(&mut text) {
154                        Err(err.into())
155                    } else {
156                        Ok(regex.find(&text).is_some())
157                    }
158                } else {
159                    for line in reader.lines() {
160                        let line = line?;
161                        if regex.find(&line).is_some() {
162                            return Ok(true);
163                        }
164                    }
165                    Ok(false)
166                }
167            }
168        }
169    }
170
171    pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
172        const YIELD_INTERVAL: usize = 20000;
173
174        if self.as_str().is_empty() {
175            return Default::default();
176        }
177
178        let mut matches = Vec::new();
179        match self {
180            Self::Text {
181                search, whole_word, ..
182            } => {
183                for (ix, mat) in search
184                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
185                    .enumerate()
186                {
187                    if (ix + 1) % YIELD_INTERVAL == 0 {
188                        yield_now().await;
189                    }
190
191                    let mat = mat.unwrap();
192                    if *whole_word {
193                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
194                        let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
195                        let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
196                        let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
197                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
198                            continue;
199                        }
200                    }
201                    matches.push(mat.start()..mat.end())
202                }
203            }
204            Self::Regex {
205                regex, multiline, ..
206            } => {
207                if *multiline {
208                    let text = rope.to_string();
209                    for (ix, mat) in regex.find_iter(&text).enumerate() {
210                        if (ix + 1) % YIELD_INTERVAL == 0 {
211                            yield_now().await;
212                        }
213
214                        matches.push(mat.start()..mat.end());
215                    }
216                } else {
217                    let mut line = String::new();
218                    let mut line_offset = 0;
219                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
220                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
221                            yield_now().await;
222                        }
223
224                        for (newline_ix, text) in chunk.split('\n').enumerate() {
225                            if newline_ix > 0 {
226                                for mat in regex.find_iter(&line) {
227                                    let start = line_offset + mat.start();
228                                    let end = line_offset + mat.end();
229                                    matches.push(start..end);
230                                }
231
232                                line_offset += line.len() + 1;
233                                line.clear();
234                            }
235                            line.push_str(text);
236                        }
237                    }
238                }
239            }
240        }
241        matches
242    }
243
244    pub fn as_str(&self) -> &str {
245        match self {
246            Self::Text { query, .. } => query.as_ref(),
247            Self::Regex { query, .. } => query.as_ref(),
248        }
249    }
250
251    pub fn whole_word(&self) -> bool {
252        match self {
253            Self::Text { whole_word, .. } => *whole_word,
254            Self::Regex { whole_word, .. } => *whole_word,
255        }
256    }
257
258    pub fn case_sensitive(&self) -> bool {
259        match self {
260            Self::Text { case_sensitive, .. } => *case_sensitive,
261            Self::Regex { case_sensitive, .. } => *case_sensitive,
262        }
263    }
264
265    pub fn is_regex(&self) -> bool {
266        matches!(self, Self::Regex { .. })
267    }
268
269    pub fn files_to_include(&self) -> &[GlobMatcher] {
270        match self {
271            Self::Text {
272                files_to_include, ..
273            } => files_to_include,
274            Self::Regex {
275                files_to_include, ..
276            } => files_to_include,
277        }
278    }
279
280    pub fn files_to_exclude(&self) -> &[GlobMatcher] {
281        match self {
282            Self::Text {
283                files_to_exclude, ..
284            } => files_to_exclude,
285            Self::Regex {
286                files_to_exclude, ..
287            } => files_to_exclude,
288        }
289    }
290
291    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
292        match file_path {
293            Some(file_path) => {
294                !self
295                    .files_to_exclude()
296                    .iter()
297                    .any(|exclude_glob| exclude_glob.is_match(file_path))
298                    && (self.files_to_include().is_empty()
299                        || self
300                            .files_to_include()
301                            .iter()
302                            .any(|include_glob| include_glob.is_match(file_path)))
303            }
304            None => self.files_to_include().is_empty(),
305        }
306    }
307}
308
309fn deserialize_globs(glob_set: &str) -> Result<Vec<GlobMatcher>> {
310    glob_set
311        .split(',')
312        .map(str::trim)
313        .filter(|glob_str| !glob_str.is_empty())
314        .map(|glob_str| Ok(Glob::new(glob_str)?.compile_matcher()))
315        .collect()
316}