search.rs

  1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
  2use anyhow::Result;
  3use client::proto;
  4use itertools::Itertools;
  5use language::{char_kind, Rope};
  6use regex::{Regex, RegexBuilder};
  7use smol::future::yield_now;
  8use std::{
  9    io::{BufRead, BufReader, Read},
 10    ops::Range,
 11    path::Path,
 12    sync::Arc,
 13};
 14
 15#[derive(Clone, Debug)]
 16pub enum SearchQuery {
 17    Text {
 18        search: Arc<AhoCorasick<usize>>,
 19        query: Arc<str>,
 20        whole_word: bool,
 21        case_sensitive: bool,
 22        files_to_include: Vec<glob::Pattern>,
 23        files_to_exclude: Vec<glob::Pattern>,
 24    },
 25    Regex {
 26        regex: Regex,
 27        query: Arc<str>,
 28        multiline: bool,
 29        whole_word: bool,
 30        case_sensitive: bool,
 31        files_to_include: Vec<glob::Pattern>,
 32        files_to_exclude: Vec<glob::Pattern>,
 33    },
 34}
 35
 36impl SearchQuery {
 37    pub fn text(
 38        query: impl ToString,
 39        whole_word: bool,
 40        case_sensitive: bool,
 41        files_to_include: Vec<glob::Pattern>,
 42        files_to_exclude: Vec<glob::Pattern>,
 43    ) -> Self {
 44        let query = query.to_string();
 45        let search = AhoCorasickBuilder::new()
 46            .auto_configure(&[&query])
 47            .ascii_case_insensitive(!case_sensitive)
 48            .build(&[&query]);
 49        Self::Text {
 50            search: Arc::new(search),
 51            query: Arc::from(query),
 52            whole_word,
 53            case_sensitive,
 54            files_to_include,
 55            files_to_exclude,
 56        }
 57    }
 58
 59    pub fn regex(
 60        query: impl ToString,
 61        whole_word: bool,
 62        case_sensitive: bool,
 63        files_to_include: Vec<glob::Pattern>,
 64        files_to_exclude: Vec<glob::Pattern>,
 65    ) -> Result<Self> {
 66        let mut query = query.to_string();
 67        let initial_query = Arc::from(query.as_str());
 68        if whole_word {
 69            let mut word_query = String::new();
 70            word_query.push_str("\\b");
 71            word_query.push_str(&query);
 72            word_query.push_str("\\b");
 73            query = word_query
 74        }
 75
 76        let multiline = query.contains('\n') || query.contains("\\n");
 77        let regex = RegexBuilder::new(&query)
 78            .case_insensitive(!case_sensitive)
 79            .multi_line(multiline)
 80            .build()?;
 81        Ok(Self::Regex {
 82            regex,
 83            query: initial_query,
 84            multiline,
 85            whole_word,
 86            case_sensitive,
 87            files_to_include,
 88            files_to_exclude,
 89        })
 90    }
 91
 92    pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
 93        if message.regex {
 94            Self::regex(
 95                message.query,
 96                message.whole_word,
 97                message.case_sensitive,
 98                message
 99                    .files_to_include
100                    .split(',')
101                    .map(str::trim)
102                    .filter(|glob_str| !glob_str.is_empty())
103                    .map(|glob_str| glob::Pattern::new(glob_str))
104                    .collect::<Result<_, _>>()?,
105                message
106                    .files_to_exclude
107                    .split(',')
108                    .map(str::trim)
109                    .filter(|glob_str| !glob_str.is_empty())
110                    .map(|glob_str| glob::Pattern::new(glob_str))
111                    .collect::<Result<_, _>>()?,
112            )
113        } else {
114            Ok(Self::text(
115                message.query,
116                message.whole_word,
117                message.case_sensitive,
118                message
119                    .files_to_include
120                    .split(',')
121                    .map(str::trim)
122                    .filter(|glob_str| !glob_str.is_empty())
123                    .map(|glob_str| glob::Pattern::new(glob_str))
124                    .collect::<Result<_, _>>()?,
125                message
126                    .files_to_exclude
127                    .split(',')
128                    .map(str::trim)
129                    .filter(|glob_str| !glob_str.is_empty())
130                    .map(|glob_str| glob::Pattern::new(glob_str))
131                    .collect::<Result<_, _>>()?,
132            ))
133        }
134    }
135
136    pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
137        proto::SearchProject {
138            project_id,
139            query: self.as_str().to_string(),
140            regex: self.is_regex(),
141            whole_word: self.whole_word(),
142            case_sensitive: self.case_sensitive(),
143            files_to_include: self
144                .files_to_include()
145                .iter()
146                .map(ToString::to_string)
147                .join(","),
148            files_to_exclude: self
149                .files_to_exclude()
150                .iter()
151                .map(ToString::to_string)
152                .join(","),
153        }
154    }
155
156    pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
157        if self.as_str().is_empty() {
158            return Ok(false);
159        }
160
161        match self {
162            Self::Text { search, .. } => {
163                let mat = search.stream_find_iter(stream).next();
164                match mat {
165                    Some(Ok(_)) => Ok(true),
166                    Some(Err(err)) => Err(err.into()),
167                    None => Ok(false),
168                }
169            }
170            Self::Regex {
171                regex, multiline, ..
172            } => {
173                let mut reader = BufReader::new(stream);
174                if *multiline {
175                    let mut text = String::new();
176                    if let Err(err) = reader.read_to_string(&mut text) {
177                        Err(err.into())
178                    } else {
179                        Ok(regex.find(&text).is_some())
180                    }
181                } else {
182                    for line in reader.lines() {
183                        let line = line?;
184                        if regex.find(&line).is_some() {
185                            return Ok(true);
186                        }
187                    }
188                    Ok(false)
189                }
190            }
191        }
192    }
193
194    pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
195        const YIELD_INTERVAL: usize = 20000;
196
197        if self.as_str().is_empty() {
198            return Default::default();
199        }
200
201        let mut matches = Vec::new();
202        match self {
203            Self::Text {
204                search, whole_word, ..
205            } => {
206                for (ix, mat) in search
207                    .stream_find_iter(rope.bytes_in_range(0..rope.len()))
208                    .enumerate()
209                {
210                    if (ix + 1) % YIELD_INTERVAL == 0 {
211                        yield_now().await;
212                    }
213
214                    let mat = mat.unwrap();
215                    if *whole_word {
216                        let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
217                        let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
218                        let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
219                        let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
220                        if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
221                            continue;
222                        }
223                    }
224                    matches.push(mat.start()..mat.end())
225                }
226            }
227            Self::Regex {
228                regex, multiline, ..
229            } => {
230                if *multiline {
231                    let text = rope.to_string();
232                    for (ix, mat) in regex.find_iter(&text).enumerate() {
233                        if (ix + 1) % YIELD_INTERVAL == 0 {
234                            yield_now().await;
235                        }
236
237                        matches.push(mat.start()..mat.end());
238                    }
239                } else {
240                    let mut line = String::new();
241                    let mut line_offset = 0;
242                    for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
243                        if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
244                            yield_now().await;
245                        }
246
247                        for (newline_ix, text) in chunk.split('\n').enumerate() {
248                            if newline_ix > 0 {
249                                for mat in regex.find_iter(&line) {
250                                    let start = line_offset + mat.start();
251                                    let end = line_offset + mat.end();
252                                    matches.push(start..end);
253                                }
254
255                                line_offset += line.len() + 1;
256                                line.clear();
257                            }
258                            line.push_str(text);
259                        }
260                    }
261                }
262            }
263        }
264        matches
265    }
266
267    pub fn as_str(&self) -> &str {
268        match self {
269            Self::Text { query, .. } => query.as_ref(),
270            Self::Regex { query, .. } => query.as_ref(),
271        }
272    }
273
274    pub fn whole_word(&self) -> bool {
275        match self {
276            Self::Text { whole_word, .. } => *whole_word,
277            Self::Regex { whole_word, .. } => *whole_word,
278        }
279    }
280
281    pub fn case_sensitive(&self) -> bool {
282        match self {
283            Self::Text { case_sensitive, .. } => *case_sensitive,
284            Self::Regex { case_sensitive, .. } => *case_sensitive,
285        }
286    }
287
288    pub fn is_regex(&self) -> bool {
289        matches!(self, Self::Regex { .. })
290    }
291
292    pub fn files_to_include(&self) -> &[glob::Pattern] {
293        match self {
294            Self::Text {
295                files_to_include, ..
296            } => files_to_include,
297            Self::Regex {
298                files_to_include, ..
299            } => files_to_include,
300        }
301    }
302
303    pub fn files_to_exclude(&self) -> &[glob::Pattern] {
304        match self {
305            Self::Text {
306                files_to_exclude, ..
307            } => files_to_exclude,
308            Self::Regex {
309                files_to_exclude, ..
310            } => files_to_exclude,
311        }
312    }
313
314    pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
315        match file_path {
316            Some(file_path) => {
317                !self
318                    .files_to_exclude()
319                    .iter()
320                    .any(|exclude_glob| exclude_glob.matches_path(file_path))
321                    && (self.files_to_include().is_empty()
322                        || self
323                            .files_to_include()
324                            .iter()
325                            .any(|include_glob| include_glob.matches_path(file_path)))
326            }
327            None => self.files_to_include().is_empty(),
328        }
329    }
330}