1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub enum SearchQuery {
18 Text {
19 search: Arc<AhoCorasick<usize>>,
20 query: Arc<str>,
21 whole_word: bool,
22 case_sensitive: bool,
23 files_to_include: Vec<PathMatcher>,
24 files_to_exclude: Vec<PathMatcher>,
25 },
26
27 Regex {
28 regex: Regex,
29 query: Arc<str>,
30 multiline: bool,
31 whole_word: bool,
32 case_sensitive: bool,
33 files_to_include: Vec<PathMatcher>,
34 files_to_exclude: Vec<PathMatcher>,
35 },
36}
37
38#[derive(Clone, Debug)]
39pub struct PathMatcher {
40 maybe_path: PathBuf,
41 glob: GlobMatcher,
42}
43
44impl std::fmt::Display for PathMatcher {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 self.maybe_path.to_string_lossy().fmt(f)
47 }
48}
49
50impl PathMatcher {
51 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
52 Ok(PathMatcher {
53 glob: Glob::new(&maybe_glob)?.compile_matcher(),
54 maybe_path: PathBuf::from(maybe_glob),
55 })
56 }
57
58 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
59 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
60 }
61}
62
63impl SearchQuery {
64 pub fn text(
65 query: impl ToString,
66 whole_word: bool,
67 case_sensitive: bool,
68 files_to_include: Vec<PathMatcher>,
69 files_to_exclude: Vec<PathMatcher>,
70 ) -> Self {
71 let query = query.to_string();
72 let search = AhoCorasickBuilder::new()
73 .auto_configure(&[&query])
74 .ascii_case_insensitive(!case_sensitive)
75 .build(&[&query]);
76 Self::Text {
77 search: Arc::new(search),
78 query: Arc::from(query),
79 whole_word,
80 case_sensitive,
81 files_to_include,
82 files_to_exclude,
83 }
84 }
85
86 pub fn regex(
87 query: impl ToString,
88 whole_word: bool,
89 case_sensitive: bool,
90 files_to_include: Vec<PathMatcher>,
91 files_to_exclude: Vec<PathMatcher>,
92 ) -> Result<Self> {
93 let mut query = query.to_string();
94 let initial_query = Arc::from(query.as_str());
95 if whole_word {
96 let mut word_query = String::new();
97 word_query.push_str("\\b");
98 word_query.push_str(&query);
99 word_query.push_str("\\b");
100 query = word_query
101 }
102
103 let multiline = query.contains('\n') || query.contains("\\n");
104 let regex = RegexBuilder::new(&query)
105 .case_insensitive(!case_sensitive)
106 .multi_line(multiline)
107 .build()?;
108 Ok(Self::Regex {
109 regex,
110 query: initial_query,
111 multiline,
112 whole_word,
113 case_sensitive,
114 files_to_include,
115 files_to_exclude,
116 })
117 }
118
119 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
120 if message.regex {
121 Self::regex(
122 message.query,
123 message.whole_word,
124 message.case_sensitive,
125 deserialize_path_matches(&message.files_to_include)?,
126 deserialize_path_matches(&message.files_to_exclude)?,
127 )
128 } else {
129 Ok(Self::text(
130 message.query,
131 message.whole_word,
132 message.case_sensitive,
133 deserialize_path_matches(&message.files_to_include)?,
134 deserialize_path_matches(&message.files_to_exclude)?,
135 ))
136 }
137 }
138
139 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
140 proto::SearchProject {
141 project_id,
142 query: self.as_str().to_string(),
143 regex: self.is_regex(),
144 whole_word: self.whole_word(),
145 case_sensitive: self.case_sensitive(),
146 files_to_include: self
147 .files_to_include()
148 .iter()
149 .map(|matcher| matcher.to_string())
150 .join(","),
151 files_to_exclude: self
152 .files_to_exclude()
153 .iter()
154 .map(|matcher| matcher.to_string())
155 .join(","),
156 }
157 }
158
159 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
160 if self.as_str().is_empty() {
161 return Ok(false);
162 }
163
164 match self {
165 Self::Text { search, .. } => {
166 let mat = search.stream_find_iter(stream).next();
167 match mat {
168 Some(Ok(_)) => Ok(true),
169 Some(Err(err)) => Err(err.into()),
170 None => Ok(false),
171 }
172 }
173 Self::Regex {
174 regex, multiline, ..
175 } => {
176 let mut reader = BufReader::new(stream);
177 if *multiline {
178 let mut text = String::new();
179 if let Err(err) = reader.read_to_string(&mut text) {
180 Err(err.into())
181 } else {
182 Ok(regex.find(&text).is_some())
183 }
184 } else {
185 for line in reader.lines() {
186 let line = line?;
187 if regex.find(&line).is_some() {
188 return Ok(true);
189 }
190 }
191 Ok(false)
192 }
193 }
194 }
195 }
196
197 pub async fn search(
198 &self,
199 buffer: &BufferSnapshot,
200 subrange: Option<Range<usize>>,
201 ) -> Vec<Range<usize>> {
202 const YIELD_INTERVAL: usize = 20000;
203
204 if self.as_str().is_empty() {
205 return Default::default();
206 }
207
208 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
209 let rope = if let Some(range) = subrange {
210 buffer.as_rope().slice(range)
211 } else {
212 buffer.as_rope().clone()
213 };
214
215 let mut matches = Vec::new();
216 match self {
217 Self::Text {
218 search, whole_word, ..
219 } => {
220 for (ix, mat) in search
221 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
222 .enumerate()
223 {
224 if (ix + 1) % YIELD_INTERVAL == 0 {
225 yield_now().await;
226 }
227
228 let mat = mat.unwrap();
229 if *whole_word {
230 let scope = buffer.language_scope_at(range_offset + mat.start());
231 let kind = |c| char_kind(&scope, c);
232
233 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
234 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
235 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
236 let next_kind = rope.chars_at(mat.end()).next().map(kind);
237 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
238 continue;
239 }
240 }
241 matches.push(mat.start()..mat.end())
242 }
243 }
244
245 Self::Regex {
246 regex, multiline, ..
247 } => {
248 if *multiline {
249 let text = rope.to_string();
250 for (ix, mat) in regex.find_iter(&text).enumerate() {
251 if (ix + 1) % YIELD_INTERVAL == 0 {
252 yield_now().await;
253 }
254
255 matches.push(mat.start()..mat.end());
256 }
257 } else {
258 let mut line = String::new();
259 let mut line_offset = 0;
260 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
261 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
262 yield_now().await;
263 }
264
265 for (newline_ix, text) in chunk.split('\n').enumerate() {
266 if newline_ix > 0 {
267 for mat in regex.find_iter(&line) {
268 let start = line_offset + mat.start();
269 let end = line_offset + mat.end();
270 matches.push(start..end);
271 }
272
273 line_offset += line.len() + 1;
274 line.clear();
275 }
276 line.push_str(text);
277 }
278 }
279 }
280 }
281 }
282
283 matches
284 }
285
286 pub fn as_str(&self) -> &str {
287 match self {
288 Self::Text { query, .. } => query.as_ref(),
289 Self::Regex { query, .. } => query.as_ref(),
290 }
291 }
292
293 pub fn whole_word(&self) -> bool {
294 match self {
295 Self::Text { whole_word, .. } => *whole_word,
296 Self::Regex { whole_word, .. } => *whole_word,
297 }
298 }
299
300 pub fn case_sensitive(&self) -> bool {
301 match self {
302 Self::Text { case_sensitive, .. } => *case_sensitive,
303 Self::Regex { case_sensitive, .. } => *case_sensitive,
304 }
305 }
306
307 pub fn is_regex(&self) -> bool {
308 matches!(self, Self::Regex { .. })
309 }
310
311 pub fn files_to_include(&self) -> &[PathMatcher] {
312 match self {
313 Self::Text {
314 files_to_include, ..
315 } => files_to_include,
316 Self::Regex {
317 files_to_include, ..
318 } => files_to_include,
319 }
320 }
321
322 pub fn files_to_exclude(&self) -> &[PathMatcher] {
323 match self {
324 Self::Text {
325 files_to_exclude, ..
326 } => files_to_exclude,
327 Self::Regex {
328 files_to_exclude, ..
329 } => files_to_exclude,
330 }
331 }
332
333 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
334 match file_path {
335 Some(file_path) => {
336 !self
337 .files_to_exclude()
338 .iter()
339 .any(|exclude_glob| exclude_glob.is_match(file_path))
340 && (self.files_to_include().is_empty()
341 || self
342 .files_to_include()
343 .iter()
344 .any(|include_glob| include_glob.is_match(file_path)))
345 }
346 None => self.files_to_include().is_empty(),
347 }
348 }
349}
350
351fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
352 glob_set
353 .split(',')
354 .map(str::trim)
355 .filter(|glob_str| !glob_str.is_empty())
356 .map(|glob_str| {
357 PathMatcher::new(glob_str)
358 .with_context(|| format!("deserializing path match glob {glob_str}"))
359 })
360 .collect()
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 #[test]
368 fn path_matcher_creation_for_valid_paths() {
369 for valid_path in [
370 "file",
371 "Cargo.toml",
372 ".DS_Store",
373 "~/dir/another_dir/",
374 "./dir/file",
375 "dir/[a-z].txt",
376 "../dir/filé",
377 ] {
378 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
379 panic!("Valid path {valid_path} should be accepted, but got: {e}")
380 });
381 assert!(
382 path_matcher.is_match(valid_path),
383 "Path matcher for valid path {valid_path} should match itself"
384 )
385 }
386 }
387
388 #[test]
389 fn path_matcher_creation_for_globs() {
390 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
391 match PathMatcher::new(invalid_glob) {
392 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
393 Err(_expected) => {}
394 }
395 }
396
397 for valid_glob in [
398 "dir/?ile",
399 "dir/*.txt",
400 "dir/**/file",
401 "dir/[a-z].txt",
402 "{dir,file}",
403 ] {
404 match PathMatcher::new(valid_glob) {
405 Ok(_expected) => {}
406 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
407 }
408 }
409 }
410}