1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub struct SearchInputs {
18 query: Arc<str>,
19 files_to_include: Vec<PathMatcher>,
20 files_to_exclude: Vec<PathMatcher>,
21}
22
23impl SearchInputs {
24 pub fn as_str(&self) -> &str {
25 self.query.as_ref()
26 }
27 pub fn files_to_include(&self) -> &[PathMatcher] {
28 &self.files_to_include
29 }
30 pub fn files_to_exclude(&self) -> &[PathMatcher] {
31 &self.files_to_exclude
32 }
33}
34#[derive(Clone, Debug)]
35pub enum SearchQuery {
36 Text {
37 search: Arc<AhoCorasick<usize>>,
38 whole_word: bool,
39 case_sensitive: bool,
40 inner: SearchInputs,
41 },
42
43 Regex {
44 regex: Regex,
45
46 multiline: bool,
47 whole_word: bool,
48 case_sensitive: bool,
49 inner: SearchInputs,
50 },
51}
52
53#[derive(Clone, Debug)]
54pub struct PathMatcher {
55 maybe_path: PathBuf,
56 glob: GlobMatcher,
57}
58
59impl std::fmt::Display for PathMatcher {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 self.maybe_path.to_string_lossy().fmt(f)
62 }
63}
64
65impl PathMatcher {
66 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
67 Ok(PathMatcher {
68 glob: Glob::new(&maybe_glob)?.compile_matcher(),
69 maybe_path: PathBuf::from(maybe_glob),
70 })
71 }
72
73 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
74 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
75 }
76}
77
78impl SearchQuery {
79 pub fn text(
80 query: impl ToString,
81 whole_word: bool,
82 case_sensitive: bool,
83 files_to_include: Vec<PathMatcher>,
84 files_to_exclude: Vec<PathMatcher>,
85 ) -> Self {
86 let query = query.to_string();
87 let search = AhoCorasickBuilder::new()
88 .auto_configure(&[&query])
89 .ascii_case_insensitive(!case_sensitive)
90 .build(&[&query]);
91 let inner = SearchInputs {
92 query: query.into(),
93 files_to_exclude,
94 files_to_include,
95 };
96 Self::Text {
97 search: Arc::new(search),
98 whole_word,
99 case_sensitive,
100 inner,
101 }
102 }
103
104 pub fn regex(
105 query: impl ToString,
106 whole_word: bool,
107 case_sensitive: bool,
108 files_to_include: Vec<PathMatcher>,
109 files_to_exclude: Vec<PathMatcher>,
110 ) -> Result<Self> {
111 let mut query = query.to_string();
112 let initial_query = Arc::from(query.as_str());
113 if whole_word {
114 let mut word_query = String::new();
115 word_query.push_str("\\b");
116 word_query.push_str(&query);
117 word_query.push_str("\\b");
118 query = word_query
119 }
120
121 let multiline = query.contains('\n') || query.contains("\\n");
122 let regex = RegexBuilder::new(&query)
123 .case_insensitive(!case_sensitive)
124 .multi_line(multiline)
125 .build()?;
126 let inner = SearchInputs {
127 query: initial_query,
128 files_to_exclude,
129 files_to_include,
130 };
131 Ok(Self::Regex {
132 regex,
133 multiline,
134 whole_word,
135 case_sensitive,
136 inner,
137 })
138 }
139
140 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
141 if message.regex {
142 Self::regex(
143 message.query,
144 message.whole_word,
145 message.case_sensitive,
146 deserialize_path_matches(&message.files_to_include)?,
147 deserialize_path_matches(&message.files_to_exclude)?,
148 )
149 } else {
150 Ok(Self::text(
151 message.query,
152 message.whole_word,
153 message.case_sensitive,
154 deserialize_path_matches(&message.files_to_include)?,
155 deserialize_path_matches(&message.files_to_exclude)?,
156 ))
157 }
158 }
159
160 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
161 proto::SearchProject {
162 project_id,
163 query: self.as_str().to_string(),
164 regex: self.is_regex(),
165 whole_word: self.whole_word(),
166 case_sensitive: self.case_sensitive(),
167 files_to_include: self
168 .files_to_include()
169 .iter()
170 .map(|matcher| matcher.to_string())
171 .join(","),
172 files_to_exclude: self
173 .files_to_exclude()
174 .iter()
175 .map(|matcher| matcher.to_string())
176 .join(","),
177 }
178 }
179
180 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
181 if self.as_str().is_empty() {
182 return Ok(false);
183 }
184
185 match self {
186 Self::Text { search, .. } => {
187 let mat = search.stream_find_iter(stream).next();
188 match mat {
189 Some(Ok(_)) => Ok(true),
190 Some(Err(err)) => Err(err.into()),
191 None => Ok(false),
192 }
193 }
194 Self::Regex {
195 regex, multiline, ..
196 } => {
197 let mut reader = BufReader::new(stream);
198 if *multiline {
199 let mut text = String::new();
200 if let Err(err) = reader.read_to_string(&mut text) {
201 Err(err.into())
202 } else {
203 Ok(regex.find(&text).is_some())
204 }
205 } else {
206 for line in reader.lines() {
207 let line = line?;
208 if regex.find(&line).is_some() {
209 return Ok(true);
210 }
211 }
212 Ok(false)
213 }
214 }
215 }
216 }
217
218 pub async fn search(
219 &self,
220 buffer: &BufferSnapshot,
221 subrange: Option<Range<usize>>,
222 ) -> Vec<Range<usize>> {
223 const YIELD_INTERVAL: usize = 20000;
224
225 if self.as_str().is_empty() {
226 return Default::default();
227 }
228
229 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
230 let rope = if let Some(range) = subrange {
231 buffer.as_rope().slice(range)
232 } else {
233 buffer.as_rope().clone()
234 };
235
236 let mut matches = Vec::new();
237 match self {
238 Self::Text {
239 search, whole_word, ..
240 } => {
241 for (ix, mat) in search
242 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
243 .enumerate()
244 {
245 if (ix + 1) % YIELD_INTERVAL == 0 {
246 yield_now().await;
247 }
248
249 let mat = mat.unwrap();
250 if *whole_word {
251 let scope = buffer.language_scope_at(range_offset + mat.start());
252 let kind = |c| char_kind(&scope, c);
253
254 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
255 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
256 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
257 let next_kind = rope.chars_at(mat.end()).next().map(kind);
258 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
259 continue;
260 }
261 }
262 matches.push(mat.start()..mat.end())
263 }
264 }
265
266 Self::Regex {
267 regex, multiline, ..
268 } => {
269 if *multiline {
270 let text = rope.to_string();
271 for (ix, mat) in regex.find_iter(&text).enumerate() {
272 if (ix + 1) % YIELD_INTERVAL == 0 {
273 yield_now().await;
274 }
275
276 matches.push(mat.start()..mat.end());
277 }
278 } else {
279 let mut line = String::new();
280 let mut line_offset = 0;
281 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
282 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
283 yield_now().await;
284 }
285
286 for (newline_ix, text) in chunk.split('\n').enumerate() {
287 if newline_ix > 0 {
288 for mat in regex.find_iter(&line) {
289 let start = line_offset + mat.start();
290 let end = line_offset + mat.end();
291 matches.push(start..end);
292 }
293
294 line_offset += line.len() + 1;
295 line.clear();
296 }
297 line.push_str(text);
298 }
299 }
300 }
301 }
302 }
303
304 matches
305 }
306
307 pub fn as_str(&self) -> &str {
308 self.as_inner().as_str()
309 }
310
311 pub fn whole_word(&self) -> bool {
312 match self {
313 Self::Text { whole_word, .. } => *whole_word,
314 Self::Regex { whole_word, .. } => *whole_word,
315 }
316 }
317
318 pub fn case_sensitive(&self) -> bool {
319 match self {
320 Self::Text { case_sensitive, .. } => *case_sensitive,
321 Self::Regex { case_sensitive, .. } => *case_sensitive,
322 }
323 }
324
325 pub fn is_regex(&self) -> bool {
326 matches!(self, Self::Regex { .. })
327 }
328
329 pub fn files_to_include(&self) -> &[PathMatcher] {
330 self.as_inner().files_to_include()
331 }
332
333 pub fn files_to_exclude(&self) -> &[PathMatcher] {
334 self.as_inner().files_to_exclude()
335 }
336
337 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
338 match file_path {
339 Some(file_path) => {
340 !self
341 .files_to_exclude()
342 .iter()
343 .any(|exclude_glob| exclude_glob.is_match(file_path))
344 && (self.files_to_include().is_empty()
345 || self
346 .files_to_include()
347 .iter()
348 .any(|include_glob| include_glob.is_match(file_path)))
349 }
350 None => self.files_to_include().is_empty(),
351 }
352 }
353 pub fn as_inner(&self) -> &SearchInputs {
354 match self {
355 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
356 }
357 }
358}
359
360fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
361 glob_set
362 .split(',')
363 .map(str::trim)
364 .filter(|glob_str| !glob_str.is_empty())
365 .map(|glob_str| {
366 PathMatcher::new(glob_str)
367 .with_context(|| format!("deserializing path match glob {glob_str}"))
368 })
369 .collect()
370}
371
372#[cfg(test)]
373mod tests {
374 use super::*;
375
376 #[test]
377 fn path_matcher_creation_for_valid_paths() {
378 for valid_path in [
379 "file",
380 "Cargo.toml",
381 ".DS_Store",
382 "~/dir/another_dir/",
383 "./dir/file",
384 "dir/[a-z].txt",
385 "../dir/filé",
386 ] {
387 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
388 panic!("Valid path {valid_path} should be accepted, but got: {e}")
389 });
390 assert!(
391 path_matcher.is_match(valid_path),
392 "Path matcher for valid path {valid_path} should match itself"
393 )
394 }
395 }
396
397 #[test]
398 fn path_matcher_creation_for_globs() {
399 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
400 match PathMatcher::new(invalid_glob) {
401 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
402 Err(_expected) => {}
403 }
404 }
405
406 for valid_glob in [
407 "dir/?ile",
408 "dir/*.txt",
409 "dir/**/file",
410 "dir/[a-z].txt",
411 "{dir,file}",
412 ] {
413 match PathMatcher::new(valid_glob) {
414 Ok(_expected) => {}
415 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
416 }
417 }
418 }
419}