1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub struct SearchInputs {
18 query: Arc<str>,
19 files_to_include: Vec<PathMatcher>,
20 files_to_exclude: Vec<PathMatcher>,
21}
22
23impl SearchInputs {
24 pub fn as_str(&self) -> &str {
25 self.query.as_ref()
26 }
27 pub fn files_to_include(&self) -> &[PathMatcher] {
28 &self.files_to_include
29 }
30 pub fn files_to_exclude(&self) -> &[PathMatcher] {
31 &self.files_to_exclude
32 }
33}
34#[derive(Clone, Debug)]
35pub enum SearchQuery {
36 Text {
37 search: Arc<AhoCorasick<usize>>,
38 whole_word: bool,
39 case_sensitive: bool,
40 inner: SearchInputs,
41 },
42
43 Regex {
44 regex: Regex,
45
46 multiline: bool,
47 whole_word: bool,
48 case_sensitive: bool,
49 inner: SearchInputs,
50 },
51}
52
53#[derive(Clone, Debug)]
54pub struct PathMatcher {
55 maybe_path: PathBuf,
56 glob: GlobMatcher,
57}
58
59impl std::fmt::Display for PathMatcher {
60 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61 self.maybe_path.to_string_lossy().fmt(f)
62 }
63}
64
65impl PathMatcher {
66 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
67 Ok(PathMatcher {
68 glob: Glob::new(&maybe_glob)?.compile_matcher(),
69 maybe_path: PathBuf::from(maybe_glob),
70 })
71 }
72
73 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
74 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
75 }
76}
77
78impl SearchQuery {
79 pub fn text(
80 query: impl ToString,
81 whole_word: bool,
82 case_sensitive: bool,
83 files_to_include: Vec<PathMatcher>,
84 files_to_exclude: Vec<PathMatcher>,
85 ) -> Self {
86 let query = query.to_string();
87 let search = AhoCorasickBuilder::new()
88 .auto_configure(&[&query])
89 .ascii_case_insensitive(!case_sensitive)
90 .build(&[&query]);
91 let inner = SearchInputs {
92 query: query.into(),
93 files_to_exclude,
94 files_to_include,
95 };
96 Self::Text {
97 search: Arc::new(search),
98 whole_word,
99 case_sensitive,
100 inner,
101 }
102 }
103
104 pub fn regex(
105 query: impl ToString,
106 whole_word: bool,
107 case_sensitive: bool,
108 files_to_include: Vec<PathMatcher>,
109 files_to_exclude: Vec<PathMatcher>,
110 ) -> Result<Self> {
111 let mut query = query.to_string();
112 let initial_query = Arc::from(query.as_str());
113 if whole_word {
114 let mut word_query = String::new();
115 word_query.push_str("\\b");
116 word_query.push_str(&query);
117 word_query.push_str("\\b");
118 query = word_query
119 }
120
121 let multiline = query.contains('\n') || query.contains("\\n");
122 let regex = RegexBuilder::new(&query)
123 .case_insensitive(!case_sensitive)
124 .multi_line(multiline)
125 .build()?;
126 let inner = SearchInputs {
127 query: initial_query,
128 files_to_exclude,
129 files_to_include,
130 };
131 Ok(Self::Regex {
132 regex,
133 multiline,
134 whole_word,
135 case_sensitive,
136 inner,
137 })
138 }
139
140 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
141 if message.regex {
142 Self::regex(
143 message.query,
144 message.whole_word,
145 message.case_sensitive,
146 deserialize_path_matches(&message.files_to_include)?,
147 deserialize_path_matches(&message.files_to_exclude)?,
148 )
149 } else {
150 Ok(Self::text(
151 message.query,
152 message.whole_word,
153 message.case_sensitive,
154 deserialize_path_matches(&message.files_to_include)?,
155 deserialize_path_matches(&message.files_to_exclude)?,
156 ))
157 }
158 }
159
160 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
161 proto::SearchProject {
162 project_id,
163 query: self.as_str().to_string(),
164 regex: self.is_regex(),
165 whole_word: self.whole_word(),
166 case_sensitive: self.case_sensitive(),
167 files_to_include: self
168 .files_to_include()
169 .iter()
170 .map(|matcher| matcher.to_string())
171 .join(","),
172 files_to_exclude: self
173 .files_to_exclude()
174 .iter()
175 .map(|matcher| matcher.to_string())
176 .join(","),
177 }
178 }
179
180 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
181 if self.as_str().is_empty() {
182 return Ok(false);
183 }
184
185 match self {
186 Self::Text { search, .. } => {
187 let mat = search.stream_find_iter(stream).next();
188 match mat {
189 Some(Ok(_)) => Ok(true),
190 Some(Err(err)) => Err(err.into()),
191 None => Ok(false),
192 }
193 }
194 Self::Regex {
195 regex, multiline, ..
196 } => {
197 let mut reader = BufReader::new(stream);
198 if *multiline {
199 let mut text = String::new();
200 if let Err(err) = reader.read_to_string(&mut text) {
201 Err(err.into())
202 } else {
203 Ok(regex.find(&text).is_some())
204 }
205 } else {
206 for line in reader.lines() {
207 let line = line?;
208 if regex.find(&line).is_some() {
209 return Ok(true);
210 }
211 }
212 Ok(false)
213 }
214 }
215 }
216 }
217
218 pub async fn search(
219 &self,
220 buffer: &BufferSnapshot,
221 subrange: Option<Range<usize>>,
222 ) -> Vec<Range<usize>> {
223 const YIELD_INTERVAL: usize = 20000;
224
225 if self.as_str().is_empty() {
226 return Default::default();
227 }
228 let language = buffer.language_at(0);
229 let rope = if let Some(range) = subrange {
230 buffer.as_rope().slice(range)
231 } else {
232 buffer.as_rope().clone()
233 };
234
235 let kind = |c| char_kind(language, c);
236
237 let mut matches = Vec::new();
238 match self {
239 Self::Text {
240 search, whole_word, ..
241 } => {
242 for (ix, mat) in search
243 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
244 .enumerate()
245 {
246 if (ix + 1) % YIELD_INTERVAL == 0 {
247 yield_now().await;
248 }
249
250 let mat = mat.unwrap();
251 if *whole_word {
252 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
253 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
254 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
255 let next_kind = rope.chars_at(mat.end()).next().map(kind);
256 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
257 continue;
258 }
259 }
260 matches.push(mat.start()..mat.end())
261 }
262 }
263
264 Self::Regex {
265 regex, multiline, ..
266 } => {
267 if *multiline {
268 let text = rope.to_string();
269 for (ix, mat) in regex.find_iter(&text).enumerate() {
270 if (ix + 1) % YIELD_INTERVAL == 0 {
271 yield_now().await;
272 }
273
274 matches.push(mat.start()..mat.end());
275 }
276 } else {
277 let mut line = String::new();
278 let mut line_offset = 0;
279 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
280 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
281 yield_now().await;
282 }
283
284 for (newline_ix, text) in chunk.split('\n').enumerate() {
285 if newline_ix > 0 {
286 for mat in regex.find_iter(&line) {
287 let start = line_offset + mat.start();
288 let end = line_offset + mat.end();
289 matches.push(start..end);
290 }
291
292 line_offset += line.len() + 1;
293 line.clear();
294 }
295 line.push_str(text);
296 }
297 }
298 }
299 }
300 }
301
302 matches
303 }
304
305 pub fn as_str(&self) -> &str {
306 self.as_inner().as_str()
307 }
308
309 pub fn whole_word(&self) -> bool {
310 match self {
311 Self::Text { whole_word, .. } => *whole_word,
312 Self::Regex { whole_word, .. } => *whole_word,
313 }
314 }
315
316 pub fn case_sensitive(&self) -> bool {
317 match self {
318 Self::Text { case_sensitive, .. } => *case_sensitive,
319 Self::Regex { case_sensitive, .. } => *case_sensitive,
320 }
321 }
322
323 pub fn is_regex(&self) -> bool {
324 matches!(self, Self::Regex { .. })
325 }
326
327 pub fn files_to_include(&self) -> &[PathMatcher] {
328 self.as_inner().files_to_include()
329 }
330
331 pub fn files_to_exclude(&self) -> &[PathMatcher] {
332 self.as_inner().files_to_exclude()
333 }
334
335 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
336 match file_path {
337 Some(file_path) => {
338 !self
339 .files_to_exclude()
340 .iter()
341 .any(|exclude_glob| exclude_glob.is_match(file_path))
342 && (self.files_to_include().is_empty()
343 || self
344 .files_to_include()
345 .iter()
346 .any(|include_glob| include_glob.is_match(file_path)))
347 }
348 None => self.files_to_include().is_empty(),
349 }
350 }
351 pub fn as_inner(&self) -> &SearchInputs {
352 match self {
353 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
354 }
355 }
356}
357
358fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
359 glob_set
360 .split(',')
361 .map(str::trim)
362 .filter(|glob_str| !glob_str.is_empty())
363 .map(|glob_str| {
364 PathMatcher::new(glob_str)
365 .with_context(|| format!("deserializing path match glob {glob_str}"))
366 })
367 .collect()
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373
374 #[test]
375 fn path_matcher_creation_for_valid_paths() {
376 for valid_path in [
377 "file",
378 "Cargo.toml",
379 ".DS_Store",
380 "~/dir/another_dir/",
381 "./dir/file",
382 "dir/[a-z].txt",
383 "../dir/filé",
384 ] {
385 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
386 panic!("Valid path {valid_path} should be accepted, but got: {e}")
387 });
388 assert!(
389 path_matcher.is_match(valid_path),
390 "Path matcher for valid path {valid_path} should match itself"
391 )
392 }
393 }
394
395 #[test]
396 fn path_matcher_creation_for_globs() {
397 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
398 match PathMatcher::new(invalid_glob) {
399 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
400 Err(_expected) => {}
401 }
402 }
403
404 for valid_glob in [
405 "dir/?ile",
406 "dir/*.txt",
407 "dir/**/file",
408 "dir/[a-z].txt",
409 "{dir,file}",
410 ] {
411 match PathMatcher::new(valid_glob) {
412 Ok(_expected) => {}
413 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
414 }
415 }
416 }
417}