1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 path::{Path, PathBuf},
14 sync::Arc,
15};
16
17#[derive(Clone, Debug)]
18pub struct SearchInputs {
19 query: Arc<str>,
20 files_to_include: Vec<PathMatcher>,
21 files_to_exclude: Vec<PathMatcher>,
22}
23
24impl SearchInputs {
25 pub fn as_str(&self) -> &str {
26 self.query.as_ref()
27 }
28 pub fn files_to_include(&self) -> &[PathMatcher] {
29 &self.files_to_include
30 }
31 pub fn files_to_exclude(&self) -> &[PathMatcher] {
32 &self.files_to_exclude
33 }
34}
35#[derive(Clone, Debug)]
36pub enum SearchQuery {
37 Text {
38 search: Arc<AhoCorasick<usize>>,
39 replacement: Option<String>,
40 whole_word: bool,
41 case_sensitive: bool,
42 inner: SearchInputs,
43 },
44
45 Regex {
46 regex: Regex,
47 replacement: Option<String>,
48 multiline: bool,
49 whole_word: bool,
50 case_sensitive: bool,
51 inner: SearchInputs,
52 },
53}
54
55#[derive(Clone, Debug)]
56pub struct PathMatcher {
57 maybe_path: PathBuf,
58 glob: GlobMatcher,
59}
60
61impl std::fmt::Display for PathMatcher {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 self.maybe_path.to_string_lossy().fmt(f)
64 }
65}
66
67impl PathMatcher {
68 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
69 Ok(PathMatcher {
70 glob: Glob::new(&maybe_glob)?.compile_matcher(),
71 maybe_path: PathBuf::from(maybe_glob),
72 })
73 }
74
75 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
76 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
77 }
78}
79
80impl SearchQuery {
81 pub fn text(
82 query: impl ToString,
83 whole_word: bool,
84 case_sensitive: bool,
85 files_to_include: Vec<PathMatcher>,
86 files_to_exclude: Vec<PathMatcher>,
87 ) -> Self {
88 let query = query.to_string();
89 let search = AhoCorasickBuilder::new()
90 .auto_configure(&[&query])
91 .ascii_case_insensitive(!case_sensitive)
92 .build(&[&query]);
93 let inner = SearchInputs {
94 query: query.into(),
95 files_to_exclude,
96 files_to_include,
97 };
98 Self::Text {
99 search: Arc::new(search),
100 replacement: None,
101 whole_word,
102 case_sensitive,
103 inner,
104 }
105 }
106
107 pub fn regex(
108 query: impl ToString,
109 whole_word: bool,
110 case_sensitive: bool,
111 files_to_include: Vec<PathMatcher>,
112 files_to_exclude: Vec<PathMatcher>,
113 ) -> Result<Self> {
114 let mut query = query.to_string();
115 let initial_query = Arc::from(query.as_str());
116 if whole_word {
117 let mut word_query = String::new();
118 word_query.push_str("\\b");
119 word_query.push_str(&query);
120 word_query.push_str("\\b");
121 query = word_query
122 }
123
124 let multiline = query.contains('\n') || query.contains("\\n");
125 let regex = RegexBuilder::new(&query)
126 .case_insensitive(!case_sensitive)
127 .multi_line(multiline)
128 .build()?;
129 let inner = SearchInputs {
130 query: initial_query,
131 files_to_exclude,
132 files_to_include,
133 };
134 Ok(Self::Regex {
135 regex,
136 replacement: None,
137 multiline,
138 whole_word,
139 case_sensitive,
140 inner,
141 })
142 }
143
144 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
145 if message.regex {
146 Self::regex(
147 message.query,
148 message.whole_word,
149 message.case_sensitive,
150 deserialize_path_matches(&message.files_to_include)?,
151 deserialize_path_matches(&message.files_to_exclude)?,
152 )
153 } else {
154 Ok(Self::text(
155 message.query,
156 message.whole_word,
157 message.case_sensitive,
158 deserialize_path_matches(&message.files_to_include)?,
159 deserialize_path_matches(&message.files_to_exclude)?,
160 ))
161 }
162 }
163 pub fn with_replacement(mut self, new_replacement: Option<String>) -> Self {
164 match self {
165 Self::Text {
166 ref mut replacement,
167 ..
168 }
169 | Self::Regex {
170 ref mut replacement,
171 ..
172 } => {
173 *replacement = new_replacement;
174 self
175 }
176 }
177 }
178 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
179 proto::SearchProject {
180 project_id,
181 query: self.as_str().to_string(),
182 regex: self.is_regex(),
183 whole_word: self.whole_word(),
184 case_sensitive: self.case_sensitive(),
185 files_to_include: self
186 .files_to_include()
187 .iter()
188 .map(|matcher| matcher.to_string())
189 .join(","),
190 files_to_exclude: self
191 .files_to_exclude()
192 .iter()
193 .map(|matcher| matcher.to_string())
194 .join(","),
195 }
196 }
197
198 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
199 if self.as_str().is_empty() {
200 return Ok(false);
201 }
202
203 match self {
204 Self::Text { search, .. } => {
205 let mat = search.stream_find_iter(stream).next();
206 match mat {
207 Some(Ok(_)) => Ok(true),
208 Some(Err(err)) => Err(err.into()),
209 None => Ok(false),
210 }
211 }
212 Self::Regex {
213 regex, multiline, ..
214 } => {
215 let mut reader = BufReader::new(stream);
216 if *multiline {
217 let mut text = String::new();
218 if let Err(err) = reader.read_to_string(&mut text) {
219 Err(err.into())
220 } else {
221 Ok(regex.find(&text).is_some())
222 }
223 } else {
224 for line in reader.lines() {
225 let line = line?;
226 if regex.find(&line).is_some() {
227 return Ok(true);
228 }
229 }
230 Ok(false)
231 }
232 }
233 }
234 }
235 pub fn replacement<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
236 match self {
237 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
238 SearchQuery::Regex {
239 regex, replacement, ..
240 } => {
241 if let Some(replacement) = replacement {
242 Some(regex.replace(text, replacement))
243 } else {
244 None
245 }
246 }
247 }
248 }
249 pub async fn search(
250 &self,
251 buffer: &BufferSnapshot,
252 subrange: Option<Range<usize>>,
253 ) -> Vec<Range<usize>> {
254 const YIELD_INTERVAL: usize = 20000;
255
256 if self.as_str().is_empty() {
257 return Default::default();
258 }
259
260 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
261 let rope = if let Some(range) = subrange {
262 buffer.as_rope().slice(range)
263 } else {
264 buffer.as_rope().clone()
265 };
266
267 let mut matches = Vec::new();
268 match self {
269 Self::Text {
270 search, whole_word, ..
271 } => {
272 for (ix, mat) in search
273 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
274 .enumerate()
275 {
276 if (ix + 1) % YIELD_INTERVAL == 0 {
277 yield_now().await;
278 }
279
280 let mat = mat.unwrap();
281 if *whole_word {
282 let scope = buffer.language_scope_at(range_offset + mat.start());
283 let kind = |c| char_kind(&scope, c);
284
285 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
286 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
287 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
288 let next_kind = rope.chars_at(mat.end()).next().map(kind);
289 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
290 continue;
291 }
292 }
293 matches.push(mat.start()..mat.end())
294 }
295 }
296
297 Self::Regex {
298 regex, multiline, ..
299 } => {
300 if *multiline {
301 let text = rope.to_string();
302 for (ix, mat) in regex.find_iter(&text).enumerate() {
303 if (ix + 1) % YIELD_INTERVAL == 0 {
304 yield_now().await;
305 }
306
307 matches.push(mat.start()..mat.end());
308 }
309 } else {
310 let mut line = String::new();
311 let mut line_offset = 0;
312 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
313 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
314 yield_now().await;
315 }
316
317 for (newline_ix, text) in chunk.split('\n').enumerate() {
318 if newline_ix > 0 {
319 for mat in regex.find_iter(&line) {
320 let start = line_offset + mat.start();
321 let end = line_offset + mat.end();
322 matches.push(start..end);
323 }
324
325 line_offset += line.len() + 1;
326 line.clear();
327 }
328 line.push_str(text);
329 }
330 }
331 }
332 }
333 }
334
335 matches
336 }
337
338 pub fn as_str(&self) -> &str {
339 self.as_inner().as_str()
340 }
341
342 pub fn whole_word(&self) -> bool {
343 match self {
344 Self::Text { whole_word, .. } => *whole_word,
345 Self::Regex { whole_word, .. } => *whole_word,
346 }
347 }
348
349 pub fn case_sensitive(&self) -> bool {
350 match self {
351 Self::Text { case_sensitive, .. } => *case_sensitive,
352 Self::Regex { case_sensitive, .. } => *case_sensitive,
353 }
354 }
355
356 pub fn is_regex(&self) -> bool {
357 matches!(self, Self::Regex { .. })
358 }
359
360 pub fn files_to_include(&self) -> &[PathMatcher] {
361 self.as_inner().files_to_include()
362 }
363
364 pub fn files_to_exclude(&self) -> &[PathMatcher] {
365 self.as_inner().files_to_exclude()
366 }
367
368 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
369 match file_path {
370 Some(file_path) => {
371 !self
372 .files_to_exclude()
373 .iter()
374 .any(|exclude_glob| exclude_glob.is_match(file_path))
375 && (self.files_to_include().is_empty()
376 || self
377 .files_to_include()
378 .iter()
379 .any(|include_glob| include_glob.is_match(file_path)))
380 }
381 None => self.files_to_include().is_empty(),
382 }
383 }
384 pub fn as_inner(&self) -> &SearchInputs {
385 match self {
386 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
387 }
388 }
389}
390
391fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
392 glob_set
393 .split(',')
394 .map(str::trim)
395 .filter(|glob_str| !glob_str.is_empty())
396 .map(|glob_str| {
397 PathMatcher::new(glob_str)
398 .with_context(|| format!("deserializing path match glob {glob_str}"))
399 })
400 .collect()
401}
402
403#[cfg(test)]
404mod tests {
405 use super::*;
406
407 #[test]
408 fn path_matcher_creation_for_valid_paths() {
409 for valid_path in [
410 "file",
411 "Cargo.toml",
412 ".DS_Store",
413 "~/dir/another_dir/",
414 "./dir/file",
415 "dir/[a-z].txt",
416 "../dir/filé",
417 ] {
418 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
419 panic!("Valid path {valid_path} should be accepted, but got: {e}")
420 });
421 assert!(
422 path_matcher.is_match(valid_path),
423 "Path matcher for valid path {valid_path} should match itself"
424 )
425 }
426 }
427
428 #[test]
429 fn path_matcher_creation_for_globs() {
430 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
431 match PathMatcher::new(invalid_glob) {
432 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
433 Err(_expected) => {}
434 }
435 }
436
437 for valid_glob in [
438 "dir/?ile",
439 "dir/*.txt",
440 "dir/**/file",
441 "dir/[a-z].txt",
442 "{dir,file}",
443 ] {
444 match PathMatcher::new(valid_glob) {
445 Ok(_expected) => {}
446 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
447 }
448 }
449 }
450}