1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 path::{Path, PathBuf},
14 sync::Arc,
15};
16
17#[derive(Clone, Debug)]
18pub struct SearchInputs {
19 query: Arc<str>,
20 files_to_include: Vec<PathMatcher>,
21 files_to_exclude: Vec<PathMatcher>,
22}
23
24impl SearchInputs {
25 pub fn as_str(&self) -> &str {
26 self.query.as_ref()
27 }
28 pub fn files_to_include(&self) -> &[PathMatcher] {
29 &self.files_to_include
30 }
31 pub fn files_to_exclude(&self) -> &[PathMatcher] {
32 &self.files_to_exclude
33 }
34}
35#[derive(Clone, Debug)]
36pub enum SearchQuery {
37 Text {
38 search: Arc<AhoCorasick>,
39 replacement: Option<String>,
40 whole_word: bool,
41 case_sensitive: bool,
42 inner: SearchInputs,
43 },
44
45 Regex {
46 regex: Regex,
47 replacement: Option<String>,
48 multiline: bool,
49 whole_word: bool,
50 case_sensitive: bool,
51 inner: SearchInputs,
52 },
53}
54
55#[derive(Clone, Debug)]
56pub struct PathMatcher {
57 maybe_path: PathBuf,
58 glob: GlobMatcher,
59}
60
61impl std::fmt::Display for PathMatcher {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 self.maybe_path.to_string_lossy().fmt(f)
64 }
65}
66
67impl PathMatcher {
68 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
69 Ok(PathMatcher {
70 glob: Glob::new(&maybe_glob)?.compile_matcher(),
71 maybe_path: PathBuf::from(maybe_glob),
72 })
73 }
74
75 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
76 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
77 }
78}
79
80impl SearchQuery {
81 pub fn text(
82 query: impl ToString,
83 whole_word: bool,
84 case_sensitive: bool,
85 files_to_include: Vec<PathMatcher>,
86 files_to_exclude: Vec<PathMatcher>,
87 ) -> Result<Self> {
88 let query = query.to_string();
89 let search = AhoCorasickBuilder::new()
90 .ascii_case_insensitive(!case_sensitive)
91 .build(&[&query])?;
92 let inner = SearchInputs {
93 query: query.into(),
94 files_to_exclude,
95 files_to_include,
96 };
97 Ok(Self::Text {
98 search: Arc::new(search),
99 replacement: None,
100 whole_word,
101 case_sensitive,
102 inner,
103 })
104 }
105
106 pub fn regex(
107 query: impl ToString,
108 whole_word: bool,
109 case_sensitive: bool,
110 files_to_include: Vec<PathMatcher>,
111 files_to_exclude: Vec<PathMatcher>,
112 ) -> Result<Self> {
113 let mut query = query.to_string();
114 let initial_query = Arc::from(query.as_str());
115 if whole_word {
116 let mut word_query = String::new();
117 word_query.push_str("\\b");
118 word_query.push_str(&query);
119 word_query.push_str("\\b");
120 query = word_query
121 }
122
123 let multiline = query.contains('\n') || query.contains("\\n");
124 let regex = RegexBuilder::new(&query)
125 .case_insensitive(!case_sensitive)
126 .multi_line(multiline)
127 .build()?;
128 let inner = SearchInputs {
129 query: initial_query,
130 files_to_exclude,
131 files_to_include,
132 };
133 Ok(Self::Regex {
134 regex,
135 replacement: None,
136 multiline,
137 whole_word,
138 case_sensitive,
139 inner,
140 })
141 }
142
143 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
144 if message.regex {
145 Self::regex(
146 message.query,
147 message.whole_word,
148 message.case_sensitive,
149 deserialize_path_matches(&message.files_to_include)?,
150 deserialize_path_matches(&message.files_to_exclude)?,
151 )
152 } else {
153 Self::text(
154 message.query,
155 message.whole_word,
156 message.case_sensitive,
157 deserialize_path_matches(&message.files_to_include)?,
158 deserialize_path_matches(&message.files_to_exclude)?,
159 )
160 }
161 }
162 pub fn with_replacement(mut self, new_replacement: Option<String>) -> Self {
163 match self {
164 Self::Text {
165 ref mut replacement,
166 ..
167 }
168 | Self::Regex {
169 ref mut replacement,
170 ..
171 } => {
172 *replacement = new_replacement;
173 self
174 }
175 }
176 }
177 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
178 proto::SearchProject {
179 project_id,
180 query: self.as_str().to_string(),
181 regex: self.is_regex(),
182 whole_word: self.whole_word(),
183 case_sensitive: self.case_sensitive(),
184 files_to_include: self
185 .files_to_include()
186 .iter()
187 .map(|matcher| matcher.to_string())
188 .join(","),
189 files_to_exclude: self
190 .files_to_exclude()
191 .iter()
192 .map(|matcher| matcher.to_string())
193 .join(","),
194 }
195 }
196
197 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
198 if self.as_str().is_empty() {
199 return Ok(false);
200 }
201
202 match self {
203 Self::Text { search, .. } => {
204 let mat = search.stream_find_iter(stream).next();
205 match mat {
206 Some(Ok(_)) => Ok(true),
207 Some(Err(err)) => Err(err.into()),
208 None => Ok(false),
209 }
210 }
211 Self::Regex {
212 regex, multiline, ..
213 } => {
214 let mut reader = BufReader::new(stream);
215 if *multiline {
216 let mut text = String::new();
217 if let Err(err) = reader.read_to_string(&mut text) {
218 Err(err.into())
219 } else {
220 Ok(regex.find(&text).is_some())
221 }
222 } else {
223 for line in reader.lines() {
224 let line = line?;
225 if regex.find(&line).is_some() {
226 return Ok(true);
227 }
228 }
229 Ok(false)
230 }
231 }
232 }
233 }
234 pub fn replacement<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
235 match self {
236 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
237 SearchQuery::Regex {
238 regex, replacement, ..
239 } => {
240 if let Some(replacement) = replacement {
241 Some(regex.replace(text, replacement))
242 } else {
243 None
244 }
245 }
246 }
247 }
248 pub async fn search(
249 &self,
250 buffer: &BufferSnapshot,
251 subrange: Option<Range<usize>>,
252 ) -> Vec<Range<usize>> {
253 const YIELD_INTERVAL: usize = 20000;
254
255 if self.as_str().is_empty() {
256 return Default::default();
257 }
258
259 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
260 let rope = if let Some(range) = subrange {
261 buffer.as_rope().slice(range)
262 } else {
263 buffer.as_rope().clone()
264 };
265
266 let mut matches = Vec::new();
267 match self {
268 Self::Text {
269 search, whole_word, ..
270 } => {
271 for (ix, mat) in search
272 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
273 .enumerate()
274 {
275 if (ix + 1) % YIELD_INTERVAL == 0 {
276 yield_now().await;
277 }
278
279 let mat = mat.unwrap();
280 if *whole_word {
281 let scope = buffer.language_scope_at(range_offset + mat.start());
282 let kind = |c| char_kind(&scope, c);
283
284 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
285 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
286 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
287 let next_kind = rope.chars_at(mat.end()).next().map(kind);
288 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
289 continue;
290 }
291 }
292 matches.push(mat.start()..mat.end())
293 }
294 }
295
296 Self::Regex {
297 regex, multiline, ..
298 } => {
299 if *multiline {
300 let text = rope.to_string();
301 for (ix, mat) in regex.find_iter(&text).enumerate() {
302 if (ix + 1) % YIELD_INTERVAL == 0 {
303 yield_now().await;
304 }
305
306 matches.push(mat.start()..mat.end());
307 }
308 } else {
309 let mut line = String::new();
310 let mut line_offset = 0;
311 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
312 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
313 yield_now().await;
314 }
315
316 for (newline_ix, text) in chunk.split('\n').enumerate() {
317 if newline_ix > 0 {
318 for mat in regex.find_iter(&line) {
319 let start = line_offset + mat.start();
320 let end = line_offset + mat.end();
321 matches.push(start..end);
322 }
323
324 line_offset += line.len() + 1;
325 line.clear();
326 }
327 line.push_str(text);
328 }
329 }
330 }
331 }
332 }
333
334 matches
335 }
336
337 pub fn as_str(&self) -> &str {
338 self.as_inner().as_str()
339 }
340
341 pub fn whole_word(&self) -> bool {
342 match self {
343 Self::Text { whole_word, .. } => *whole_word,
344 Self::Regex { whole_word, .. } => *whole_word,
345 }
346 }
347
348 pub fn case_sensitive(&self) -> bool {
349 match self {
350 Self::Text { case_sensitive, .. } => *case_sensitive,
351 Self::Regex { case_sensitive, .. } => *case_sensitive,
352 }
353 }
354
355 pub fn is_regex(&self) -> bool {
356 matches!(self, Self::Regex { .. })
357 }
358
359 pub fn files_to_include(&self) -> &[PathMatcher] {
360 self.as_inner().files_to_include()
361 }
362
363 pub fn files_to_exclude(&self) -> &[PathMatcher] {
364 self.as_inner().files_to_exclude()
365 }
366
367 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
368 match file_path {
369 Some(file_path) => {
370 !self
371 .files_to_exclude()
372 .iter()
373 .any(|exclude_glob| exclude_glob.is_match(file_path))
374 && (self.files_to_include().is_empty()
375 || self
376 .files_to_include()
377 .iter()
378 .any(|include_glob| include_glob.is_match(file_path)))
379 }
380 None => self.files_to_include().is_empty(),
381 }
382 }
383 pub fn as_inner(&self) -> &SearchInputs {
384 match self {
385 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
386 }
387 }
388}
389
390fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
391 glob_set
392 .split(',')
393 .map(str::trim)
394 .filter(|glob_str| !glob_str.is_empty())
395 .map(|glob_str| {
396 PathMatcher::new(glob_str)
397 .with_context(|| format!("deserializing path match glob {glob_str}"))
398 })
399 .collect()
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405
406 #[test]
407 fn path_matcher_creation_for_valid_paths() {
408 for valid_path in [
409 "file",
410 "Cargo.toml",
411 ".DS_Store",
412 "~/dir/another_dir/",
413 "./dir/file",
414 "dir/[a-z].txt",
415 "../dir/filé",
416 ] {
417 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
418 panic!("Valid path {valid_path} should be accepted, but got: {e}")
419 });
420 assert!(
421 path_matcher.is_match(valid_path),
422 "Path matcher for valid path {valid_path} should match itself"
423 )
424 }
425 }
426
427 #[test]
428 fn path_matcher_creation_for_globs() {
429 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
430 match PathMatcher::new(invalid_glob) {
431 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
432 Err(_expected) => {}
433 }
434 }
435
436 for valid_glob in [
437 "dir/?ile",
438 "dir/*.txt",
439 "dir/**/file",
440 "dir/[a-z].txt",
441 "{dir,file}",
442 ] {
443 match PathMatcher::new(valid_glob) {
444 Ok(_expected) => {}
445 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
446 }
447 }
448 }
449}