1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 path::{Path, PathBuf},
14 sync::Arc,
15};
16
17#[derive(Clone, Debug)]
18pub struct SearchInputs {
19 query: Arc<str>,
20 files_to_include: Vec<PathMatcher>,
21 files_to_exclude: Vec<PathMatcher>,
22}
23
24impl SearchInputs {
25 pub fn as_str(&self) -> &str {
26 self.query.as_ref()
27 }
28 pub fn files_to_include(&self) -> &[PathMatcher] {
29 &self.files_to_include
30 }
31 pub fn files_to_exclude(&self) -> &[PathMatcher] {
32 &self.files_to_exclude
33 }
34}
35#[derive(Clone, Debug)]
36pub enum SearchQuery {
37 Text {
38 search: Arc<AhoCorasick>,
39 replacement: Option<String>,
40 whole_word: bool,
41 case_sensitive: bool,
42 inner: SearchInputs,
43 },
44
45 Regex {
46 regex: Regex,
47 replacement: Option<String>,
48 multiline: bool,
49 whole_word: bool,
50 case_sensitive: bool,
51 inner: SearchInputs,
52 },
53}
54
55#[derive(Clone, Debug)]
56pub struct PathMatcher {
57 maybe_path: PathBuf,
58 glob: GlobMatcher,
59}
60
61impl std::fmt::Display for PathMatcher {
62 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63 self.maybe_path.to_string_lossy().fmt(f)
64 }
65}
66
67impl PathMatcher {
68 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
69 Ok(PathMatcher {
70 glob: Glob::new(&maybe_glob)?.compile_matcher(),
71 maybe_path: PathBuf::from(maybe_glob),
72 })
73 }
74
75 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
76 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
77 }
78}
79
80impl SearchQuery {
81 pub fn text(
82 query: impl ToString,
83 whole_word: bool,
84 case_sensitive: bool,
85 files_to_include: Vec<PathMatcher>,
86 files_to_exclude: Vec<PathMatcher>,
87 ) -> Result<Self> {
88 let query = query.to_string();
89 let search = AhoCorasickBuilder::new()
90 .ascii_case_insensitive(!case_sensitive)
91 .build(&[&query])?;
92 let inner = SearchInputs {
93 query: query.into(),
94 files_to_exclude,
95 files_to_include,
96 };
97 Ok(Self::Text {
98 search: Arc::new(search),
99 replacement: None,
100 whole_word,
101 case_sensitive,
102 inner,
103 })
104 }
105
106 pub fn regex(
107 query: impl ToString,
108 whole_word: bool,
109 case_sensitive: bool,
110 files_to_include: Vec<PathMatcher>,
111 files_to_exclude: Vec<PathMatcher>,
112 ) -> Result<Self> {
113 let mut query = query.to_string();
114 let initial_query = Arc::from(query.as_str());
115 if whole_word {
116 let mut word_query = String::new();
117 word_query.push_str("\\b");
118 word_query.push_str(&query);
119 word_query.push_str("\\b");
120 query = word_query
121 }
122
123 let multiline = query.contains('\n') || query.contains("\\n");
124 let regex = RegexBuilder::new(&query)
125 .case_insensitive(!case_sensitive)
126 .multi_line(multiline)
127 .build()?;
128 let inner = SearchInputs {
129 query: initial_query,
130 files_to_exclude,
131 files_to_include,
132 };
133 Ok(Self::Regex {
134 regex,
135 replacement: None,
136 multiline,
137 whole_word,
138 case_sensitive,
139 inner,
140 })
141 }
142
143 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
144 if message.regex {
145 Self::regex(
146 message.query,
147 message.whole_word,
148 message.case_sensitive,
149 deserialize_path_matches(&message.files_to_include)?,
150 deserialize_path_matches(&message.files_to_exclude)?,
151 )
152 } else {
153 Self::text(
154 message.query,
155 message.whole_word,
156 message.case_sensitive,
157 deserialize_path_matches(&message.files_to_include)?,
158 deserialize_path_matches(&message.files_to_exclude)?,
159 )
160 }
161 }
162 pub fn with_replacement(mut self, new_replacement: String) -> Self {
163 match self {
164 Self::Text {
165 ref mut replacement,
166 ..
167 }
168 | Self::Regex {
169 ref mut replacement,
170 ..
171 } => {
172 *replacement = Some(new_replacement);
173 self
174 }
175 }
176 }
177 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
178 proto::SearchProject {
179 project_id,
180 query: self.as_str().to_string(),
181 regex: self.is_regex(),
182 whole_word: self.whole_word(),
183 case_sensitive: self.case_sensitive(),
184 files_to_include: self
185 .files_to_include()
186 .iter()
187 .map(|matcher| matcher.to_string())
188 .join(","),
189 files_to_exclude: self
190 .files_to_exclude()
191 .iter()
192 .map(|matcher| matcher.to_string())
193 .join(","),
194 }
195 }
196
197 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
198 if self.as_str().is_empty() {
199 return Ok(false);
200 }
201
202 match self {
203 Self::Text { search, .. } => {
204 let mat = search.stream_find_iter(stream).next();
205 match mat {
206 Some(Ok(_)) => Ok(true),
207 Some(Err(err)) => Err(err.into()),
208 None => Ok(false),
209 }
210 }
211 Self::Regex {
212 regex, multiline, ..
213 } => {
214 let mut reader = BufReader::new(stream);
215 if *multiline {
216 let mut text = String::new();
217 if let Err(err) = reader.read_to_string(&mut text) {
218 Err(err.into())
219 } else {
220 Ok(regex.find(&text).is_some())
221 }
222 } else {
223 for line in reader.lines() {
224 let line = line?;
225 if regex.find(&line).is_some() {
226 return Ok(true);
227 }
228 }
229 Ok(false)
230 }
231 }
232 }
233 }
234 /// Returns the replacement text for this `SearchQuery`.
235 pub fn replacement(&self) -> Option<&str> {
236 match self {
237 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
238 replacement.as_deref()
239 }
240 }
241 }
242 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
243 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
244 match self {
245 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
246 SearchQuery::Regex {
247 regex, replacement, ..
248 } => {
249 if let Some(replacement) = replacement {
250 Some(regex.replace(text, replacement))
251 } else {
252 None
253 }
254 }
255 }
256 }
257 pub async fn search(
258 &self,
259 buffer: &BufferSnapshot,
260 subrange: Option<Range<usize>>,
261 ) -> Vec<Range<usize>> {
262 const YIELD_INTERVAL: usize = 20000;
263
264 if self.as_str().is_empty() {
265 return Default::default();
266 }
267
268 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
269 let rope = if let Some(range) = subrange {
270 buffer.as_rope().slice(range)
271 } else {
272 buffer.as_rope().clone()
273 };
274
275 let mut matches = Vec::new();
276 match self {
277 Self::Text {
278 search, whole_word, ..
279 } => {
280 for (ix, mat) in search
281 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
282 .enumerate()
283 {
284 if (ix + 1) % YIELD_INTERVAL == 0 {
285 yield_now().await;
286 }
287
288 let mat = mat.unwrap();
289 if *whole_word {
290 let scope = buffer.language_scope_at(range_offset + mat.start());
291 let kind = |c| char_kind(&scope, c);
292
293 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
294 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
295 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
296 let next_kind = rope.chars_at(mat.end()).next().map(kind);
297 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
298 continue;
299 }
300 }
301 matches.push(mat.start()..mat.end())
302 }
303 }
304
305 Self::Regex {
306 regex, multiline, ..
307 } => {
308 if *multiline {
309 let text = rope.to_string();
310 for (ix, mat) in regex.find_iter(&text).enumerate() {
311 if (ix + 1) % YIELD_INTERVAL == 0 {
312 yield_now().await;
313 }
314
315 matches.push(mat.start()..mat.end());
316 }
317 } else {
318 let mut line = String::new();
319 let mut line_offset = 0;
320 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
321 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
322 yield_now().await;
323 }
324
325 for (newline_ix, text) in chunk.split('\n').enumerate() {
326 if newline_ix > 0 {
327 for mat in regex.find_iter(&line) {
328 let start = line_offset + mat.start();
329 let end = line_offset + mat.end();
330 matches.push(start..end);
331 }
332
333 line_offset += line.len() + 1;
334 line.clear();
335 }
336 line.push_str(text);
337 }
338 }
339 }
340 }
341 }
342
343 matches
344 }
345
346 pub fn as_str(&self) -> &str {
347 self.as_inner().as_str()
348 }
349
350 pub fn whole_word(&self) -> bool {
351 match self {
352 Self::Text { whole_word, .. } => *whole_word,
353 Self::Regex { whole_word, .. } => *whole_word,
354 }
355 }
356
357 pub fn case_sensitive(&self) -> bool {
358 match self {
359 Self::Text { case_sensitive, .. } => *case_sensitive,
360 Self::Regex { case_sensitive, .. } => *case_sensitive,
361 }
362 }
363
364 pub fn is_regex(&self) -> bool {
365 matches!(self, Self::Regex { .. })
366 }
367
368 pub fn files_to_include(&self) -> &[PathMatcher] {
369 self.as_inner().files_to_include()
370 }
371
372 pub fn files_to_exclude(&self) -> &[PathMatcher] {
373 self.as_inner().files_to_exclude()
374 }
375
376 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
377 match file_path {
378 Some(file_path) => {
379 !self
380 .files_to_exclude()
381 .iter()
382 .any(|exclude_glob| exclude_glob.is_match(file_path))
383 && (self.files_to_include().is_empty()
384 || self
385 .files_to_include()
386 .iter()
387 .any(|include_glob| include_glob.is_match(file_path)))
388 }
389 None => self.files_to_include().is_empty(),
390 }
391 }
392 pub fn as_inner(&self) -> &SearchInputs {
393 match self {
394 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
395 }
396 }
397}
398
399fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
400 glob_set
401 .split(',')
402 .map(str::trim)
403 .filter(|glob_str| !glob_str.is_empty())
404 .map(|glob_str| {
405 PathMatcher::new(glob_str)
406 .with_context(|| format!("deserializing path match glob {glob_str}"))
407 })
408 .collect()
409}
410
411#[cfg(test)]
412mod tests {
413 use super::*;
414
415 #[test]
416 fn path_matcher_creation_for_valid_paths() {
417 for valid_path in [
418 "file",
419 "Cargo.toml",
420 ".DS_Store",
421 "~/dir/another_dir/",
422 "./dir/file",
423 "dir/[a-z].txt",
424 "../dir/filé",
425 ] {
426 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
427 panic!("Valid path {valid_path} should be accepted, but got: {e}")
428 });
429 assert!(
430 path_matcher.is_match(valid_path),
431 "Path matcher for valid path {valid_path} should match itself"
432 )
433 }
434 }
435
436 #[test]
437 fn path_matcher_creation_for_globs() {
438 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
439 match PathMatcher::new(invalid_glob) {
440 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
441 Err(_expected) => {}
442 }
443 }
444
445 for valid_glob in [
446 "dir/?ile",
447 "dir/*.txt",
448 "dir/**/file",
449 "dir/[a-z].txt",
450 "{dir,file}",
451 ] {
452 match PathMatcher::new(valid_glob) {
453 Ok(_expected) => {}
454 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
455 }
456 }
457 }
458}