1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use itertools::Itertools;
5use language::{char_kind, BufferSnapshot};
6use regex::{Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::Arc,
14};
15use util::paths::PathMatcher;
16
17#[derive(Clone, Debug)]
18pub struct SearchInputs {
19 query: Arc<str>,
20 files_to_include: Vec<PathMatcher>,
21 files_to_exclude: Vec<PathMatcher>,
22}
23
24impl SearchInputs {
25 pub fn as_str(&self) -> &str {
26 self.query.as_ref()
27 }
28 pub fn files_to_include(&self) -> &[PathMatcher] {
29 &self.files_to_include
30 }
31 pub fn files_to_exclude(&self) -> &[PathMatcher] {
32 &self.files_to_exclude
33 }
34}
35#[derive(Clone, Debug)]
36pub enum SearchQuery {
37 Text {
38 search: Arc<AhoCorasick>,
39 replacement: Option<String>,
40 whole_word: bool,
41 case_sensitive: bool,
42 include_ignored: bool,
43 inner: SearchInputs,
44 },
45
46 Regex {
47 regex: Regex,
48 replacement: Option<String>,
49 multiline: bool,
50 whole_word: bool,
51 case_sensitive: bool,
52 include_ignored: bool,
53 inner: SearchInputs,
54 },
55}
56
57impl SearchQuery {
58 pub fn text(
59 query: impl ToString,
60 whole_word: bool,
61 case_sensitive: bool,
62 include_ignored: bool,
63 files_to_include: Vec<PathMatcher>,
64 files_to_exclude: Vec<PathMatcher>,
65 ) -> Result<Self> {
66 let query = query.to_string();
67 let search = AhoCorasickBuilder::new()
68 .ascii_case_insensitive(!case_sensitive)
69 .build(&[&query])?;
70 let inner = SearchInputs {
71 query: query.into(),
72 files_to_exclude,
73 files_to_include,
74 };
75 Ok(Self::Text {
76 search: Arc::new(search),
77 replacement: None,
78 whole_word,
79 case_sensitive,
80 include_ignored,
81 inner,
82 })
83 }
84
85 pub fn regex(
86 query: impl ToString,
87 whole_word: bool,
88 case_sensitive: bool,
89 include_ignored: bool,
90 files_to_include: Vec<PathMatcher>,
91 files_to_exclude: Vec<PathMatcher>,
92 ) -> Result<Self> {
93 let mut query = query.to_string();
94 let initial_query = Arc::from(query.as_str());
95 if whole_word {
96 let mut word_query = String::new();
97 word_query.push_str("\\b");
98 word_query.push_str(&query);
99 word_query.push_str("\\b");
100 query = word_query
101 }
102
103 let multiline = query.contains('\n') || query.contains("\\n");
104 let regex = RegexBuilder::new(&query)
105 .case_insensitive(!case_sensitive)
106 .multi_line(multiline)
107 .build()?;
108 let inner = SearchInputs {
109 query: initial_query,
110 files_to_exclude,
111 files_to_include,
112 };
113 Ok(Self::Regex {
114 regex,
115 replacement: None,
116 multiline,
117 whole_word,
118 case_sensitive,
119 include_ignored,
120 inner,
121 })
122 }
123
124 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
125 if message.regex {
126 Self::regex(
127 message.query,
128 message.whole_word,
129 message.case_sensitive,
130 message.include_ignored,
131 deserialize_path_matches(&message.files_to_include)?,
132 deserialize_path_matches(&message.files_to_exclude)?,
133 )
134 } else {
135 Self::text(
136 message.query,
137 message.whole_word,
138 message.case_sensitive,
139 message.include_ignored,
140 deserialize_path_matches(&message.files_to_include)?,
141 deserialize_path_matches(&message.files_to_exclude)?,
142 )
143 }
144 }
145 pub fn with_replacement(mut self, new_replacement: String) -> Self {
146 match self {
147 Self::Text {
148 ref mut replacement,
149 ..
150 }
151 | Self::Regex {
152 ref mut replacement,
153 ..
154 } => {
155 *replacement = Some(new_replacement);
156 self
157 }
158 }
159 }
160 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
161 proto::SearchProject {
162 project_id,
163 query: self.as_str().to_string(),
164 regex: self.is_regex(),
165 whole_word: self.whole_word(),
166 case_sensitive: self.case_sensitive(),
167 include_ignored: self.include_ignored(),
168 files_to_include: self
169 .files_to_include()
170 .iter()
171 .map(|matcher| matcher.to_string())
172 .join(","),
173 files_to_exclude: self
174 .files_to_exclude()
175 .iter()
176 .map(|matcher| matcher.to_string())
177 .join(","),
178 }
179 }
180
181 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
182 if self.as_str().is_empty() {
183 return Ok(false);
184 }
185
186 match self {
187 Self::Text { search, .. } => {
188 let mat = search.stream_find_iter(stream).next();
189 match mat {
190 Some(Ok(_)) => Ok(true),
191 Some(Err(err)) => Err(err.into()),
192 None => Ok(false),
193 }
194 }
195 Self::Regex {
196 regex, multiline, ..
197 } => {
198 let mut reader = BufReader::new(stream);
199 if *multiline {
200 let mut text = String::new();
201 if let Err(err) = reader.read_to_string(&mut text) {
202 Err(err.into())
203 } else {
204 Ok(regex.find(&text).is_some())
205 }
206 } else {
207 for line in reader.lines() {
208 let line = line?;
209 if regex.find(&line).is_some() {
210 return Ok(true);
211 }
212 }
213 Ok(false)
214 }
215 }
216 }
217 }
218 /// Returns the replacement text for this `SearchQuery`.
219 pub fn replacement(&self) -> Option<&str> {
220 match self {
221 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
222 replacement.as_deref()
223 }
224 }
225 }
226 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
227 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
228 match self {
229 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
230 SearchQuery::Regex {
231 regex, replacement, ..
232 } => {
233 if let Some(replacement) = replacement {
234 Some(regex.replace(text, replacement))
235 } else {
236 None
237 }
238 }
239 }
240 }
241 pub async fn search(
242 &self,
243 buffer: &BufferSnapshot,
244 subrange: Option<Range<usize>>,
245 ) -> Vec<Range<usize>> {
246 const YIELD_INTERVAL: usize = 20000;
247
248 if self.as_str().is_empty() {
249 return Default::default();
250 }
251
252 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
253 let rope = if let Some(range) = subrange {
254 buffer.as_rope().slice(range)
255 } else {
256 buffer.as_rope().clone()
257 };
258
259 let mut matches = Vec::new();
260 match self {
261 Self::Text {
262 search, whole_word, ..
263 } => {
264 for (ix, mat) in search
265 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
266 .enumerate()
267 {
268 if (ix + 1) % YIELD_INTERVAL == 0 {
269 yield_now().await;
270 }
271
272 let mat = mat.unwrap();
273 if *whole_word {
274 let scope = buffer.language_scope_at(range_offset + mat.start());
275 let kind = |c| char_kind(&scope, c);
276
277 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
278 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
279 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
280 let next_kind = rope.chars_at(mat.end()).next().map(kind);
281 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
282 continue;
283 }
284 }
285 matches.push(mat.start()..mat.end())
286 }
287 }
288
289 Self::Regex {
290 regex, multiline, ..
291 } => {
292 if *multiline {
293 let text = rope.to_string();
294 for (ix, mat) in regex.find_iter(&text).enumerate() {
295 if (ix + 1) % YIELD_INTERVAL == 0 {
296 yield_now().await;
297 }
298
299 matches.push(mat.start()..mat.end());
300 }
301 } else {
302 let mut line = String::new();
303 let mut line_offset = 0;
304 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
305 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
306 yield_now().await;
307 }
308
309 for (newline_ix, text) in chunk.split('\n').enumerate() {
310 if newline_ix > 0 {
311 for mat in regex.find_iter(&line) {
312 let start = line_offset + mat.start();
313 let end = line_offset + mat.end();
314 matches.push(start..end);
315 }
316
317 line_offset += line.len() + 1;
318 line.clear();
319 }
320 line.push_str(text);
321 }
322 }
323 }
324 }
325 }
326
327 matches
328 }
329
330 pub fn is_empty(&self) -> bool {
331 self.as_str().is_empty()
332 }
333
334 pub fn as_str(&self) -> &str {
335 self.as_inner().as_str()
336 }
337
338 pub fn whole_word(&self) -> bool {
339 match self {
340 Self::Text { whole_word, .. } => *whole_word,
341 Self::Regex { whole_word, .. } => *whole_word,
342 }
343 }
344
345 pub fn case_sensitive(&self) -> bool {
346 match self {
347 Self::Text { case_sensitive, .. } => *case_sensitive,
348 Self::Regex { case_sensitive, .. } => *case_sensitive,
349 }
350 }
351
352 pub fn include_ignored(&self) -> bool {
353 match self {
354 Self::Text {
355 include_ignored, ..
356 } => *include_ignored,
357 Self::Regex {
358 include_ignored, ..
359 } => *include_ignored,
360 }
361 }
362
363 pub fn is_regex(&self) -> bool {
364 matches!(self, Self::Regex { .. })
365 }
366
367 pub fn files_to_include(&self) -> &[PathMatcher] {
368 self.as_inner().files_to_include()
369 }
370
371 pub fn files_to_exclude(&self) -> &[PathMatcher] {
372 self.as_inner().files_to_exclude()
373 }
374
375 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
376 match file_path {
377 Some(file_path) => {
378 let mut path = file_path.to_path_buf();
379 loop {
380 if self
381 .files_to_exclude()
382 .iter()
383 .any(|exclude_glob| exclude_glob.is_match(&path))
384 {
385 return false;
386 } else if self.files_to_include().is_empty()
387 || self
388 .files_to_include()
389 .iter()
390 .any(|include_glob| include_glob.is_match(&path))
391 {
392 return true;
393 } else if !path.pop() {
394 return false;
395 }
396 }
397 }
398 None => self.files_to_include().is_empty(),
399 }
400 }
401 pub fn as_inner(&self) -> &SearchInputs {
402 match self {
403 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
404 }
405 }
406}
407
408fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
409 glob_set
410 .split(',')
411 .map(str::trim)
412 .filter(|glob_str| !glob_str.is_empty())
413 .map(|glob_str| {
414 PathMatcher::new(glob_str)
415 .with_context(|| format!("deserializing path match glob {glob_str}"))
416 })
417 .collect()
418}
419
420#[cfg(test)]
421mod tests {
422 use super::*;
423
424 #[test]
425 fn path_matcher_creation_for_valid_paths() {
426 for valid_path in [
427 "file",
428 "Cargo.toml",
429 ".DS_Store",
430 "~/dir/another_dir/",
431 "./dir/file",
432 "dir/[a-z].txt",
433 "../dir/filé",
434 ] {
435 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
436 panic!("Valid path {valid_path} should be accepted, but got: {e}")
437 });
438 assert!(
439 path_matcher.is_match(valid_path),
440 "Path matcher for valid path {valid_path} should match itself"
441 )
442 }
443 }
444
445 #[test]
446 fn path_matcher_creation_for_globs() {
447 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
448 match PathMatcher::new(invalid_glob) {
449 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
450 Err(_expected) => {}
451 }
452 }
453
454 for valid_glob in [
455 "dir/?ile",
456 "dir/*.txt",
457 "dir/**/file",
458 "dir/[a-z].txt",
459 "{dir,file}",
460 ] {
461 match PathMatcher::new(valid_glob) {
462 Ok(_expected) => {}
463 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
464 }
465 }
466 }
467}