1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use itertools::Itertools;
5use language::{char_kind, BufferSnapshot};
6use regex::{Captures, Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use util::paths::PathMatcher;
16
17static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
18
19#[derive(Clone, Debug)]
20pub struct SearchInputs {
21 query: Arc<str>,
22 files_to_include: Vec<PathMatcher>,
23 files_to_exclude: Vec<PathMatcher>,
24}
25
26impl SearchInputs {
27 pub fn as_str(&self) -> &str {
28 self.query.as_ref()
29 }
30 pub fn files_to_include(&self) -> &[PathMatcher] {
31 &self.files_to_include
32 }
33 pub fn files_to_exclude(&self) -> &[PathMatcher] {
34 &self.files_to_exclude
35 }
36}
37#[derive(Clone, Debug)]
38pub enum SearchQuery {
39 Text {
40 search: Arc<AhoCorasick>,
41 replacement: Option<String>,
42 whole_word: bool,
43 case_sensitive: bool,
44 include_ignored: bool,
45 inner: SearchInputs,
46 },
47
48 Regex {
49 regex: Regex,
50 replacement: Option<String>,
51 multiline: bool,
52 whole_word: bool,
53 case_sensitive: bool,
54 include_ignored: bool,
55 inner: SearchInputs,
56 },
57}
58
59impl SearchQuery {
60 pub fn text(
61 query: impl ToString,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 files_to_include: Vec<PathMatcher>,
66 files_to_exclude: Vec<PathMatcher>,
67 ) -> Result<Self> {
68 let query = query.to_string();
69 let search = AhoCorasickBuilder::new()
70 .ascii_case_insensitive(!case_sensitive)
71 .build(&[&query])?;
72 let inner = SearchInputs {
73 query: query.into(),
74 files_to_exclude,
75 files_to_include,
76 };
77 Ok(Self::Text {
78 search: Arc::new(search),
79 replacement: None,
80 whole_word,
81 case_sensitive,
82 include_ignored,
83 inner,
84 })
85 }
86
87 pub fn regex(
88 query: impl ToString,
89 whole_word: bool,
90 case_sensitive: bool,
91 include_ignored: bool,
92 files_to_include: Vec<PathMatcher>,
93 files_to_exclude: Vec<PathMatcher>,
94 ) -> Result<Self> {
95 let mut query = query.to_string();
96 let initial_query = Arc::from(query.as_str());
97 if whole_word {
98 let mut word_query = String::new();
99 word_query.push_str("\\b");
100 word_query.push_str(&query);
101 word_query.push_str("\\b");
102 query = word_query
103 }
104
105 let multiline = query.contains('\n') || query.contains("\\n");
106 let regex = RegexBuilder::new(&query)
107 .case_insensitive(!case_sensitive)
108 .multi_line(multiline)
109 .build()?;
110 let inner = SearchInputs {
111 query: initial_query,
112 files_to_exclude,
113 files_to_include,
114 };
115 Ok(Self::Regex {
116 regex,
117 replacement: None,
118 multiline,
119 whole_word,
120 case_sensitive,
121 include_ignored,
122 inner,
123 })
124 }
125
126 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
127 if message.regex {
128 Self::regex(
129 message.query,
130 message.whole_word,
131 message.case_sensitive,
132 message.include_ignored,
133 deserialize_path_matches(&message.files_to_include)?,
134 deserialize_path_matches(&message.files_to_exclude)?,
135 )
136 } else {
137 Self::text(
138 message.query,
139 message.whole_word,
140 message.case_sensitive,
141 message.include_ignored,
142 deserialize_path_matches(&message.files_to_include)?,
143 deserialize_path_matches(&message.files_to_exclude)?,
144 )
145 }
146 }
147 pub fn with_replacement(mut self, new_replacement: String) -> Self {
148 match self {
149 Self::Text {
150 ref mut replacement,
151 ..
152 }
153 | Self::Regex {
154 ref mut replacement,
155 ..
156 } => {
157 *replacement = Some(new_replacement);
158 self
159 }
160 }
161 }
162 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
163 proto::SearchProject {
164 project_id,
165 query: self.as_str().to_string(),
166 regex: self.is_regex(),
167 whole_word: self.whole_word(),
168 case_sensitive: self.case_sensitive(),
169 include_ignored: self.include_ignored(),
170 files_to_include: self
171 .files_to_include()
172 .iter()
173 .map(|matcher| matcher.to_string())
174 .join(","),
175 files_to_exclude: self
176 .files_to_exclude()
177 .iter()
178 .map(|matcher| matcher.to_string())
179 .join(","),
180 }
181 }
182
183 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
184 if self.as_str().is_empty() {
185 return Ok(false);
186 }
187
188 match self {
189 Self::Text { search, .. } => {
190 let mat = search.stream_find_iter(stream).next();
191 match mat {
192 Some(Ok(_)) => Ok(true),
193 Some(Err(err)) => Err(err.into()),
194 None => Ok(false),
195 }
196 }
197 Self::Regex {
198 regex, multiline, ..
199 } => {
200 let mut reader = BufReader::new(stream);
201 if *multiline {
202 let mut text = String::new();
203 if let Err(err) = reader.read_to_string(&mut text) {
204 Err(err.into())
205 } else {
206 Ok(regex.find(&text).is_some())
207 }
208 } else {
209 for line in reader.lines() {
210 let line = line?;
211 if regex.find(&line).is_some() {
212 return Ok(true);
213 }
214 }
215 Ok(false)
216 }
217 }
218 }
219 }
220 /// Returns the replacement text for this `SearchQuery`.
221 pub fn replacement(&self) -> Option<&str> {
222 match self {
223 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
224 replacement.as_deref()
225 }
226 }
227 }
228 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
229 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
230 match self {
231 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
232 SearchQuery::Regex {
233 regex, replacement, ..
234 } => {
235 if let Some(replacement) = replacement {
236 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
237 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
238 .replace_all(replacement, |c: &Captures| {
239 match c.get(0).unwrap().as_str() {
240 r"\\" => "\\",
241 r"\n" => "\n",
242 r"\t" => "\t",
243 x => unreachable!("Unexpected escape sequence: {}", x),
244 }
245 });
246 Some(regex.replace(text, replacement))
247 } else {
248 None
249 }
250 }
251 }
252 }
253 pub async fn search(
254 &self,
255 buffer: &BufferSnapshot,
256 subrange: Option<Range<usize>>,
257 ) -> Vec<Range<usize>> {
258 const YIELD_INTERVAL: usize = 20000;
259
260 if self.as_str().is_empty() {
261 return Default::default();
262 }
263
264 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
265 let rope = if let Some(range) = subrange {
266 buffer.as_rope().slice(range)
267 } else {
268 buffer.as_rope().clone()
269 };
270
271 let mut matches = Vec::new();
272 match self {
273 Self::Text {
274 search, whole_word, ..
275 } => {
276 for (ix, mat) in search
277 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
278 .enumerate()
279 {
280 if (ix + 1) % YIELD_INTERVAL == 0 {
281 yield_now().await;
282 }
283
284 let mat = mat.unwrap();
285 if *whole_word {
286 let scope = buffer.language_scope_at(range_offset + mat.start());
287 let kind = |c| char_kind(&scope, c);
288
289 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
290 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
291 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
292 let next_kind = rope.chars_at(mat.end()).next().map(kind);
293 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
294 continue;
295 }
296 }
297 matches.push(mat.start()..mat.end())
298 }
299 }
300
301 Self::Regex {
302 regex, multiline, ..
303 } => {
304 if *multiline {
305 let text = rope.to_string();
306 for (ix, mat) in regex.find_iter(&text).enumerate() {
307 if (ix + 1) % YIELD_INTERVAL == 0 {
308 yield_now().await;
309 }
310
311 matches.push(mat.start()..mat.end());
312 }
313 } else {
314 let mut line = String::new();
315 let mut line_offset = 0;
316 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
317 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
318 yield_now().await;
319 }
320
321 for (newline_ix, text) in chunk.split('\n').enumerate() {
322 if newline_ix > 0 {
323 for mat in regex.find_iter(&line) {
324 let start = line_offset + mat.start();
325 let end = line_offset + mat.end();
326 matches.push(start..end);
327 }
328
329 line_offset += line.len() + 1;
330 line.clear();
331 }
332 line.push_str(text);
333 }
334 }
335 }
336 }
337 }
338
339 matches
340 }
341
342 pub fn is_empty(&self) -> bool {
343 self.as_str().is_empty()
344 }
345
346 pub fn as_str(&self) -> &str {
347 self.as_inner().as_str()
348 }
349
350 pub fn whole_word(&self) -> bool {
351 match self {
352 Self::Text { whole_word, .. } => *whole_word,
353 Self::Regex { whole_word, .. } => *whole_word,
354 }
355 }
356
357 pub fn case_sensitive(&self) -> bool {
358 match self {
359 Self::Text { case_sensitive, .. } => *case_sensitive,
360 Self::Regex { case_sensitive, .. } => *case_sensitive,
361 }
362 }
363
364 pub fn include_ignored(&self) -> bool {
365 match self {
366 Self::Text {
367 include_ignored, ..
368 } => *include_ignored,
369 Self::Regex {
370 include_ignored, ..
371 } => *include_ignored,
372 }
373 }
374
375 pub fn is_regex(&self) -> bool {
376 matches!(self, Self::Regex { .. })
377 }
378
379 pub fn files_to_include(&self) -> &[PathMatcher] {
380 self.as_inner().files_to_include()
381 }
382
383 pub fn files_to_exclude(&self) -> &[PathMatcher] {
384 self.as_inner().files_to_exclude()
385 }
386
387 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
388 match file_path {
389 Some(file_path) => {
390 let mut path = file_path.to_path_buf();
391 loop {
392 if self
393 .files_to_exclude()
394 .iter()
395 .any(|exclude_glob| exclude_glob.is_match(&path))
396 {
397 return false;
398 } else if self.files_to_include().is_empty()
399 || self
400 .files_to_include()
401 .iter()
402 .any(|include_glob| include_glob.is_match(&path))
403 {
404 return true;
405 } else if !path.pop() {
406 return false;
407 }
408 }
409 }
410 None => self.files_to_include().is_empty(),
411 }
412 }
413 pub fn as_inner(&self) -> &SearchInputs {
414 match self {
415 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
416 }
417 }
418}
419
420fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
421 glob_set
422 .split(',')
423 .map(str::trim)
424 .filter(|glob_str| !glob_str.is_empty())
425 .map(|glob_str| {
426 PathMatcher::new(glob_str)
427 .with_context(|| format!("deserializing path match glob {glob_str}"))
428 })
429 .collect()
430}
431
432#[cfg(test)]
433mod tests {
434 use super::*;
435
436 #[test]
437 fn path_matcher_creation_for_valid_paths() {
438 for valid_path in [
439 "file",
440 "Cargo.toml",
441 ".DS_Store",
442 "~/dir/another_dir/",
443 "./dir/file",
444 "dir/[a-z].txt",
445 "../dir/filé",
446 ] {
447 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
448 panic!("Valid path {valid_path} should be accepted, but got: {e}")
449 });
450 assert!(
451 path_matcher.is_match(valid_path),
452 "Path matcher for valid path {valid_path} should match itself"
453 )
454 }
455 }
456
457 #[test]
458 fn path_matcher_creation_for_globs() {
459 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
460 match PathMatcher::new(invalid_glob) {
461 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
462 Err(_expected) => {}
463 }
464 }
465
466 for valid_glob in [
467 "dir/?ile",
468 "dir/*.txt",
469 "dir/**/file",
470 "dir/[a-z].txt",
471 "{dir,file}",
472 ] {
473 match PathMatcher::new(valid_glob) {
474 Ok(_expected) => {}
475 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
476 }
477 }
478 }
479}