1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 match_full_paths: bool,
40 buffers: Option<Vec<Entity<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: AhoCorasick,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 one_match_per_line: bool,
76 inner: SearchInputs,
77 },
78}
79
80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
81 RegexBuilder::new(r"\B")
82 .build()
83 .expect("Failed to create WORD_MATCH_TEST")
84});
85
86impl SearchQuery {
87 /// Create a text query
88 ///
89 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
90 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
91 pub fn text(
92 query: impl ToString,
93 whole_word: bool,
94 case_sensitive: bool,
95 include_ignored: bool,
96 files_to_include: PathMatcher,
97 files_to_exclude: PathMatcher,
98 match_full_paths: bool,
99 buffers: Option<Vec<Entity<Buffer>>>,
100 ) -> Result<Self> {
101 let query = query.to_string();
102 if !case_sensitive && !query.is_ascii() {
103 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104 // Fallback to regex search as recommended by
105 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106 return Self::regex(
107 regex::escape(&query),
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 false,
112 files_to_include,
113 files_to_exclude,
114 false,
115 buffers,
116 );
117 }
118 let search = AhoCorasickBuilder::new()
119 .ascii_case_insensitive(!case_sensitive)
120 .build([&query])?;
121 let inner = SearchInputs {
122 query: query.into(),
123 files_to_exclude,
124 files_to_include,
125 match_full_paths,
126 buffers,
127 };
128 Ok(Self::Text {
129 search,
130 replacement: None,
131 whole_word,
132 case_sensitive,
133 include_ignored,
134 inner,
135 })
136 }
137
138 /// Create a regex query
139 ///
140 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142 /// with their respective project root).
143 pub fn regex(
144 query: impl ToString,
145 whole_word: bool,
146 mut case_sensitive: bool,
147 include_ignored: bool,
148 one_match_per_line: bool,
149 files_to_include: PathMatcher,
150 files_to_exclude: PathMatcher,
151 match_full_paths: bool,
152 buffers: Option<Vec<Entity<Buffer>>>,
153 ) -> Result<Self> {
154 let mut query = query.to_string();
155 let initial_query = Arc::from(query.as_str());
156
157 if let Some((case_sensitive_from_pattern, new_query)) =
158 Self::case_sensitive_from_pattern(&query)
159 {
160 case_sensitive = case_sensitive_from_pattern;
161 query = new_query
162 }
163
164 if whole_word {
165 let mut word_query = String::new();
166 if let Some(first) = query.get(0..1)
167 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
168 {
169 word_query.push_str("\\b");
170 }
171 word_query.push_str(&query);
172 if let Some(last) = query.get(query.len() - 1..)
173 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
174 {
175 word_query.push_str("\\b");
176 }
177 query = word_query
178 }
179
180 let multiline = query.contains('\n') || query.contains("\\n");
181 let regex = RegexBuilder::new(&query)
182 .case_insensitive(!case_sensitive)
183 .build()?;
184 let inner = SearchInputs {
185 query: initial_query,
186 files_to_exclude,
187 files_to_include,
188 match_full_paths,
189 buffers,
190 };
191 Ok(Self::Regex {
192 regex,
193 replacement: None,
194 multiline,
195 whole_word,
196 case_sensitive,
197 include_ignored,
198 inner,
199 one_match_per_line,
200 })
201 }
202
203 /// Extracts case sensitivity settings from pattern items in the provided
204 /// query and returns the same query, with the pattern items removed.
205 ///
206 /// The following pattern modifiers are supported:
207 ///
208 /// - `\c` (case_sensitive: false)
209 /// - `\C` (case_sensitive: true)
210 ///
211 /// If no pattern item were found, `None` will be returned.
212 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
213 if !(query.contains("\\c") || query.contains("\\C")) {
214 return None;
215 }
216
217 let mut was_escaped = false;
218 let mut new_query = String::new();
219 let mut is_case_sensitive = None;
220
221 for c in query.chars() {
222 if was_escaped {
223 if c == 'c' {
224 is_case_sensitive = Some(false);
225 } else if c == 'C' {
226 is_case_sensitive = Some(true);
227 } else {
228 new_query.push('\\');
229 new_query.push(c);
230 }
231 was_escaped = false
232 } else if c == '\\' {
233 was_escaped = true
234 } else {
235 new_query.push(c);
236 }
237 }
238
239 is_case_sensitive.map(|c| (c, new_query))
240 }
241
242 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
243 let files_to_include = if message.files_to_include.is_empty() {
244 message
245 .files_to_include_legacy
246 .split(',')
247 .map(str::trim)
248 .filter(|&glob_str| !glob_str.is_empty())
249 .map(|s| s.to_string())
250 .collect()
251 } else {
252 message.files_to_include
253 };
254
255 let files_to_exclude = if message.files_to_exclude.is_empty() {
256 message
257 .files_to_exclude_legacy
258 .split(',')
259 .map(str::trim)
260 .filter(|&glob_str| !glob_str.is_empty())
261 .map(|s| s.to_string())
262 .collect()
263 } else {
264 message.files_to_exclude
265 };
266
267 if message.regex {
268 Self::regex(
269 message.query,
270 message.whole_word,
271 message.case_sensitive,
272 message.include_ignored,
273 false,
274 PathMatcher::new(files_to_include)?,
275 PathMatcher::new(files_to_exclude)?,
276 message.match_full_paths,
277 None, // search opened only don't need search remote
278 )
279 } else {
280 Self::text(
281 message.query,
282 message.whole_word,
283 message.case_sensitive,
284 message.include_ignored,
285 PathMatcher::new(files_to_include)?,
286 PathMatcher::new(files_to_exclude)?,
287 false,
288 None, // search opened only don't need search remote
289 )
290 }
291 }
292
293 pub fn with_replacement(mut self, new_replacement: String) -> Self {
294 match self {
295 Self::Text {
296 ref mut replacement,
297 ..
298 }
299 | Self::Regex {
300 ref mut replacement,
301 ..
302 } => {
303 *replacement = Some(new_replacement);
304 self
305 }
306 }
307 }
308
309 pub fn to_proto(&self) -> proto::SearchQuery {
310 let files_to_include = self.files_to_include().sources().to_vec();
311 let files_to_exclude = self.files_to_exclude().sources().to_vec();
312 proto::SearchQuery {
313 query: self.as_str().to_string(),
314 regex: self.is_regex(),
315 whole_word: self.whole_word(),
316 case_sensitive: self.case_sensitive(),
317 include_ignored: self.include_ignored(),
318 files_to_include: files_to_include.clone(),
319 files_to_exclude: files_to_exclude.clone(),
320 match_full_paths: self.match_full_paths(),
321 // Populate legacy fields for backwards compatibility
322 files_to_include_legacy: files_to_include.join(","),
323 files_to_exclude_legacy: files_to_exclude.join(","),
324 }
325 }
326
327 pub(crate) fn detect(
328 &self,
329 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
330 ) -> Result<bool> {
331 if self.as_str().is_empty() {
332 return Ok(false);
333 }
334
335 match self {
336 Self::Text { search, .. } => {
337 let mat = search.stream_find_iter(reader).next();
338 match mat {
339 Some(Ok(_)) => Ok(true),
340 Some(Err(err)) => Err(err.into()),
341 None => Ok(false),
342 }
343 }
344 Self::Regex {
345 regex, multiline, ..
346 } => {
347 if *multiline {
348 let mut text = String::new();
349 if let Err(err) = reader.read_to_string(&mut text) {
350 Err(err.into())
351 } else {
352 Ok(regex.find(&text)?.is_some())
353 }
354 } else {
355 for line in reader.lines() {
356 let line = line?;
357 if regex.find(&line)?.is_some() {
358 return Ok(true);
359 }
360 }
361 Ok(false)
362 }
363 }
364 }
365 }
366 /// Returns the replacement text for this `SearchQuery`.
367 pub fn replacement(&self) -> Option<&str> {
368 match self {
369 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
370 replacement.as_deref()
371 }
372 }
373 }
374 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
375 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
376 match self {
377 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
378 SearchQuery::Regex {
379 regex, replacement, ..
380 } => {
381 if let Some(replacement) = replacement {
382 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
383 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
384 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
385 replacement,
386 |c: &Captures| match c.get(0).unwrap().as_str() {
387 r"\\" => "\\",
388 r"\n" => "\n",
389 r"\t" => "\t",
390 x => unreachable!("Unexpected escape sequence: {}", x),
391 },
392 );
393 Some(regex.replace(text, replacement))
394 } else {
395 None
396 }
397 }
398 }
399 }
400
401 pub async fn search(
402 &self,
403 buffer: &BufferSnapshot,
404 subrange: Option<Range<usize>>,
405 ) -> Vec<Range<usize>> {
406 const YIELD_INTERVAL: usize = 20000;
407
408 if self.as_str().is_empty() {
409 return Default::default();
410 }
411
412 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
413 let rope = if let Some(range) = subrange {
414 buffer.as_rope().slice(range)
415 } else {
416 buffer.as_rope().clone()
417 };
418
419 let mut matches = Vec::new();
420 match self {
421 Self::Text {
422 search, whole_word, ..
423 } => {
424 for (ix, mat) in search
425 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
426 .enumerate()
427 {
428 if (ix + 1) % YIELD_INTERVAL == 0 {
429 yield_now().await;
430 }
431
432 let mat = mat.unwrap();
433 if *whole_word {
434 let classifier = buffer.char_classifier_at(range_offset + mat.start());
435
436 let prev_kind = rope
437 .reversed_chars_at(mat.start())
438 .next()
439 .map(|c| classifier.kind(c));
440 let start_kind =
441 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
442 let end_kind =
443 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
444 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
445 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
446 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
447 {
448 continue;
449 }
450 }
451 matches.push(mat.start()..mat.end())
452 }
453 }
454
455 Self::Regex {
456 regex, multiline, ..
457 } => {
458 if *multiline {
459 let text = rope.to_string();
460 for (ix, mat) in regex.find_iter(&text).enumerate() {
461 if (ix + 1) % YIELD_INTERVAL == 0 {
462 yield_now().await;
463 }
464
465 if let Ok(mat) = mat {
466 matches.push(mat.start()..mat.end());
467 }
468 }
469 } else {
470 let mut line = String::new();
471 let mut line_offset = 0;
472 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
473 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
474 yield_now().await;
475 }
476
477 for (newline_ix, text) in chunk.split('\n').enumerate() {
478 if newline_ix > 0 {
479 for mat in regex.find_iter(&line).flatten() {
480 let start = line_offset + mat.start();
481 let end = line_offset + mat.end();
482 matches.push(start..end);
483 if self.one_match_per_line() == Some(true) {
484 break;
485 }
486 }
487
488 line_offset += line.len() + 1;
489 line.clear();
490 }
491 line.push_str(text);
492 }
493 }
494 }
495 }
496 }
497
498 matches
499 }
500
501 pub fn is_empty(&self) -> bool {
502 self.as_str().is_empty()
503 }
504
505 pub fn as_str(&self) -> &str {
506 self.as_inner().as_str()
507 }
508
509 pub fn whole_word(&self) -> bool {
510 match self {
511 Self::Text { whole_word, .. } => *whole_word,
512 Self::Regex { whole_word, .. } => *whole_word,
513 }
514 }
515
516 pub fn case_sensitive(&self) -> bool {
517 match self {
518 Self::Text { case_sensitive, .. } => *case_sensitive,
519 Self::Regex { case_sensitive, .. } => *case_sensitive,
520 }
521 }
522
523 pub fn include_ignored(&self) -> bool {
524 match self {
525 Self::Text {
526 include_ignored, ..
527 } => *include_ignored,
528 Self::Regex {
529 include_ignored, ..
530 } => *include_ignored,
531 }
532 }
533
534 pub fn is_regex(&self) -> bool {
535 matches!(self, Self::Regex { .. })
536 }
537
538 pub fn files_to_include(&self) -> &PathMatcher {
539 self.as_inner().files_to_include()
540 }
541
542 pub fn files_to_exclude(&self) -> &PathMatcher {
543 self.as_inner().files_to_exclude()
544 }
545
546 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
547 self.as_inner().buffers.as_ref()
548 }
549
550 pub fn is_opened_only(&self) -> bool {
551 self.as_inner().buffers.is_some()
552 }
553
554 pub fn filters_path(&self) -> bool {
555 !(self.files_to_exclude().sources().is_empty()
556 && self.files_to_include().sources().is_empty())
557 }
558
559 pub fn match_full_paths(&self) -> bool {
560 self.as_inner().match_full_paths
561 }
562
563 /// Check match full paths to determine whether you're required to pass a fully qualified
564 /// project path (starts with a project root).
565 pub fn match_path(&self, file_path: &Path) -> bool {
566 let mut path = file_path.to_path_buf();
567 loop {
568 if self.files_to_exclude().is_match(&path) {
569 return false;
570 } else if self.files_to_include().sources().is_empty()
571 || self.files_to_include().is_match(&path)
572 {
573 return true;
574 } else if !path.pop() {
575 return false;
576 }
577 }
578 }
579 pub fn as_inner(&self) -> &SearchInputs {
580 match self {
581 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
582 }
583 }
584
585 /// Whether this search should replace only one match per line, instead of
586 /// all matches.
587 /// Returns `None` for text searches, as only regex searches support this
588 /// option.
589 pub fn one_match_per_line(&self) -> Option<bool> {
590 match self {
591 Self::Regex {
592 one_match_per_line, ..
593 } => Some(*one_match_per_line),
594 Self::Text { .. } => None,
595 }
596 }
597}
598
599#[cfg(test)]
600mod tests {
601 use super::*;
602
603 #[test]
604 fn path_matcher_creation_for_valid_paths() {
605 for valid_path in [
606 "file",
607 "Cargo.toml",
608 ".DS_Store",
609 "~/dir/another_dir/",
610 "./dir/file",
611 "dir/[a-z].txt",
612 "../dir/filé",
613 ] {
614 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
615 panic!("Valid path {valid_path} should be accepted, but got: {e}")
616 });
617 assert!(
618 path_matcher.is_match(valid_path),
619 "Path matcher for valid path {valid_path} should match itself"
620 )
621 }
622 }
623
624 #[test]
625 fn path_matcher_creation_for_globs() {
626 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
627 match PathMatcher::new(&[invalid_glob.to_owned()]) {
628 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
629 Err(_expected) => {}
630 }
631 }
632
633 for valid_glob in [
634 "dir/?ile",
635 "dir/*.txt",
636 "dir/**/file",
637 "dir/[a-z].txt",
638 "{dir,file}",
639 ] {
640 match PathMatcher::new(&[valid_glob.to_owned()]) {
641 Ok(_expected) => {}
642 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
643 }
644 }
645 }
646
647 #[test]
648 fn test_case_sensitive_pattern_items() {
649 let case_sensitive = false;
650 let search_query = SearchQuery::regex(
651 "test\\C",
652 false,
653 case_sensitive,
654 false,
655 false,
656 Default::default(),
657 Default::default(),
658 false,
659 None,
660 )
661 .expect("Should be able to create a regex SearchQuery");
662
663 assert_eq!(
664 search_query.case_sensitive(),
665 true,
666 "Case sensitivity should be enabled when \\C pattern item is present in the query."
667 );
668
669 let case_sensitive = true;
670 let search_query = SearchQuery::regex(
671 "test\\c",
672 true,
673 case_sensitive,
674 false,
675 false,
676 Default::default(),
677 Default::default(),
678 false,
679 None,
680 )
681 .expect("Should be able to create a regex SearchQuery");
682
683 assert_eq!(
684 search_query.case_sensitive(),
685 false,
686 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
687 );
688
689 let case_sensitive = false;
690 let search_query = SearchQuery::regex(
691 "test\\c\\C",
692 false,
693 case_sensitive,
694 false,
695 false,
696 Default::default(),
697 Default::default(),
698 false,
699 None,
700 )
701 .expect("Should be able to create a regex SearchQuery");
702
703 assert_eq!(
704 search_query.case_sensitive(),
705 true,
706 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
707 );
708
709 let case_sensitive = false;
710 let search_query = SearchQuery::regex(
711 "tests\\\\C",
712 false,
713 case_sensitive,
714 false,
715 false,
716 Default::default(),
717 Default::default(),
718 false,
719 None,
720 )
721 .expect("Should be able to create a regex SearchQuery");
722
723 assert_eq!(
724 search_query.case_sensitive(),
725 false,
726 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
727 );
728 }
729}