1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use itertools::Itertools as _;
7use language::{Buffer, BufferSnapshot, CharKind};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28}
29
30#[derive(Clone, Copy, PartialEq)]
31pub enum SearchInputKind {
32 Query,
33 Include,
34 Exclude,
35}
36
37#[derive(Clone, Debug)]
38pub struct SearchInputs {
39 query: Arc<str>,
40 files_to_include: PathMatcher,
41 files_to_exclude: PathMatcher,
42 match_full_paths: bool,
43 buffers: Option<Vec<Entity<Buffer>>>,
44}
45
46impl SearchInputs {
47 pub fn as_str(&self) -> &str {
48 self.query.as_ref()
49 }
50 pub fn files_to_include(&self) -> &PathMatcher {
51 &self.files_to_include
52 }
53 pub fn files_to_exclude(&self) -> &PathMatcher {
54 &self.files_to_exclude
55 }
56 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
57 &self.buffers
58 }
59}
60#[derive(Clone, Debug)]
61pub enum SearchQuery {
62 Text {
63 search: AhoCorasick,
64 replacement: Option<String>,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70 Regex {
71 regex: Regex,
72 replacement: Option<String>,
73 multiline: bool,
74 whole_word: bool,
75 case_sensitive: bool,
76 include_ignored: bool,
77 one_match_per_line: bool,
78 inner: SearchInputs,
79 },
80}
81
82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
83 RegexBuilder::new(r"\B")
84 .build()
85 .expect("Failed to create WORD_MATCH_TEST")
86});
87
88impl SearchQuery {
89 /// Create a text query
90 ///
91 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
92 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
93 pub fn text(
94 query: impl ToString,
95 whole_word: bool,
96 case_sensitive: bool,
97 include_ignored: bool,
98 files_to_include: PathMatcher,
99 files_to_exclude: PathMatcher,
100 match_full_paths: bool,
101 buffers: Option<Vec<Entity<Buffer>>>,
102 ) -> Result<Self> {
103 let query = query.to_string();
104 if !case_sensitive && !query.is_ascii() {
105 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106 // Fallback to regex search as recommended by
107 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108 return Self::regex(
109 regex::escape(&query),
110 whole_word,
111 case_sensitive,
112 include_ignored,
113 false,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 mut case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let mut query = query.to_string();
157 let initial_query = Arc::from(query.as_str());
158
159 if let Some((case_sensitive_from_pattern, new_query)) =
160 Self::case_sensitive_from_pattern(&query)
161 {
162 case_sensitive = case_sensitive_from_pattern;
163 query = new_query
164 }
165
166 if whole_word {
167 let mut word_query = String::new();
168 if let Some(first) = query.get(0..1)
169 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170 {
171 word_query.push_str("\\b");
172 }
173 word_query.push_str(&query);
174 if let Some(last) = query.get(query.len() - 1..)
175 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176 {
177 word_query.push_str("\\b");
178 }
179 query = word_query
180 }
181
182 let multiline = query.contains('\n') || query.contains("\\n");
183 if multiline {
184 query.insert_str(0, "(?m)");
185 }
186
187 let regex = RegexBuilder::new(&query)
188 .case_insensitive(!case_sensitive)
189 .build()?;
190 let inner = SearchInputs {
191 query: initial_query,
192 files_to_exclude,
193 files_to_include,
194 match_full_paths,
195 buffers,
196 };
197 Ok(Self::Regex {
198 regex,
199 replacement: None,
200 multiline,
201 whole_word,
202 case_sensitive,
203 include_ignored,
204 inner,
205 one_match_per_line,
206 })
207 }
208
209 /// Extracts case sensitivity settings from pattern items in the provided
210 /// query and returns the same query, with the pattern items removed.
211 ///
212 /// The following pattern modifiers are supported:
213 ///
214 /// - `\c` (case_sensitive: false)
215 /// - `\C` (case_sensitive: true)
216 ///
217 /// If no pattern item were found, `None` will be returned.
218 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
219 if !(query.contains("\\c") || query.contains("\\C")) {
220 return None;
221 }
222
223 let mut was_escaped = false;
224 let mut new_query = String::new();
225 let mut is_case_sensitive = None;
226
227 for c in query.chars() {
228 if was_escaped {
229 if c == 'c' {
230 is_case_sensitive = Some(false);
231 } else if c == 'C' {
232 is_case_sensitive = Some(true);
233 } else {
234 new_query.push('\\');
235 new_query.push(c);
236 }
237 was_escaped = false
238 } else if c == '\\' {
239 was_escaped = true
240 } else {
241 new_query.push(c);
242 }
243 }
244
245 is_case_sensitive.map(|c| (c, new_query))
246 }
247
248 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
249 let files_to_include = if message.files_to_include.is_empty() {
250 message
251 .files_to_include_legacy
252 .split(',')
253 .map(str::trim)
254 .filter(|&glob_str| !glob_str.is_empty())
255 .map(|s| s.to_string())
256 .collect()
257 } else {
258 message.files_to_include
259 };
260
261 let files_to_exclude = if message.files_to_exclude.is_empty() {
262 message
263 .files_to_exclude_legacy
264 .split(',')
265 .map(str::trim)
266 .filter(|&glob_str| !glob_str.is_empty())
267 .map(|s| s.to_string())
268 .collect()
269 } else {
270 message.files_to_exclude
271 };
272
273 if message.regex {
274 Self::regex(
275 message.query,
276 message.whole_word,
277 message.case_sensitive,
278 message.include_ignored,
279 false,
280 PathMatcher::new(files_to_include, path_style)?,
281 PathMatcher::new(files_to_exclude, path_style)?,
282 message.match_full_paths,
283 None, // search opened only don't need search remote
284 )
285 } else {
286 Self::text(
287 message.query,
288 message.whole_word,
289 message.case_sensitive,
290 message.include_ignored,
291 PathMatcher::new(files_to_include, path_style)?,
292 PathMatcher::new(files_to_exclude, path_style)?,
293 false,
294 None, // search opened only don't need search remote
295 )
296 }
297 }
298
299 pub fn with_replacement(mut self, new_replacement: String) -> Self {
300 match self {
301 Self::Text {
302 ref mut replacement,
303 ..
304 }
305 | Self::Regex {
306 ref mut replacement,
307 ..
308 } => {
309 *replacement = Some(new_replacement);
310 self
311 }
312 }
313 }
314
315 pub fn to_proto(&self) -> proto::SearchQuery {
316 let mut files_to_include = self.files_to_include().sources();
317 let mut files_to_exclude = self.files_to_exclude().sources();
318 proto::SearchQuery {
319 query: self.as_str().to_string(),
320 regex: self.is_regex(),
321 whole_word: self.whole_word(),
322 case_sensitive: self.case_sensitive(),
323 include_ignored: self.include_ignored(),
324 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
325 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
326 match_full_paths: self.match_full_paths(),
327 // Populate legacy fields for backwards compatibility
328 files_to_include_legacy: files_to_include.join(","),
329 files_to_exclude_legacy: files_to_exclude.join(","),
330 }
331 }
332
333 pub(crate) async fn detect(
334 &self,
335 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
336 ) -> Result<bool> {
337 let query_str = self.as_str();
338 let needle_len = query_str.len();
339 if needle_len == 0 {
340 return Ok(false);
341 }
342 if self.as_str().is_empty() {
343 return Ok(false);
344 }
345
346 let mut text = String::new();
347 let mut bytes_read = 0;
348 // Yield from this function every 128 bytes scanned.
349 const YIELD_THRESHOLD: usize = 128;
350 match self {
351 Self::Text { search, .. } => {
352 if query_str.contains('\n') {
353 reader.read_to_string(&mut text)?;
354 Ok(search.is_match(&text))
355 } else {
356 // Yield from this function every 128 bytes scanned.
357 const YIELD_THRESHOLD: usize = 128;
358 while reader.read_line(&mut text)? > 0 {
359 if search.is_match(&text) {
360 return Ok(true);
361 }
362 bytes_read += text.len();
363 if bytes_read >= YIELD_THRESHOLD {
364 bytes_read = 0;
365 smol::future::yield_now().await;
366 }
367 text.clear();
368 }
369 Ok(false)
370 }
371 }
372 Self::Regex {
373 regex, multiline, ..
374 } => {
375 if *multiline {
376 if let Err(err) = reader.read_to_string(&mut text) {
377 Err(err.into())
378 } else {
379 Ok(regex.is_match(&text)?)
380 }
381 } else {
382 while reader.read_line(&mut text)? > 0 {
383 if regex.is_match(&text)? {
384 return Ok(true);
385 }
386 bytes_read += text.len();
387 if bytes_read >= YIELD_THRESHOLD {
388 bytes_read = 0;
389 smol::future::yield_now().await;
390 }
391 text.clear();
392 }
393 Ok(false)
394 }
395 }
396 }
397 }
398 /// Returns the replacement text for this `SearchQuery`.
399 pub fn replacement(&self) -> Option<&str> {
400 match self {
401 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
402 replacement.as_deref()
403 }
404 }
405 }
406 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
407 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
408 match self {
409 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
410 SearchQuery::Regex {
411 regex, replacement, ..
412 } => {
413 if let Some(replacement) = replacement {
414 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
415 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
416 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
417 replacement,
418 |c: &Captures| match c.get(0).unwrap().as_str() {
419 r"\\" => "\\",
420 r"\n" => "\n",
421 r"\t" => "\t",
422 x => unreachable!("Unexpected escape sequence: {}", x),
423 },
424 );
425 Some(regex.replace(text, replacement))
426 } else {
427 None
428 }
429 }
430 }
431 }
432
433 pub async fn search(
434 &self,
435 buffer: &BufferSnapshot,
436 subrange: Option<Range<usize>>,
437 ) -> Vec<Range<usize>> {
438 const YIELD_INTERVAL: usize = 20000;
439
440 if self.as_str().is_empty() {
441 return Default::default();
442 }
443
444 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
445 let rope = if let Some(range) = subrange {
446 buffer.as_rope().slice(range)
447 } else {
448 buffer.as_rope().clone()
449 };
450
451 let mut matches = Vec::new();
452 match self {
453 Self::Text {
454 search, whole_word, ..
455 } => {
456 for (ix, mat) in search
457 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
458 .enumerate()
459 {
460 if (ix + 1) % YIELD_INTERVAL == 0 {
461 yield_now().await;
462 }
463
464 let mat = mat.unwrap();
465 if *whole_word {
466 let classifier = buffer.char_classifier_at(range_offset + mat.start());
467
468 let prev_kind = rope
469 .reversed_chars_at(mat.start())
470 .next()
471 .map(|c| classifier.kind(c));
472 let start_kind =
473 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
474 let end_kind =
475 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
476 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
477 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
478 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
479 {
480 continue;
481 }
482 }
483 matches.push(mat.start()..mat.end())
484 }
485 }
486
487 Self::Regex {
488 regex, multiline, ..
489 } => {
490 if *multiline {
491 let text = rope.to_string();
492 for (ix, mat) in regex.find_iter(&text).enumerate() {
493 if (ix + 1) % YIELD_INTERVAL == 0 {
494 yield_now().await;
495 }
496
497 if let Ok(mat) = mat {
498 matches.push(mat.start()..mat.end());
499 }
500 }
501 } else {
502 let mut line = String::new();
503 let mut line_offset = 0;
504 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
505 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
506 yield_now().await;
507 }
508
509 for (newline_ix, text) in chunk.split('\n').enumerate() {
510 if newline_ix > 0 {
511 for mat in regex.find_iter(&line).flatten() {
512 let start = line_offset + mat.start();
513 let end = line_offset + mat.end();
514 matches.push(start..end);
515 if self.one_match_per_line() == Some(true) {
516 break;
517 }
518 }
519
520 line_offset += line.len() + 1;
521 line.clear();
522 }
523 line.push_str(text);
524 }
525 }
526 }
527 }
528 }
529
530 matches
531 }
532
533 pub fn is_empty(&self) -> bool {
534 self.as_str().is_empty()
535 }
536
537 pub fn as_str(&self) -> &str {
538 self.as_inner().as_str()
539 }
540
541 pub fn whole_word(&self) -> bool {
542 match self {
543 Self::Text { whole_word, .. } => *whole_word,
544 Self::Regex { whole_word, .. } => *whole_word,
545 }
546 }
547
548 pub fn case_sensitive(&self) -> bool {
549 match self {
550 Self::Text { case_sensitive, .. } => *case_sensitive,
551 Self::Regex { case_sensitive, .. } => *case_sensitive,
552 }
553 }
554
555 pub fn include_ignored(&self) -> bool {
556 match self {
557 Self::Text {
558 include_ignored, ..
559 } => *include_ignored,
560 Self::Regex {
561 include_ignored, ..
562 } => *include_ignored,
563 }
564 }
565
566 pub fn is_regex(&self) -> bool {
567 matches!(self, Self::Regex { .. })
568 }
569
570 pub fn files_to_include(&self) -> &PathMatcher {
571 self.as_inner().files_to_include()
572 }
573
574 pub fn files_to_exclude(&self) -> &PathMatcher {
575 self.as_inner().files_to_exclude()
576 }
577
578 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
579 self.as_inner().buffers.as_ref()
580 }
581
582 pub fn is_opened_only(&self) -> bool {
583 self.as_inner().buffers.is_some()
584 }
585
586 pub fn filters_path(&self) -> bool {
587 !(self.files_to_exclude().sources().next().is_none()
588 && self.files_to_include().sources().next().is_none())
589 }
590
591 pub fn match_full_paths(&self) -> bool {
592 self.as_inner().match_full_paths
593 }
594
595 /// Check match full paths to determine whether you're required to pass a fully qualified
596 /// project path (starts with a project root).
597 pub fn match_path(&self, file_path: &RelPath) -> bool {
598 let mut path = file_path.to_rel_path_buf();
599 loop {
600 if self.files_to_exclude().is_match(&path) {
601 return false;
602 } else if self.files_to_include().sources().next().is_none()
603 || self.files_to_include().is_match(&path)
604 {
605 return true;
606 } else if !path.pop() {
607 return false;
608 }
609 }
610 }
611 pub fn as_inner(&self) -> &SearchInputs {
612 match self {
613 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
614 }
615 }
616
617 /// Whether this search should replace only one match per line, instead of
618 /// all matches.
619 /// Returns `None` for text searches, as only regex searches support this
620 /// option.
621 pub fn one_match_per_line(&self) -> Option<bool> {
622 match self {
623 Self::Regex {
624 one_match_per_line, ..
625 } => Some(*one_match_per_line),
626 Self::Text { .. } => None,
627 }
628 }
629}
630
631#[cfg(test)]
632mod tests {
633 use super::*;
634
635 #[test]
636 fn path_matcher_creation_for_valid_paths() {
637 for valid_path in [
638 "file",
639 "Cargo.toml",
640 ".DS_Store",
641 "~/dir/another_dir/",
642 "./dir/file",
643 "dir/[a-z].txt",
644 ] {
645 let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
646 .unwrap_or_else(|e| {
647 panic!("Valid path {valid_path} should be accepted, but got: {e}")
648 });
649 assert!(
650 path_matcher
651 .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
652 "Path matcher for valid path {valid_path} should match itself"
653 )
654 }
655 }
656
657 #[test]
658 fn path_matcher_creation_for_globs() {
659 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
660 match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
661 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
662 Err(_expected) => {}
663 }
664 }
665
666 for valid_glob in [
667 "dir/?ile",
668 "dir/*.txt",
669 "dir/**/file",
670 "dir/[a-z].txt",
671 "{dir,file}",
672 ] {
673 match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
674 Ok(_expected) => {}
675 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
676 }
677 }
678 }
679
680 #[test]
681 fn test_case_sensitive_pattern_items() {
682 let case_sensitive = false;
683 let search_query = SearchQuery::regex(
684 "test\\C",
685 false,
686 case_sensitive,
687 false,
688 false,
689 Default::default(),
690 Default::default(),
691 false,
692 None,
693 )
694 .expect("Should be able to create a regex SearchQuery");
695
696 assert_eq!(
697 search_query.case_sensitive(),
698 true,
699 "Case sensitivity should be enabled when \\C pattern item is present in the query."
700 );
701
702 let case_sensitive = true;
703 let search_query = SearchQuery::regex(
704 "test\\c",
705 true,
706 case_sensitive,
707 false,
708 false,
709 Default::default(),
710 Default::default(),
711 false,
712 None,
713 )
714 .expect("Should be able to create a regex SearchQuery");
715
716 assert_eq!(
717 search_query.case_sensitive(),
718 false,
719 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
720 );
721
722 let case_sensitive = false;
723 let search_query = SearchQuery::regex(
724 "test\\c\\C",
725 false,
726 case_sensitive,
727 false,
728 false,
729 Default::default(),
730 Default::default(),
731 false,
732 None,
733 )
734 .expect("Should be able to create a regex SearchQuery");
735
736 assert_eq!(
737 search_query.case_sensitive(),
738 true,
739 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
740 );
741
742 let case_sensitive = false;
743 let search_query = SearchQuery::regex(
744 "tests\\\\C",
745 false,
746 case_sensitive,
747 false,
748 false,
749 Default::default(),
750 Default::default(),
751 false,
752 None,
753 )
754 .expect("Should be able to create a regex SearchQuery");
755
756 assert_eq!(
757 search_query.case_sensitive(),
758 false,
759 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
760 );
761 }
762
763 #[gpui::test]
764 async fn test_multiline_regex(cx: &mut gpui::TestAppContext) {
765 let search_query = SearchQuery::regex(
766 "^hello$\n",
767 false,
768 false,
769 false,
770 false,
771 Default::default(),
772 Default::default(),
773 false,
774 None,
775 )
776 .expect("Should be able to create a regex SearchQuery");
777
778 use language::Buffer;
779 let text = crate::Rope::from("hello\nworld\nhello\nworld");
780 let snapshot = cx
781 .update(|app| Buffer::build_snapshot(text, None, None, app))
782 .await;
783
784 let results = search_query.search(&snapshot, None).await;
785 assert_eq!(results, vec![0..6, 12..18]);
786 }
787}