1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use itertools::Itertools as _;
7use language::{Buffer, BufferSnapshot, CharKind};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28}
29
30#[derive(Clone, Copy, PartialEq)]
31pub enum SearchInputKind {
32 Query,
33 Include,
34 Exclude,
35}
36
37#[derive(Clone, Debug)]
38pub struct SearchInputs {
39 query: Arc<str>,
40 files_to_include: PathMatcher,
41 files_to_exclude: PathMatcher,
42 match_full_paths: bool,
43 buffers: Option<Vec<Entity<Buffer>>>,
44}
45
46impl SearchInputs {
47 pub fn as_str(&self) -> &str {
48 self.query.as_ref()
49 }
50 pub fn files_to_include(&self) -> &PathMatcher {
51 &self.files_to_include
52 }
53 pub fn files_to_exclude(&self) -> &PathMatcher {
54 &self.files_to_exclude
55 }
56 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
57 &self.buffers
58 }
59}
60#[derive(Clone, Debug)]
61pub enum SearchQuery {
62 Text {
63 search: AhoCorasick,
64 replacement: Option<String>,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70 Regex {
71 regex: Regex,
72 replacement: Option<String>,
73 multiline: bool,
74 whole_word: bool,
75 case_sensitive: bool,
76 include_ignored: bool,
77 one_match_per_line: bool,
78 inner: SearchInputs,
79 },
80}
81
82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
83 RegexBuilder::new(r"\B")
84 .build()
85 .expect("Failed to create WORD_MATCH_TEST")
86});
87
88impl SearchQuery {
89 /// Create a text query
90 ///
91 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
92 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
93 pub fn text(
94 query: impl ToString,
95 whole_word: bool,
96 case_sensitive: bool,
97 include_ignored: bool,
98 files_to_include: PathMatcher,
99 files_to_exclude: PathMatcher,
100 match_full_paths: bool,
101 buffers: Option<Vec<Entity<Buffer>>>,
102 ) -> Result<Self> {
103 let query = query.to_string();
104 if !case_sensitive && !query.is_ascii() {
105 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106 // Fallback to regex search as recommended by
107 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108 return Self::regex(
109 regex::escape(&query),
110 whole_word,
111 case_sensitive,
112 include_ignored,
113 false,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 mut case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let mut query = query.to_string();
157 let initial_query = Arc::from(query.as_str());
158
159 if let Some((case_sensitive_from_pattern, new_query)) =
160 Self::case_sensitive_from_pattern(&query)
161 {
162 case_sensitive = case_sensitive_from_pattern;
163 query = new_query
164 }
165
166 if whole_word {
167 let mut word_query = String::new();
168 if let Some(first) = query.get(0..1)
169 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170 {
171 word_query.push_str("\\b");
172 }
173 word_query.push_str(&query);
174 if let Some(last) = query.get(query.len() - 1..)
175 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176 {
177 word_query.push_str("\\b");
178 }
179 query = word_query
180 }
181
182 let multiline = query.contains('\n') || query.contains("\\n");
183 let regex = RegexBuilder::new(&query)
184 .case_insensitive(!case_sensitive)
185 .build()?;
186 let inner = SearchInputs {
187 query: initial_query,
188 files_to_exclude,
189 files_to_include,
190 match_full_paths,
191 buffers,
192 };
193 Ok(Self::Regex {
194 regex,
195 replacement: None,
196 multiline,
197 whole_word,
198 case_sensitive,
199 include_ignored,
200 inner,
201 one_match_per_line,
202 })
203 }
204
205 /// Extracts case sensitivity settings from pattern items in the provided
206 /// query and returns the same query, with the pattern items removed.
207 ///
208 /// The following pattern modifiers are supported:
209 ///
210 /// - `\c` (case_sensitive: false)
211 /// - `\C` (case_sensitive: true)
212 ///
213 /// If no pattern item were found, `None` will be returned.
214 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215 if !(query.contains("\\c") || query.contains("\\C")) {
216 return None;
217 }
218
219 let mut was_escaped = false;
220 let mut new_query = String::new();
221 let mut is_case_sensitive = None;
222
223 for c in query.chars() {
224 if was_escaped {
225 if c == 'c' {
226 is_case_sensitive = Some(false);
227 } else if c == 'C' {
228 is_case_sensitive = Some(true);
229 } else {
230 new_query.push('\\');
231 new_query.push(c);
232 }
233 was_escaped = false
234 } else if c == '\\' {
235 was_escaped = true
236 } else {
237 new_query.push(c);
238 }
239 }
240
241 is_case_sensitive.map(|c| (c, new_query))
242 }
243
244 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245 let files_to_include = if message.files_to_include.is_empty() {
246 message
247 .files_to_include_legacy
248 .split(',')
249 .map(str::trim)
250 .filter(|&glob_str| !glob_str.is_empty())
251 .map(|s| s.to_string())
252 .collect()
253 } else {
254 message.files_to_include
255 };
256
257 let files_to_exclude = if message.files_to_exclude.is_empty() {
258 message
259 .files_to_exclude_legacy
260 .split(',')
261 .map(str::trim)
262 .filter(|&glob_str| !glob_str.is_empty())
263 .map(|s| s.to_string())
264 .collect()
265 } else {
266 message.files_to_exclude
267 };
268
269 if message.regex {
270 Self::regex(
271 message.query,
272 message.whole_word,
273 message.case_sensitive,
274 message.include_ignored,
275 false,
276 PathMatcher::new(files_to_include, path_style)?,
277 PathMatcher::new(files_to_exclude, path_style)?,
278 message.match_full_paths,
279 None, // search opened only don't need search remote
280 )
281 } else {
282 Self::text(
283 message.query,
284 message.whole_word,
285 message.case_sensitive,
286 message.include_ignored,
287 PathMatcher::new(files_to_include, path_style)?,
288 PathMatcher::new(files_to_exclude, path_style)?,
289 false,
290 None, // search opened only don't need search remote
291 )
292 }
293 }
294
295 pub fn with_replacement(mut self, new_replacement: String) -> Self {
296 match self {
297 Self::Text {
298 ref mut replacement,
299 ..
300 }
301 | Self::Regex {
302 ref mut replacement,
303 ..
304 } => {
305 *replacement = Some(new_replacement);
306 self
307 }
308 }
309 }
310
311 pub fn to_proto(&self) -> proto::SearchQuery {
312 let mut files_to_include = self.files_to_include().sources();
313 let mut files_to_exclude = self.files_to_exclude().sources();
314 proto::SearchQuery {
315 query: self.as_str().to_string(),
316 regex: self.is_regex(),
317 whole_word: self.whole_word(),
318 case_sensitive: self.case_sensitive(),
319 include_ignored: self.include_ignored(),
320 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322 match_full_paths: self.match_full_paths(),
323 // Populate legacy fields for backwards compatibility
324 files_to_include_legacy: files_to_include.join(","),
325 files_to_exclude_legacy: files_to_exclude.join(","),
326 }
327 }
328
329 pub(crate) async fn detect(
330 &self,
331 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
332 ) -> Result<bool> {
333 let query_str = self.as_str();
334 let needle_len = query_str.len();
335 if needle_len == 0 {
336 return Ok(false);
337 }
338 if self.as_str().is_empty() {
339 return Ok(false);
340 }
341
342 let mut text = String::new();
343 let mut bytes_read = 0;
344 // Yield from this function every 128 bytes scanned.
345 const YIELD_THRESHOLD: usize = 128;
346 match self {
347 Self::Text { search, .. } => {
348 if query_str.contains('\n') {
349 reader.read_to_string(&mut text)?;
350 Ok(search.is_match(&text))
351 } else {
352 // Yield from this function every 128 bytes scanned.
353 const YIELD_THRESHOLD: usize = 128;
354 while reader.read_line(&mut text)? > 0 {
355 if search.is_match(&text) {
356 return Ok(true);
357 }
358 bytes_read += text.len();
359 if bytes_read >= YIELD_THRESHOLD {
360 bytes_read = 0;
361 smol::future::yield_now().await;
362 }
363 text.clear();
364 }
365 Ok(false)
366 }
367 }
368 Self::Regex {
369 regex, multiline, ..
370 } => {
371 if *multiline {
372 if let Err(err) = reader.read_to_string(&mut text) {
373 Err(err.into())
374 } else {
375 Ok(regex.is_match(&text)?)
376 }
377 } else {
378 while reader.read_line(&mut text)? > 0 {
379 if regex.is_match(&text)? {
380 return Ok(true);
381 }
382 bytes_read += text.len();
383 if bytes_read >= YIELD_THRESHOLD {
384 bytes_read = 0;
385 smol::future::yield_now().await;
386 }
387 text.clear();
388 }
389 Ok(false)
390 }
391 }
392 }
393 }
394 /// Returns the replacement text for this `SearchQuery`.
395 pub fn replacement(&self) -> Option<&str> {
396 match self {
397 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
398 replacement.as_deref()
399 }
400 }
401 }
402 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
403 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
404 match self {
405 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
406 SearchQuery::Regex {
407 regex, replacement, ..
408 } => {
409 if let Some(replacement) = replacement {
410 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
411 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
412 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
413 replacement,
414 |c: &Captures| match c.get(0).unwrap().as_str() {
415 r"\\" => "\\",
416 r"\n" => "\n",
417 r"\t" => "\t",
418 x => unreachable!("Unexpected escape sequence: {}", x),
419 },
420 );
421 Some(regex.replace(text, replacement))
422 } else {
423 None
424 }
425 }
426 }
427 }
428
429 pub async fn search(
430 &self,
431 buffer: &BufferSnapshot,
432 subrange: Option<Range<usize>>,
433 ) -> Vec<Range<usize>> {
434 const YIELD_INTERVAL: usize = 20000;
435
436 if self.as_str().is_empty() {
437 return Default::default();
438 }
439
440 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
441 let rope = if let Some(range) = subrange {
442 buffer.as_rope().slice(range)
443 } else {
444 buffer.as_rope().clone()
445 };
446
447 let mut matches = Vec::new();
448 match self {
449 Self::Text {
450 search, whole_word, ..
451 } => {
452 for (ix, mat) in search
453 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
454 .enumerate()
455 {
456 if (ix + 1) % YIELD_INTERVAL == 0 {
457 yield_now().await;
458 }
459
460 let mat = mat.unwrap();
461 if *whole_word {
462 let classifier = buffer.char_classifier_at(range_offset + mat.start());
463
464 let prev_kind = rope
465 .reversed_chars_at(mat.start())
466 .next()
467 .map(|c| classifier.kind(c));
468 let start_kind =
469 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
470 let end_kind =
471 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
472 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
473 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
474 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
475 {
476 continue;
477 }
478 }
479 matches.push(mat.start()..mat.end())
480 }
481 }
482
483 Self::Regex {
484 regex, multiline, ..
485 } => {
486 if *multiline {
487 let text = rope.to_string();
488 for (ix, mat) in regex.find_iter(&text).enumerate() {
489 if (ix + 1) % YIELD_INTERVAL == 0 {
490 yield_now().await;
491 }
492
493 if let Ok(mat) = mat {
494 matches.push(mat.start()..mat.end());
495 }
496 }
497 } else {
498 let mut line = String::new();
499 let mut line_offset = 0;
500 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
501 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
502 yield_now().await;
503 }
504
505 for (newline_ix, text) in chunk.split('\n').enumerate() {
506 if newline_ix > 0 {
507 for mat in regex.find_iter(&line).flatten() {
508 let start = line_offset + mat.start();
509 let end = line_offset + mat.end();
510 matches.push(start..end);
511 if self.one_match_per_line() == Some(true) {
512 break;
513 }
514 }
515
516 line_offset += line.len() + 1;
517 line.clear();
518 }
519 line.push_str(text);
520 }
521 }
522 }
523 }
524 }
525
526 matches
527 }
528
529 pub fn is_empty(&self) -> bool {
530 self.as_str().is_empty()
531 }
532
533 pub fn as_str(&self) -> &str {
534 self.as_inner().as_str()
535 }
536
537 pub fn whole_word(&self) -> bool {
538 match self {
539 Self::Text { whole_word, .. } => *whole_word,
540 Self::Regex { whole_word, .. } => *whole_word,
541 }
542 }
543
544 pub fn case_sensitive(&self) -> bool {
545 match self {
546 Self::Text { case_sensitive, .. } => *case_sensitive,
547 Self::Regex { case_sensitive, .. } => *case_sensitive,
548 }
549 }
550
551 pub fn include_ignored(&self) -> bool {
552 match self {
553 Self::Text {
554 include_ignored, ..
555 } => *include_ignored,
556 Self::Regex {
557 include_ignored, ..
558 } => *include_ignored,
559 }
560 }
561
562 pub fn is_regex(&self) -> bool {
563 matches!(self, Self::Regex { .. })
564 }
565
566 pub fn files_to_include(&self) -> &PathMatcher {
567 self.as_inner().files_to_include()
568 }
569
570 pub fn files_to_exclude(&self) -> &PathMatcher {
571 self.as_inner().files_to_exclude()
572 }
573
574 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
575 self.as_inner().buffers.as_ref()
576 }
577
578 pub fn is_opened_only(&self) -> bool {
579 self.as_inner().buffers.is_some()
580 }
581
582 pub fn filters_path(&self) -> bool {
583 !(self.files_to_exclude().sources().next().is_none()
584 && self.files_to_include().sources().next().is_none())
585 }
586
587 pub fn match_full_paths(&self) -> bool {
588 self.as_inner().match_full_paths
589 }
590
591 /// Check match full paths to determine whether you're required to pass a fully qualified
592 /// project path (starts with a project root).
593 pub fn match_path(&self, file_path: &RelPath) -> bool {
594 let mut path = file_path.to_rel_path_buf();
595 loop {
596 if self.files_to_exclude().is_match(&path) {
597 return false;
598 } else if self.files_to_include().sources().next().is_none()
599 || self.files_to_include().is_match(&path)
600 {
601 return true;
602 } else if !path.pop() {
603 return false;
604 }
605 }
606 }
607 pub fn as_inner(&self) -> &SearchInputs {
608 match self {
609 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
610 }
611 }
612
613 /// Whether this search should replace only one match per line, instead of
614 /// all matches.
615 /// Returns `None` for text searches, as only regex searches support this
616 /// option.
617 pub fn one_match_per_line(&self) -> Option<bool> {
618 match self {
619 Self::Regex {
620 one_match_per_line, ..
621 } => Some(*one_match_per_line),
622 Self::Text { .. } => None,
623 }
624 }
625}
626
627#[cfg(test)]
628mod tests {
629 use super::*;
630
631 #[test]
632 fn path_matcher_creation_for_valid_paths() {
633 for valid_path in [
634 "file",
635 "Cargo.toml",
636 ".DS_Store",
637 "~/dir/another_dir/",
638 "./dir/file",
639 "dir/[a-z].txt",
640 ] {
641 let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
642 .unwrap_or_else(|e| {
643 panic!("Valid path {valid_path} should be accepted, but got: {e}")
644 });
645 assert!(
646 path_matcher
647 .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
648 "Path matcher for valid path {valid_path} should match itself"
649 )
650 }
651 }
652
653 #[test]
654 fn path_matcher_creation_for_globs() {
655 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
656 match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
657 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
658 Err(_expected) => {}
659 }
660 }
661
662 for valid_glob in [
663 "dir/?ile",
664 "dir/*.txt",
665 "dir/**/file",
666 "dir/[a-z].txt",
667 "{dir,file}",
668 ] {
669 match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
670 Ok(_expected) => {}
671 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
672 }
673 }
674 }
675
676 #[test]
677 fn test_case_sensitive_pattern_items() {
678 let case_sensitive = false;
679 let search_query = SearchQuery::regex(
680 "test\\C",
681 false,
682 case_sensitive,
683 false,
684 false,
685 Default::default(),
686 Default::default(),
687 false,
688 None,
689 )
690 .expect("Should be able to create a regex SearchQuery");
691
692 assert_eq!(
693 search_query.case_sensitive(),
694 true,
695 "Case sensitivity should be enabled when \\C pattern item is present in the query."
696 );
697
698 let case_sensitive = true;
699 let search_query = SearchQuery::regex(
700 "test\\c",
701 true,
702 case_sensitive,
703 false,
704 false,
705 Default::default(),
706 Default::default(),
707 false,
708 None,
709 )
710 .expect("Should be able to create a regex SearchQuery");
711
712 assert_eq!(
713 search_query.case_sensitive(),
714 false,
715 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
716 );
717
718 let case_sensitive = false;
719 let search_query = SearchQuery::regex(
720 "test\\c\\C",
721 false,
722 case_sensitive,
723 false,
724 false,
725 Default::default(),
726 Default::default(),
727 false,
728 None,
729 )
730 .expect("Should be able to create a regex SearchQuery");
731
732 assert_eq!(
733 search_query.case_sensitive(),
734 true,
735 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
736 );
737
738 let case_sensitive = false;
739 let search_query = SearchQuery::regex(
740 "tests\\\\C",
741 false,
742 case_sensitive,
743 false,
744 false,
745 Default::default(),
746 Default::default(),
747 false,
748 None,
749 )
750 .expect("Should be able to create a regex SearchQuery");
751
752 assert_eq!(
753 search_query.case_sensitive(),
754 false,
755 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
756 );
757 }
758}