1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use futures::{AsyncBufRead, AsyncBufReadExt, AsyncReadExt};
6use gpui::Entity;
7use itertools::Itertools as _;
8use language::{Buffer, BufferSnapshot, CharKind};
9use smol::future::yield_now;
10use std::{
11 borrow::Cow,
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28}
29
30#[derive(Clone, Copy, PartialEq)]
31pub enum SearchInputKind {
32 Query,
33 Include,
34 Exclude,
35}
36
37#[derive(Clone, Debug)]
38pub struct SearchInputs {
39 query: Arc<str>,
40 files_to_include: PathMatcher,
41 files_to_exclude: PathMatcher,
42 match_full_paths: bool,
43 buffers: Option<Vec<Entity<Buffer>>>,
44}
45
46impl SearchInputs {
47 pub fn as_str(&self) -> &str {
48 self.query.as_ref()
49 }
50 pub fn files_to_include(&self) -> &PathMatcher {
51 &self.files_to_include
52 }
53 pub fn files_to_exclude(&self) -> &PathMatcher {
54 &self.files_to_exclude
55 }
56 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
57 &self.buffers
58 }
59}
60#[derive(Clone, Debug)]
61pub enum SearchQuery {
62 Text {
63 search: AhoCorasick,
64 replacement: Option<String>,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70 Regex {
71 regex: Regex,
72 replacement: Option<String>,
73 multiline: bool,
74 whole_word: bool,
75 case_sensitive: bool,
76 include_ignored: bool,
77 one_match_per_line: bool,
78 inner: SearchInputs,
79 },
80}
81
82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
83 RegexBuilder::new(r"\B")
84 .build()
85 .expect("Failed to create WORD_MATCH_TEST")
86});
87
88impl SearchQuery {
89 /// Create a text query
90 ///
91 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
92 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
93 pub fn text(
94 query: impl ToString,
95 whole_word: bool,
96 case_sensitive: bool,
97 include_ignored: bool,
98 files_to_include: PathMatcher,
99 files_to_exclude: PathMatcher,
100 match_full_paths: bool,
101 buffers: Option<Vec<Entity<Buffer>>>,
102 ) -> Result<Self> {
103 let query = query.to_string();
104 if !case_sensitive && !query.is_ascii() {
105 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106 // Fallback to regex search as recommended by
107 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108 return Self::regex(
109 regex::escape(&query),
110 whole_word,
111 case_sensitive,
112 include_ignored,
113 false,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 mut case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let mut query = query.to_string();
157 let initial_query = Arc::from(query.as_str());
158
159 if let Some((case_sensitive_from_pattern, new_query)) =
160 Self::case_sensitive_from_pattern(&query)
161 {
162 case_sensitive = case_sensitive_from_pattern;
163 query = new_query
164 }
165
166 if whole_word {
167 let mut word_query = String::new();
168 if let Some(first) = query.get(0..1)
169 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170 {
171 word_query.push_str("\\b");
172 }
173 word_query.push_str(&query);
174 if let Some(last) = query.get(query.len() - 1..)
175 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176 {
177 word_query.push_str("\\b");
178 }
179 query = word_query
180 }
181
182 let multiline = query.contains('\n') || query.contains("\\n");
183 let regex = RegexBuilder::new(&query)
184 .case_insensitive(!case_sensitive)
185 .build()?;
186 let inner = SearchInputs {
187 query: initial_query,
188 files_to_exclude,
189 files_to_include,
190 match_full_paths,
191 buffers,
192 };
193 Ok(Self::Regex {
194 regex,
195 replacement: None,
196 multiline,
197 whole_word,
198 case_sensitive,
199 include_ignored,
200 inner,
201 one_match_per_line,
202 })
203 }
204
205 /// Extracts case sensitivity settings from pattern items in the provided
206 /// query and returns the same query, with the pattern items removed.
207 ///
208 /// The following pattern modifiers are supported:
209 ///
210 /// - `\c` (case_sensitive: false)
211 /// - `\C` (case_sensitive: true)
212 ///
213 /// If no pattern item were found, `None` will be returned.
214 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215 if !(query.contains("\\c") || query.contains("\\C")) {
216 return None;
217 }
218
219 let mut was_escaped = false;
220 let mut new_query = String::new();
221 let mut is_case_sensitive = None;
222
223 for c in query.chars() {
224 if was_escaped {
225 if c == 'c' {
226 is_case_sensitive = Some(false);
227 } else if c == 'C' {
228 is_case_sensitive = Some(true);
229 } else {
230 new_query.push('\\');
231 new_query.push(c);
232 }
233 was_escaped = false
234 } else if c == '\\' {
235 was_escaped = true
236 } else {
237 new_query.push(c);
238 }
239 }
240
241 is_case_sensitive.map(|c| (c, new_query))
242 }
243
244 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245 let files_to_include = if message.files_to_include.is_empty() {
246 message
247 .files_to_include_legacy
248 .split(',')
249 .map(str::trim)
250 .filter(|&glob_str| !glob_str.is_empty())
251 .map(|s| s.to_string())
252 .collect()
253 } else {
254 message.files_to_include
255 };
256
257 let files_to_exclude = if message.files_to_exclude.is_empty() {
258 message
259 .files_to_exclude_legacy
260 .split(',')
261 .map(str::trim)
262 .filter(|&glob_str| !glob_str.is_empty())
263 .map(|s| s.to_string())
264 .collect()
265 } else {
266 message.files_to_exclude
267 };
268
269 if message.regex {
270 Self::regex(
271 message.query,
272 message.whole_word,
273 message.case_sensitive,
274 message.include_ignored,
275 false,
276 PathMatcher::new(files_to_include, path_style)?,
277 PathMatcher::new(files_to_exclude, path_style)?,
278 message.match_full_paths,
279 None, // search opened only don't need search remote
280 )
281 } else {
282 Self::text(
283 message.query,
284 message.whole_word,
285 message.case_sensitive,
286 message.include_ignored,
287 PathMatcher::new(files_to_include, path_style)?,
288 PathMatcher::new(files_to_exclude, path_style)?,
289 false,
290 None, // search opened only don't need search remote
291 )
292 }
293 }
294
295 pub fn with_replacement(mut self, new_replacement: String) -> Self {
296 match self {
297 Self::Text {
298 ref mut replacement,
299 ..
300 }
301 | Self::Regex {
302 ref mut replacement,
303 ..
304 } => {
305 *replacement = Some(new_replacement);
306 self
307 }
308 }
309 }
310
311 pub fn to_proto(&self) -> proto::SearchQuery {
312 let mut files_to_include = self.files_to_include().sources();
313 let mut files_to_exclude = self.files_to_exclude().sources();
314 proto::SearchQuery {
315 query: self.as_str().to_string(),
316 regex: self.is_regex(),
317 whole_word: self.whole_word(),
318 case_sensitive: self.case_sensitive(),
319 include_ignored: self.include_ignored(),
320 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322 match_full_paths: self.match_full_paths(),
323 // Populate legacy fields for backwards compatibility
324 files_to_include_legacy: files_to_include.join(","),
325 files_to_exclude_legacy: files_to_exclude.join(","),
326 }
327 }
328
329 pub(crate) async fn detect(&self, mut reader: impl AsyncBufRead + Unpin) -> Result<bool> {
330 let query_str = self.as_str();
331 let needle_len = query_str.len();
332 if needle_len == 0 {
333 return Ok(false);
334 }
335
336 let mut text = String::new();
337 match self {
338 Self::Text { search, .. } => {
339 if query_str.contains('\n') {
340 reader.read_to_string(&mut text).await?;
341 Ok(search.find(&text).is_some())
342 } else {
343 while reader.read_line(&mut text).await? > 0 {
344 if search.find(&text).is_some() {
345 return Ok(true);
346 }
347 text.clear();
348 }
349 Ok(false)
350 }
351 }
352 Self::Regex {
353 regex, multiline, ..
354 } => {
355 if *multiline {
356 reader.read_to_string(&mut text).await?;
357 Ok(regex.find(&text)?.is_some())
358 } else {
359 while reader.read_line(&mut text).await? > 0 {
360 if regex.find(&text)?.is_some() {
361 return Ok(true);
362 }
363 }
364 Ok(false)
365 }
366 }
367 }
368 }
369 /// Returns the replacement text for this `SearchQuery`.
370 pub fn replacement(&self) -> Option<&str> {
371 match self {
372 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
373 replacement.as_deref()
374 }
375 }
376 }
377 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
378 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
379 match self {
380 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
381 SearchQuery::Regex {
382 regex, replacement, ..
383 } => {
384 if let Some(replacement) = replacement {
385 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
386 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
387 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
388 replacement,
389 |c: &Captures| match c.get(0).unwrap().as_str() {
390 r"\\" => "\\",
391 r"\n" => "\n",
392 r"\t" => "\t",
393 x => unreachable!("Unexpected escape sequence: {}", x),
394 },
395 );
396 Some(regex.replace(text, replacement))
397 } else {
398 None
399 }
400 }
401 }
402 }
403
404 pub async fn search(
405 &self,
406 buffer: &BufferSnapshot,
407 subrange: Option<Range<usize>>,
408 ) -> Vec<Range<usize>> {
409 const YIELD_INTERVAL: usize = 20000;
410
411 if self.as_str().is_empty() {
412 return Default::default();
413 }
414
415 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
416 let rope = if let Some(range) = subrange {
417 buffer.as_rope().slice(range)
418 } else {
419 buffer.as_rope().clone()
420 };
421
422 let mut matches = Vec::new();
423 match self {
424 Self::Text {
425 search, whole_word, ..
426 } => {
427 for (ix, mat) in search
428 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
429 .enumerate()
430 {
431 if (ix + 1) % YIELD_INTERVAL == 0 {
432 yield_now().await;
433 }
434
435 let mat = mat.unwrap();
436 if *whole_word {
437 let classifier = buffer.char_classifier_at(range_offset + mat.start());
438
439 let prev_kind = rope
440 .reversed_chars_at(mat.start())
441 .next()
442 .map(|c| classifier.kind(c));
443 let start_kind =
444 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
445 let end_kind =
446 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
447 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
448 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
449 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
450 {
451 continue;
452 }
453 }
454 matches.push(mat.start()..mat.end())
455 }
456 }
457
458 Self::Regex {
459 regex, multiline, ..
460 } => {
461 if *multiline {
462 let text = rope.to_string();
463 for (ix, mat) in regex.find_iter(&text).enumerate() {
464 if (ix + 1) % YIELD_INTERVAL == 0 {
465 yield_now().await;
466 }
467
468 if let Ok(mat) = mat {
469 matches.push(mat.start()..mat.end());
470 }
471 }
472 } else {
473 let mut line = String::new();
474 let mut line_offset = 0;
475 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
476 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
477 yield_now().await;
478 }
479
480 for (newline_ix, text) in chunk.split('\n').enumerate() {
481 if newline_ix > 0 {
482 for mat in regex.find_iter(&line).flatten() {
483 let start = line_offset + mat.start();
484 let end = line_offset + mat.end();
485 matches.push(start..end);
486 if self.one_match_per_line() == Some(true) {
487 break;
488 }
489 }
490
491 line_offset += line.len() + 1;
492 line.clear();
493 }
494 line.push_str(text);
495 }
496 }
497 }
498 }
499 }
500
501 matches
502 }
503
504 pub fn is_empty(&self) -> bool {
505 self.as_str().is_empty()
506 }
507
508 pub fn as_str(&self) -> &str {
509 self.as_inner().as_str()
510 }
511
512 pub fn whole_word(&self) -> bool {
513 match self {
514 Self::Text { whole_word, .. } => *whole_word,
515 Self::Regex { whole_word, .. } => *whole_word,
516 }
517 }
518
519 pub fn case_sensitive(&self) -> bool {
520 match self {
521 Self::Text { case_sensitive, .. } => *case_sensitive,
522 Self::Regex { case_sensitive, .. } => *case_sensitive,
523 }
524 }
525
526 pub fn include_ignored(&self) -> bool {
527 match self {
528 Self::Text {
529 include_ignored, ..
530 } => *include_ignored,
531 Self::Regex {
532 include_ignored, ..
533 } => *include_ignored,
534 }
535 }
536
537 pub fn is_regex(&self) -> bool {
538 matches!(self, Self::Regex { .. })
539 }
540
541 pub fn files_to_include(&self) -> &PathMatcher {
542 self.as_inner().files_to_include()
543 }
544
545 pub fn files_to_exclude(&self) -> &PathMatcher {
546 self.as_inner().files_to_exclude()
547 }
548
549 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
550 self.as_inner().buffers.as_ref()
551 }
552
553 pub fn is_opened_only(&self) -> bool {
554 self.as_inner().buffers.is_some()
555 }
556
557 pub fn filters_path(&self) -> bool {
558 !(self.files_to_exclude().sources().next().is_none()
559 && self.files_to_include().sources().next().is_none())
560 }
561
562 pub fn match_full_paths(&self) -> bool {
563 self.as_inner().match_full_paths
564 }
565
566 /// Check match full paths to determine whether you're required to pass a fully qualified
567 /// project path (starts with a project root).
568 pub fn match_path(&self, file_path: &RelPath) -> bool {
569 let mut path = file_path.to_rel_path_buf();
570 loop {
571 if self.files_to_exclude().is_match(&path) {
572 return false;
573 } else if self.files_to_include().sources().next().is_none()
574 || self.files_to_include().is_match(&path)
575 {
576 return true;
577 } else if !path.pop() {
578 return false;
579 }
580 }
581 }
582 pub fn as_inner(&self) -> &SearchInputs {
583 match self {
584 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
585 }
586 }
587
588 /// Whether this search should replace only one match per line, instead of
589 /// all matches.
590 /// Returns `None` for text searches, as only regex searches support this
591 /// option.
592 pub fn one_match_per_line(&self) -> Option<bool> {
593 match self {
594 Self::Regex {
595 one_match_per_line, ..
596 } => Some(*one_match_per_line),
597 Self::Text { .. } => None,
598 }
599 }
600}
601
602#[cfg(test)]
603mod tests {
604 use super::*;
605
606 #[test]
607 fn path_matcher_creation_for_valid_paths() {
608 for valid_path in [
609 "file",
610 "Cargo.toml",
611 ".DS_Store",
612 "~/dir/another_dir/",
613 "./dir/file",
614 "dir/[a-z].txt",
615 ] {
616 let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
617 .unwrap_or_else(|e| {
618 panic!("Valid path {valid_path} should be accepted, but got: {e}")
619 });
620 assert!(
621 path_matcher
622 .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
623 "Path matcher for valid path {valid_path} should match itself"
624 )
625 }
626 }
627
628 #[test]
629 fn path_matcher_creation_for_globs() {
630 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
631 match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
632 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
633 Err(_expected) => {}
634 }
635 }
636
637 for valid_glob in [
638 "dir/?ile",
639 "dir/*.txt",
640 "dir/**/file",
641 "dir/[a-z].txt",
642 "{dir,file}",
643 ] {
644 match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
645 Ok(_expected) => {}
646 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
647 }
648 }
649 }
650
651 #[test]
652 fn test_case_sensitive_pattern_items() {
653 let case_sensitive = false;
654 let search_query = SearchQuery::regex(
655 "test\\C",
656 false,
657 case_sensitive,
658 false,
659 false,
660 Default::default(),
661 Default::default(),
662 false,
663 None,
664 )
665 .expect("Should be able to create a regex SearchQuery");
666
667 assert_eq!(
668 search_query.case_sensitive(),
669 true,
670 "Case sensitivity should be enabled when \\C pattern item is present in the query."
671 );
672
673 let case_sensitive = true;
674 let search_query = SearchQuery::regex(
675 "test\\c",
676 true,
677 case_sensitive,
678 false,
679 false,
680 Default::default(),
681 Default::default(),
682 false,
683 None,
684 )
685 .expect("Should be able to create a regex SearchQuery");
686
687 assert_eq!(
688 search_query.case_sensitive(),
689 false,
690 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
691 );
692
693 let case_sensitive = false;
694 let search_query = SearchQuery::regex(
695 "test\\c\\C",
696 false,
697 case_sensitive,
698 false,
699 false,
700 Default::default(),
701 Default::default(),
702 false,
703 None,
704 )
705 .expect("Should be able to create a regex SearchQuery");
706
707 assert_eq!(
708 search_query.case_sensitive(),
709 true,
710 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
711 );
712
713 let case_sensitive = false;
714 let search_query = SearchQuery::regex(
715 "tests\\\\C",
716 false,
717 case_sensitive,
718 false,
719 false,
720 Default::default(),
721 Default::default(),
722 false,
723 None,
724 )
725 .expect("Should be able to create a regex SearchQuery");
726
727 assert_eq!(
728 search_query.case_sensitive(),
729 false,
730 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
731 );
732 }
733}