1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use itertools::Itertools as _;
7use language::{Buffer, BufferSnapshot, CharKind};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28}
29
30#[derive(Clone, Copy, PartialEq)]
31pub enum SearchInputKind {
32 Query,
33 Include,
34 Exclude,
35}
36
37#[derive(Clone, Debug)]
38pub struct SearchInputs {
39 query: Arc<str>,
40 files_to_include: PathMatcher,
41 files_to_exclude: PathMatcher,
42 match_full_paths: bool,
43 buffers: Option<Vec<Entity<Buffer>>>,
44}
45
46impl SearchInputs {
47 pub fn as_str(&self) -> &str {
48 self.query.as_ref()
49 }
50 pub fn files_to_include(&self) -> &PathMatcher {
51 &self.files_to_include
52 }
53 pub fn files_to_exclude(&self) -> &PathMatcher {
54 &self.files_to_exclude
55 }
56 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
57 &self.buffers
58 }
59}
60#[derive(Clone, Debug)]
61pub enum SearchQuery {
62 Text {
63 search: AhoCorasick,
64 replacement: Option<String>,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70 Regex {
71 regex: Regex,
72 replacement: Option<String>,
73 multiline: bool,
74 whole_word: bool,
75 case_sensitive: bool,
76 include_ignored: bool,
77 one_match_per_line: bool,
78 inner: SearchInputs,
79 },
80}
81
82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
83 RegexBuilder::new(r"\B")
84 .build()
85 .expect("Failed to create WORD_MATCH_TEST")
86});
87
88impl SearchQuery {
89 /// Create a text query
90 ///
91 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
92 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
93 pub fn text(
94 query: impl ToString,
95 whole_word: bool,
96 case_sensitive: bool,
97 include_ignored: bool,
98 files_to_include: PathMatcher,
99 files_to_exclude: PathMatcher,
100 match_full_paths: bool,
101 buffers: Option<Vec<Entity<Buffer>>>,
102 ) -> Result<Self> {
103 let query = query.to_string();
104 if !case_sensitive && !query.is_ascii() {
105 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106 // Fallback to regex search as recommended by
107 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108 return Self::regex(
109 regex::escape(&query),
110 whole_word,
111 case_sensitive,
112 include_ignored,
113 false,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 mut case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let mut query = query.to_string();
157 let initial_query = Arc::from(query.as_str());
158
159 if let Some((case_sensitive_from_pattern, new_query)) =
160 Self::case_sensitive_from_pattern(&query)
161 {
162 case_sensitive = case_sensitive_from_pattern;
163 query = new_query
164 }
165
166 if whole_word {
167 let mut word_query = String::new();
168 if let Some(first) = query.get(0..1)
169 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170 {
171 word_query.push_str("\\b");
172 }
173 word_query.push_str(&query);
174 if let Some(last) = query.get(query.len() - 1..)
175 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176 {
177 word_query.push_str("\\b");
178 }
179 query = word_query
180 }
181
182 let multiline = query.contains('\n') || query.contains("\\n");
183 let regex = RegexBuilder::new(&query)
184 .case_insensitive(!case_sensitive)
185 .build()?;
186 let inner = SearchInputs {
187 query: initial_query,
188 files_to_exclude,
189 files_to_include,
190 match_full_paths,
191 buffers,
192 };
193 Ok(Self::Regex {
194 regex,
195 replacement: None,
196 multiline,
197 whole_word,
198 case_sensitive,
199 include_ignored,
200 inner,
201 one_match_per_line,
202 })
203 }
204
205 /// Extracts case sensitivity settings from pattern items in the provided
206 /// query and returns the same query, with the pattern items removed.
207 ///
208 /// The following pattern modifiers are supported:
209 ///
210 /// - `\c` (case_sensitive: false)
211 /// - `\C` (case_sensitive: true)
212 ///
213 /// If no pattern item were found, `None` will be returned.
214 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
215 if !(query.contains("\\c") || query.contains("\\C")) {
216 return None;
217 }
218
219 let mut was_escaped = false;
220 let mut new_query = String::new();
221 let mut is_case_sensitive = None;
222
223 for c in query.chars() {
224 if was_escaped {
225 if c == 'c' {
226 is_case_sensitive = Some(false);
227 } else if c == 'C' {
228 is_case_sensitive = Some(true);
229 } else {
230 new_query.push('\\');
231 new_query.push(c);
232 }
233 was_escaped = false
234 } else if c == '\\' {
235 was_escaped = true
236 } else {
237 new_query.push(c);
238 }
239 }
240
241 is_case_sensitive.map(|c| (c, new_query))
242 }
243
244 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
245 let files_to_include = if message.files_to_include.is_empty() {
246 message
247 .files_to_include_legacy
248 .split(',')
249 .map(str::trim)
250 .filter(|&glob_str| !glob_str.is_empty())
251 .map(|s| s.to_string())
252 .collect()
253 } else {
254 message.files_to_include
255 };
256
257 let files_to_exclude = if message.files_to_exclude.is_empty() {
258 message
259 .files_to_exclude_legacy
260 .split(',')
261 .map(str::trim)
262 .filter(|&glob_str| !glob_str.is_empty())
263 .map(|s| s.to_string())
264 .collect()
265 } else {
266 message.files_to_exclude
267 };
268
269 if message.regex {
270 Self::regex(
271 message.query,
272 message.whole_word,
273 message.case_sensitive,
274 message.include_ignored,
275 false,
276 PathMatcher::new(files_to_include, path_style)?,
277 PathMatcher::new(files_to_exclude, path_style)?,
278 message.match_full_paths,
279 None, // search opened only don't need search remote
280 )
281 } else {
282 Self::text(
283 message.query,
284 message.whole_word,
285 message.case_sensitive,
286 message.include_ignored,
287 PathMatcher::new(files_to_include, path_style)?,
288 PathMatcher::new(files_to_exclude, path_style)?,
289 false,
290 None, // search opened only don't need search remote
291 )
292 }
293 }
294
295 pub fn with_replacement(mut self, new_replacement: String) -> Self {
296 match self {
297 Self::Text {
298 ref mut replacement,
299 ..
300 }
301 | Self::Regex {
302 ref mut replacement,
303 ..
304 } => {
305 *replacement = Some(new_replacement);
306 self
307 }
308 }
309 }
310
311 pub fn to_proto(&self) -> proto::SearchQuery {
312 let mut files_to_include = self.files_to_include().sources();
313 let mut files_to_exclude = self.files_to_exclude().sources();
314 proto::SearchQuery {
315 query: self.as_str().to_string(),
316 regex: self.is_regex(),
317 whole_word: self.whole_word(),
318 case_sensitive: self.case_sensitive(),
319 include_ignored: self.include_ignored(),
320 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
321 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
322 match_full_paths: self.match_full_paths(),
323 // Populate legacy fields for backwards compatibility
324 files_to_include_legacy: files_to_include.join(","),
325 files_to_exclude_legacy: files_to_exclude.join(","),
326 }
327 }
328
329 pub(crate) fn detect(
330 &self,
331 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
332 ) -> Result<bool> {
333 if self.as_str().is_empty() {
334 return Ok(false);
335 }
336
337 match self {
338 Self::Text { search, .. } => {
339 let mat = search.stream_find_iter(reader).next();
340 match mat {
341 Some(Ok(_)) => Ok(true),
342 Some(Err(err)) => Err(err.into()),
343 None => Ok(false),
344 }
345 }
346 Self::Regex {
347 regex, multiline, ..
348 } => {
349 if *multiline {
350 let mut text = String::new();
351 if let Err(err) = reader.read_to_string(&mut text) {
352 Err(err.into())
353 } else {
354 Ok(regex.find(&text)?.is_some())
355 }
356 } else {
357 for line in reader.lines() {
358 let line = line?;
359 if regex.find(&line)?.is_some() {
360 return Ok(true);
361 }
362 }
363 Ok(false)
364 }
365 }
366 }
367 }
368 /// Returns the replacement text for this `SearchQuery`.
369 pub fn replacement(&self) -> Option<&str> {
370 match self {
371 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
372 replacement.as_deref()
373 }
374 }
375 }
376 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
377 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
378 match self {
379 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
380 SearchQuery::Regex {
381 regex, replacement, ..
382 } => {
383 if let Some(replacement) = replacement {
384 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
385 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
386 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
387 replacement,
388 |c: &Captures| match c.get(0).unwrap().as_str() {
389 r"\\" => "\\",
390 r"\n" => "\n",
391 r"\t" => "\t",
392 x => unreachable!("Unexpected escape sequence: {}", x),
393 },
394 );
395 Some(regex.replace(text, replacement))
396 } else {
397 None
398 }
399 }
400 }
401 }
402
403 pub async fn search(
404 &self,
405 buffer: &BufferSnapshot,
406 subrange: Option<Range<usize>>,
407 ) -> Vec<Range<usize>> {
408 const YIELD_INTERVAL: usize = 20000;
409
410 if self.as_str().is_empty() {
411 return Default::default();
412 }
413
414 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
415 let rope = if let Some(range) = subrange {
416 buffer.as_rope().slice(range)
417 } else {
418 buffer.as_rope().clone()
419 };
420
421 let mut matches = Vec::new();
422 match self {
423 Self::Text {
424 search, whole_word, ..
425 } => {
426 for (ix, mat) in search
427 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
428 .enumerate()
429 {
430 if (ix + 1) % YIELD_INTERVAL == 0 {
431 yield_now().await;
432 }
433
434 let mat = mat.unwrap();
435 if *whole_word {
436 let classifier = buffer.char_classifier_at(range_offset + mat.start());
437
438 let prev_kind = rope
439 .reversed_chars_at(mat.start())
440 .next()
441 .map(|c| classifier.kind(c));
442 let start_kind =
443 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
444 let end_kind =
445 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
446 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
447 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
448 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
449 {
450 continue;
451 }
452 }
453 matches.push(mat.start()..mat.end())
454 }
455 }
456
457 Self::Regex {
458 regex, multiline, ..
459 } => {
460 if *multiline {
461 let text = rope.to_string();
462 for (ix, mat) in regex.find_iter(&text).enumerate() {
463 if (ix + 1) % YIELD_INTERVAL == 0 {
464 yield_now().await;
465 }
466
467 if let Ok(mat) = mat {
468 matches.push(mat.start()..mat.end());
469 }
470 }
471 } else {
472 let mut line = String::new();
473 let mut line_offset = 0;
474 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
475 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
476 yield_now().await;
477 }
478
479 for (newline_ix, text) in chunk.split('\n').enumerate() {
480 if newline_ix > 0 {
481 for mat in regex.find_iter(&line).flatten() {
482 let start = line_offset + mat.start();
483 let end = line_offset + mat.end();
484 matches.push(start..end);
485 if self.one_match_per_line() == Some(true) {
486 break;
487 }
488 }
489
490 line_offset += line.len() + 1;
491 line.clear();
492 }
493 line.push_str(text);
494 }
495 }
496 }
497 }
498 }
499
500 matches
501 }
502
503 pub fn is_empty(&self) -> bool {
504 self.as_str().is_empty()
505 }
506
507 pub fn as_str(&self) -> &str {
508 self.as_inner().as_str()
509 }
510
511 pub fn whole_word(&self) -> bool {
512 match self {
513 Self::Text { whole_word, .. } => *whole_word,
514 Self::Regex { whole_word, .. } => *whole_word,
515 }
516 }
517
518 pub fn case_sensitive(&self) -> bool {
519 match self {
520 Self::Text { case_sensitive, .. } => *case_sensitive,
521 Self::Regex { case_sensitive, .. } => *case_sensitive,
522 }
523 }
524
525 pub fn include_ignored(&self) -> bool {
526 match self {
527 Self::Text {
528 include_ignored, ..
529 } => *include_ignored,
530 Self::Regex {
531 include_ignored, ..
532 } => *include_ignored,
533 }
534 }
535
536 pub fn is_regex(&self) -> bool {
537 matches!(self, Self::Regex { .. })
538 }
539
540 pub fn files_to_include(&self) -> &PathMatcher {
541 self.as_inner().files_to_include()
542 }
543
544 pub fn files_to_exclude(&self) -> &PathMatcher {
545 self.as_inner().files_to_exclude()
546 }
547
548 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
549 self.as_inner().buffers.as_ref()
550 }
551
552 pub fn is_opened_only(&self) -> bool {
553 self.as_inner().buffers.is_some()
554 }
555
556 pub fn filters_path(&self) -> bool {
557 !(self.files_to_exclude().sources().next().is_none()
558 && self.files_to_include().sources().next().is_none())
559 }
560
561 pub fn match_full_paths(&self) -> bool {
562 self.as_inner().match_full_paths
563 }
564
565 /// Check match full paths to determine whether you're required to pass a fully qualified
566 /// project path (starts with a project root).
567 pub fn match_path(&self, file_path: &RelPath) -> bool {
568 let mut path = file_path.to_rel_path_buf();
569 loop {
570 if self.files_to_exclude().is_match(&path) {
571 return false;
572 } else if self.files_to_include().sources().next().is_none()
573 || self.files_to_include().is_match(&path)
574 {
575 return true;
576 } else if !path.pop() {
577 return false;
578 }
579 }
580 }
581 pub fn as_inner(&self) -> &SearchInputs {
582 match self {
583 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
584 }
585 }
586
587 /// Whether this search should replace only one match per line, instead of
588 /// all matches.
589 /// Returns `None` for text searches, as only regex searches support this
590 /// option.
591 pub fn one_match_per_line(&self) -> Option<bool> {
592 match self {
593 Self::Regex {
594 one_match_per_line, ..
595 } => Some(*one_match_per_line),
596 Self::Text { .. } => None,
597 }
598 }
599}
600
601#[cfg(test)]
602mod tests {
603 use super::*;
604
605 #[test]
606 fn path_matcher_creation_for_valid_paths() {
607 for valid_path in [
608 "file",
609 "Cargo.toml",
610 ".DS_Store",
611 "~/dir/another_dir/",
612 "./dir/file",
613 "dir/[a-z].txt",
614 ] {
615 let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
616 .unwrap_or_else(|e| {
617 panic!("Valid path {valid_path} should be accepted, but got: {e}")
618 });
619 assert!(
620 path_matcher
621 .is_match(&RelPath::new(valid_path.as_ref(), PathStyle::local()).unwrap()),
622 "Path matcher for valid path {valid_path} should match itself"
623 )
624 }
625 }
626
627 #[test]
628 fn path_matcher_creation_for_globs() {
629 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
630 match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
631 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
632 Err(_expected) => {}
633 }
634 }
635
636 for valid_glob in [
637 "dir/?ile",
638 "dir/*.txt",
639 "dir/**/file",
640 "dir/[a-z].txt",
641 "{dir,file}",
642 ] {
643 match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
644 Ok(_expected) => {}
645 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
646 }
647 }
648 }
649
650 #[test]
651 fn test_case_sensitive_pattern_items() {
652 let case_sensitive = false;
653 let search_query = SearchQuery::regex(
654 "test\\C",
655 false,
656 case_sensitive,
657 false,
658 false,
659 Default::default(),
660 Default::default(),
661 false,
662 None,
663 )
664 .expect("Should be able to create a regex SearchQuery");
665
666 assert_eq!(
667 search_query.case_sensitive(),
668 true,
669 "Case sensitivity should be enabled when \\C pattern item is present in the query."
670 );
671
672 let case_sensitive = true;
673 let search_query = SearchQuery::regex(
674 "test\\c",
675 true,
676 case_sensitive,
677 false,
678 false,
679 Default::default(),
680 Default::default(),
681 false,
682 None,
683 )
684 .expect("Should be able to create a regex SearchQuery");
685
686 assert_eq!(
687 search_query.case_sensitive(),
688 false,
689 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
690 );
691
692 let case_sensitive = false;
693 let search_query = SearchQuery::regex(
694 "test\\c\\C",
695 false,
696 case_sensitive,
697 false,
698 false,
699 Default::default(),
700 Default::default(),
701 false,
702 None,
703 )
704 .expect("Should be able to create a regex SearchQuery");
705
706 assert_eq!(
707 search_query.case_sensitive(),
708 true,
709 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
710 );
711
712 let case_sensitive = false;
713 let search_query = SearchQuery::regex(
714 "tests\\\\C",
715 false,
716 case_sensitive,
717 false,
718 false,
719 Default::default(),
720 Default::default(),
721 false,
722 None,
723 )
724 .expect("Should be able to create a regex SearchQuery");
725
726 assert_eq!(
727 search_query.case_sensitive(),
728 false,
729 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
730 );
731 }
732}