1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::{PathMatcher, PathStyle};
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 match_full_paths: bool,
40 buffers: Option<Vec<Entity<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: AhoCorasick,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67 Regex {
68 regex: Regex,
69 replacement: Option<String>,
70 multiline: bool,
71 whole_word: bool,
72 case_sensitive: bool,
73 include_ignored: bool,
74 one_match_per_line: bool,
75 inner: SearchInputs,
76 },
77}
78
79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
80 RegexBuilder::new(r"\B")
81 .build()
82 .expect("Failed to create WORD_MATCH_TEST")
83});
84
85impl SearchQuery {
86 /// Create a text query
87 ///
88 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
89 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
90 pub fn text(
91 query: impl ToString,
92 whole_word: bool,
93 case_sensitive: bool,
94 include_ignored: bool,
95 files_to_include: PathMatcher,
96 files_to_exclude: PathMatcher,
97 match_full_paths: bool,
98 buffers: Option<Vec<Entity<Buffer>>>,
99 ) -> Result<Self> {
100 let query = query.to_string();
101 if !case_sensitive && !query.is_ascii() {
102 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
103 // Fallback to regex search as recommended by
104 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
105 return Self::regex(
106 regex::escape(&query),
107 whole_word,
108 case_sensitive,
109 include_ignored,
110 false,
111 files_to_include,
112 files_to_exclude,
113 false,
114 buffers,
115 );
116 }
117 let search = AhoCorasickBuilder::new()
118 .ascii_case_insensitive(!case_sensitive)
119 .build([&query])?;
120 let inner = SearchInputs {
121 query: query.into(),
122 files_to_exclude,
123 files_to_include,
124 match_full_paths,
125 buffers,
126 };
127 Ok(Self::Text {
128 search,
129 replacement: None,
130 whole_word,
131 case_sensitive,
132 include_ignored,
133 inner,
134 })
135 }
136
137 /// Create a regex query
138 ///
139 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
140 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
141 /// with their respective project root).
142 pub fn regex(
143 query: impl ToString,
144 whole_word: bool,
145 mut case_sensitive: bool,
146 include_ignored: bool,
147 one_match_per_line: bool,
148 files_to_include: PathMatcher,
149 files_to_exclude: PathMatcher,
150 match_full_paths: bool,
151 buffers: Option<Vec<Entity<Buffer>>>,
152 ) -> Result<Self> {
153 let mut query = query.to_string();
154 let initial_query = Arc::from(query.as_str());
155
156 if let Some((case_sensitive_from_pattern, new_query)) =
157 Self::case_sensitive_from_pattern(&query)
158 {
159 case_sensitive = case_sensitive_from_pattern;
160 query = new_query
161 }
162
163 if whole_word {
164 let mut word_query = String::new();
165 if let Some(first) = query.get(0..1)
166 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
167 {
168 word_query.push_str("\\b");
169 }
170 word_query.push_str(&query);
171 if let Some(last) = query.get(query.len() - 1..)
172 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
173 {
174 word_query.push_str("\\b");
175 }
176 query = word_query
177 }
178
179 let multiline = query.contains('\n') || query.contains("\\n");
180 let regex = RegexBuilder::new(&query)
181 .case_insensitive(!case_sensitive)
182 .build()?;
183 let inner = SearchInputs {
184 query: initial_query,
185 files_to_exclude,
186 files_to_include,
187 match_full_paths,
188 buffers,
189 };
190 Ok(Self::Regex {
191 regex,
192 replacement: None,
193 multiline,
194 whole_word,
195 case_sensitive,
196 include_ignored,
197 inner,
198 one_match_per_line,
199 })
200 }
201
202 /// Extracts case sensitivity settings from pattern items in the provided
203 /// query and returns the same query, with the pattern items removed.
204 ///
205 /// The following pattern modifiers are supported:
206 ///
207 /// - `\c` (case_sensitive: false)
208 /// - `\C` (case_sensitive: true)
209 ///
210 /// If no pattern item were found, `None` will be returned.
211 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
212 if !(query.contains("\\c") || query.contains("\\C")) {
213 return None;
214 }
215
216 let mut was_escaped = false;
217 let mut new_query = String::new();
218 let mut is_case_sensitive = None;
219
220 for c in query.chars() {
221 if was_escaped {
222 if c == 'c' {
223 is_case_sensitive = Some(false);
224 } else if c == 'C' {
225 is_case_sensitive = Some(true);
226 } else {
227 new_query.push('\\');
228 new_query.push(c);
229 }
230 was_escaped = false
231 } else if c == '\\' {
232 was_escaped = true
233 } else {
234 new_query.push(c);
235 }
236 }
237
238 is_case_sensitive.map(|c| (c, new_query))
239 }
240
241 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
242 let files_to_include = if message.files_to_include.is_empty() {
243 message
244 .files_to_include_legacy
245 .split(',')
246 .map(str::trim)
247 .filter(|&glob_str| !glob_str.is_empty())
248 .map(|s| s.to_string())
249 .collect()
250 } else {
251 message.files_to_include
252 };
253
254 let files_to_exclude = if message.files_to_exclude.is_empty() {
255 message
256 .files_to_exclude_legacy
257 .split(',')
258 .map(str::trim)
259 .filter(|&glob_str| !glob_str.is_empty())
260 .map(|s| s.to_string())
261 .collect()
262 } else {
263 message.files_to_exclude
264 };
265
266 if message.regex {
267 Self::regex(
268 message.query,
269 message.whole_word,
270 message.case_sensitive,
271 message.include_ignored,
272 false,
273 PathMatcher::new(files_to_include, path_style)?,
274 PathMatcher::new(files_to_exclude, path_style)?,
275 message.match_full_paths,
276 None, // search opened only don't need search remote
277 )
278 } else {
279 Self::text(
280 message.query,
281 message.whole_word,
282 message.case_sensitive,
283 message.include_ignored,
284 PathMatcher::new(files_to_include, path_style)?,
285 PathMatcher::new(files_to_exclude, path_style)?,
286 false,
287 None, // search opened only don't need search remote
288 )
289 }
290 }
291
292 pub fn with_replacement(mut self, new_replacement: String) -> Self {
293 match self {
294 Self::Text {
295 ref mut replacement,
296 ..
297 }
298 | Self::Regex {
299 ref mut replacement,
300 ..
301 } => {
302 *replacement = Some(new_replacement);
303 self
304 }
305 }
306 }
307
308 pub fn to_proto(&self) -> proto::SearchQuery {
309 let files_to_include = self.files_to_include().sources().to_vec();
310 let files_to_exclude = self.files_to_exclude().sources().to_vec();
311 proto::SearchQuery {
312 query: self.as_str().to_string(),
313 regex: self.is_regex(),
314 whole_word: self.whole_word(),
315 case_sensitive: self.case_sensitive(),
316 include_ignored: self.include_ignored(),
317 files_to_include: files_to_include.clone(),
318 files_to_exclude: files_to_exclude.clone(),
319 match_full_paths: self.match_full_paths(),
320 // Populate legacy fields for backwards compatibility
321 files_to_include_legacy: files_to_include.join(","),
322 files_to_exclude_legacy: files_to_exclude.join(","),
323 }
324 }
325
326 pub(crate) fn detect(
327 &self,
328 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
329 ) -> Result<bool> {
330 if self.as_str().is_empty() {
331 return Ok(false);
332 }
333
334 match self {
335 Self::Text { search, .. } => {
336 let mat = search.stream_find_iter(reader).next();
337 match mat {
338 Some(Ok(_)) => Ok(true),
339 Some(Err(err)) => Err(err.into()),
340 None => Ok(false),
341 }
342 }
343 Self::Regex {
344 regex, multiline, ..
345 } => {
346 if *multiline {
347 let mut text = String::new();
348 if let Err(err) = reader.read_to_string(&mut text) {
349 Err(err.into())
350 } else {
351 Ok(regex.find(&text)?.is_some())
352 }
353 } else {
354 for line in reader.lines() {
355 let line = line?;
356 if regex.find(&line)?.is_some() {
357 return Ok(true);
358 }
359 }
360 Ok(false)
361 }
362 }
363 }
364 }
365 /// Returns the replacement text for this `SearchQuery`.
366 pub fn replacement(&self) -> Option<&str> {
367 match self {
368 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
369 replacement.as_deref()
370 }
371 }
372 }
373 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
374 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
375 match self {
376 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
377 SearchQuery::Regex {
378 regex, replacement, ..
379 } => {
380 if let Some(replacement) = replacement {
381 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
382 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
383 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
384 replacement,
385 |c: &Captures| match c.get(0).unwrap().as_str() {
386 r"\\" => "\\",
387 r"\n" => "\n",
388 r"\t" => "\t",
389 x => unreachable!("Unexpected escape sequence: {}", x),
390 },
391 );
392 Some(regex.replace(text, replacement))
393 } else {
394 None
395 }
396 }
397 }
398 }
399
400 pub async fn search(
401 &self,
402 buffer: &BufferSnapshot,
403 subrange: Option<Range<usize>>,
404 ) -> Vec<Range<usize>> {
405 const YIELD_INTERVAL: usize = 20000;
406
407 if self.as_str().is_empty() {
408 return Default::default();
409 }
410
411 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
412 let rope = if let Some(range) = subrange {
413 buffer.as_rope().slice(range)
414 } else {
415 buffer.as_rope().clone()
416 };
417
418 let mut matches = Vec::new();
419 match self {
420 Self::Text {
421 search, whole_word, ..
422 } => {
423 for (ix, mat) in search
424 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
425 .enumerate()
426 {
427 if (ix + 1) % YIELD_INTERVAL == 0 {
428 yield_now().await;
429 }
430
431 let mat = mat.unwrap();
432 if *whole_word {
433 let classifier = buffer.char_classifier_at(range_offset + mat.start());
434
435 let prev_kind = rope
436 .reversed_chars_at(mat.start())
437 .next()
438 .map(|c| classifier.kind(c));
439 let start_kind =
440 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
441 let end_kind =
442 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
443 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
444 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
445 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
446 {
447 continue;
448 }
449 }
450 matches.push(mat.start()..mat.end())
451 }
452 }
453
454 Self::Regex {
455 regex, multiline, ..
456 } => {
457 if *multiline {
458 let text = rope.to_string();
459 for (ix, mat) in regex.find_iter(&text).enumerate() {
460 if (ix + 1) % YIELD_INTERVAL == 0 {
461 yield_now().await;
462 }
463
464 if let Ok(mat) = mat {
465 matches.push(mat.start()..mat.end());
466 }
467 }
468 } else {
469 let mut line = String::new();
470 let mut line_offset = 0;
471 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
472 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
473 yield_now().await;
474 }
475
476 for (newline_ix, text) in chunk.split('\n').enumerate() {
477 if newline_ix > 0 {
478 for mat in regex.find_iter(&line).flatten() {
479 let start = line_offset + mat.start();
480 let end = line_offset + mat.end();
481 matches.push(start..end);
482 if self.one_match_per_line() == Some(true) {
483 break;
484 }
485 }
486
487 line_offset += line.len() + 1;
488 line.clear();
489 }
490 line.push_str(text);
491 }
492 }
493 }
494 }
495 }
496
497 matches
498 }
499
500 pub fn is_empty(&self) -> bool {
501 self.as_str().is_empty()
502 }
503
504 pub fn as_str(&self) -> &str {
505 self.as_inner().as_str()
506 }
507
508 pub fn whole_word(&self) -> bool {
509 match self {
510 Self::Text { whole_word, .. } => *whole_word,
511 Self::Regex { whole_word, .. } => *whole_word,
512 }
513 }
514
515 pub fn case_sensitive(&self) -> bool {
516 match self {
517 Self::Text { case_sensitive, .. } => *case_sensitive,
518 Self::Regex { case_sensitive, .. } => *case_sensitive,
519 }
520 }
521
522 pub fn include_ignored(&self) -> bool {
523 match self {
524 Self::Text {
525 include_ignored, ..
526 } => *include_ignored,
527 Self::Regex {
528 include_ignored, ..
529 } => *include_ignored,
530 }
531 }
532
533 pub fn is_regex(&self) -> bool {
534 matches!(self, Self::Regex { .. })
535 }
536
537 pub fn files_to_include(&self) -> &PathMatcher {
538 self.as_inner().files_to_include()
539 }
540
541 pub fn files_to_exclude(&self) -> &PathMatcher {
542 self.as_inner().files_to_exclude()
543 }
544
545 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
546 self.as_inner().buffers.as_ref()
547 }
548
549 pub fn is_opened_only(&self) -> bool {
550 self.as_inner().buffers.is_some()
551 }
552
553 pub fn filters_path(&self) -> bool {
554 !(self.files_to_exclude().sources().is_empty()
555 && self.files_to_include().sources().is_empty())
556 }
557
558 pub fn match_full_paths(&self) -> bool {
559 self.as_inner().match_full_paths
560 }
561
562 /// Check match full paths to determine whether you're required to pass a fully qualified
563 /// project path (starts with a project root).
564 pub fn match_path(&self, file_path: &Path) -> bool {
565 let mut path = file_path.to_path_buf();
566 loop {
567 if self.files_to_exclude().is_match(&path) {
568 return false;
569 } else if self.files_to_include().sources().is_empty()
570 || self.files_to_include().is_match(&path)
571 {
572 return true;
573 } else if !path.pop() {
574 return false;
575 }
576 }
577 }
578 pub fn as_inner(&self) -> &SearchInputs {
579 match self {
580 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
581 }
582 }
583
584 /// Whether this search should replace only one match per line, instead of
585 /// all matches.
586 /// Returns `None` for text searches, as only regex searches support this
587 /// option.
588 pub fn one_match_per_line(&self) -> Option<bool> {
589 match self {
590 Self::Regex {
591 one_match_per_line, ..
592 } => Some(*one_match_per_line),
593 Self::Text { .. } => None,
594 }
595 }
596}
597
598#[cfg(test)]
599mod tests {
600 use super::*;
601
602 #[test]
603 fn path_matcher_creation_for_valid_paths() {
604 for valid_path in [
605 "file",
606 "Cargo.toml",
607 ".DS_Store",
608 "~/dir/another_dir/",
609 "./dir/file",
610 "dir/[a-z].txt",
611 "../dir/filé",
612 ] {
613 let path_matcher = PathMatcher::new(&[valid_path.to_owned()], PathStyle::local())
614 .unwrap_or_else(|e| {
615 panic!("Valid path {valid_path} should be accepted, but got: {e}")
616 });
617 assert!(
618 path_matcher.is_match(valid_path),
619 "Path matcher for valid path {valid_path} should match itself"
620 )
621 }
622 }
623
624 #[test]
625 fn path_matcher_creation_for_globs() {
626 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
627 match PathMatcher::new(&[invalid_glob.to_owned()], PathStyle::local()) {
628 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
629 Err(_expected) => {}
630 }
631 }
632
633 for valid_glob in [
634 "dir/?ile",
635 "dir/*.txt",
636 "dir/**/file",
637 "dir/[a-z].txt",
638 "{dir,file}",
639 ] {
640 match PathMatcher::new(&[valid_glob.to_owned()], PathStyle::local()) {
641 Ok(_expected) => {}
642 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
643 }
644 }
645 }
646
647 #[test]
648 fn test_case_sensitive_pattern_items() {
649 let case_sensitive = false;
650 let search_query = SearchQuery::regex(
651 "test\\C",
652 false,
653 case_sensitive,
654 false,
655 false,
656 Default::default(),
657 Default::default(),
658 false,
659 None,
660 )
661 .expect("Should be able to create a regex SearchQuery");
662
663 assert_eq!(
664 search_query.case_sensitive(),
665 true,
666 "Case sensitivity should be enabled when \\C pattern item is present in the query."
667 );
668
669 let case_sensitive = true;
670 let search_query = SearchQuery::regex(
671 "test\\c",
672 true,
673 case_sensitive,
674 false,
675 false,
676 Default::default(),
677 Default::default(),
678 false,
679 None,
680 )
681 .expect("Should be able to create a regex SearchQuery");
682
683 assert_eq!(
684 search_query.case_sensitive(),
685 false,
686 "Case sensitivity should be disabled when \\c pattern item is present, even if initially set to true."
687 );
688
689 let case_sensitive = false;
690 let search_query = SearchQuery::regex(
691 "test\\c\\C",
692 false,
693 case_sensitive,
694 false,
695 false,
696 Default::default(),
697 Default::default(),
698 false,
699 None,
700 )
701 .expect("Should be able to create a regex SearchQuery");
702
703 assert_eq!(
704 search_query.case_sensitive(),
705 true,
706 "Case sensitivity should be enabled when \\C is the last pattern item, even after a \\c."
707 );
708
709 let case_sensitive = false;
710 let search_query = SearchQuery::regex(
711 "tests\\\\C",
712 false,
713 case_sensitive,
714 false,
715 false,
716 Default::default(),
717 Default::default(),
718 false,
719 None,
720 )
721 .expect("Should be able to create a regex SearchQuery");
722
723 assert_eq!(
724 search_query.case_sensitive(),
725 false,
726 "Case sensitivity should not be enabled when \\C pattern item is preceded by a backslash."
727 );
728 }
729}