1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use itertools::Itertools as _;
7use language::{Buffer, BufferSnapshot, CharKind};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28 WaitingForScan,
29}
30
31#[derive(Clone, Copy, PartialEq)]
32pub enum SearchInputKind {
33 Query,
34 Include,
35 Exclude,
36}
37
38#[derive(Clone, Debug)]
39pub struct SearchInputs {
40 query: Arc<str>,
41 files_to_include: PathMatcher,
42 files_to_exclude: PathMatcher,
43 match_full_paths: bool,
44 buffers: Option<Vec<Entity<Buffer>>>,
45}
46
47impl SearchInputs {
48 pub fn as_str(&self) -> &str {
49 self.query.as_ref()
50 }
51 pub fn files_to_include(&self) -> &PathMatcher {
52 &self.files_to_include
53 }
54 pub fn files_to_exclude(&self) -> &PathMatcher {
55 &self.files_to_exclude
56 }
57 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
58 &self.buffers
59 }
60}
61#[derive(Clone, Debug)]
62pub enum SearchQuery {
63 Text {
64 search: AhoCorasick,
65 replacement: Option<String>,
66 whole_word: bool,
67 case_sensitive: bool,
68 include_ignored: bool,
69 inner: SearchInputs,
70 },
71 Regex {
72 regex: Regex,
73 replacement: Option<String>,
74 multiline: bool,
75 whole_word: bool,
76 case_sensitive: bool,
77 include_ignored: bool,
78 one_match_per_line: bool,
79 inner: SearchInputs,
80 },
81}
82
83static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
84 RegexBuilder::new(r"\B")
85 .build()
86 .expect("Failed to create WORD_MATCH_TEST")
87});
88
89impl SearchQuery {
90 /// Create a text query
91 ///
92 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
93 /// If `match_full_paths` is false, patterns will be matched against worktree-relative paths.
94 pub fn text(
95 query: impl ToString,
96 whole_word: bool,
97 case_sensitive: bool,
98 include_ignored: bool,
99 files_to_include: PathMatcher,
100 files_to_exclude: PathMatcher,
101 match_full_paths: bool,
102 buffers: Option<Vec<Entity<Buffer>>>,
103 ) -> Result<Self> {
104 let query = query.to_string();
105 if !case_sensitive && !query.is_ascii() {
106 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
107 // Fallback to regex search as recommended by
108 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
109 return Self::escaped_regex(
110 query,
111 whole_word,
112 case_sensitive,
113 include_ignored,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let query = query.to_string();
157 let inner = SearchInputs {
158 query: Arc::from(query.as_str()),
159 files_to_include,
160 files_to_exclude,
161 match_full_paths,
162 buffers,
163 };
164 Self::build_regex(
165 query,
166 whole_word,
167 case_sensitive,
168 include_ignored,
169 one_match_per_line,
170 inner,
171 )
172 }
173
174 /// Create a regex query from a literal string, escaping any regex
175 /// metacharacters so that the resulting query matches the literal text.
176 ///
177 /// Unlike `regex`, the query stored on the resulting `SearchQuery` is the
178 /// original unescaped text, so `as_str` returns what the user typed.
179 pub fn escaped_regex(
180 query: impl ToString,
181 whole_word: bool,
182 case_sensitive: bool,
183 include_ignored: bool,
184 files_to_include: PathMatcher,
185 files_to_exclude: PathMatcher,
186 match_full_paths: bool,
187 buffers: Option<Vec<Entity<Buffer>>>,
188 ) -> Result<Self> {
189 let query = query.to_string();
190 let inner = SearchInputs {
191 query: Arc::from(query.as_str()),
192 files_to_include,
193 files_to_exclude,
194 match_full_paths,
195 buffers,
196 };
197 Self::build_regex(
198 regex::escape(&query),
199 whole_word,
200 case_sensitive,
201 include_ignored,
202 false,
203 inner,
204 )
205 }
206
207 fn build_regex(
208 mut pattern: String,
209 whole_word: bool,
210 mut case_sensitive: bool,
211 include_ignored: bool,
212 one_match_per_line: bool,
213 inner: SearchInputs,
214 ) -> Result<Self> {
215 if let Some((case_sensitive_from_pattern, new_pattern)) =
216 Self::case_sensitive_from_pattern(&pattern)
217 {
218 case_sensitive = case_sensitive_from_pattern;
219 pattern = new_pattern
220 }
221
222 if whole_word {
223 let mut word_pattern = String::new();
224 if let Some(first) = pattern.get(0..1)
225 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
226 {
227 word_pattern.push_str("\\b");
228 }
229 word_pattern.push_str(&pattern);
230 if let Some(last) = pattern.get(pattern.len() - 1..)
231 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
232 {
233 word_pattern.push_str("\\b");
234 }
235 pattern = word_pattern
236 }
237
238 let multiline = pattern.contains('\n') || pattern.contains("\\n");
239 if multiline {
240 pattern.insert_str(0, "(?m)");
241 }
242
243 let regex = RegexBuilder::new(&pattern)
244 .case_insensitive(!case_sensitive)
245 .build()?;
246 Ok(Self::Regex {
247 regex,
248 replacement: None,
249 multiline,
250 whole_word,
251 case_sensitive,
252 include_ignored,
253 inner,
254 one_match_per_line,
255 })
256 }
257
258 /// Extracts case sensitivity settings from pattern items in the provided
259 /// query and returns the same query, with the pattern items removed.
260 ///
261 /// The following pattern modifiers are supported:
262 ///
263 /// - `\c` (case_sensitive: false)
264 /// - `\C` (case_sensitive: true)
265 ///
266 /// If no pattern item were found, `None` will be returned.
267 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
268 if !(query.contains("\\c") || query.contains("\\C")) {
269 return None;
270 }
271
272 let mut was_escaped = false;
273 let mut new_query = String::new();
274 let mut is_case_sensitive = None;
275
276 for c in query.chars() {
277 if was_escaped {
278 if c == 'c' {
279 is_case_sensitive = Some(false);
280 } else if c == 'C' {
281 is_case_sensitive = Some(true);
282 } else {
283 new_query.push('\\');
284 new_query.push(c);
285 }
286 was_escaped = false
287 } else if c == '\\' {
288 was_escaped = true
289 } else {
290 new_query.push(c);
291 }
292 }
293
294 is_case_sensitive.map(|c| (c, new_query))
295 }
296
297 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
298 let files_to_include = if message.files_to_include.is_empty() {
299 message
300 .files_to_include_legacy
301 .split(',')
302 .map(str::trim)
303 .filter(|&glob_str| !glob_str.is_empty())
304 .map(|s| s.to_string())
305 .collect()
306 } else {
307 message.files_to_include
308 };
309
310 let files_to_exclude = if message.files_to_exclude.is_empty() {
311 message
312 .files_to_exclude_legacy
313 .split(',')
314 .map(str::trim)
315 .filter(|&glob_str| !glob_str.is_empty())
316 .map(|s| s.to_string())
317 .collect()
318 } else {
319 message.files_to_exclude
320 };
321
322 if message.regex {
323 Self::regex(
324 message.query,
325 message.whole_word,
326 message.case_sensitive,
327 message.include_ignored,
328 false,
329 PathMatcher::new(files_to_include, path_style)?,
330 PathMatcher::new(files_to_exclude, path_style)?,
331 message.match_full_paths,
332 None, // search opened only don't need search remote
333 )
334 } else {
335 Self::text(
336 message.query,
337 message.whole_word,
338 message.case_sensitive,
339 message.include_ignored,
340 PathMatcher::new(files_to_include, path_style)?,
341 PathMatcher::new(files_to_exclude, path_style)?,
342 message.match_full_paths,
343 None, // search opened only don't need search remote
344 )
345 }
346 }
347
348 pub fn with_replacement(mut self, new_replacement: String) -> Self {
349 match self {
350 Self::Text {
351 ref mut replacement,
352 ..
353 }
354 | Self::Regex {
355 ref mut replacement,
356 ..
357 } => {
358 *replacement = Some(new_replacement);
359 self
360 }
361 }
362 }
363
364 pub fn to_proto(&self) -> proto::SearchQuery {
365 let mut files_to_include = self.files_to_include().sources();
366 let mut files_to_exclude = self.files_to_exclude().sources();
367 proto::SearchQuery {
368 query: self.as_str().to_string(),
369 regex: self.is_regex(),
370 whole_word: self.whole_word(),
371 case_sensitive: self.case_sensitive(),
372 include_ignored: self.include_ignored(),
373 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
374 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
375 match_full_paths: self.match_full_paths(),
376 // Populate legacy fields for backwards compatibility
377 files_to_include_legacy: files_to_include.join(","),
378 files_to_exclude_legacy: files_to_exclude.join(","),
379 }
380 }
381
382 pub(crate) async fn detect(
383 &self,
384 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
385 ) -> Result<bool> {
386 let query_str = self.as_str();
387 if query_str.is_empty() {
388 return Ok(false);
389 }
390
391 // Yield from this function every 20KB scanned.
392 const YIELD_THRESHOLD: usize = 20 * 1024;
393
394 match self {
395 Self::Text { search, .. } => {
396 let mut text = String::new();
397 if query_str.contains('\n') {
398 reader.read_to_string(&mut text)?;
399 Ok(search.is_match(&text))
400 } else {
401 let mut bytes_read = 0;
402 while reader.read_line(&mut text)? > 0 {
403 if search.is_match(&text) {
404 return Ok(true);
405 }
406 bytes_read += text.len();
407 if bytes_read >= YIELD_THRESHOLD {
408 bytes_read = 0;
409 smol::future::yield_now().await;
410 }
411 text.clear();
412 }
413 Ok(false)
414 }
415 }
416 Self::Regex {
417 regex, multiline, ..
418 } => {
419 let mut text = String::new();
420 if *multiline {
421 reader.read_to_string(&mut text)?;
422 Ok(regex.is_match(&text)?)
423 } else {
424 let mut bytes_read = 0;
425 while reader.read_line(&mut text)? > 0 {
426 if regex.is_match(&text)? {
427 return Ok(true);
428 }
429 bytes_read += text.len();
430 if bytes_read >= YIELD_THRESHOLD {
431 bytes_read = 0;
432 smol::future::yield_now().await;
433 }
434 text.clear();
435 }
436 Ok(false)
437 }
438 }
439 }
440 }
441 /// Returns the replacement text for this `SearchQuery`.
442 pub fn replacement(&self) -> Option<&str> {
443 match self {
444 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
445 replacement.as_deref()
446 }
447 }
448 }
449 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
450 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
451 match self {
452 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
453 SearchQuery::Regex {
454 regex, replacement, ..
455 } => {
456 if let Some(replacement) = replacement {
457 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
458 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
459 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
460 replacement,
461 |c: &Captures| match c.get(0).unwrap().as_str() {
462 r"\\" => "\\",
463 r"\n" => "\n",
464 r"\t" => "\t",
465 x => unreachable!("Unexpected escape sequence: {}", x),
466 },
467 );
468 Some(regex.replace(text, replacement))
469 } else {
470 None
471 }
472 }
473 }
474 }
475
476 pub async fn search(
477 &self,
478 buffer: &BufferSnapshot,
479 subrange: Option<Range<usize>>,
480 ) -> Vec<Range<usize>> {
481 const YIELD_INTERVAL: usize = 20000;
482
483 if self.as_str().is_empty() {
484 return Default::default();
485 }
486
487 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
488 let rope = if let Some(range) = subrange {
489 buffer.as_rope().slice(range)
490 } else {
491 buffer.as_rope().clone()
492 };
493
494 let mut matches = Vec::new();
495 match self {
496 Self::Text {
497 search, whole_word, ..
498 } => {
499 for (ix, mat) in search
500 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
501 .enumerate()
502 {
503 if (ix + 1) % YIELD_INTERVAL == 0 {
504 yield_now().await;
505 }
506
507 let mat = mat.unwrap();
508 if *whole_word {
509 let classifier = buffer.char_classifier_at(range_offset + mat.start());
510
511 let prev_kind = rope
512 .reversed_chars_at(mat.start())
513 .next()
514 .map(|c| classifier.kind(c));
515 let start_kind =
516 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
517 let end_kind =
518 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
519 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
520 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
521 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
522 {
523 continue;
524 }
525 }
526 matches.push(mat.start()..mat.end())
527 }
528 }
529
530 Self::Regex {
531 regex, multiline, ..
532 } => {
533 if *multiline {
534 let text = rope.to_string();
535 for (ix, mat) in regex.find_iter(&text).enumerate() {
536 if (ix + 1) % YIELD_INTERVAL == 0 {
537 yield_now().await;
538 }
539
540 if let Ok(mat) = mat {
541 matches.push(mat.start()..mat.end());
542 }
543 }
544 } else {
545 let mut line = String::new();
546 let mut line_offset = 0;
547 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
548 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
549 yield_now().await;
550 }
551
552 for (newline_ix, text) in chunk.split('\n').enumerate() {
553 if newline_ix > 0 {
554 for mat in regex.find_iter(&line).flatten() {
555 let start = line_offset + mat.start();
556 let end = line_offset + mat.end();
557 matches.push(start..end);
558 if self.one_match_per_line() == Some(true) {
559 break;
560 }
561 }
562
563 line_offset += line.len() + 1;
564 line.clear();
565 }
566 line.push_str(text);
567 }
568 }
569 }
570 }
571 }
572
573 matches
574 }
575
576 pub fn is_empty(&self) -> bool {
577 self.as_str().is_empty()
578 }
579
580 pub fn as_str(&self) -> &str {
581 self.as_inner().as_str()
582 }
583
584 pub fn whole_word(&self) -> bool {
585 match self {
586 Self::Text { whole_word, .. } => *whole_word,
587 Self::Regex { whole_word, .. } => *whole_word,
588 }
589 }
590
591 pub fn case_sensitive(&self) -> bool {
592 match self {
593 Self::Text { case_sensitive, .. } => *case_sensitive,
594 Self::Regex { case_sensitive, .. } => *case_sensitive,
595 }
596 }
597
598 pub fn include_ignored(&self) -> bool {
599 match self {
600 Self::Text {
601 include_ignored, ..
602 } => *include_ignored,
603 Self::Regex {
604 include_ignored, ..
605 } => *include_ignored,
606 }
607 }
608
609 pub fn is_regex(&self) -> bool {
610 matches!(self, Self::Regex { .. })
611 }
612
613 pub fn files_to_include(&self) -> &PathMatcher {
614 self.as_inner().files_to_include()
615 }
616
617 pub fn files_to_exclude(&self) -> &PathMatcher {
618 self.as_inner().files_to_exclude()
619 }
620
621 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
622 self.as_inner().buffers.as_ref()
623 }
624
625 pub fn is_opened_only(&self) -> bool {
626 self.as_inner().buffers.is_some()
627 }
628
629 pub fn filters_path(&self) -> bool {
630 !(self.files_to_exclude().sources().next().is_none()
631 && self.files_to_include().sources().next().is_none())
632 }
633
634 pub fn match_full_paths(&self) -> bool {
635 self.as_inner().match_full_paths
636 }
637
638 /// Check match full paths to determine whether you're required to pass a fully qualified
639 /// project path (starts with a project root).
640 pub fn match_path(&self, file_path: &RelPath) -> bool {
641 let mut path = file_path.to_rel_path_buf();
642 loop {
643 if self.files_to_exclude().is_match(&path) {
644 return false;
645 } else if self.files_to_include().sources().next().is_none()
646 || self.files_to_include().is_match(&path)
647 {
648 return true;
649 } else if !path.pop() {
650 return false;
651 }
652 }
653 }
654 pub fn as_inner(&self) -> &SearchInputs {
655 match self {
656 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
657 }
658 }
659
660 /// Whether this search should replace only one match per line, instead of
661 /// all matches.
662 /// Returns `None` for text searches, as only regex searches support this
663 /// option.
664 pub fn one_match_per_line(&self) -> Option<bool> {
665 match self {
666 Self::Regex {
667 one_match_per_line, ..
668 } => Some(*one_match_per_line),
669 Self::Text { .. } => None,
670 }
671 }
672
673 pub fn search_str(&self, text: &str) -> Vec<Range<usize>> {
674 if self.as_str().is_empty() {
675 return Vec::new();
676 }
677
678 let is_word_char = |c: char| c.is_alphanumeric() || c == '_';
679
680 let mut matches = Vec::new();
681 match self {
682 Self::Text {
683 search, whole_word, ..
684 } => {
685 for mat in search.find_iter(text.as_bytes()) {
686 if *whole_word {
687 let prev_char = text[..mat.start()].chars().last();
688 let next_char = text[mat.end()..].chars().next();
689 if prev_char.is_some_and(&is_word_char)
690 || next_char.is_some_and(&is_word_char)
691 {
692 continue;
693 }
694 }
695 matches.push(mat.start()..mat.end());
696 }
697 }
698 Self::Regex {
699 regex,
700 multiline,
701 one_match_per_line,
702 ..
703 } => {
704 if *multiline {
705 for mat in regex.find_iter(text).flatten() {
706 matches.push(mat.start()..mat.end());
707 }
708 } else {
709 let mut line_offset = 0;
710 for line in text.split('\n') {
711 for mat in regex.find_iter(line).flatten() {
712 matches.push((line_offset + mat.start())..(line_offset + mat.end()));
713 if *one_match_per_line {
714 break;
715 }
716 }
717 line_offset += line.len() + 1;
718 }
719 }
720 }
721 }
722 matches
723 }
724}