1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use itertools::Itertools as _;
7use language::{Buffer, BufferSnapshot, CharKind};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::{
17 paths::{PathMatcher, PathStyle},
18 rel_path::RelPath,
19};
20
21#[derive(Debug)]
22pub enum SearchResult {
23 Buffer {
24 buffer: Entity<Buffer>,
25 ranges: Vec<Range<Anchor>>,
26 },
27 LimitReached,
28}
29
30#[derive(Clone, Copy, PartialEq)]
31pub enum SearchInputKind {
32 Query,
33 Include,
34 Exclude,
35}
36
37#[derive(Clone, Debug)]
38pub struct SearchInputs {
39 query: Arc<str>,
40 files_to_include: PathMatcher,
41 files_to_exclude: PathMatcher,
42 match_full_paths: bool,
43 buffers: Option<Vec<Entity<Buffer>>>,
44}
45
46impl SearchInputs {
47 pub fn as_str(&self) -> &str {
48 self.query.as_ref()
49 }
50 pub fn files_to_include(&self) -> &PathMatcher {
51 &self.files_to_include
52 }
53 pub fn files_to_exclude(&self) -> &PathMatcher {
54 &self.files_to_exclude
55 }
56 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
57 &self.buffers
58 }
59}
60#[derive(Clone, Debug)]
61pub enum SearchQuery {
62 Text {
63 search: AhoCorasick,
64 replacement: Option<String>,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70 Regex {
71 regex: Regex,
72 replacement: Option<String>,
73 multiline: bool,
74 whole_word: bool,
75 case_sensitive: bool,
76 include_ignored: bool,
77 one_match_per_line: bool,
78 inner: SearchInputs,
79 },
80}
81
82static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
83 RegexBuilder::new(r"\B")
84 .build()
85 .expect("Failed to create WORD_MATCH_TEST")
86});
87
88impl SearchQuery {
89 /// Create a text query
90 ///
91 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
92 /// If `match_full_paths` is false, patterns will be matched against worktree-relative paths.
93 pub fn text(
94 query: impl ToString,
95 whole_word: bool,
96 case_sensitive: bool,
97 include_ignored: bool,
98 files_to_include: PathMatcher,
99 files_to_exclude: PathMatcher,
100 match_full_paths: bool,
101 buffers: Option<Vec<Entity<Buffer>>>,
102 ) -> Result<Self> {
103 let query = query.to_string();
104 if !case_sensitive && !query.is_ascii() {
105 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
106 // Fallback to regex search as recommended by
107 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
108 return Self::regex(
109 regex::escape(&query),
110 whole_word,
111 case_sensitive,
112 include_ignored,
113 false,
114 files_to_include,
115 files_to_exclude,
116 false,
117 buffers,
118 );
119 }
120 let search = AhoCorasickBuilder::new()
121 .ascii_case_insensitive(!case_sensitive)
122 .build([&query])?;
123 let inner = SearchInputs {
124 query: query.into(),
125 files_to_exclude,
126 files_to_include,
127 match_full_paths,
128 buffers,
129 };
130 Ok(Self::Text {
131 search,
132 replacement: None,
133 whole_word,
134 case_sensitive,
135 include_ignored,
136 inner,
137 })
138 }
139
140 /// Create a regex query
141 ///
142 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
143 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
144 /// with their respective project root).
145 pub fn regex(
146 query: impl ToString,
147 whole_word: bool,
148 mut case_sensitive: bool,
149 include_ignored: bool,
150 one_match_per_line: bool,
151 files_to_include: PathMatcher,
152 files_to_exclude: PathMatcher,
153 match_full_paths: bool,
154 buffers: Option<Vec<Entity<Buffer>>>,
155 ) -> Result<Self> {
156 let mut query = query.to_string();
157 let initial_query = Arc::from(query.as_str());
158
159 if let Some((case_sensitive_from_pattern, new_query)) =
160 Self::case_sensitive_from_pattern(&query)
161 {
162 case_sensitive = case_sensitive_from_pattern;
163 query = new_query
164 }
165
166 if whole_word {
167 let mut word_query = String::new();
168 if let Some(first) = query.get(0..1)
169 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
170 {
171 word_query.push_str("\\b");
172 }
173 word_query.push_str(&query);
174 if let Some(last) = query.get(query.len() - 1..)
175 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
176 {
177 word_query.push_str("\\b");
178 }
179 query = word_query
180 }
181
182 let multiline = query.contains('\n') || query.contains("\\n");
183 if multiline {
184 query.insert_str(0, "(?m)");
185 }
186
187 let regex = RegexBuilder::new(&query)
188 .case_insensitive(!case_sensitive)
189 .build()?;
190 let inner = SearchInputs {
191 query: initial_query,
192 files_to_exclude,
193 files_to_include,
194 match_full_paths,
195 buffers,
196 };
197 Ok(Self::Regex {
198 regex,
199 replacement: None,
200 multiline,
201 whole_word,
202 case_sensitive,
203 include_ignored,
204 inner,
205 one_match_per_line,
206 })
207 }
208
209 /// Extracts case sensitivity settings from pattern items in the provided
210 /// query and returns the same query, with the pattern items removed.
211 ///
212 /// The following pattern modifiers are supported:
213 ///
214 /// - `\c` (case_sensitive: false)
215 /// - `\C` (case_sensitive: true)
216 ///
217 /// If no pattern item were found, `None` will be returned.
218 fn case_sensitive_from_pattern(query: &str) -> Option<(bool, String)> {
219 if !(query.contains("\\c") || query.contains("\\C")) {
220 return None;
221 }
222
223 let mut was_escaped = false;
224 let mut new_query = String::new();
225 let mut is_case_sensitive = None;
226
227 for c in query.chars() {
228 if was_escaped {
229 if c == 'c' {
230 is_case_sensitive = Some(false);
231 } else if c == 'C' {
232 is_case_sensitive = Some(true);
233 } else {
234 new_query.push('\\');
235 new_query.push(c);
236 }
237 was_escaped = false
238 } else if c == '\\' {
239 was_escaped = true
240 } else {
241 new_query.push(c);
242 }
243 }
244
245 is_case_sensitive.map(|c| (c, new_query))
246 }
247
248 pub fn from_proto(message: proto::SearchQuery, path_style: PathStyle) -> Result<Self> {
249 let files_to_include = if message.files_to_include.is_empty() {
250 message
251 .files_to_include_legacy
252 .split(',')
253 .map(str::trim)
254 .filter(|&glob_str| !glob_str.is_empty())
255 .map(|s| s.to_string())
256 .collect()
257 } else {
258 message.files_to_include
259 };
260
261 let files_to_exclude = if message.files_to_exclude.is_empty() {
262 message
263 .files_to_exclude_legacy
264 .split(',')
265 .map(str::trim)
266 .filter(|&glob_str| !glob_str.is_empty())
267 .map(|s| s.to_string())
268 .collect()
269 } else {
270 message.files_to_exclude
271 };
272
273 if message.regex {
274 Self::regex(
275 message.query,
276 message.whole_word,
277 message.case_sensitive,
278 message.include_ignored,
279 false,
280 PathMatcher::new(files_to_include, path_style)?,
281 PathMatcher::new(files_to_exclude, path_style)?,
282 message.match_full_paths,
283 None, // search opened only don't need search remote
284 )
285 } else {
286 Self::text(
287 message.query,
288 message.whole_word,
289 message.case_sensitive,
290 message.include_ignored,
291 PathMatcher::new(files_to_include, path_style)?,
292 PathMatcher::new(files_to_exclude, path_style)?,
293 message.match_full_paths,
294 None, // search opened only don't need search remote
295 )
296 }
297 }
298
299 pub fn with_replacement(mut self, new_replacement: String) -> Self {
300 match self {
301 Self::Text {
302 ref mut replacement,
303 ..
304 }
305 | Self::Regex {
306 ref mut replacement,
307 ..
308 } => {
309 *replacement = Some(new_replacement);
310 self
311 }
312 }
313 }
314
315 pub fn to_proto(&self) -> proto::SearchQuery {
316 let mut files_to_include = self.files_to_include().sources();
317 let mut files_to_exclude = self.files_to_exclude().sources();
318 proto::SearchQuery {
319 query: self.as_str().to_string(),
320 regex: self.is_regex(),
321 whole_word: self.whole_word(),
322 case_sensitive: self.case_sensitive(),
323 include_ignored: self.include_ignored(),
324 files_to_include: files_to_include.clone().map(ToOwned::to_owned).collect(),
325 files_to_exclude: files_to_exclude.clone().map(ToOwned::to_owned).collect(),
326 match_full_paths: self.match_full_paths(),
327 // Populate legacy fields for backwards compatibility
328 files_to_include_legacy: files_to_include.join(","),
329 files_to_exclude_legacy: files_to_exclude.join(","),
330 }
331 }
332
333 pub(crate) async fn detect(
334 &self,
335 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
336 ) -> Result<bool> {
337 let query_str = self.as_str();
338 if query_str.is_empty() {
339 return Ok(false);
340 }
341
342 // Yield from this function every 20KB scanned.
343 const YIELD_THRESHOLD: usize = 20 * 1024;
344
345 match self {
346 Self::Text { search, .. } => {
347 let mut text = String::new();
348 if query_str.contains('\n') {
349 reader.read_to_string(&mut text)?;
350 Ok(search.is_match(&text))
351 } else {
352 let mut bytes_read = 0;
353 while reader.read_line(&mut text)? > 0 {
354 if search.is_match(&text) {
355 return Ok(true);
356 }
357 bytes_read += text.len();
358 if bytes_read >= YIELD_THRESHOLD {
359 bytes_read = 0;
360 smol::future::yield_now().await;
361 }
362 text.clear();
363 }
364 Ok(false)
365 }
366 }
367 Self::Regex {
368 regex, multiline, ..
369 } => {
370 let mut text = String::new();
371 if *multiline {
372 reader.read_to_string(&mut text)?;
373 Ok(regex.is_match(&text)?)
374 } else {
375 let mut bytes_read = 0;
376 while reader.read_line(&mut text)? > 0 {
377 if regex.is_match(&text)? {
378 return Ok(true);
379 }
380 bytes_read += text.len();
381 if bytes_read >= YIELD_THRESHOLD {
382 bytes_read = 0;
383 smol::future::yield_now().await;
384 }
385 text.clear();
386 }
387 Ok(false)
388 }
389 }
390 }
391 }
392 /// Returns the replacement text for this `SearchQuery`.
393 pub fn replacement(&self) -> Option<&str> {
394 match self {
395 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
396 replacement.as_deref()
397 }
398 }
399 }
400 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
401 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
402 match self {
403 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
404 SearchQuery::Regex {
405 regex, replacement, ..
406 } => {
407 if let Some(replacement) = replacement {
408 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
409 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
410 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
411 replacement,
412 |c: &Captures| match c.get(0).unwrap().as_str() {
413 r"\\" => "\\",
414 r"\n" => "\n",
415 r"\t" => "\t",
416 x => unreachable!("Unexpected escape sequence: {}", x),
417 },
418 );
419 Some(regex.replace(text, replacement))
420 } else {
421 None
422 }
423 }
424 }
425 }
426
427 pub async fn search(
428 &self,
429 buffer: &BufferSnapshot,
430 subrange: Option<Range<usize>>,
431 ) -> Vec<Range<usize>> {
432 const YIELD_INTERVAL: usize = 20000;
433
434 if self.as_str().is_empty() {
435 return Default::default();
436 }
437
438 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
439 let rope = if let Some(range) = subrange {
440 buffer.as_rope().slice(range)
441 } else {
442 buffer.as_rope().clone()
443 };
444
445 let mut matches = Vec::new();
446 match self {
447 Self::Text {
448 search, whole_word, ..
449 } => {
450 for (ix, mat) in search
451 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
452 .enumerate()
453 {
454 if (ix + 1) % YIELD_INTERVAL == 0 {
455 yield_now().await;
456 }
457
458 let mat = mat.unwrap();
459 if *whole_word {
460 let classifier = buffer.char_classifier_at(range_offset + mat.start());
461
462 let prev_kind = rope
463 .reversed_chars_at(mat.start())
464 .next()
465 .map(|c| classifier.kind(c));
466 let start_kind =
467 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
468 let end_kind =
469 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
470 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
471 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
472 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
473 {
474 continue;
475 }
476 }
477 matches.push(mat.start()..mat.end())
478 }
479 }
480
481 Self::Regex {
482 regex, multiline, ..
483 } => {
484 if *multiline {
485 let text = rope.to_string();
486 for (ix, mat) in regex.find_iter(&text).enumerate() {
487 if (ix + 1) % YIELD_INTERVAL == 0 {
488 yield_now().await;
489 }
490
491 if let Ok(mat) = mat {
492 matches.push(mat.start()..mat.end());
493 }
494 }
495 } else {
496 let mut line = String::new();
497 let mut line_offset = 0;
498 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
499 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
500 yield_now().await;
501 }
502
503 for (newline_ix, text) in chunk.split('\n').enumerate() {
504 if newline_ix > 0 {
505 for mat in regex.find_iter(&line).flatten() {
506 let start = line_offset + mat.start();
507 let end = line_offset + mat.end();
508 matches.push(start..end);
509 if self.one_match_per_line() == Some(true) {
510 break;
511 }
512 }
513
514 line_offset += line.len() + 1;
515 line.clear();
516 }
517 line.push_str(text);
518 }
519 }
520 }
521 }
522 }
523
524 matches
525 }
526
527 pub fn is_empty(&self) -> bool {
528 self.as_str().is_empty()
529 }
530
531 pub fn as_str(&self) -> &str {
532 self.as_inner().as_str()
533 }
534
535 pub fn whole_word(&self) -> bool {
536 match self {
537 Self::Text { whole_word, .. } => *whole_word,
538 Self::Regex { whole_word, .. } => *whole_word,
539 }
540 }
541
542 pub fn case_sensitive(&self) -> bool {
543 match self {
544 Self::Text { case_sensitive, .. } => *case_sensitive,
545 Self::Regex { case_sensitive, .. } => *case_sensitive,
546 }
547 }
548
549 pub fn include_ignored(&self) -> bool {
550 match self {
551 Self::Text {
552 include_ignored, ..
553 } => *include_ignored,
554 Self::Regex {
555 include_ignored, ..
556 } => *include_ignored,
557 }
558 }
559
560 pub fn is_regex(&self) -> bool {
561 matches!(self, Self::Regex { .. })
562 }
563
564 pub fn files_to_include(&self) -> &PathMatcher {
565 self.as_inner().files_to_include()
566 }
567
568 pub fn files_to_exclude(&self) -> &PathMatcher {
569 self.as_inner().files_to_exclude()
570 }
571
572 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
573 self.as_inner().buffers.as_ref()
574 }
575
576 pub fn is_opened_only(&self) -> bool {
577 self.as_inner().buffers.is_some()
578 }
579
580 pub fn filters_path(&self) -> bool {
581 !(self.files_to_exclude().sources().next().is_none()
582 && self.files_to_include().sources().next().is_none())
583 }
584
585 pub fn match_full_paths(&self) -> bool {
586 self.as_inner().match_full_paths
587 }
588
589 /// Check match full paths to determine whether you're required to pass a fully qualified
590 /// project path (starts with a project root).
591 pub fn match_path(&self, file_path: &RelPath) -> bool {
592 let mut path = file_path.to_rel_path_buf();
593 loop {
594 if self.files_to_exclude().is_match(&path) {
595 return false;
596 } else if self.files_to_include().sources().next().is_none()
597 || self.files_to_include().is_match(&path)
598 {
599 return true;
600 } else if !path.pop() {
601 return false;
602 }
603 }
604 }
605 pub fn as_inner(&self) -> &SearchInputs {
606 match self {
607 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
608 }
609 }
610
611 /// Whether this search should replace only one match per line, instead of
612 /// all matches.
613 /// Returns `None` for text searches, as only regex searches support this
614 /// option.
615 pub fn one_match_per_line(&self) -> Option<bool> {
616 match self {
617 Self::Regex {
618 one_match_per_line, ..
619 } => Some(*one_match_per_line),
620 Self::Text { .. } => None,
621 }
622 }
623}