1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 match_full_paths: bool,
40 buffers: Option<Vec<Entity<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: AhoCorasick,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 one_match_per_line: bool,
76 inner: SearchInputs,
77 },
78}
79
80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
81 RegexBuilder::new(r"\B")
82 .build()
83 .expect("Failed to create WORD_MATCH_TEST")
84});
85
86impl SearchQuery {
87 /// Create a text query
88 ///
89 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
90 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
91 pub fn text(
92 query: impl ToString,
93 whole_word: bool,
94 case_sensitive: bool,
95 include_ignored: bool,
96 files_to_include: PathMatcher,
97 files_to_exclude: PathMatcher,
98 match_full_paths: bool,
99 buffers: Option<Vec<Entity<Buffer>>>,
100 ) -> Result<Self> {
101 let query = query.to_string();
102 if !case_sensitive && !query.is_ascii() {
103 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104 // Fallback to regex search as recommended by
105 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106 return Self::regex(
107 regex::escape(&query),
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 false,
112 files_to_include,
113 files_to_exclude,
114 false,
115 buffers,
116 );
117 }
118 let search = AhoCorasickBuilder::new()
119 .ascii_case_insensitive(!case_sensitive)
120 .build([&query])?;
121 let inner = SearchInputs {
122 query: query.into(),
123 files_to_exclude,
124 files_to_include,
125 match_full_paths,
126 buffers,
127 };
128 Ok(Self::Text {
129 search,
130 replacement: None,
131 whole_word,
132 case_sensitive,
133 include_ignored,
134 inner,
135 })
136 }
137
138 /// Create a regex query
139 ///
140 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142 /// with their respective project root).
143 pub fn regex(
144 query: impl ToString,
145 whole_word: bool,
146 case_sensitive: bool,
147 include_ignored: bool,
148 one_match_per_line: bool,
149 files_to_include: PathMatcher,
150 files_to_exclude: PathMatcher,
151 match_full_paths: bool,
152 buffers: Option<Vec<Entity<Buffer>>>,
153 ) -> Result<Self> {
154 let mut query = query.to_string();
155 let initial_query = Arc::from(query.as_str());
156 if whole_word {
157 let mut word_query = String::new();
158 if let Some(first) = query.get(0..1)
159 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x)
160 {
161 word_query.push_str("\\b");
162 }
163 word_query.push_str(&query);
164 if let Some(last) = query.get(query.len() - 1..)
165 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x)
166 {
167 word_query.push_str("\\b");
168 }
169 query = word_query
170 }
171
172 let multiline = query.contains('\n') || query.contains("\\n");
173 let regex = RegexBuilder::new(&query)
174 .case_insensitive(!case_sensitive)
175 .build()?;
176 let inner = SearchInputs {
177 query: initial_query,
178 files_to_exclude,
179 files_to_include,
180 match_full_paths,
181 buffers,
182 };
183 Ok(Self::Regex {
184 regex,
185 replacement: None,
186 multiline,
187 whole_word,
188 case_sensitive,
189 include_ignored,
190 inner,
191 one_match_per_line,
192 })
193 }
194
195 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
196 let files_to_include = if message.files_to_include.is_empty() {
197 message
198 .files_to_include_legacy
199 .split(',')
200 .map(str::trim)
201 .filter(|&glob_str| !glob_str.is_empty())
202 .map(|s| s.to_string())
203 .collect()
204 } else {
205 message.files_to_include
206 };
207
208 let files_to_exclude = if message.files_to_exclude.is_empty() {
209 message
210 .files_to_exclude_legacy
211 .split(',')
212 .map(str::trim)
213 .filter(|&glob_str| !glob_str.is_empty())
214 .map(|s| s.to_string())
215 .collect()
216 } else {
217 message.files_to_exclude
218 };
219
220 if message.regex {
221 Self::regex(
222 message.query,
223 message.whole_word,
224 message.case_sensitive,
225 message.include_ignored,
226 false,
227 PathMatcher::new(files_to_include)?,
228 PathMatcher::new(files_to_exclude)?,
229 message.match_full_paths,
230 None, // search opened only don't need search remote
231 )
232 } else {
233 Self::text(
234 message.query,
235 message.whole_word,
236 message.case_sensitive,
237 message.include_ignored,
238 PathMatcher::new(files_to_include)?,
239 PathMatcher::new(files_to_exclude)?,
240 false,
241 None, // search opened only don't need search remote
242 )
243 }
244 }
245
246 pub fn with_replacement(mut self, new_replacement: String) -> Self {
247 match self {
248 Self::Text {
249 ref mut replacement,
250 ..
251 }
252 | Self::Regex {
253 ref mut replacement,
254 ..
255 } => {
256 *replacement = Some(new_replacement);
257 self
258 }
259 }
260 }
261
262 pub fn to_proto(&self) -> proto::SearchQuery {
263 let files_to_include = self.files_to_include().sources().to_vec();
264 let files_to_exclude = self.files_to_exclude().sources().to_vec();
265 proto::SearchQuery {
266 query: self.as_str().to_string(),
267 regex: self.is_regex(),
268 whole_word: self.whole_word(),
269 case_sensitive: self.case_sensitive(),
270 include_ignored: self.include_ignored(),
271 files_to_include: files_to_include.clone(),
272 files_to_exclude: files_to_exclude.clone(),
273 match_full_paths: self.match_full_paths(),
274 // Populate legacy fields for backwards compatibility
275 files_to_include_legacy: files_to_include.join(","),
276 files_to_exclude_legacy: files_to_exclude.join(","),
277 }
278 }
279
280 pub(crate) fn detect(
281 &self,
282 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
283 ) -> Result<bool> {
284 if self.as_str().is_empty() {
285 return Ok(false);
286 }
287
288 match self {
289 Self::Text { search, .. } => {
290 let mat = search.stream_find_iter(reader).next();
291 match mat {
292 Some(Ok(_)) => Ok(true),
293 Some(Err(err)) => Err(err.into()),
294 None => Ok(false),
295 }
296 }
297 Self::Regex {
298 regex, multiline, ..
299 } => {
300 if *multiline {
301 let mut text = String::new();
302 if let Err(err) = reader.read_to_string(&mut text) {
303 Err(err.into())
304 } else {
305 Ok(regex.find(&text)?.is_some())
306 }
307 } else {
308 for line in reader.lines() {
309 let line = line?;
310 if regex.find(&line)?.is_some() {
311 return Ok(true);
312 }
313 }
314 Ok(false)
315 }
316 }
317 }
318 }
319 /// Returns the replacement text for this `SearchQuery`.
320 pub fn replacement(&self) -> Option<&str> {
321 match self {
322 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
323 replacement.as_deref()
324 }
325 }
326 }
327 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
328 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
329 match self {
330 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
331 SearchQuery::Regex {
332 regex, replacement, ..
333 } => {
334 if let Some(replacement) = replacement {
335 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
336 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
337 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
338 replacement,
339 |c: &Captures| match c.get(0).unwrap().as_str() {
340 r"\\" => "\\",
341 r"\n" => "\n",
342 r"\t" => "\t",
343 x => unreachable!("Unexpected escape sequence: {}", x),
344 },
345 );
346 Some(regex.replace(text, replacement))
347 } else {
348 None
349 }
350 }
351 }
352 }
353
354 pub async fn search(
355 &self,
356 buffer: &BufferSnapshot,
357 subrange: Option<Range<usize>>,
358 ) -> Vec<Range<usize>> {
359 const YIELD_INTERVAL: usize = 20000;
360
361 if self.as_str().is_empty() {
362 return Default::default();
363 }
364
365 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
366 let rope = if let Some(range) = subrange {
367 buffer.as_rope().slice(range)
368 } else {
369 buffer.as_rope().clone()
370 };
371
372 let mut matches = Vec::new();
373 match self {
374 Self::Text {
375 search, whole_word, ..
376 } => {
377 for (ix, mat) in search
378 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
379 .enumerate()
380 {
381 if (ix + 1) % YIELD_INTERVAL == 0 {
382 yield_now().await;
383 }
384
385 let mat = mat.unwrap();
386 if *whole_word {
387 let classifier = buffer.char_classifier_at(range_offset + mat.start());
388
389 let prev_kind = rope
390 .reversed_chars_at(mat.start())
391 .next()
392 .map(|c| classifier.kind(c));
393 let start_kind =
394 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
395 let end_kind =
396 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
397 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
398 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
399 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
400 {
401 continue;
402 }
403 }
404 matches.push(mat.start()..mat.end())
405 }
406 }
407
408 Self::Regex {
409 regex, multiline, ..
410 } => {
411 if *multiline {
412 let text = rope.to_string();
413 for (ix, mat) in regex.find_iter(&text).enumerate() {
414 if (ix + 1) % YIELD_INTERVAL == 0 {
415 yield_now().await;
416 }
417
418 if let Ok(mat) = mat {
419 matches.push(mat.start()..mat.end());
420 }
421 }
422 } else {
423 let mut line = String::new();
424 let mut line_offset = 0;
425 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
426 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
427 yield_now().await;
428 }
429
430 for (newline_ix, text) in chunk.split('\n').enumerate() {
431 if newline_ix > 0 {
432 for mat in regex.find_iter(&line).flatten() {
433 let start = line_offset + mat.start();
434 let end = line_offset + mat.end();
435 matches.push(start..end);
436 if self.one_match_per_line() == Some(true) {
437 break;
438 }
439 }
440
441 line_offset += line.len() + 1;
442 line.clear();
443 }
444 line.push_str(text);
445 }
446 }
447 }
448 }
449 }
450
451 matches
452 }
453
454 pub fn is_empty(&self) -> bool {
455 self.as_str().is_empty()
456 }
457
458 pub fn as_str(&self) -> &str {
459 self.as_inner().as_str()
460 }
461
462 pub fn whole_word(&self) -> bool {
463 match self {
464 Self::Text { whole_word, .. } => *whole_word,
465 Self::Regex { whole_word, .. } => *whole_word,
466 }
467 }
468
469 pub fn case_sensitive(&self) -> bool {
470 match self {
471 Self::Text { case_sensitive, .. } => *case_sensitive,
472 Self::Regex { case_sensitive, .. } => *case_sensitive,
473 }
474 }
475
476 pub fn include_ignored(&self) -> bool {
477 match self {
478 Self::Text {
479 include_ignored, ..
480 } => *include_ignored,
481 Self::Regex {
482 include_ignored, ..
483 } => *include_ignored,
484 }
485 }
486
487 pub fn is_regex(&self) -> bool {
488 matches!(self, Self::Regex { .. })
489 }
490
491 pub fn files_to_include(&self) -> &PathMatcher {
492 self.as_inner().files_to_include()
493 }
494
495 pub fn files_to_exclude(&self) -> &PathMatcher {
496 self.as_inner().files_to_exclude()
497 }
498
499 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
500 self.as_inner().buffers.as_ref()
501 }
502
503 pub fn is_opened_only(&self) -> bool {
504 self.as_inner().buffers.is_some()
505 }
506
507 pub fn filters_path(&self) -> bool {
508 !(self.files_to_exclude().sources().is_empty()
509 && self.files_to_include().sources().is_empty())
510 }
511
512 pub fn match_full_paths(&self) -> bool {
513 self.as_inner().match_full_paths
514 }
515
516 /// Check match full paths to determine whether you're required to pass a fully qualified
517 /// project path (starts with a project root).
518 pub fn match_path(&self, file_path: &Path) -> bool {
519 let mut path = file_path.to_path_buf();
520 loop {
521 if self.files_to_exclude().is_match(&path) {
522 return false;
523 } else if self.files_to_include().sources().is_empty()
524 || self.files_to_include().is_match(&path)
525 {
526 return true;
527 } else if !path.pop() {
528 return false;
529 }
530 }
531 }
532 pub fn as_inner(&self) -> &SearchInputs {
533 match self {
534 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
535 }
536 }
537
538 /// Whether this search should replace only one match per line, instead of
539 /// all matches.
540 /// Returns `None` for text searches, as only regex searches support this
541 /// option.
542 pub fn one_match_per_line(&self) -> Option<bool> {
543 match self {
544 Self::Regex {
545 one_match_per_line, ..
546 } => Some(*one_match_per_line),
547 Self::Text { .. } => None,
548 }
549 }
550}
551
552#[cfg(test)]
553mod tests {
554 use super::*;
555
556 #[test]
557 fn path_matcher_creation_for_valid_paths() {
558 for valid_path in [
559 "file",
560 "Cargo.toml",
561 ".DS_Store",
562 "~/dir/another_dir/",
563 "./dir/file",
564 "dir/[a-z].txt",
565 "../dir/filé",
566 ] {
567 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
568 panic!("Valid path {valid_path} should be accepted, but got: {e}")
569 });
570 assert!(
571 path_matcher.is_match(valid_path),
572 "Path matcher for valid path {valid_path} should match itself"
573 )
574 }
575 }
576
577 #[test]
578 fn path_matcher_creation_for_globs() {
579 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
580 match PathMatcher::new(&[invalid_glob.to_owned()]) {
581 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
582 Err(_expected) => {}
583 }
584 }
585
586 for valid_glob in [
587 "dir/?ile",
588 "dir/*.txt",
589 "dir/**/file",
590 "dir/[a-z].txt",
591 "{dir,file}",
592 ] {
593 match PathMatcher::new(&[valid_glob.to_owned()]) {
594 Ok(_expected) => {}
595 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
596 }
597 }
598 }
599}