1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 match_full_paths: bool,
40 buffers: Option<Vec<Entity<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: AhoCorasick,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 one_match_per_line: bool,
76 inner: SearchInputs,
77 },
78}
79
80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
81 RegexBuilder::new(r"\B")
82 .build()
83 .expect("Failed to create WORD_MATCH_TEST")
84});
85
86impl SearchQuery {
87 /// Create a text query
88 ///
89 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
90 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
91 pub fn text(
92 query: impl ToString,
93 whole_word: bool,
94 case_sensitive: bool,
95 include_ignored: bool,
96 files_to_include: PathMatcher,
97 files_to_exclude: PathMatcher,
98 match_full_paths: bool,
99 buffers: Option<Vec<Entity<Buffer>>>,
100 ) -> Result<Self> {
101 let query = query.to_string();
102 if !case_sensitive && !query.is_ascii() {
103 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104 // Fallback to regex search as recommended by
105 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106 return Self::regex(
107 regex::escape(&query),
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 false,
112 files_to_include,
113 files_to_exclude,
114 false,
115 buffers,
116 );
117 }
118 let search = AhoCorasickBuilder::new()
119 .ascii_case_insensitive(!case_sensitive)
120 .build([&query])?;
121 let inner = SearchInputs {
122 query: query.into(),
123 files_to_exclude,
124 files_to_include,
125 match_full_paths,
126 buffers,
127 };
128 Ok(Self::Text {
129 search,
130 replacement: None,
131 whole_word,
132 case_sensitive,
133 include_ignored,
134 inner,
135 })
136 }
137
138 /// Create a regex query
139 ///
140 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142 /// with their respective project root).
143 pub fn regex(
144 query: impl ToString,
145 whole_word: bool,
146 case_sensitive: bool,
147 include_ignored: bool,
148 one_match_per_line: bool,
149 files_to_include: PathMatcher,
150 files_to_exclude: PathMatcher,
151 match_full_paths: bool,
152 buffers: Option<Vec<Entity<Buffer>>>,
153 ) -> Result<Self> {
154 let mut query = query.to_string();
155 let initial_query = Arc::from(query.as_str());
156 if whole_word {
157 let mut word_query = String::new();
158 if let Some(first) = query.get(0..1)
159 && WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
160 word_query.push_str("\\b");
161 }
162 word_query.push_str(&query);
163 if let Some(last) = query.get(query.len() - 1..)
164 && WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
165 word_query.push_str("\\b");
166 }
167 query = word_query
168 }
169
170 let multiline = query.contains('\n') || query.contains("\\n");
171 let regex = RegexBuilder::new(&query)
172 .case_insensitive(!case_sensitive)
173 .build()?;
174 let inner = SearchInputs {
175 query: initial_query,
176 files_to_exclude,
177 files_to_include,
178 match_full_paths,
179 buffers,
180 };
181 Ok(Self::Regex {
182 regex,
183 replacement: None,
184 multiline,
185 whole_word,
186 case_sensitive,
187 include_ignored,
188 inner,
189 one_match_per_line,
190 })
191 }
192
193 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
194 let files_to_include = if message.files_to_include.is_empty() {
195 message
196 .files_to_include_legacy
197 .split(',')
198 .map(str::trim)
199 .filter(|&glob_str| !glob_str.is_empty())
200 .map(|s| s.to_string())
201 .collect()
202 } else {
203 message.files_to_include
204 };
205
206 let files_to_exclude = if message.files_to_exclude.is_empty() {
207 message
208 .files_to_exclude_legacy
209 .split(',')
210 .map(str::trim)
211 .filter(|&glob_str| !glob_str.is_empty())
212 .map(|s| s.to_string())
213 .collect()
214 } else {
215 message.files_to_exclude
216 };
217
218 if message.regex {
219 Self::regex(
220 message.query,
221 message.whole_word,
222 message.case_sensitive,
223 message.include_ignored,
224 false,
225 PathMatcher::new(files_to_include)?,
226 PathMatcher::new(files_to_exclude)?,
227 message.match_full_paths,
228 None, // search opened only don't need search remote
229 )
230 } else {
231 Self::text(
232 message.query,
233 message.whole_word,
234 message.case_sensitive,
235 message.include_ignored,
236 PathMatcher::new(files_to_include)?,
237 PathMatcher::new(files_to_exclude)?,
238 false,
239 None, // search opened only don't need search remote
240 )
241 }
242 }
243
244 pub fn with_replacement(mut self, new_replacement: String) -> Self {
245 match self {
246 Self::Text {
247 ref mut replacement,
248 ..
249 }
250 | Self::Regex {
251 ref mut replacement,
252 ..
253 } => {
254 *replacement = Some(new_replacement);
255 self
256 }
257 }
258 }
259
260 pub fn to_proto(&self) -> proto::SearchQuery {
261 let files_to_include = self.files_to_include().sources().to_vec();
262 let files_to_exclude = self.files_to_exclude().sources().to_vec();
263 proto::SearchQuery {
264 query: self.as_str().to_string(),
265 regex: self.is_regex(),
266 whole_word: self.whole_word(),
267 case_sensitive: self.case_sensitive(),
268 include_ignored: self.include_ignored(),
269 files_to_include: files_to_include.clone(),
270 files_to_exclude: files_to_exclude.clone(),
271 match_full_paths: self.match_full_paths(),
272 // Populate legacy fields for backwards compatibility
273 files_to_include_legacy: files_to_include.join(","),
274 files_to_exclude_legacy: files_to_exclude.join(","),
275 }
276 }
277
278 pub(crate) fn detect(
279 &self,
280 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
281 ) -> Result<bool> {
282 if self.as_str().is_empty() {
283 return Ok(false);
284 }
285
286 match self {
287 Self::Text { search, .. } => {
288 let mat = search.stream_find_iter(reader).next();
289 match mat {
290 Some(Ok(_)) => Ok(true),
291 Some(Err(err)) => Err(err.into()),
292 None => Ok(false),
293 }
294 }
295 Self::Regex {
296 regex, multiline, ..
297 } => {
298 if *multiline {
299 let mut text = String::new();
300 if let Err(err) = reader.read_to_string(&mut text) {
301 Err(err.into())
302 } else {
303 Ok(regex.find(&text)?.is_some())
304 }
305 } else {
306 for line in reader.lines() {
307 let line = line?;
308 if regex.find(&line)?.is_some() {
309 return Ok(true);
310 }
311 }
312 Ok(false)
313 }
314 }
315 }
316 }
317 /// Returns the replacement text for this `SearchQuery`.
318 pub fn replacement(&self) -> Option<&str> {
319 match self {
320 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
321 replacement.as_deref()
322 }
323 }
324 }
325 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
326 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
327 match self {
328 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
329 SearchQuery::Regex {
330 regex, replacement, ..
331 } => {
332 if let Some(replacement) = replacement {
333 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
334 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
335 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
336 replacement,
337 |c: &Captures| match c.get(0).unwrap().as_str() {
338 r"\\" => "\\",
339 r"\n" => "\n",
340 r"\t" => "\t",
341 x => unreachable!("Unexpected escape sequence: {}", x),
342 },
343 );
344 Some(regex.replace(text, replacement))
345 } else {
346 None
347 }
348 }
349 }
350 }
351
352 pub async fn search(
353 &self,
354 buffer: &BufferSnapshot,
355 subrange: Option<Range<usize>>,
356 ) -> Vec<Range<usize>> {
357 const YIELD_INTERVAL: usize = 20000;
358
359 if self.as_str().is_empty() {
360 return Default::default();
361 }
362
363 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
364 let rope = if let Some(range) = subrange {
365 buffer.as_rope().slice(range)
366 } else {
367 buffer.as_rope().clone()
368 };
369
370 let mut matches = Vec::new();
371 match self {
372 Self::Text {
373 search, whole_word, ..
374 } => {
375 for (ix, mat) in search
376 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
377 .enumerate()
378 {
379 if (ix + 1) % YIELD_INTERVAL == 0 {
380 yield_now().await;
381 }
382
383 let mat = mat.unwrap();
384 if *whole_word {
385 let classifier = buffer.char_classifier_at(range_offset + mat.start());
386
387 let prev_kind = rope
388 .reversed_chars_at(mat.start())
389 .next()
390 .map(|c| classifier.kind(c));
391 let start_kind =
392 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
393 let end_kind =
394 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
395 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
396 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
397 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
398 {
399 continue;
400 }
401 }
402 matches.push(mat.start()..mat.end())
403 }
404 }
405
406 Self::Regex {
407 regex, multiline, ..
408 } => {
409 if *multiline {
410 let text = rope.to_string();
411 for (ix, mat) in regex.find_iter(&text).enumerate() {
412 if (ix + 1) % YIELD_INTERVAL == 0 {
413 yield_now().await;
414 }
415
416 if let Ok(mat) = mat {
417 matches.push(mat.start()..mat.end());
418 }
419 }
420 } else {
421 let mut line = String::new();
422 let mut line_offset = 0;
423 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
424 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
425 yield_now().await;
426 }
427
428 for (newline_ix, text) in chunk.split('\n').enumerate() {
429 if newline_ix > 0 {
430 for mat in regex.find_iter(&line).flatten() {
431 let start = line_offset + mat.start();
432 let end = line_offset + mat.end();
433 matches.push(start..end);
434 if self.one_match_per_line() == Some(true) {
435 break;
436 }
437 }
438
439 line_offset += line.len() + 1;
440 line.clear();
441 }
442 line.push_str(text);
443 }
444 }
445 }
446 }
447 }
448
449 matches
450 }
451
452 pub fn is_empty(&self) -> bool {
453 self.as_str().is_empty()
454 }
455
456 pub fn as_str(&self) -> &str {
457 self.as_inner().as_str()
458 }
459
460 pub fn whole_word(&self) -> bool {
461 match self {
462 Self::Text { whole_word, .. } => *whole_word,
463 Self::Regex { whole_word, .. } => *whole_word,
464 }
465 }
466
467 pub fn case_sensitive(&self) -> bool {
468 match self {
469 Self::Text { case_sensitive, .. } => *case_sensitive,
470 Self::Regex { case_sensitive, .. } => *case_sensitive,
471 }
472 }
473
474 pub fn include_ignored(&self) -> bool {
475 match self {
476 Self::Text {
477 include_ignored, ..
478 } => *include_ignored,
479 Self::Regex {
480 include_ignored, ..
481 } => *include_ignored,
482 }
483 }
484
485 pub fn is_regex(&self) -> bool {
486 matches!(self, Self::Regex { .. })
487 }
488
489 pub fn files_to_include(&self) -> &PathMatcher {
490 self.as_inner().files_to_include()
491 }
492
493 pub fn files_to_exclude(&self) -> &PathMatcher {
494 self.as_inner().files_to_exclude()
495 }
496
497 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
498 self.as_inner().buffers.as_ref()
499 }
500
501 pub fn is_opened_only(&self) -> bool {
502 self.as_inner().buffers.is_some()
503 }
504
505 pub fn filters_path(&self) -> bool {
506 !(self.files_to_exclude().sources().is_empty()
507 && self.files_to_include().sources().is_empty())
508 }
509
510 pub fn match_full_paths(&self) -> bool {
511 self.as_inner().match_full_paths
512 }
513
514 /// Check match full paths to determine whether you're required to pass a fully qualified
515 /// project path (starts with a project root).
516 pub fn match_path(&self, file_path: &Path) -> bool {
517 let mut path = file_path.to_path_buf();
518 loop {
519 if self.files_to_exclude().is_match(&path) {
520 return false;
521 } else if self.files_to_include().sources().is_empty()
522 || self.files_to_include().is_match(&path)
523 {
524 return true;
525 } else if !path.pop() {
526 return false;
527 }
528 }
529 }
530 pub fn as_inner(&self) -> &SearchInputs {
531 match self {
532 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
533 }
534 }
535
536 /// Whether this search should replace only one match per line, instead of
537 /// all matches.
538 /// Returns `None` for text searches, as only regex searches support this
539 /// option.
540 pub fn one_match_per_line(&self) -> Option<bool> {
541 match self {
542 Self::Regex {
543 one_match_per_line, ..
544 } => Some(*one_match_per_line),
545 Self::Text { .. } => None,
546 }
547 }
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553
554 #[test]
555 fn path_matcher_creation_for_valid_paths() {
556 for valid_path in [
557 "file",
558 "Cargo.toml",
559 ".DS_Store",
560 "~/dir/another_dir/",
561 "./dir/file",
562 "dir/[a-z].txt",
563 "../dir/filé",
564 ] {
565 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
566 panic!("Valid path {valid_path} should be accepted, but got: {e}")
567 });
568 assert!(
569 path_matcher.is_match(valid_path),
570 "Path matcher for valid path {valid_path} should match itself"
571 )
572 }
573 }
574
575 #[test]
576 fn path_matcher_creation_for_globs() {
577 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
578 match PathMatcher::new(&[invalid_glob.to_owned()]) {
579 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
580 Err(_expected) => {}
581 }
582 }
583
584 for valid_glob in [
585 "dir/?ile",
586 "dir/*.txt",
587 "dir/**/file",
588 "dir/[a-z].txt",
589 "{dir,file}",
590 ] {
591 match PathMatcher::new(&[valid_glob.to_owned()]) {
592 Ok(_expected) => {}
593 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
594 }
595 }
596 }
597}