1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 match_full_paths: bool,
40 buffers: Option<Vec<Entity<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: AhoCorasick,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 one_match_per_line: bool,
76 inner: SearchInputs,
77 },
78}
79
80static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
81 RegexBuilder::new(r"\B")
82 .build()
83 .expect("Failed to create WORD_MATCH_TEST")
84});
85
86impl SearchQuery {
87 /// Create a text query
88 ///
89 /// If `match_full_paths` is true, include/exclude patterns will always be matched against fully qualified project paths beginning with a project root.
90 /// If `match_full_paths` is false, patterns will be matched against full paths only when the project has multiple roots.
91 pub fn text(
92 query: impl ToString,
93 whole_word: bool,
94 case_sensitive: bool,
95 include_ignored: bool,
96 files_to_include: PathMatcher,
97 files_to_exclude: PathMatcher,
98 match_full_paths: bool,
99 buffers: Option<Vec<Entity<Buffer>>>,
100 ) -> Result<Self> {
101 let query = query.to_string();
102 if !case_sensitive && !query.is_ascii() {
103 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
104 // Fallback to regex search as recommended by
105 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
106 return Self::regex(
107 regex::escape(&query),
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 false,
112 files_to_include,
113 files_to_exclude,
114 false,
115 buffers,
116 );
117 }
118 let search = AhoCorasickBuilder::new()
119 .ascii_case_insensitive(!case_sensitive)
120 .build([&query])?;
121 let inner = SearchInputs {
122 query: query.into(),
123 files_to_exclude,
124 files_to_include,
125 match_full_paths,
126 buffers,
127 };
128 Ok(Self::Text {
129 search,
130 replacement: None,
131 whole_word,
132 case_sensitive,
133 include_ignored,
134 inner,
135 })
136 }
137
138 /// Create a regex query
139 ///
140 /// If `match_full_paths` is true, include/exclude patterns will be matched against fully qualified project paths
141 /// beginning with a project root name. If false, they will be matched against project-relative paths (which don't start
142 /// with their respective project root).
143 pub fn regex(
144 query: impl ToString,
145 whole_word: bool,
146 case_sensitive: bool,
147 include_ignored: bool,
148 one_match_per_line: bool,
149 files_to_include: PathMatcher,
150 files_to_exclude: PathMatcher,
151 match_full_paths: bool,
152 buffers: Option<Vec<Entity<Buffer>>>,
153 ) -> Result<Self> {
154 let mut query = query.to_string();
155 let initial_query = Arc::from(query.as_str());
156 if whole_word {
157 let mut word_query = String::new();
158 if let Some(first) = query.get(0..1) {
159 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
160 word_query.push_str("\\b");
161 }
162 }
163 word_query.push_str(&query);
164 if let Some(last) = query.get(query.len() - 1..) {
165 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
166 word_query.push_str("\\b");
167 }
168 }
169 query = word_query
170 }
171
172 let multiline = query.contains('\n') || query.contains("\\n");
173 let regex = RegexBuilder::new(&query)
174 .case_insensitive(!case_sensitive)
175 .build()?;
176 let inner = SearchInputs {
177 query: initial_query,
178 files_to_exclude,
179 files_to_include,
180 match_full_paths,
181 buffers,
182 };
183 Ok(Self::Regex {
184 regex,
185 replacement: None,
186 multiline,
187 whole_word,
188 case_sensitive,
189 include_ignored,
190 inner,
191 one_match_per_line,
192 })
193 }
194
195 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
196 if message.regex {
197 Self::regex(
198 message.query,
199 message.whole_word,
200 message.case_sensitive,
201 message.include_ignored,
202 false,
203 deserialize_path_matches(&message.files_to_include)?,
204 deserialize_path_matches(&message.files_to_exclude)?,
205 message.match_full_paths,
206 None, // search opened only don't need search remote
207 )
208 } else {
209 Self::text(
210 message.query,
211 message.whole_word,
212 message.case_sensitive,
213 message.include_ignored,
214 deserialize_path_matches(&message.files_to_include)?,
215 deserialize_path_matches(&message.files_to_exclude)?,
216 false,
217 None, // search opened only don't need search remote
218 )
219 }
220 }
221
222 pub fn with_replacement(mut self, new_replacement: String) -> Self {
223 match self {
224 Self::Text {
225 ref mut replacement,
226 ..
227 }
228 | Self::Regex {
229 ref mut replacement,
230 ..
231 } => {
232 *replacement = Some(new_replacement);
233 self
234 }
235 }
236 }
237
238 pub fn to_proto(&self) -> proto::SearchQuery {
239 proto::SearchQuery {
240 query: self.as_str().to_string(),
241 regex: self.is_regex(),
242 whole_word: self.whole_word(),
243 case_sensitive: self.case_sensitive(),
244 include_ignored: self.include_ignored(),
245 files_to_include: self.files_to_include().sources().join(","),
246 files_to_exclude: self.files_to_exclude().sources().join(","),
247 match_full_paths: self.match_full_paths(),
248 }
249 }
250
251 pub(crate) fn detect(
252 &self,
253 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
254 ) -> Result<bool> {
255 if self.as_str().is_empty() {
256 return Ok(false);
257 }
258
259 match self {
260 Self::Text { search, .. } => {
261 let mat = search.stream_find_iter(reader).next();
262 match mat {
263 Some(Ok(_)) => Ok(true),
264 Some(Err(err)) => Err(err.into()),
265 None => Ok(false),
266 }
267 }
268 Self::Regex {
269 regex, multiline, ..
270 } => {
271 if *multiline {
272 let mut text = String::new();
273 if let Err(err) = reader.read_to_string(&mut text) {
274 Err(err.into())
275 } else {
276 Ok(regex.find(&text)?.is_some())
277 }
278 } else {
279 for line in reader.lines() {
280 let line = line?;
281 if regex.find(&line)?.is_some() {
282 return Ok(true);
283 }
284 }
285 Ok(false)
286 }
287 }
288 }
289 }
290 /// Returns the replacement text for this `SearchQuery`.
291 pub fn replacement(&self) -> Option<&str> {
292 match self {
293 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
294 replacement.as_deref()
295 }
296 }
297 }
298 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
299 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
300 match self {
301 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
302 SearchQuery::Regex {
303 regex, replacement, ..
304 } => {
305 if let Some(replacement) = replacement {
306 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
307 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
308 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
309 replacement,
310 |c: &Captures| match c.get(0).unwrap().as_str() {
311 r"\\" => "\\",
312 r"\n" => "\n",
313 r"\t" => "\t",
314 x => unreachable!("Unexpected escape sequence: {}", x),
315 },
316 );
317 Some(regex.replace(text, replacement))
318 } else {
319 None
320 }
321 }
322 }
323 }
324
325 pub async fn search(
326 &self,
327 buffer: &BufferSnapshot,
328 subrange: Option<Range<usize>>,
329 ) -> Vec<Range<usize>> {
330 const YIELD_INTERVAL: usize = 20000;
331
332 if self.as_str().is_empty() {
333 return Default::default();
334 }
335
336 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
337 let rope = if let Some(range) = subrange {
338 buffer.as_rope().slice(range)
339 } else {
340 buffer.as_rope().clone()
341 };
342
343 let mut matches = Vec::new();
344 match self {
345 Self::Text {
346 search, whole_word, ..
347 } => {
348 for (ix, mat) in search
349 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
350 .enumerate()
351 {
352 if (ix + 1) % YIELD_INTERVAL == 0 {
353 yield_now().await;
354 }
355
356 let mat = mat.unwrap();
357 if *whole_word {
358 let classifier = buffer.char_classifier_at(range_offset + mat.start());
359
360 let prev_kind = rope
361 .reversed_chars_at(mat.start())
362 .next()
363 .map(|c| classifier.kind(c));
364 let start_kind =
365 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
366 let end_kind =
367 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
368 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
369 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
370 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
371 {
372 continue;
373 }
374 }
375 matches.push(mat.start()..mat.end())
376 }
377 }
378
379 Self::Regex {
380 regex, multiline, ..
381 } => {
382 if *multiline {
383 let text = rope.to_string();
384 for (ix, mat) in regex.find_iter(&text).enumerate() {
385 if (ix + 1) % YIELD_INTERVAL == 0 {
386 yield_now().await;
387 }
388
389 if let Ok(mat) = mat {
390 matches.push(mat.start()..mat.end());
391 }
392 }
393 } else {
394 let mut line = String::new();
395 let mut line_offset = 0;
396 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
397 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
398 yield_now().await;
399 }
400
401 for (newline_ix, text) in chunk.split('\n').enumerate() {
402 if newline_ix > 0 {
403 for mat in regex.find_iter(&line).flatten() {
404 let start = line_offset + mat.start();
405 let end = line_offset + mat.end();
406 matches.push(start..end);
407 if self.one_match_per_line() == Some(true) {
408 break;
409 }
410 }
411
412 line_offset += line.len() + 1;
413 line.clear();
414 }
415 line.push_str(text);
416 }
417 }
418 }
419 }
420 }
421
422 matches
423 }
424
425 pub fn is_empty(&self) -> bool {
426 self.as_str().is_empty()
427 }
428
429 pub fn as_str(&self) -> &str {
430 self.as_inner().as_str()
431 }
432
433 pub fn whole_word(&self) -> bool {
434 match self {
435 Self::Text { whole_word, .. } => *whole_word,
436 Self::Regex { whole_word, .. } => *whole_word,
437 }
438 }
439
440 pub fn case_sensitive(&self) -> bool {
441 match self {
442 Self::Text { case_sensitive, .. } => *case_sensitive,
443 Self::Regex { case_sensitive, .. } => *case_sensitive,
444 }
445 }
446
447 pub fn include_ignored(&self) -> bool {
448 match self {
449 Self::Text {
450 include_ignored, ..
451 } => *include_ignored,
452 Self::Regex {
453 include_ignored, ..
454 } => *include_ignored,
455 }
456 }
457
458 pub fn is_regex(&self) -> bool {
459 matches!(self, Self::Regex { .. })
460 }
461
462 pub fn files_to_include(&self) -> &PathMatcher {
463 self.as_inner().files_to_include()
464 }
465
466 pub fn files_to_exclude(&self) -> &PathMatcher {
467 self.as_inner().files_to_exclude()
468 }
469
470 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
471 self.as_inner().buffers.as_ref()
472 }
473
474 pub fn is_opened_only(&self) -> bool {
475 self.as_inner().buffers.is_some()
476 }
477
478 pub fn filters_path(&self) -> bool {
479 !(self.files_to_exclude().sources().is_empty()
480 && self.files_to_include().sources().is_empty())
481 }
482
483 pub fn match_full_paths(&self) -> bool {
484 self.as_inner().match_full_paths
485 }
486
487 /// Check match full paths to determine whether you're required to pass a fully qualified
488 /// project path (starts with a project root).
489 pub fn match_path(&self, file_path: &Path) -> bool {
490 let mut path = file_path.to_path_buf();
491 loop {
492 if self.files_to_exclude().is_match(&path) {
493 return false;
494 } else if self.files_to_include().sources().is_empty()
495 || self.files_to_include().is_match(&path)
496 {
497 return true;
498 } else if !path.pop() {
499 return false;
500 }
501 }
502 }
503 pub fn as_inner(&self) -> &SearchInputs {
504 match self {
505 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
506 }
507 }
508
509 /// Whether this search should replace only one match per line, instead of
510 /// all matches.
511 /// Returns `None` for text searches, as only regex searches support this
512 /// option.
513 pub fn one_match_per_line(&self) -> Option<bool> {
514 match self {
515 Self::Regex {
516 one_match_per_line, ..
517 } => Some(*one_match_per_line),
518 Self::Text { .. } => None,
519 }
520 }
521}
522
523pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
524 let globs = glob_set
525 .split(',')
526 .map(str::trim)
527 .filter(|&glob_str| !glob_str.is_empty());
528 Ok(PathMatcher::new(globs)?)
529}
530
531#[cfg(test)]
532mod tests {
533 use super::*;
534
535 #[test]
536 fn path_matcher_creation_for_valid_paths() {
537 for valid_path in [
538 "file",
539 "Cargo.toml",
540 ".DS_Store",
541 "~/dir/another_dir/",
542 "./dir/file",
543 "dir/[a-z].txt",
544 "../dir/filé",
545 ] {
546 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
547 panic!("Valid path {valid_path} should be accepted, but got: {e}")
548 });
549 assert!(
550 path_matcher.is_match(valid_path),
551 "Path matcher for valid path {valid_path} should match itself"
552 )
553 }
554 }
555
556 #[test]
557 fn path_matcher_creation_for_globs() {
558 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
559 match PathMatcher::new(&[invalid_glob.to_owned()]) {
560 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
561 Err(_expected) => {}
562 }
563 }
564
565 for valid_glob in [
566 "dir/?ile",
567 "dir/*.txt",
568 "dir/**/file",
569 "dir/[a-z].txt",
570 "{dir,file}",
571 ] {
572 match PathMatcher::new(&[valid_glob.to_owned()]) {
573 Ok(_expected) => {}
574 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
575 }
576 }
577 }
578}