1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 buffers: Option<Vec<Entity<Buffer>>>,
40}
41
42impl SearchInputs {
43 pub fn as_str(&self) -> &str {
44 self.query.as_ref()
45 }
46 pub fn files_to_include(&self) -> &PathMatcher {
47 &self.files_to_include
48 }
49 pub fn files_to_exclude(&self) -> &PathMatcher {
50 &self.files_to_exclude
51 }
52 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
53 &self.buffers
54 }
55}
56#[derive(Clone, Debug)]
57pub enum SearchQuery {
58 Text {
59 search: AhoCorasick,
60 replacement: Option<String>,
61 whole_word: bool,
62 case_sensitive: bool,
63 include_ignored: bool,
64 inner: SearchInputs,
65 },
66
67 Regex {
68 regex: Regex,
69 replacement: Option<String>,
70 multiline: bool,
71 whole_word: bool,
72 case_sensitive: bool,
73 include_ignored: bool,
74 one_match_per_line: bool,
75 inner: SearchInputs,
76 },
77}
78
79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
80 RegexBuilder::new(r"\B")
81 .build()
82 .expect("Failed to create WORD_MATCH_TEST")
83});
84
85impl SearchQuery {
86 pub fn text(
87 query: impl ToString,
88 whole_word: bool,
89 case_sensitive: bool,
90 include_ignored: bool,
91 files_to_include: PathMatcher,
92 files_to_exclude: PathMatcher,
93 buffers: Option<Vec<Entity<Buffer>>>,
94 ) -> Result<Self> {
95 let query = query.to_string();
96 if !case_sensitive && !query.is_ascii() {
97 // AhoCorasickBuilder doesn't support case-insensitive search with unicode characters
98 // Fallback to regex search as recommended by
99 // https://docs.rs/aho-corasick/1.1/aho_corasick/struct.AhoCorasickBuilder.html#method.ascii_case_insensitive
100 return Self::regex(
101 regex::escape(&query),
102 whole_word,
103 case_sensitive,
104 include_ignored,
105 false,
106 files_to_include,
107 files_to_exclude,
108 buffers,
109 );
110 }
111 let search = AhoCorasickBuilder::new()
112 .ascii_case_insensitive(!case_sensitive)
113 .build([&query])?;
114 let inner = SearchInputs {
115 query: query.into(),
116 files_to_exclude,
117 files_to_include,
118 buffers,
119 };
120 Ok(Self::Text {
121 search,
122 replacement: None,
123 whole_word,
124 case_sensitive,
125 include_ignored,
126 inner,
127 })
128 }
129
130 pub fn regex(
131 query: impl ToString,
132 whole_word: bool,
133 case_sensitive: bool,
134 include_ignored: bool,
135 one_match_per_line: bool,
136 files_to_include: PathMatcher,
137 files_to_exclude: PathMatcher,
138 buffers: Option<Vec<Entity<Buffer>>>,
139 ) -> Result<Self> {
140 let mut query = query.to_string();
141 let initial_query = Arc::from(query.as_str());
142 if whole_word {
143 let mut word_query = String::new();
144 if let Some(first) = query.get(0..1) {
145 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
146 word_query.push_str("\\b");
147 }
148 }
149 word_query.push_str(&query);
150 if let Some(last) = query.get(query.len() - 1..) {
151 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
152 word_query.push_str("\\b");
153 }
154 }
155 query = word_query
156 }
157
158 let multiline = query.contains('\n') || query.contains("\\n");
159 let regex = RegexBuilder::new(&query)
160 .case_insensitive(!case_sensitive)
161 .build()?;
162 let inner = SearchInputs {
163 query: initial_query,
164 files_to_exclude,
165 files_to_include,
166 buffers,
167 };
168 Ok(Self::Regex {
169 regex,
170 replacement: None,
171 multiline,
172 whole_word,
173 case_sensitive,
174 include_ignored,
175 inner,
176 one_match_per_line,
177 })
178 }
179
180 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
181 if message.regex {
182 Self::regex(
183 message.query,
184 message.whole_word,
185 message.case_sensitive,
186 message.include_ignored,
187 false,
188 deserialize_path_matches(&message.files_to_include)?,
189 deserialize_path_matches(&message.files_to_exclude)?,
190 None, // search opened only don't need search remote
191 )
192 } else {
193 Self::text(
194 message.query,
195 message.whole_word,
196 message.case_sensitive,
197 message.include_ignored,
198 deserialize_path_matches(&message.files_to_include)?,
199 deserialize_path_matches(&message.files_to_exclude)?,
200 None, // search opened only don't need search remote
201 )
202 }
203 }
204
205 pub fn with_replacement(mut self, new_replacement: String) -> Self {
206 match self {
207 Self::Text {
208 ref mut replacement,
209 ..
210 }
211 | Self::Regex {
212 ref mut replacement,
213 ..
214 } => {
215 *replacement = Some(new_replacement);
216 self
217 }
218 }
219 }
220
221 pub fn to_proto(&self) -> proto::SearchQuery {
222 proto::SearchQuery {
223 query: self.as_str().to_string(),
224 regex: self.is_regex(),
225 whole_word: self.whole_word(),
226 case_sensitive: self.case_sensitive(),
227 include_ignored: self.include_ignored(),
228 files_to_include: self.files_to_include().sources().join(","),
229 files_to_exclude: self.files_to_exclude().sources().join(","),
230 }
231 }
232
233 pub(crate) fn detect(
234 &self,
235 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
236 ) -> Result<bool> {
237 if self.as_str().is_empty() {
238 return Ok(false);
239 }
240
241 match self {
242 Self::Text { search, .. } => {
243 let mat = search.stream_find_iter(reader).next();
244 match mat {
245 Some(Ok(_)) => Ok(true),
246 Some(Err(err)) => Err(err.into()),
247 None => Ok(false),
248 }
249 }
250 Self::Regex {
251 regex, multiline, ..
252 } => {
253 if *multiline {
254 let mut text = String::new();
255 if let Err(err) = reader.read_to_string(&mut text) {
256 Err(err.into())
257 } else {
258 Ok(regex.find(&text)?.is_some())
259 }
260 } else {
261 for line in reader.lines() {
262 let line = line?;
263 if regex.find(&line)?.is_some() {
264 return Ok(true);
265 }
266 }
267 Ok(false)
268 }
269 }
270 }
271 }
272 /// Returns the replacement text for this `SearchQuery`.
273 pub fn replacement(&self) -> Option<&str> {
274 match self {
275 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
276 replacement.as_deref()
277 }
278 }
279 }
280 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
281 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
282 match self {
283 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
284 SearchQuery::Regex {
285 regex, replacement, ..
286 } => {
287 if let Some(replacement) = replacement {
288 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
289 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
290 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
291 replacement,
292 |c: &Captures| match c.get(0).unwrap().as_str() {
293 r"\\" => "\\",
294 r"\n" => "\n",
295 r"\t" => "\t",
296 x => unreachable!("Unexpected escape sequence: {}", x),
297 },
298 );
299 Some(regex.replace(text, replacement))
300 } else {
301 None
302 }
303 }
304 }
305 }
306
307 pub async fn search(
308 &self,
309 buffer: &BufferSnapshot,
310 subrange: Option<Range<usize>>,
311 ) -> Vec<Range<usize>> {
312 const YIELD_INTERVAL: usize = 20000;
313
314 if self.as_str().is_empty() {
315 return Default::default();
316 }
317
318 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
319 let rope = if let Some(range) = subrange {
320 buffer.as_rope().slice(range)
321 } else {
322 buffer.as_rope().clone()
323 };
324
325 let mut matches = Vec::new();
326 match self {
327 Self::Text {
328 search, whole_word, ..
329 } => {
330 for (ix, mat) in search
331 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
332 .enumerate()
333 {
334 if (ix + 1) % YIELD_INTERVAL == 0 {
335 yield_now().await;
336 }
337
338 let mat = mat.unwrap();
339 if *whole_word {
340 let classifier = buffer.char_classifier_at(range_offset + mat.start());
341
342 let prev_kind = rope
343 .reversed_chars_at(mat.start())
344 .next()
345 .map(|c| classifier.kind(c));
346 let start_kind =
347 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
348 let end_kind =
349 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
350 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
351 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
352 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
353 {
354 continue;
355 }
356 }
357 matches.push(mat.start()..mat.end())
358 }
359 }
360
361 Self::Regex {
362 regex, multiline, ..
363 } => {
364 if *multiline {
365 let text = rope.to_string();
366 for (ix, mat) in regex.find_iter(&text).enumerate() {
367 if (ix + 1) % YIELD_INTERVAL == 0 {
368 yield_now().await;
369 }
370
371 if let Ok(mat) = mat {
372 matches.push(mat.start()..mat.end());
373 }
374 }
375 } else {
376 let mut line = String::new();
377 let mut line_offset = 0;
378 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
379 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
380 yield_now().await;
381 }
382
383 for (newline_ix, text) in chunk.split('\n').enumerate() {
384 if newline_ix > 0 {
385 for mat in regex.find_iter(&line).flatten() {
386 let start = line_offset + mat.start();
387 let end = line_offset + mat.end();
388 matches.push(start..end);
389 }
390
391 line_offset += line.len() + 1;
392 line.clear();
393 }
394 line.push_str(text);
395 }
396 }
397 }
398 }
399 }
400
401 matches
402 }
403
404 pub fn is_empty(&self) -> bool {
405 self.as_str().is_empty()
406 }
407
408 pub fn as_str(&self) -> &str {
409 self.as_inner().as_str()
410 }
411
412 pub fn whole_word(&self) -> bool {
413 match self {
414 Self::Text { whole_word, .. } => *whole_word,
415 Self::Regex { whole_word, .. } => *whole_word,
416 }
417 }
418
419 pub fn case_sensitive(&self) -> bool {
420 match self {
421 Self::Text { case_sensitive, .. } => *case_sensitive,
422 Self::Regex { case_sensitive, .. } => *case_sensitive,
423 }
424 }
425
426 pub fn include_ignored(&self) -> bool {
427 match self {
428 Self::Text {
429 include_ignored, ..
430 } => *include_ignored,
431 Self::Regex {
432 include_ignored, ..
433 } => *include_ignored,
434 }
435 }
436
437 pub fn is_regex(&self) -> bool {
438 matches!(self, Self::Regex { .. })
439 }
440
441 pub fn files_to_include(&self) -> &PathMatcher {
442 self.as_inner().files_to_include()
443 }
444
445 pub fn files_to_exclude(&self) -> &PathMatcher {
446 self.as_inner().files_to_exclude()
447 }
448
449 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
450 self.as_inner().buffers.as_ref()
451 }
452
453 pub fn is_opened_only(&self) -> bool {
454 self.as_inner().buffers.is_some()
455 }
456
457 pub fn filters_path(&self) -> bool {
458 !(self.files_to_exclude().sources().is_empty()
459 && self.files_to_include().sources().is_empty())
460 }
461
462 pub fn file_matches(&self, file_path: &Path) -> bool {
463 let mut path = file_path.to_path_buf();
464 loop {
465 if self.files_to_exclude().is_match(&path) {
466 return false;
467 } else if self.files_to_include().sources().is_empty()
468 || self.files_to_include().is_match(&path)
469 {
470 return true;
471 } else if !path.pop() {
472 return false;
473 }
474 }
475 }
476 pub fn as_inner(&self) -> &SearchInputs {
477 match self {
478 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
479 }
480 }
481
482 /// Whether this search should replace only one match per line, instead of
483 /// all matches.
484 /// Returns `None` for text searches, as only regex searches support this
485 /// option.
486 pub fn one_match_per_line(&self) -> Option<bool> {
487 match self {
488 Self::Regex {
489 one_match_per_line, ..
490 } => Some(*one_match_per_line),
491 Self::Text { .. } => None,
492 }
493 }
494}
495
496pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
497 let globs = glob_set
498 .split(',')
499 .map(str::trim)
500 .filter(|&glob_str| (!glob_str.is_empty()))
501 .map(|glob_str| glob_str.to_owned())
502 .collect::<Vec<_>>();
503 Ok(PathMatcher::new(&globs)?)
504}
505
506#[cfg(test)]
507mod tests {
508 use super::*;
509
510 #[test]
511 fn path_matcher_creation_for_valid_paths() {
512 for valid_path in [
513 "file",
514 "Cargo.toml",
515 ".DS_Store",
516 "~/dir/another_dir/",
517 "./dir/file",
518 "dir/[a-z].txt",
519 "../dir/filé",
520 ] {
521 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
522 panic!("Valid path {valid_path} should be accepted, but got: {e}")
523 });
524 assert!(
525 path_matcher.is_match(valid_path),
526 "Path matcher for valid path {valid_path} should match itself"
527 )
528 }
529 }
530
531 #[test]
532 fn path_matcher_creation_for_globs() {
533 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
534 match PathMatcher::new(&[invalid_glob.to_owned()]) {
535 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
536 Err(_expected) => {}
537 }
538 }
539
540 for valid_glob in [
541 "dir/?ile",
542 "dir/*.txt",
543 "dir/**/file",
544 "dir/[a-z].txt",
545 "{dir,file}",
546 ] {
547 match PathMatcher::new(&[valid_glob.to_owned()]) {
548 Ok(_expected) => {}
549 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
550 }
551 }
552 }
553}