1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 buffers: Option<Vec<Entity<Buffer>>>,
40}
41
42impl SearchInputs {
43 pub fn as_str(&self) -> &str {
44 self.query.as_ref()
45 }
46 pub fn files_to_include(&self) -> &PathMatcher {
47 &self.files_to_include
48 }
49 pub fn files_to_exclude(&self) -> &PathMatcher {
50 &self.files_to_exclude
51 }
52 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
53 &self.buffers
54 }
55}
56#[derive(Clone, Debug)]
57pub enum SearchQuery {
58 Text {
59 search: AhoCorasick,
60 replacement: Option<String>,
61 whole_word: bool,
62 case_sensitive: bool,
63 include_ignored: bool,
64 inner: SearchInputs,
65 },
66
67 Regex {
68 regex: Regex,
69 replacement: Option<String>,
70 multiline: bool,
71 whole_word: bool,
72 case_sensitive: bool,
73 include_ignored: bool,
74 one_match_per_line: bool,
75 inner: SearchInputs,
76 },
77}
78
79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
80 RegexBuilder::new(r"\B")
81 .build()
82 .expect("Failed to create WORD_MATCH_TEST")
83});
84
85impl SearchQuery {
86 pub fn text(
87 query: impl ToString,
88 whole_word: bool,
89 case_sensitive: bool,
90 include_ignored: bool,
91 files_to_include: PathMatcher,
92 files_to_exclude: PathMatcher,
93 buffers: Option<Vec<Entity<Buffer>>>,
94 ) -> Result<Self> {
95 let query = query.to_string();
96 let search = AhoCorasickBuilder::new()
97 .ascii_case_insensitive(!case_sensitive)
98 .build([&query])?;
99 let inner = SearchInputs {
100 query: query.into(),
101 files_to_exclude,
102 files_to_include,
103 buffers,
104 };
105 Ok(Self::Text {
106 search,
107 replacement: None,
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 inner,
112 })
113 }
114
115 pub fn regex(
116 query: impl ToString,
117 whole_word: bool,
118 case_sensitive: bool,
119 include_ignored: bool,
120 one_match_per_line: bool,
121 files_to_include: PathMatcher,
122 files_to_exclude: PathMatcher,
123 buffers: Option<Vec<Entity<Buffer>>>,
124 ) -> Result<Self> {
125 let mut query = query.to_string();
126 let initial_query = Arc::from(query.as_str());
127 if whole_word {
128 let mut word_query = String::new();
129 if let Some(first) = query.get(0..1) {
130 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
131 word_query.push_str("\\b");
132 }
133 }
134 word_query.push_str(&query);
135 if let Some(last) = query.get(query.len() - 1..) {
136 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
137 word_query.push_str("\\b");
138 }
139 }
140 query = word_query
141 }
142
143 let multiline = query.contains('\n') || query.contains("\\n");
144 let regex = RegexBuilder::new(&query)
145 .case_insensitive(!case_sensitive)
146 .build()?;
147 let inner = SearchInputs {
148 query: initial_query,
149 files_to_exclude,
150 files_to_include,
151 buffers,
152 };
153 Ok(Self::Regex {
154 regex,
155 replacement: None,
156 multiline,
157 whole_word,
158 case_sensitive,
159 include_ignored,
160 inner,
161 one_match_per_line,
162 })
163 }
164
165 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
166 if message.regex {
167 Self::regex(
168 message.query,
169 message.whole_word,
170 message.case_sensitive,
171 message.include_ignored,
172 false,
173 deserialize_path_matches(&message.files_to_include)?,
174 deserialize_path_matches(&message.files_to_exclude)?,
175 None, // search opened only don't need search remote
176 )
177 } else {
178 Self::text(
179 message.query,
180 message.whole_word,
181 message.case_sensitive,
182 message.include_ignored,
183 deserialize_path_matches(&message.files_to_include)?,
184 deserialize_path_matches(&message.files_to_exclude)?,
185 None, // search opened only don't need search remote
186 )
187 }
188 }
189
190 pub fn with_replacement(mut self, new_replacement: String) -> Self {
191 match self {
192 Self::Text {
193 ref mut replacement,
194 ..
195 }
196 | Self::Regex {
197 ref mut replacement,
198 ..
199 } => {
200 *replacement = Some(new_replacement);
201 self
202 }
203 }
204 }
205
206 pub fn to_proto(&self) -> proto::SearchQuery {
207 proto::SearchQuery {
208 query: self.as_str().to_string(),
209 regex: self.is_regex(),
210 whole_word: self.whole_word(),
211 case_sensitive: self.case_sensitive(),
212 include_ignored: self.include_ignored(),
213 files_to_include: self.files_to_include().sources().join(","),
214 files_to_exclude: self.files_to_exclude().sources().join(","),
215 }
216 }
217
218 pub(crate) fn detect(
219 &self,
220 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
221 ) -> Result<bool> {
222 if self.as_str().is_empty() {
223 return Ok(false);
224 }
225
226 match self {
227 Self::Text { search, .. } => {
228 let mat = search.stream_find_iter(reader).next();
229 match mat {
230 Some(Ok(_)) => Ok(true),
231 Some(Err(err)) => Err(err.into()),
232 None => Ok(false),
233 }
234 }
235 Self::Regex {
236 regex, multiline, ..
237 } => {
238 if *multiline {
239 let mut text = String::new();
240 if let Err(err) = reader.read_to_string(&mut text) {
241 Err(err.into())
242 } else {
243 Ok(regex.find(&text)?.is_some())
244 }
245 } else {
246 for line in reader.lines() {
247 let line = line?;
248 if regex.find(&line)?.is_some() {
249 return Ok(true);
250 }
251 }
252 Ok(false)
253 }
254 }
255 }
256 }
257 /// Returns the replacement text for this `SearchQuery`.
258 pub fn replacement(&self) -> Option<&str> {
259 match self {
260 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
261 replacement.as_deref()
262 }
263 }
264 }
265 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
266 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
267 match self {
268 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
269 SearchQuery::Regex {
270 regex, replacement, ..
271 } => {
272 if let Some(replacement) = replacement {
273 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
274 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
275 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
276 replacement,
277 |c: &Captures| match c.get(0).unwrap().as_str() {
278 r"\\" => "\\",
279 r"\n" => "\n",
280 r"\t" => "\t",
281 x => unreachable!("Unexpected escape sequence: {}", x),
282 },
283 );
284 Some(regex.replace(text, replacement))
285 } else {
286 None
287 }
288 }
289 }
290 }
291
292 pub async fn search(
293 &self,
294 buffer: &BufferSnapshot,
295 subrange: Option<Range<usize>>,
296 ) -> Vec<Range<usize>> {
297 const YIELD_INTERVAL: usize = 20000;
298
299 if self.as_str().is_empty() {
300 return Default::default();
301 }
302
303 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
304 let rope = if let Some(range) = subrange {
305 buffer.as_rope().slice(range)
306 } else {
307 buffer.as_rope().clone()
308 };
309
310 let mut matches = Vec::new();
311 match self {
312 Self::Text {
313 search, whole_word, ..
314 } => {
315 for (ix, mat) in search
316 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
317 .enumerate()
318 {
319 if (ix + 1) % YIELD_INTERVAL == 0 {
320 yield_now().await;
321 }
322
323 let mat = mat.unwrap();
324 if *whole_word {
325 let classifier = buffer.char_classifier_at(range_offset + mat.start());
326
327 let prev_kind = rope
328 .reversed_chars_at(mat.start())
329 .next()
330 .map(|c| classifier.kind(c));
331 let start_kind =
332 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
333 let end_kind =
334 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
335 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
336 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
337 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
338 {
339 continue;
340 }
341 }
342 matches.push(mat.start()..mat.end())
343 }
344 }
345
346 Self::Regex {
347 regex, multiline, ..
348 } => {
349 if *multiline {
350 let text = rope.to_string();
351 for (ix, mat) in regex.find_iter(&text).enumerate() {
352 if (ix + 1) % YIELD_INTERVAL == 0 {
353 yield_now().await;
354 }
355
356 if let Ok(mat) = mat {
357 matches.push(mat.start()..mat.end());
358 }
359 }
360 } else {
361 let mut line = String::new();
362 let mut line_offset = 0;
363 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
364 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
365 yield_now().await;
366 }
367
368 for (newline_ix, text) in chunk.split('\n').enumerate() {
369 if newline_ix > 0 {
370 for mat in regex.find_iter(&line).flatten() {
371 let start = line_offset + mat.start();
372 let end = line_offset + mat.end();
373 matches.push(start..end);
374 }
375
376 line_offset += line.len() + 1;
377 line.clear();
378 }
379 line.push_str(text);
380 }
381 }
382 }
383 }
384 }
385
386 matches
387 }
388
389 pub fn is_empty(&self) -> bool {
390 self.as_str().is_empty()
391 }
392
393 pub fn as_str(&self) -> &str {
394 self.as_inner().as_str()
395 }
396
397 pub fn whole_word(&self) -> bool {
398 match self {
399 Self::Text { whole_word, .. } => *whole_word,
400 Self::Regex { whole_word, .. } => *whole_word,
401 }
402 }
403
404 pub fn case_sensitive(&self) -> bool {
405 match self {
406 Self::Text { case_sensitive, .. } => *case_sensitive,
407 Self::Regex { case_sensitive, .. } => *case_sensitive,
408 }
409 }
410
411 pub fn include_ignored(&self) -> bool {
412 match self {
413 Self::Text {
414 include_ignored, ..
415 } => *include_ignored,
416 Self::Regex {
417 include_ignored, ..
418 } => *include_ignored,
419 }
420 }
421
422 pub fn is_regex(&self) -> bool {
423 matches!(self, Self::Regex { .. })
424 }
425
426 pub fn files_to_include(&self) -> &PathMatcher {
427 self.as_inner().files_to_include()
428 }
429
430 pub fn files_to_exclude(&self) -> &PathMatcher {
431 self.as_inner().files_to_exclude()
432 }
433
434 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
435 self.as_inner().buffers.as_ref()
436 }
437
438 pub fn is_opened_only(&self) -> bool {
439 self.as_inner().buffers.is_some()
440 }
441
442 pub fn filters_path(&self) -> bool {
443 !(self.files_to_exclude().sources().is_empty()
444 && self.files_to_include().sources().is_empty())
445 }
446
447 pub fn file_matches(&self, file_path: &Path) -> bool {
448 let mut path = file_path.to_path_buf();
449 loop {
450 if self.files_to_exclude().is_match(&path) {
451 return false;
452 } else if self.files_to_include().sources().is_empty()
453 || self.files_to_include().is_match(&path)
454 {
455 return true;
456 } else if !path.pop() {
457 return false;
458 }
459 }
460 }
461 pub fn as_inner(&self) -> &SearchInputs {
462 match self {
463 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
464 }
465 }
466
467 /// Whether this search should replace only one match per line, instead of
468 /// all matches.
469 /// Returns `None` for text searches, as only regex searches support this
470 /// option.
471 pub fn one_match_per_line(&self) -> Option<bool> {
472 match self {
473 Self::Regex {
474 one_match_per_line, ..
475 } => Some(*one_match_per_line),
476 Self::Text { .. } => None,
477 }
478 }
479}
480
481pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
482 let globs = glob_set
483 .split(',')
484 .map(str::trim)
485 .filter(|&glob_str| (!glob_str.is_empty()))
486 .map(|glob_str| glob_str.to_owned())
487 .collect::<Vec<_>>();
488 Ok(PathMatcher::new(&globs)?)
489}
490
491#[cfg(test)]
492mod tests {
493 use super::*;
494
495 #[test]
496 fn path_matcher_creation_for_valid_paths() {
497 for valid_path in [
498 "file",
499 "Cargo.toml",
500 ".DS_Store",
501 "~/dir/another_dir/",
502 "./dir/file",
503 "dir/[a-z].txt",
504 "../dir/filé",
505 ] {
506 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
507 panic!("Valid path {valid_path} should be accepted, but got: {e}")
508 });
509 assert!(
510 path_matcher.is_match(valid_path),
511 "Path matcher for valid path {valid_path} should match itself"
512 )
513 }
514 }
515
516 #[test]
517 fn path_matcher_creation_for_globs() {
518 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
519 match PathMatcher::new(&[invalid_glob.to_owned()]) {
520 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
521 Err(_expected) => {}
522 }
523 }
524
525 for valid_glob in [
526 "dir/?ile",
527 "dir/*.txt",
528 "dir/**/file",
529 "dir/[a-z].txt",
530 "{dir,file}",
531 ] {
532 match PathMatcher::new(&[valid_glob.to_owned()]) {
533 Ok(_expected) => {}
534 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
535 }
536 }
537 }
538}