1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Entity;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18#[derive(Debug)]
19pub enum SearchResult {
20 Buffer {
21 buffer: Entity<Buffer>,
22 ranges: Vec<Range<Anchor>>,
23 },
24 LimitReached,
25}
26
27#[derive(Clone, Copy, PartialEq)]
28pub enum SearchInputKind {
29 Query,
30 Include,
31 Exclude,
32}
33
34#[derive(Clone, Debug)]
35pub struct SearchInputs {
36 query: Arc<str>,
37 files_to_include: PathMatcher,
38 files_to_exclude: PathMatcher,
39 buffers: Option<Vec<Entity<Buffer>>>,
40}
41
42impl SearchInputs {
43 pub fn as_str(&self) -> &str {
44 self.query.as_ref()
45 }
46 pub fn files_to_include(&self) -> &PathMatcher {
47 &self.files_to_include
48 }
49 pub fn files_to_exclude(&self) -> &PathMatcher {
50 &self.files_to_exclude
51 }
52 pub fn buffers(&self) -> &Option<Vec<Entity<Buffer>>> {
53 &self.buffers
54 }
55}
56#[derive(Clone, Debug)]
57pub enum SearchQuery {
58 Text {
59 search: AhoCorasick,
60 replacement: Option<String>,
61 whole_word: bool,
62 case_sensitive: bool,
63 include_ignored: bool,
64 inner: SearchInputs,
65 },
66
67 Regex {
68 regex: Regex,
69 replacement: Option<String>,
70 multiline: bool,
71 whole_word: bool,
72 case_sensitive: bool,
73 include_ignored: bool,
74 inner: SearchInputs,
75 },
76}
77
78static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
79 RegexBuilder::new(r"\B")
80 .build()
81 .expect("Failed to create WORD_MATCH_TEST")
82});
83
84impl SearchQuery {
85 pub fn text(
86 query: impl ToString,
87 whole_word: bool,
88 case_sensitive: bool,
89 include_ignored: bool,
90 files_to_include: PathMatcher,
91 files_to_exclude: PathMatcher,
92 buffers: Option<Vec<Entity<Buffer>>>,
93 ) -> Result<Self> {
94 let query = query.to_string();
95 let search = AhoCorasickBuilder::new()
96 .ascii_case_insensitive(!case_sensitive)
97 .build([&query])?;
98 let inner = SearchInputs {
99 query: query.into(),
100 files_to_exclude,
101 files_to_include,
102 buffers,
103 };
104 Ok(Self::Text {
105 search,
106 replacement: None,
107 whole_word,
108 case_sensitive,
109 include_ignored,
110 inner,
111 })
112 }
113
114 pub fn regex(
115 query: impl ToString,
116 whole_word: bool,
117 case_sensitive: bool,
118 include_ignored: bool,
119 files_to_include: PathMatcher,
120 files_to_exclude: PathMatcher,
121 buffers: Option<Vec<Entity<Buffer>>>,
122 ) -> Result<Self> {
123 let mut query = query.to_string();
124 let initial_query = Arc::from(query.as_str());
125 if whole_word {
126 let mut word_query = String::new();
127 if let Some(first) = query.get(0..1) {
128 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
129 word_query.push_str("\\b");
130 }
131 }
132 word_query.push_str(&query);
133 if let Some(last) = query.get(query.len() - 1..) {
134 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
135 word_query.push_str("\\b");
136 }
137 }
138 query = word_query
139 }
140
141 let multiline = query.contains('\n') || query.contains("\\n");
142 let regex = RegexBuilder::new(&query)
143 .case_insensitive(!case_sensitive)
144 .build()?;
145 let inner = SearchInputs {
146 query: initial_query,
147 files_to_exclude,
148 files_to_include,
149 buffers,
150 };
151 Ok(Self::Regex {
152 regex,
153 replacement: None,
154 multiline,
155 whole_word,
156 case_sensitive,
157 include_ignored,
158 inner,
159 })
160 }
161
162 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
163 if message.regex {
164 Self::regex(
165 message.query,
166 message.whole_word,
167 message.case_sensitive,
168 message.include_ignored,
169 deserialize_path_matches(&message.files_to_include)?,
170 deserialize_path_matches(&message.files_to_exclude)?,
171 None, // search opened only don't need search remote
172 )
173 } else {
174 Self::text(
175 message.query,
176 message.whole_word,
177 message.case_sensitive,
178 message.include_ignored,
179 deserialize_path_matches(&message.files_to_include)?,
180 deserialize_path_matches(&message.files_to_exclude)?,
181 None, // search opened only don't need search remote
182 )
183 }
184 }
185
186 pub fn with_replacement(mut self, new_replacement: String) -> Self {
187 match self {
188 Self::Text {
189 ref mut replacement,
190 ..
191 }
192 | Self::Regex {
193 ref mut replacement,
194 ..
195 } => {
196 *replacement = Some(new_replacement);
197 self
198 }
199 }
200 }
201
202 pub fn to_proto(&self) -> proto::SearchQuery {
203 proto::SearchQuery {
204 query: self.as_str().to_string(),
205 regex: self.is_regex(),
206 whole_word: self.whole_word(),
207 case_sensitive: self.case_sensitive(),
208 include_ignored: self.include_ignored(),
209 files_to_include: self.files_to_include().sources().join(","),
210 files_to_exclude: self.files_to_exclude().sources().join(","),
211 }
212 }
213
214 pub(crate) fn detect(
215 &self,
216 mut reader: BufReader<Box<dyn Read + Send + Sync>>,
217 ) -> Result<bool> {
218 if self.as_str().is_empty() {
219 return Ok(false);
220 }
221
222 match self {
223 Self::Text { search, .. } => {
224 let mat = search.stream_find_iter(reader).next();
225 match mat {
226 Some(Ok(_)) => Ok(true),
227 Some(Err(err)) => Err(err.into()),
228 None => Ok(false),
229 }
230 }
231 Self::Regex {
232 regex, multiline, ..
233 } => {
234 if *multiline {
235 let mut text = String::new();
236 if let Err(err) = reader.read_to_string(&mut text) {
237 Err(err.into())
238 } else {
239 Ok(regex.find(&text)?.is_some())
240 }
241 } else {
242 for line in reader.lines() {
243 let line = line?;
244 if regex.find(&line)?.is_some() {
245 return Ok(true);
246 }
247 }
248 Ok(false)
249 }
250 }
251 }
252 }
253 /// Returns the replacement text for this `SearchQuery`.
254 pub fn replacement(&self) -> Option<&str> {
255 match self {
256 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
257 replacement.as_deref()
258 }
259 }
260 }
261 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
262 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
263 match self {
264 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
265 SearchQuery::Regex {
266 regex, replacement, ..
267 } => {
268 if let Some(replacement) = replacement {
269 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
270 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
271 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
272 replacement,
273 |c: &Captures| match c.get(0).unwrap().as_str() {
274 r"\\" => "\\",
275 r"\n" => "\n",
276 r"\t" => "\t",
277 x => unreachable!("Unexpected escape sequence: {}", x),
278 },
279 );
280 Some(regex.replace(text, replacement))
281 } else {
282 None
283 }
284 }
285 }
286 }
287
288 pub async fn search(
289 &self,
290 buffer: &BufferSnapshot,
291 subrange: Option<Range<usize>>,
292 ) -> Vec<Range<usize>> {
293 const YIELD_INTERVAL: usize = 20000;
294
295 if self.as_str().is_empty() {
296 return Default::default();
297 }
298
299 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
300 let rope = if let Some(range) = subrange {
301 buffer.as_rope().slice(range)
302 } else {
303 buffer.as_rope().clone()
304 };
305
306 let mut matches = Vec::new();
307 match self {
308 Self::Text {
309 search, whole_word, ..
310 } => {
311 for (ix, mat) in search
312 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
313 .enumerate()
314 {
315 if (ix + 1) % YIELD_INTERVAL == 0 {
316 yield_now().await;
317 }
318
319 let mat = mat.unwrap();
320 if *whole_word {
321 let classifier = buffer.char_classifier_at(range_offset + mat.start());
322
323 let prev_kind = rope
324 .reversed_chars_at(mat.start())
325 .next()
326 .map(|c| classifier.kind(c));
327 let start_kind =
328 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
329 let end_kind =
330 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
331 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
332 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
333 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
334 {
335 continue;
336 }
337 }
338 matches.push(mat.start()..mat.end())
339 }
340 }
341
342 Self::Regex {
343 regex, multiline, ..
344 } => {
345 if *multiline {
346 let text = rope.to_string();
347 for (ix, mat) in regex.find_iter(&text).enumerate() {
348 if (ix + 1) % YIELD_INTERVAL == 0 {
349 yield_now().await;
350 }
351
352 if let Ok(mat) = mat {
353 matches.push(mat.start()..mat.end());
354 }
355 }
356 } else {
357 let mut line = String::new();
358 let mut line_offset = 0;
359 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
360 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
361 yield_now().await;
362 }
363
364 for (newline_ix, text) in chunk.split('\n').enumerate() {
365 if newline_ix > 0 {
366 for mat in regex.find_iter(&line).flatten() {
367 let start = line_offset + mat.start();
368 let end = line_offset + mat.end();
369 matches.push(start..end);
370 }
371
372 line_offset += line.len() + 1;
373 line.clear();
374 }
375 line.push_str(text);
376 }
377 }
378 }
379 }
380 }
381
382 matches
383 }
384
385 pub fn is_empty(&self) -> bool {
386 self.as_str().is_empty()
387 }
388
389 pub fn as_str(&self) -> &str {
390 self.as_inner().as_str()
391 }
392
393 pub fn whole_word(&self) -> bool {
394 match self {
395 Self::Text { whole_word, .. } => *whole_word,
396 Self::Regex { whole_word, .. } => *whole_word,
397 }
398 }
399
400 pub fn case_sensitive(&self) -> bool {
401 match self {
402 Self::Text { case_sensitive, .. } => *case_sensitive,
403 Self::Regex { case_sensitive, .. } => *case_sensitive,
404 }
405 }
406
407 pub fn include_ignored(&self) -> bool {
408 match self {
409 Self::Text {
410 include_ignored, ..
411 } => *include_ignored,
412 Self::Regex {
413 include_ignored, ..
414 } => *include_ignored,
415 }
416 }
417
418 pub fn is_regex(&self) -> bool {
419 matches!(self, Self::Regex { .. })
420 }
421
422 pub fn files_to_include(&self) -> &PathMatcher {
423 self.as_inner().files_to_include()
424 }
425
426 pub fn files_to_exclude(&self) -> &PathMatcher {
427 self.as_inner().files_to_exclude()
428 }
429
430 pub fn buffers(&self) -> Option<&Vec<Entity<Buffer>>> {
431 self.as_inner().buffers.as_ref()
432 }
433
434 pub fn is_opened_only(&self) -> bool {
435 self.as_inner().buffers.is_some()
436 }
437
438 pub fn filters_path(&self) -> bool {
439 !(self.files_to_exclude().sources().is_empty()
440 && self.files_to_include().sources().is_empty())
441 }
442
443 pub fn file_matches(&self, file_path: &Path) -> bool {
444 let mut path = file_path.to_path_buf();
445 loop {
446 if self.files_to_exclude().is_match(&path) {
447 return false;
448 } else if self.files_to_include().sources().is_empty()
449 || self.files_to_include().is_match(&path)
450 {
451 return true;
452 } else if !path.pop() {
453 return false;
454 }
455 }
456 }
457 pub fn as_inner(&self) -> &SearchInputs {
458 match self {
459 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
460 }
461 }
462}
463
464pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
465 let globs = glob_set
466 .split(',')
467 .map(str::trim)
468 .filter(|&glob_str| (!glob_str.is_empty()))
469 .map(|glob_str| glob_str.to_owned())
470 .collect::<Vec<_>>();
471 Ok(PathMatcher::new(&globs)?)
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477
478 #[test]
479 fn path_matcher_creation_for_valid_paths() {
480 for valid_path in [
481 "file",
482 "Cargo.toml",
483 ".DS_Store",
484 "~/dir/another_dir/",
485 "./dir/file",
486 "dir/[a-z].txt",
487 "../dir/filé",
488 ] {
489 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
490 panic!("Valid path {valid_path} should be accepted, but got: {e}")
491 });
492 assert!(
493 path_matcher.is_match(valid_path),
494 "Path matcher for valid path {valid_path} should match itself"
495 )
496 }
497 }
498
499 #[test]
500 fn path_matcher_creation_for_globs() {
501 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
502 match PathMatcher::new(&[invalid_glob.to_owned()]) {
503 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
504 Err(_expected) => {}
505 }
506 }
507
508 for valid_glob in [
509 "dir/?ile",
510 "dir/*.txt",
511 "dir/**/file",
512 "dir/[a-z].txt",
513 "{dir,file}",
514 ] {
515 match PathMatcher::new(&[valid_glob.to_owned()]) {
516 Ok(_expected) => {}
517 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
518 }
519 }
520 }
521}