1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Model;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18pub enum SearchResult {
19 Buffer {
20 buffer: Model<Buffer>,
21 ranges: Vec<Range<Anchor>>,
22 },
23 LimitReached,
24}
25
26#[derive(Clone, Copy, PartialEq)]
27pub enum SearchInputKind {
28 Query,
29 Include,
30 Exclude,
31}
32
33#[derive(Clone, Debug)]
34pub struct SearchInputs {
35 query: Arc<str>,
36 files_to_include: PathMatcher,
37 files_to_exclude: PathMatcher,
38 buffers: Option<Vec<Model<Buffer>>>,
39}
40
41impl SearchInputs {
42 pub fn as_str(&self) -> &str {
43 self.query.as_ref()
44 }
45 pub fn files_to_include(&self) -> &PathMatcher {
46 &self.files_to_include
47 }
48 pub fn files_to_exclude(&self) -> &PathMatcher {
49 &self.files_to_exclude
50 }
51 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
52 &self.buffers
53 }
54}
55#[derive(Clone, Debug)]
56pub enum SearchQuery {
57 Text {
58 search: Arc<AhoCorasick>,
59 replacement: Option<String>,
60 whole_word: bool,
61 case_sensitive: bool,
62 include_ignored: bool,
63 inner: SearchInputs,
64 },
65
66 Regex {
67 regex: Regex,
68 replacement: Option<String>,
69 multiline: bool,
70 whole_word: bool,
71 case_sensitive: bool,
72 include_ignored: bool,
73 inner: SearchInputs,
74 },
75}
76
77static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
78 RegexBuilder::new(r"\B")
79 .build()
80 .expect("Failed to create WORD_MATCH_TEST")
81});
82
83impl SearchQuery {
84 pub fn text(
85 query: impl ToString,
86 whole_word: bool,
87 case_sensitive: bool,
88 include_ignored: bool,
89 files_to_include: PathMatcher,
90 files_to_exclude: PathMatcher,
91 buffers: Option<Vec<Model<Buffer>>>,
92 ) -> Result<Self> {
93 let query = query.to_string();
94 let search = AhoCorasickBuilder::new()
95 .ascii_case_insensitive(!case_sensitive)
96 .build([&query])?;
97 let inner = SearchInputs {
98 query: query.into(),
99 files_to_exclude,
100 files_to_include,
101 buffers,
102 };
103 Ok(Self::Text {
104 search: Arc::new(search),
105 replacement: None,
106 whole_word,
107 case_sensitive,
108 include_ignored,
109 inner,
110 })
111 }
112
113 pub fn regex(
114 query: impl ToString,
115 whole_word: bool,
116 case_sensitive: bool,
117 include_ignored: bool,
118 files_to_include: PathMatcher,
119 files_to_exclude: PathMatcher,
120 buffers: Option<Vec<Model<Buffer>>>,
121 ) -> Result<Self> {
122 let mut query = query.to_string();
123 let initial_query = Arc::from(query.as_str());
124 if whole_word {
125 let mut word_query = String::new();
126 if let Some(first) = query.get(0..1) {
127 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
128 word_query.push_str("\\b");
129 }
130 }
131 word_query.push_str(&query);
132 if let Some(last) = query.get(query.len() - 1..) {
133 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
134 word_query.push_str("\\b");
135 }
136 }
137 query = word_query
138 }
139
140 let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
141 let regex = RegexBuilder::new(&query)
142 .case_insensitive(!case_sensitive)
143 .build()?;
144 let inner = SearchInputs {
145 query: initial_query,
146 files_to_exclude,
147 files_to_include,
148 buffers,
149 };
150 Ok(Self::Regex {
151 regex,
152 replacement: None,
153 multiline,
154 whole_word,
155 case_sensitive,
156 include_ignored,
157 inner,
158 })
159 }
160
161 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
162 if message.regex {
163 Self::regex(
164 message.query,
165 message.whole_word,
166 message.case_sensitive,
167 message.include_ignored,
168 deserialize_path_matches(&message.files_to_include)?,
169 deserialize_path_matches(&message.files_to_exclude)?,
170 None, // search opened only don't need search remote
171 )
172 } else {
173 Self::text(
174 message.query,
175 message.whole_word,
176 message.case_sensitive,
177 message.include_ignored,
178 deserialize_path_matches(&message.files_to_include)?,
179 deserialize_path_matches(&message.files_to_exclude)?,
180 None, // search opened only don't need search remote
181 )
182 }
183 }
184
185 pub fn with_replacement(mut self, new_replacement: String) -> Self {
186 match self {
187 Self::Text {
188 ref mut replacement,
189 ..
190 }
191 | Self::Regex {
192 ref mut replacement,
193 ..
194 } => {
195 *replacement = Some(new_replacement);
196 self
197 }
198 }
199 }
200
201 pub fn to_proto(&self) -> proto::SearchQuery {
202 proto::SearchQuery {
203 query: self.as_str().to_string(),
204 regex: self.is_regex(),
205 whole_word: self.whole_word(),
206 case_sensitive: self.case_sensitive(),
207 include_ignored: self.include_ignored(),
208 files_to_include: self.files_to_include().sources().join(","),
209 files_to_exclude: self.files_to_exclude().sources().join(","),
210 }
211 }
212
213 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
214 if self.as_str().is_empty() {
215 return Ok(false);
216 }
217
218 match self {
219 Self::Text { search, .. } => {
220 let mat = search.stream_find_iter(stream).next();
221 match mat {
222 Some(Ok(_)) => Ok(true),
223 Some(Err(err)) => Err(err.into()),
224 None => Ok(false),
225 }
226 }
227 Self::Regex {
228 regex, multiline, ..
229 } => {
230 let mut reader = BufReader::new(stream);
231 if *multiline {
232 let mut text = String::new();
233 if let Err(err) = reader.read_to_string(&mut text) {
234 Err(err.into())
235 } else {
236 Ok(regex.find(&text)?.is_some())
237 }
238 } else {
239 for line in reader.lines() {
240 let line = line?;
241 if regex.find(&line)?.is_some() {
242 return Ok(true);
243 }
244 }
245 Ok(false)
246 }
247 }
248 }
249 }
250 /// Returns the replacement text for this `SearchQuery`.
251 pub fn replacement(&self) -> Option<&str> {
252 match self {
253 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
254 replacement.as_deref()
255 }
256 }
257 }
258 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
259 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
260 match self {
261 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
262 SearchQuery::Regex {
263 regex, replacement, ..
264 } => {
265 if let Some(replacement) = replacement {
266 static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: LazyLock<Regex> =
267 LazyLock::new(|| Regex::new(r"\\\\|\\n|\\t").unwrap());
268 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX.replace_all(
269 replacement,
270 |c: &Captures| match c.get(0).unwrap().as_str() {
271 r"\\" => "\\",
272 r"\n" => "\n",
273 r"\t" => "\t",
274 x => unreachable!("Unexpected escape sequence: {}", x),
275 },
276 );
277 Some(regex.replace(text, replacement))
278 } else {
279 None
280 }
281 }
282 }
283 }
284
285 pub async fn search(
286 &self,
287 buffer: &BufferSnapshot,
288 subrange: Option<Range<usize>>,
289 ) -> Vec<Range<usize>> {
290 const YIELD_INTERVAL: usize = 20000;
291
292 if self.as_str().is_empty() {
293 return Default::default();
294 }
295
296 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
297 let rope = if let Some(range) = subrange {
298 buffer.as_rope().slice(range)
299 } else {
300 buffer.as_rope().clone()
301 };
302
303 let mut matches = Vec::new();
304 match self {
305 Self::Text {
306 search, whole_word, ..
307 } => {
308 for (ix, mat) in search
309 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
310 .enumerate()
311 {
312 if (ix + 1) % YIELD_INTERVAL == 0 {
313 yield_now().await;
314 }
315
316 let mat = mat.unwrap();
317 if *whole_word {
318 let classifier = buffer.char_classifier_at(range_offset + mat.start());
319
320 let prev_kind = rope
321 .reversed_chars_at(mat.start())
322 .next()
323 .map(|c| classifier.kind(c));
324 let start_kind =
325 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
326 let end_kind =
327 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
328 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
329 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
330 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
331 {
332 continue;
333 }
334 }
335 matches.push(mat.start()..mat.end())
336 }
337 }
338
339 Self::Regex {
340 regex, multiline, ..
341 } => {
342 if *multiline {
343 let text = rope.to_string();
344 for (ix, mat) in regex.find_iter(&text).enumerate() {
345 if (ix + 1) % YIELD_INTERVAL == 0 {
346 yield_now().await;
347 }
348
349 if let Ok(mat) = mat {
350 matches.push(mat.start()..mat.end());
351 }
352 }
353 } else {
354 let mut line = String::new();
355 let mut line_offset = 0;
356 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
357 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
358 yield_now().await;
359 }
360
361 for (newline_ix, text) in chunk.split('\n').enumerate() {
362 if newline_ix > 0 {
363 for mat in regex.find_iter(&line).flatten() {
364 let start = line_offset + mat.start();
365 let end = line_offset + mat.end();
366 matches.push(start..end);
367 }
368
369 line_offset += line.len() + 1;
370 line.clear();
371 }
372 line.push_str(text);
373 }
374 }
375 }
376 }
377 }
378
379 matches
380 }
381
382 pub fn is_empty(&self) -> bool {
383 self.as_str().is_empty()
384 }
385
386 pub fn as_str(&self) -> &str {
387 self.as_inner().as_str()
388 }
389
390 pub fn whole_word(&self) -> bool {
391 match self {
392 Self::Text { whole_word, .. } => *whole_word,
393 Self::Regex { whole_word, .. } => *whole_word,
394 }
395 }
396
397 pub fn case_sensitive(&self) -> bool {
398 match self {
399 Self::Text { case_sensitive, .. } => *case_sensitive,
400 Self::Regex { case_sensitive, .. } => *case_sensitive,
401 }
402 }
403
404 pub fn include_ignored(&self) -> bool {
405 match self {
406 Self::Text {
407 include_ignored, ..
408 } => *include_ignored,
409 Self::Regex {
410 include_ignored, ..
411 } => *include_ignored,
412 }
413 }
414
415 pub fn is_regex(&self) -> bool {
416 matches!(self, Self::Regex { .. })
417 }
418
419 pub fn files_to_include(&self) -> &PathMatcher {
420 self.as_inner().files_to_include()
421 }
422
423 pub fn files_to_exclude(&self) -> &PathMatcher {
424 self.as_inner().files_to_exclude()
425 }
426
427 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
428 self.as_inner().buffers.as_ref()
429 }
430
431 pub fn is_opened_only(&self) -> bool {
432 self.as_inner().buffers.is_some()
433 }
434
435 pub fn filters_path(&self) -> bool {
436 !(self.files_to_exclude().sources().is_empty()
437 && self.files_to_include().sources().is_empty())
438 }
439
440 pub fn file_matches(&self, file_path: &Path) -> bool {
441 let mut path = file_path.to_path_buf();
442 loop {
443 if self.files_to_exclude().is_match(&path) {
444 return false;
445 } else if self.files_to_include().sources().is_empty()
446 || self.files_to_include().is_match(&path)
447 {
448 return true;
449 } else if !path.pop() {
450 return false;
451 }
452 }
453 }
454 pub fn as_inner(&self) -> &SearchInputs {
455 match self {
456 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
457 }
458 }
459}
460
461pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
462 let globs = glob_set
463 .split(',')
464 .map(str::trim)
465 .filter(|&glob_str| (!glob_str.is_empty()))
466 .map(|glob_str| glob_str.to_owned())
467 .collect::<Vec<_>>();
468 Ok(PathMatcher::new(&globs)?)
469}
470
471#[cfg(test)]
472mod tests {
473 use super::*;
474
475 #[test]
476 fn path_matcher_creation_for_valid_paths() {
477 for valid_path in [
478 "file",
479 "Cargo.toml",
480 ".DS_Store",
481 "~/dir/another_dir/",
482 "./dir/file",
483 "dir/[a-z].txt",
484 "../dir/filé",
485 ] {
486 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
487 panic!("Valid path {valid_path} should be accepted, but got: {e}")
488 });
489 assert!(
490 path_matcher.is_match(valid_path),
491 "Path matcher for valid path {valid_path} should match itself"
492 )
493 }
494 }
495
496 #[test]
497 fn path_matcher_creation_for_globs() {
498 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
499 match PathMatcher::new(&[invalid_glob.to_owned()]) {
500 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
501 Err(_expected) => {}
502 }
503 }
504
505 for valid_glob in [
506 "dir/?ile",
507 "dir/*.txt",
508 "dir/**/file",
509 "dir/[a-z].txt",
510 "{dir,file}",
511 ] {
512 match PathMatcher::new(&[valid_glob.to_owned()]) {
513 Ok(_expected) => {}
514 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
515 }
516 }
517 }
518}