1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Model;
6use language::{Buffer, BufferSnapshot, CharKind};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, LazyLock, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Copy, PartialEq)]
29pub enum SearchInputKind {
30 Query,
31 Include,
32 Exclude,
33}
34
35#[derive(Clone, Debug)]
36pub struct SearchInputs {
37 query: Arc<str>,
38 files_to_include: PathMatcher,
39 files_to_exclude: PathMatcher,
40 buffers: Option<Vec<Model<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: Arc<AhoCorasick>,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 inner: SearchInputs,
76 },
77}
78
79static WORD_MATCH_TEST: LazyLock<Regex> = LazyLock::new(|| {
80 RegexBuilder::new(r"\B")
81 .build()
82 .expect("Failed to create WORD_MATCH_TEST")
83});
84
85impl SearchQuery {
86 pub fn text(
87 query: impl ToString,
88 whole_word: bool,
89 case_sensitive: bool,
90 include_ignored: bool,
91 files_to_include: PathMatcher,
92 files_to_exclude: PathMatcher,
93 buffers: Option<Vec<Model<Buffer>>>,
94 ) -> Result<Self> {
95 let query = query.to_string();
96 let search = AhoCorasickBuilder::new()
97 .ascii_case_insensitive(!case_sensitive)
98 .build([&query])?;
99 let inner = SearchInputs {
100 query: query.into(),
101 files_to_exclude,
102 files_to_include,
103 buffers,
104 };
105 Ok(Self::Text {
106 search: Arc::new(search),
107 replacement: None,
108 whole_word,
109 case_sensitive,
110 include_ignored,
111 inner,
112 })
113 }
114
115 pub fn regex(
116 query: impl ToString,
117 whole_word: bool,
118 case_sensitive: bool,
119 include_ignored: bool,
120 files_to_include: PathMatcher,
121 files_to_exclude: PathMatcher,
122 buffers: Option<Vec<Model<Buffer>>>,
123 ) -> Result<Self> {
124 let mut query = query.to_string();
125 let initial_query = Arc::from(query.as_str());
126 if whole_word {
127 let mut word_query = String::new();
128 if let Some(first) = query.get(0..1) {
129 if WORD_MATCH_TEST.is_match(first).is_ok_and(|x| !x) {
130 word_query.push_str("\\b");
131 }
132 }
133 word_query.push_str(&query);
134 if let Some(last) = query.get(query.len() - 1..) {
135 if WORD_MATCH_TEST.is_match(last).is_ok_and(|x| !x) {
136 word_query.push_str("\\b");
137 }
138 }
139 query = word_query
140 }
141
142 let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
143 let regex = RegexBuilder::new(&query)
144 .case_insensitive(!case_sensitive)
145 .build()?;
146 let inner = SearchInputs {
147 query: initial_query,
148 files_to_exclude,
149 files_to_include,
150 buffers,
151 };
152 Ok(Self::Regex {
153 regex,
154 replacement: None,
155 multiline,
156 whole_word,
157 case_sensitive,
158 include_ignored,
159 inner,
160 })
161 }
162
163 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
164 if message.regex {
165 Self::regex(
166 message.query,
167 message.whole_word,
168 message.case_sensitive,
169 message.include_ignored,
170 deserialize_path_matches(&message.files_to_include)?,
171 deserialize_path_matches(&message.files_to_exclude)?,
172 None, // search opened only don't need search remote
173 )
174 } else {
175 Self::text(
176 message.query,
177 message.whole_word,
178 message.case_sensitive,
179 message.include_ignored,
180 deserialize_path_matches(&message.files_to_include)?,
181 deserialize_path_matches(&message.files_to_exclude)?,
182 None, // search opened only don't need search remote
183 )
184 }
185 }
186
187 pub fn with_replacement(mut self, new_replacement: String) -> Self {
188 match self {
189 Self::Text {
190 ref mut replacement,
191 ..
192 }
193 | Self::Regex {
194 ref mut replacement,
195 ..
196 } => {
197 *replacement = Some(new_replacement);
198 self
199 }
200 }
201 }
202
203 pub fn to_proto(&self) -> proto::SearchQuery {
204 proto::SearchQuery {
205 query: self.as_str().to_string(),
206 regex: self.is_regex(),
207 whole_word: self.whole_word(),
208 case_sensitive: self.case_sensitive(),
209 include_ignored: self.include_ignored(),
210 files_to_include: self.files_to_include().sources().join(","),
211 files_to_exclude: self.files_to_exclude().sources().join(","),
212 }
213 }
214
215 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
216 if self.as_str().is_empty() {
217 return Ok(false);
218 }
219
220 match self {
221 Self::Text { search, .. } => {
222 let mat = search.stream_find_iter(stream).next();
223 match mat {
224 Some(Ok(_)) => Ok(true),
225 Some(Err(err)) => Err(err.into()),
226 None => Ok(false),
227 }
228 }
229 Self::Regex {
230 regex, multiline, ..
231 } => {
232 let mut reader = BufReader::new(stream);
233 if *multiline {
234 let mut text = String::new();
235 if let Err(err) = reader.read_to_string(&mut text) {
236 Err(err.into())
237 } else {
238 Ok(regex.find(&text)?.is_some())
239 }
240 } else {
241 for line in reader.lines() {
242 let line = line?;
243 if regex.find(&line)?.is_some() {
244 return Ok(true);
245 }
246 }
247 Ok(false)
248 }
249 }
250 }
251 }
252 /// Returns the replacement text for this `SearchQuery`.
253 pub fn replacement(&self) -> Option<&str> {
254 match self {
255 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
256 replacement.as_deref()
257 }
258 }
259 }
260 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
261 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
262 match self {
263 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
264 SearchQuery::Regex {
265 regex, replacement, ..
266 } => {
267 if let Some(replacement) = replacement {
268 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
269 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
270 .replace_all(replacement, |c: &Captures| {
271 match c.get(0).unwrap().as_str() {
272 r"\\" => "\\",
273 r"\n" => "\n",
274 r"\t" => "\t",
275 x => unreachable!("Unexpected escape sequence: {}", x),
276 }
277 });
278 Some(regex.replace(text, replacement))
279 } else {
280 None
281 }
282 }
283 }
284 }
285
286 pub async fn search(
287 &self,
288 buffer: &BufferSnapshot,
289 subrange: Option<Range<usize>>,
290 ) -> Vec<Range<usize>> {
291 const YIELD_INTERVAL: usize = 20000;
292
293 if self.as_str().is_empty() {
294 return Default::default();
295 }
296
297 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
298 let rope = if let Some(range) = subrange {
299 buffer.as_rope().slice(range)
300 } else {
301 buffer.as_rope().clone()
302 };
303
304 let mut matches = Vec::new();
305 match self {
306 Self::Text {
307 search, whole_word, ..
308 } => {
309 for (ix, mat) in search
310 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
311 .enumerate()
312 {
313 if (ix + 1) % YIELD_INTERVAL == 0 {
314 yield_now().await;
315 }
316
317 let mat = mat.unwrap();
318 if *whole_word {
319 let classifier = buffer.char_classifier_at(range_offset + mat.start());
320
321 let prev_kind = rope
322 .reversed_chars_at(mat.start())
323 .next()
324 .map(|c| classifier.kind(c));
325 let start_kind =
326 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
327 let end_kind =
328 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
329 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
330 if (Some(start_kind) == prev_kind && start_kind == CharKind::Word)
331 || (Some(end_kind) == next_kind && end_kind == CharKind::Word)
332 {
333 continue;
334 }
335 }
336 matches.push(mat.start()..mat.end())
337 }
338 }
339
340 Self::Regex {
341 regex, multiline, ..
342 } => {
343 if *multiline {
344 let text = rope.to_string();
345 for (ix, mat) in regex.find_iter(&text).enumerate() {
346 if (ix + 1) % YIELD_INTERVAL == 0 {
347 yield_now().await;
348 }
349
350 if let Ok(mat) = mat {
351 matches.push(mat.start()..mat.end());
352 }
353 }
354 } else {
355 let mut line = String::new();
356 let mut line_offset = 0;
357 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
358 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
359 yield_now().await;
360 }
361
362 for (newline_ix, text) in chunk.split('\n').enumerate() {
363 if newline_ix > 0 {
364 for mat in regex.find_iter(&line).flatten() {
365 let start = line_offset + mat.start();
366 let end = line_offset + mat.end();
367 matches.push(start..end);
368 }
369
370 line_offset += line.len() + 1;
371 line.clear();
372 }
373 line.push_str(text);
374 }
375 }
376 }
377 }
378 }
379
380 matches
381 }
382
383 pub fn is_empty(&self) -> bool {
384 self.as_str().is_empty()
385 }
386
387 pub fn as_str(&self) -> &str {
388 self.as_inner().as_str()
389 }
390
391 pub fn whole_word(&self) -> bool {
392 match self {
393 Self::Text { whole_word, .. } => *whole_word,
394 Self::Regex { whole_word, .. } => *whole_word,
395 }
396 }
397
398 pub fn case_sensitive(&self) -> bool {
399 match self {
400 Self::Text { case_sensitive, .. } => *case_sensitive,
401 Self::Regex { case_sensitive, .. } => *case_sensitive,
402 }
403 }
404
405 pub fn include_ignored(&self) -> bool {
406 match self {
407 Self::Text {
408 include_ignored, ..
409 } => *include_ignored,
410 Self::Regex {
411 include_ignored, ..
412 } => *include_ignored,
413 }
414 }
415
416 pub fn is_regex(&self) -> bool {
417 matches!(self, Self::Regex { .. })
418 }
419
420 pub fn files_to_include(&self) -> &PathMatcher {
421 self.as_inner().files_to_include()
422 }
423
424 pub fn files_to_exclude(&self) -> &PathMatcher {
425 self.as_inner().files_to_exclude()
426 }
427
428 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
429 self.as_inner().buffers.as_ref()
430 }
431
432 pub fn is_opened_only(&self) -> bool {
433 self.as_inner().buffers.is_some()
434 }
435
436 pub fn filters_path(&self) -> bool {
437 !(self.files_to_exclude().sources().is_empty()
438 && self.files_to_include().sources().is_empty())
439 }
440
441 pub fn file_matches(&self, file_path: &Path) -> bool {
442 let mut path = file_path.to_path_buf();
443 loop {
444 if self.files_to_exclude().is_match(&path) {
445 return false;
446 } else if self.files_to_include().sources().is_empty()
447 || self.files_to_include().is_match(&path)
448 {
449 return true;
450 } else if !path.pop() {
451 return false;
452 }
453 }
454 }
455 pub fn as_inner(&self) -> &SearchInputs {
456 match self {
457 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
458 }
459 }
460}
461
462pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
463 let globs = glob_set
464 .split(',')
465 .map(str::trim)
466 .filter(|&glob_str| (!glob_str.is_empty()))
467 .map(|glob_str| glob_str.to_owned())
468 .collect::<Vec<_>>();
469 Ok(PathMatcher::new(&globs)?)
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475
476 #[test]
477 fn path_matcher_creation_for_valid_paths() {
478 for valid_path in [
479 "file",
480 "Cargo.toml",
481 ".DS_Store",
482 "~/dir/another_dir/",
483 "./dir/file",
484 "dir/[a-z].txt",
485 "../dir/filé",
486 ] {
487 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
488 panic!("Valid path {valid_path} should be accepted, but got: {e}")
489 });
490 assert!(
491 path_matcher.is_match(valid_path),
492 "Path matcher for valid path {valid_path} should match itself"
493 )
494 }
495 }
496
497 #[test]
498 fn path_matcher_creation_for_globs() {
499 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
500 match PathMatcher::new(&[invalid_glob.to_owned()]) {
501 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
502 Err(_expected) => {}
503 }
504 }
505
506 for valid_glob in [
507 "dir/?ile",
508 "dir/*.txt",
509 "dir/**/file",
510 "dir/[a-z].txt",
511 "{dir,file}",
512 ] {
513 match PathMatcher::new(&[valid_glob.to_owned()]) {
514 Ok(_expected) => {}
515 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
516 }
517 }
518 }
519}