1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use gpui::Model;
5use language::{Buffer, BufferSnapshot};
6use regex::{Captures, Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Copy, PartialEq)]
29pub enum SearchInputKind {
30 Query,
31 Include,
32 Exclude,
33}
34
35#[derive(Clone, Debug)]
36pub struct SearchInputs {
37 query: Arc<str>,
38 files_to_include: PathMatcher,
39 files_to_exclude: PathMatcher,
40 buffers: Option<Vec<Model<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: Arc<AhoCorasick>,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 inner: SearchInputs,
76 },
77}
78
79impl SearchQuery {
80 pub fn text(
81 query: impl ToString,
82 whole_word: bool,
83 case_sensitive: bool,
84 include_ignored: bool,
85 files_to_include: PathMatcher,
86 files_to_exclude: PathMatcher,
87 buffers: Option<Vec<Model<Buffer>>>,
88 ) -> Result<Self> {
89 let query = query.to_string();
90 let search = AhoCorasickBuilder::new()
91 .ascii_case_insensitive(!case_sensitive)
92 .build([&query])?;
93 let inner = SearchInputs {
94 query: query.into(),
95 files_to_exclude,
96 files_to_include,
97 buffers,
98 };
99 Ok(Self::Text {
100 search: Arc::new(search),
101 replacement: None,
102 whole_word,
103 case_sensitive,
104 include_ignored,
105 inner,
106 })
107 }
108
109 pub fn regex(
110 query: impl ToString,
111 whole_word: bool,
112 case_sensitive: bool,
113 include_ignored: bool,
114 files_to_include: PathMatcher,
115 files_to_exclude: PathMatcher,
116 buffers: Option<Vec<Model<Buffer>>>,
117 ) -> Result<Self> {
118 let mut query = query.to_string();
119 let initial_query = Arc::from(query.as_str());
120 if whole_word {
121 let mut word_query = String::new();
122 word_query.push_str("\\b");
123 word_query.push_str(&query);
124 word_query.push_str("\\b");
125 query = word_query
126 }
127
128 let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
129 let regex = RegexBuilder::new(&query)
130 .case_insensitive(!case_sensitive)
131 .multi_line(multiline)
132 .build()?;
133 let inner = SearchInputs {
134 query: initial_query,
135 files_to_exclude,
136 files_to_include,
137 buffers,
138 };
139 Ok(Self::Regex {
140 regex,
141 replacement: None,
142 multiline,
143 whole_word,
144 case_sensitive,
145 include_ignored,
146 inner,
147 })
148 }
149
150 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
151 if message.regex {
152 Self::regex(
153 message.query,
154 message.whole_word,
155 message.case_sensitive,
156 message.include_ignored,
157 deserialize_path_matches(&message.files_to_include)?,
158 deserialize_path_matches(&message.files_to_exclude)?,
159 None, // search opened only don't need search remote
160 )
161 } else {
162 Self::text(
163 message.query,
164 message.whole_word,
165 message.case_sensitive,
166 message.include_ignored,
167 deserialize_path_matches(&message.files_to_include)?,
168 deserialize_path_matches(&message.files_to_exclude)?,
169 None, // search opened only don't need search remote
170 )
171 }
172 }
173
174 pub fn with_replacement(mut self, new_replacement: String) -> Self {
175 match self {
176 Self::Text {
177 ref mut replacement,
178 ..
179 }
180 | Self::Regex {
181 ref mut replacement,
182 ..
183 } => {
184 *replacement = Some(new_replacement);
185 self
186 }
187 }
188 }
189
190 pub fn to_proto(&self) -> proto::SearchQuery {
191 proto::SearchQuery {
192 query: self.as_str().to_string(),
193 regex: self.is_regex(),
194 whole_word: self.whole_word(),
195 case_sensitive: self.case_sensitive(),
196 include_ignored: self.include_ignored(),
197 files_to_include: self.files_to_include().sources().join(","),
198 files_to_exclude: self.files_to_exclude().sources().join(","),
199 }
200 }
201
202 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
203 if self.as_str().is_empty() {
204 return Ok(false);
205 }
206
207 match self {
208 Self::Text { search, .. } => {
209 let mat = search.stream_find_iter(stream).next();
210 match mat {
211 Some(Ok(_)) => Ok(true),
212 Some(Err(err)) => Err(err.into()),
213 None => Ok(false),
214 }
215 }
216 Self::Regex {
217 regex, multiline, ..
218 } => {
219 let mut reader = BufReader::new(stream);
220 if *multiline {
221 let mut text = String::new();
222 if let Err(err) = reader.read_to_string(&mut text) {
223 Err(err.into())
224 } else {
225 Ok(regex.find(&text).is_some())
226 }
227 } else {
228 for line in reader.lines() {
229 let line = line?;
230 if regex.find(&line).is_some() {
231 return Ok(true);
232 }
233 }
234 Ok(false)
235 }
236 }
237 }
238 }
239 /// Returns the replacement text for this `SearchQuery`.
240 pub fn replacement(&self) -> Option<&str> {
241 match self {
242 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
243 replacement.as_deref()
244 }
245 }
246 }
247 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
248 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
249 match self {
250 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
251 SearchQuery::Regex {
252 regex, replacement, ..
253 } => {
254 if let Some(replacement) = replacement {
255 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
256 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
257 .replace_all(replacement, |c: &Captures| {
258 match c.get(0).unwrap().as_str() {
259 r"\\" => "\\",
260 r"\n" => "\n",
261 r"\t" => "\t",
262 x => unreachable!("Unexpected escape sequence: {}", x),
263 }
264 });
265 Some(regex.replace(text, replacement))
266 } else {
267 None
268 }
269 }
270 }
271 }
272
273 pub async fn search(
274 &self,
275 buffer: &BufferSnapshot,
276 subrange: Option<Range<usize>>,
277 ) -> Vec<Range<usize>> {
278 const YIELD_INTERVAL: usize = 20000;
279
280 if self.as_str().is_empty() {
281 return Default::default();
282 }
283
284 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
285 let rope = if let Some(range) = subrange {
286 buffer.as_rope().slice(range)
287 } else {
288 buffer.as_rope().clone()
289 };
290
291 let mut matches = Vec::new();
292 match self {
293 Self::Text {
294 search, whole_word, ..
295 } => {
296 for (ix, mat) in search
297 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
298 .enumerate()
299 {
300 if (ix + 1) % YIELD_INTERVAL == 0 {
301 yield_now().await;
302 }
303
304 let mat = mat.unwrap();
305 if *whole_word {
306 let classifier = buffer.char_classifier_at(range_offset + mat.start());
307
308 let prev_kind = rope
309 .reversed_chars_at(mat.start())
310 .next()
311 .map(|c| classifier.kind(c));
312 let start_kind =
313 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
314 let end_kind =
315 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
316 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
317 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
318 continue;
319 }
320 }
321 matches.push(mat.start()..mat.end())
322 }
323 }
324
325 Self::Regex {
326 regex, multiline, ..
327 } => {
328 if *multiline {
329 let text = rope.to_string();
330 for (ix, mat) in regex.find_iter(&text).enumerate() {
331 if (ix + 1) % YIELD_INTERVAL == 0 {
332 yield_now().await;
333 }
334
335 matches.push(mat.start()..mat.end());
336 }
337 } else {
338 let mut line = String::new();
339 let mut line_offset = 0;
340 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
341 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
342 yield_now().await;
343 }
344
345 for (newline_ix, text) in chunk.split('\n').enumerate() {
346 if newline_ix > 0 {
347 for mat in regex.find_iter(&line) {
348 let start = line_offset + mat.start();
349 let end = line_offset + mat.end();
350 matches.push(start..end);
351 }
352
353 line_offset += line.len() + 1;
354 line.clear();
355 }
356 line.push_str(text);
357 }
358 }
359 }
360 }
361 }
362
363 matches
364 }
365
366 pub fn is_empty(&self) -> bool {
367 self.as_str().is_empty()
368 }
369
370 pub fn as_str(&self) -> &str {
371 self.as_inner().as_str()
372 }
373
374 pub fn whole_word(&self) -> bool {
375 match self {
376 Self::Text { whole_word, .. } => *whole_word,
377 Self::Regex { whole_word, .. } => *whole_word,
378 }
379 }
380
381 pub fn case_sensitive(&self) -> bool {
382 match self {
383 Self::Text { case_sensitive, .. } => *case_sensitive,
384 Self::Regex { case_sensitive, .. } => *case_sensitive,
385 }
386 }
387
388 pub fn include_ignored(&self) -> bool {
389 match self {
390 Self::Text {
391 include_ignored, ..
392 } => *include_ignored,
393 Self::Regex {
394 include_ignored, ..
395 } => *include_ignored,
396 }
397 }
398
399 pub fn is_regex(&self) -> bool {
400 matches!(self, Self::Regex { .. })
401 }
402
403 pub fn files_to_include(&self) -> &PathMatcher {
404 self.as_inner().files_to_include()
405 }
406
407 pub fn files_to_exclude(&self) -> &PathMatcher {
408 self.as_inner().files_to_exclude()
409 }
410
411 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
412 self.as_inner().buffers.as_ref()
413 }
414
415 pub fn is_opened_only(&self) -> bool {
416 self.as_inner().buffers.is_some()
417 }
418
419 pub fn filters_path(&self) -> bool {
420 !(self.files_to_exclude().sources().is_empty()
421 && self.files_to_include().sources().is_empty())
422 }
423
424 pub fn file_matches(&self, file_path: &Path) -> bool {
425 let mut path = file_path.to_path_buf();
426 loop {
427 if self.files_to_exclude().is_match(&path) {
428 return false;
429 } else if self.files_to_include().sources().is_empty()
430 || self.files_to_include().is_match(&path)
431 {
432 return true;
433 } else if !path.pop() {
434 return false;
435 }
436 }
437 }
438 pub fn as_inner(&self) -> &SearchInputs {
439 match self {
440 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
441 }
442 }
443}
444
445pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
446 let globs = glob_set
447 .split(',')
448 .map(str::trim)
449 .filter(|&glob_str| (!glob_str.is_empty()))
450 .map(|glob_str| glob_str.to_owned())
451 .collect::<Vec<_>>();
452 Ok(PathMatcher::new(&globs)?)
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 #[test]
460 fn path_matcher_creation_for_valid_paths() {
461 for valid_path in [
462 "file",
463 "Cargo.toml",
464 ".DS_Store",
465 "~/dir/another_dir/",
466 "./dir/file",
467 "dir/[a-z].txt",
468 "../dir/filé",
469 ] {
470 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
471 panic!("Valid path {valid_path} should be accepted, but got: {e}")
472 });
473 assert!(
474 path_matcher.is_match(valid_path),
475 "Path matcher for valid path {valid_path} should match itself"
476 )
477 }
478 }
479
480 #[test]
481 fn path_matcher_creation_for_globs() {
482 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
483 match PathMatcher::new(&[invalid_glob.to_owned()]) {
484 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
485 Err(_expected) => {}
486 }
487 }
488
489 for valid_glob in [
490 "dir/?ile",
491 "dir/*.txt",
492 "dir/**/file",
493 "dir/[a-z].txt",
494 "{dir,file}",
495 ] {
496 match PathMatcher::new(&[valid_glob.to_owned()]) {
497 Ok(_expected) => {}
498 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
499 }
500 }
501 }
502}