1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use fancy_regex::{Captures, Regex, RegexBuilder};
5use gpui::Model;
6use language::{Buffer, BufferSnapshot};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Copy, PartialEq)]
29pub enum SearchInputKind {
30 Query,
31 Include,
32 Exclude,
33}
34
35#[derive(Clone, Debug)]
36pub struct SearchInputs {
37 query: Arc<str>,
38 files_to_include: PathMatcher,
39 files_to_exclude: PathMatcher,
40 buffers: Option<Vec<Model<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: Arc<AhoCorasick>,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 inner: SearchInputs,
76 },
77}
78
79impl SearchQuery {
80 pub fn text(
81 query: impl ToString,
82 whole_word: bool,
83 case_sensitive: bool,
84 include_ignored: bool,
85 files_to_include: PathMatcher,
86 files_to_exclude: PathMatcher,
87 buffers: Option<Vec<Model<Buffer>>>,
88 ) -> Result<Self> {
89 let query = query.to_string();
90 let search = AhoCorasickBuilder::new()
91 .ascii_case_insensitive(!case_sensitive)
92 .build([&query])?;
93 let inner = SearchInputs {
94 query: query.into(),
95 files_to_exclude,
96 files_to_include,
97 buffers,
98 };
99 Ok(Self::Text {
100 search: Arc::new(search),
101 replacement: None,
102 whole_word,
103 case_sensitive,
104 include_ignored,
105 inner,
106 })
107 }
108
109 pub fn regex(
110 query: impl ToString,
111 whole_word: bool,
112 case_sensitive: bool,
113 include_ignored: bool,
114 files_to_include: PathMatcher,
115 files_to_exclude: PathMatcher,
116 buffers: Option<Vec<Model<Buffer>>>,
117 ) -> Result<Self> {
118 let mut query = query.to_string();
119 let initial_query = Arc::from(query.as_str());
120 if whole_word {
121 let mut word_query = String::new();
122 word_query.push_str("\\b");
123 word_query.push_str(&query);
124 word_query.push_str("\\b");
125 query = word_query
126 }
127
128 let multiline = query.contains('\n') || query.contains("\\n") || query.contains("\\s");
129 let regex = RegexBuilder::new(&query)
130 .case_insensitive(!case_sensitive)
131 .build()?;
132 let inner = SearchInputs {
133 query: initial_query,
134 files_to_exclude,
135 files_to_include,
136 buffers,
137 };
138 Ok(Self::Regex {
139 regex,
140 replacement: None,
141 multiline,
142 whole_word,
143 case_sensitive,
144 include_ignored,
145 inner,
146 })
147 }
148
149 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
150 if message.regex {
151 Self::regex(
152 message.query,
153 message.whole_word,
154 message.case_sensitive,
155 message.include_ignored,
156 deserialize_path_matches(&message.files_to_include)?,
157 deserialize_path_matches(&message.files_to_exclude)?,
158 None, // search opened only don't need search remote
159 )
160 } else {
161 Self::text(
162 message.query,
163 message.whole_word,
164 message.case_sensitive,
165 message.include_ignored,
166 deserialize_path_matches(&message.files_to_include)?,
167 deserialize_path_matches(&message.files_to_exclude)?,
168 None, // search opened only don't need search remote
169 )
170 }
171 }
172
173 pub fn with_replacement(mut self, new_replacement: String) -> Self {
174 match self {
175 Self::Text {
176 ref mut replacement,
177 ..
178 }
179 | Self::Regex {
180 ref mut replacement,
181 ..
182 } => {
183 *replacement = Some(new_replacement);
184 self
185 }
186 }
187 }
188
189 pub fn to_proto(&self) -> proto::SearchQuery {
190 proto::SearchQuery {
191 query: self.as_str().to_string(),
192 regex: self.is_regex(),
193 whole_word: self.whole_word(),
194 case_sensitive: self.case_sensitive(),
195 include_ignored: self.include_ignored(),
196 files_to_include: self.files_to_include().sources().join(","),
197 files_to_exclude: self.files_to_exclude().sources().join(","),
198 }
199 }
200
201 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
202 if self.as_str().is_empty() {
203 return Ok(false);
204 }
205
206 match self {
207 Self::Text { search, .. } => {
208 let mat = search.stream_find_iter(stream).next();
209 match mat {
210 Some(Ok(_)) => Ok(true),
211 Some(Err(err)) => Err(err.into()),
212 None => Ok(false),
213 }
214 }
215 Self::Regex {
216 regex, multiline, ..
217 } => {
218 let mut reader = BufReader::new(stream);
219 if *multiline {
220 let mut text = String::new();
221 if let Err(err) = reader.read_to_string(&mut text) {
222 Err(err.into())
223 } else {
224 Ok(regex.find(&text)?.is_some())
225 }
226 } else {
227 for line in reader.lines() {
228 let line = line?;
229 if regex.find(&line)?.is_some() {
230 return Ok(true);
231 }
232 }
233 Ok(false)
234 }
235 }
236 }
237 }
238 /// Returns the replacement text for this `SearchQuery`.
239 pub fn replacement(&self) -> Option<&str> {
240 match self {
241 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
242 replacement.as_deref()
243 }
244 }
245 }
246 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
247 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
248 match self {
249 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
250 SearchQuery::Regex {
251 regex, replacement, ..
252 } => {
253 if let Some(replacement) = replacement {
254 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
255 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
256 .replace_all(replacement, |c: &Captures| {
257 match c.get(0).unwrap().as_str() {
258 r"\\" => "\\",
259 r"\n" => "\n",
260 r"\t" => "\t",
261 x => unreachable!("Unexpected escape sequence: {}", x),
262 }
263 });
264 Some(regex.replace(text, replacement))
265 } else {
266 None
267 }
268 }
269 }
270 }
271
272 pub async fn search(
273 &self,
274 buffer: &BufferSnapshot,
275 subrange: Option<Range<usize>>,
276 ) -> Vec<Range<usize>> {
277 const YIELD_INTERVAL: usize = 20000;
278
279 if self.as_str().is_empty() {
280 return Default::default();
281 }
282
283 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
284 let rope = if let Some(range) = subrange {
285 buffer.as_rope().slice(range)
286 } else {
287 buffer.as_rope().clone()
288 };
289
290 let mut matches = Vec::new();
291 match self {
292 Self::Text {
293 search, whole_word, ..
294 } => {
295 for (ix, mat) in search
296 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
297 .enumerate()
298 {
299 if (ix + 1) % YIELD_INTERVAL == 0 {
300 yield_now().await;
301 }
302
303 let mat = mat.unwrap();
304 if *whole_word {
305 let classifier = buffer.char_classifier_at(range_offset + mat.start());
306
307 let prev_kind = rope
308 .reversed_chars_at(mat.start())
309 .next()
310 .map(|c| classifier.kind(c));
311 let start_kind =
312 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
313 let end_kind =
314 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
315 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
316 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
317 continue;
318 }
319 }
320 matches.push(mat.start()..mat.end())
321 }
322 }
323
324 Self::Regex {
325 regex, multiline, ..
326 } => {
327 if *multiline {
328 let text = rope.to_string();
329 for (ix, mat) in regex.find_iter(&text).enumerate() {
330 if (ix + 1) % YIELD_INTERVAL == 0 {
331 yield_now().await;
332 }
333
334 if let Ok(mat) = mat {
335 matches.push(mat.start()..mat.end());
336 }
337 }
338 } else {
339 let mut line = String::new();
340 let mut line_offset = 0;
341 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
342 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
343 yield_now().await;
344 }
345
346 for (newline_ix, text) in chunk.split('\n').enumerate() {
347 if newline_ix > 0 {
348 for mat in regex.find_iter(&line).flatten() {
349 let start = line_offset + mat.start();
350 let end = line_offset + mat.end();
351 matches.push(start..end);
352 }
353
354 line_offset += line.len() + 1;
355 line.clear();
356 }
357 line.push_str(text);
358 }
359 }
360 }
361 }
362 }
363
364 matches
365 }
366
367 pub fn is_empty(&self) -> bool {
368 self.as_str().is_empty()
369 }
370
371 pub fn as_str(&self) -> &str {
372 self.as_inner().as_str()
373 }
374
375 pub fn whole_word(&self) -> bool {
376 match self {
377 Self::Text { whole_word, .. } => *whole_word,
378 Self::Regex { whole_word, .. } => *whole_word,
379 }
380 }
381
382 pub fn case_sensitive(&self) -> bool {
383 match self {
384 Self::Text { case_sensitive, .. } => *case_sensitive,
385 Self::Regex { case_sensitive, .. } => *case_sensitive,
386 }
387 }
388
389 pub fn include_ignored(&self) -> bool {
390 match self {
391 Self::Text {
392 include_ignored, ..
393 } => *include_ignored,
394 Self::Regex {
395 include_ignored, ..
396 } => *include_ignored,
397 }
398 }
399
400 pub fn is_regex(&self) -> bool {
401 matches!(self, Self::Regex { .. })
402 }
403
404 pub fn files_to_include(&self) -> &PathMatcher {
405 self.as_inner().files_to_include()
406 }
407
408 pub fn files_to_exclude(&self) -> &PathMatcher {
409 self.as_inner().files_to_exclude()
410 }
411
412 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
413 self.as_inner().buffers.as_ref()
414 }
415
416 pub fn is_opened_only(&self) -> bool {
417 self.as_inner().buffers.is_some()
418 }
419
420 pub fn filters_path(&self) -> bool {
421 !(self.files_to_exclude().sources().is_empty()
422 && self.files_to_include().sources().is_empty())
423 }
424
425 pub fn file_matches(&self, file_path: &Path) -> bool {
426 let mut path = file_path.to_path_buf();
427 loop {
428 if self.files_to_exclude().is_match(&path) {
429 return false;
430 } else if self.files_to_include().sources().is_empty()
431 || self.files_to_include().is_match(&path)
432 {
433 return true;
434 } else if !path.pop() {
435 return false;
436 }
437 }
438 }
439 pub fn as_inner(&self) -> &SearchInputs {
440 match self {
441 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
442 }
443 }
444}
445
446pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
447 let globs = glob_set
448 .split(',')
449 .map(str::trim)
450 .filter(|&glob_str| (!glob_str.is_empty()))
451 .map(|glob_str| glob_str.to_owned())
452 .collect::<Vec<_>>();
453 Ok(PathMatcher::new(&globs)?)
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459
460 #[test]
461 fn path_matcher_creation_for_valid_paths() {
462 for valid_path in [
463 "file",
464 "Cargo.toml",
465 ".DS_Store",
466 "~/dir/another_dir/",
467 "./dir/file",
468 "dir/[a-z].txt",
469 "../dir/filé",
470 ] {
471 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
472 panic!("Valid path {valid_path} should be accepted, but got: {e}")
473 });
474 assert!(
475 path_matcher.is_match(valid_path),
476 "Path matcher for valid path {valid_path} should match itself"
477 )
478 }
479 }
480
481 #[test]
482 fn path_matcher_creation_for_globs() {
483 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
484 match PathMatcher::new(&[invalid_glob.to_owned()]) {
485 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
486 Err(_expected) => {}
487 }
488 }
489
490 for valid_glob in [
491 "dir/?ile",
492 "dir/*.txt",
493 "dir/**/file",
494 "dir/[a-z].txt",
495 "{dir,file}",
496 ] {
497 match PathMatcher::new(&[valid_glob.to_owned()]) {
498 Ok(_expected) => {}
499 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
500 }
501 }
502 }
503}