1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use gpui::Model;
5use language::{Buffer, BufferSnapshot};
6use regex::{Captures, Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Copy, PartialEq)]
29pub enum SearchInputKind {
30 Query,
31 Include,
32 Exclude,
33}
34
35#[derive(Clone, Debug)]
36pub struct SearchInputs {
37 query: Arc<str>,
38 files_to_include: PathMatcher,
39 files_to_exclude: PathMatcher,
40 buffers: Option<Vec<Model<Buffer>>>,
41}
42
43impl SearchInputs {
44 pub fn as_str(&self) -> &str {
45 self.query.as_ref()
46 }
47 pub fn files_to_include(&self) -> &PathMatcher {
48 &self.files_to_include
49 }
50 pub fn files_to_exclude(&self) -> &PathMatcher {
51 &self.files_to_exclude
52 }
53 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
54 &self.buffers
55 }
56}
57#[derive(Clone, Debug)]
58pub enum SearchQuery {
59 Text {
60 search: Arc<AhoCorasick>,
61 replacement: Option<String>,
62 whole_word: bool,
63 case_sensitive: bool,
64 include_ignored: bool,
65 inner: SearchInputs,
66 },
67
68 Regex {
69 regex: Regex,
70 replacement: Option<String>,
71 multiline: bool,
72 whole_word: bool,
73 case_sensitive: bool,
74 include_ignored: bool,
75 inner: SearchInputs,
76 },
77}
78
79impl SearchQuery {
80 pub fn text(
81 query: impl ToString,
82 whole_word: bool,
83 case_sensitive: bool,
84 include_ignored: bool,
85 files_to_include: PathMatcher,
86 files_to_exclude: PathMatcher,
87 buffers: Option<Vec<Model<Buffer>>>,
88 ) -> Result<Self> {
89 let query = query.to_string();
90 let search = AhoCorasickBuilder::new()
91 .ascii_case_insensitive(!case_sensitive)
92 .build([&query])?;
93 let inner = SearchInputs {
94 query: query.into(),
95 files_to_exclude,
96 files_to_include,
97 buffers,
98 };
99 Ok(Self::Text {
100 search: Arc::new(search),
101 replacement: None,
102 whole_word,
103 case_sensitive,
104 include_ignored,
105 inner,
106 })
107 }
108
109 pub fn regex(
110 query: impl ToString,
111 whole_word: bool,
112 case_sensitive: bool,
113 include_ignored: bool,
114 files_to_include: PathMatcher,
115 files_to_exclude: PathMatcher,
116 buffers: Option<Vec<Model<Buffer>>>,
117 ) -> Result<Self> {
118 let mut query = query.to_string();
119 let initial_query = Arc::from(query.as_str());
120 if whole_word {
121 let mut word_query = String::new();
122 word_query.push_str("\\b");
123 word_query.push_str(&query);
124 word_query.push_str("\\b");
125 query = word_query
126 }
127
128 let multiline = query.contains('\n') || query.contains("\\n");
129 let regex = RegexBuilder::new(&query)
130 .case_insensitive(!case_sensitive)
131 .multi_line(multiline)
132 .build()?;
133 let inner = SearchInputs {
134 query: initial_query,
135 files_to_exclude,
136 files_to_include,
137 buffers,
138 };
139 Ok(Self::Regex {
140 regex,
141 replacement: None,
142 multiline,
143 whole_word,
144 case_sensitive,
145 include_ignored,
146 inner,
147 })
148 }
149
150 pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
151 if message.regex {
152 Self::regex(
153 message.query,
154 message.whole_word,
155 message.case_sensitive,
156 message.include_ignored,
157 deserialize_path_matches(&message.files_to_include)?,
158 deserialize_path_matches(&message.files_to_exclude)?,
159 None,
160 )
161 } else {
162 Self::text(
163 message.query,
164 message.whole_word,
165 message.case_sensitive,
166 message.include_ignored,
167 deserialize_path_matches(&message.files_to_include)?,
168 deserialize_path_matches(&message.files_to_exclude)?,
169 None,
170 )
171 }
172 }
173
174 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
175 if message.regex {
176 Self::regex(
177 message.query,
178 message.whole_word,
179 message.case_sensitive,
180 message.include_ignored,
181 deserialize_path_matches(&message.files_to_include)?,
182 deserialize_path_matches(&message.files_to_exclude)?,
183 None, // search opened only don't need search remote
184 )
185 } else {
186 Self::text(
187 message.query,
188 message.whole_word,
189 message.case_sensitive,
190 message.include_ignored,
191 deserialize_path_matches(&message.files_to_include)?,
192 deserialize_path_matches(&message.files_to_exclude)?,
193 None, // search opened only don't need search remote
194 )
195 }
196 }
197 pub fn with_replacement(mut self, new_replacement: String) -> Self {
198 match self {
199 Self::Text {
200 ref mut replacement,
201 ..
202 }
203 | Self::Regex {
204 ref mut replacement,
205 ..
206 } => {
207 *replacement = Some(new_replacement);
208 self
209 }
210 }
211 }
212 pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
213 proto::SearchProject {
214 project_id,
215 query: self.as_str().to_string(),
216 regex: self.is_regex(),
217 whole_word: self.whole_word(),
218 case_sensitive: self.case_sensitive(),
219 include_ignored: self.include_ignored(),
220 files_to_include: self.files_to_include().sources().join(","),
221 files_to_exclude: self.files_to_exclude().sources().join(","),
222 }
223 }
224
225 pub fn to_proto(&self) -> proto::SearchQuery {
226 proto::SearchQuery {
227 query: self.as_str().to_string(),
228 regex: self.is_regex(),
229 whole_word: self.whole_word(),
230 case_sensitive: self.case_sensitive(),
231 include_ignored: self.include_ignored(),
232 files_to_include: self.files_to_include().sources().join(","),
233 files_to_exclude: self.files_to_exclude().sources().join(","),
234 }
235 }
236
237 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
238 if self.as_str().is_empty() {
239 return Ok(false);
240 }
241
242 match self {
243 Self::Text { search, .. } => {
244 let mat = search.stream_find_iter(stream).next();
245 match mat {
246 Some(Ok(_)) => Ok(true),
247 Some(Err(err)) => Err(err.into()),
248 None => Ok(false),
249 }
250 }
251 Self::Regex {
252 regex, multiline, ..
253 } => {
254 let mut reader = BufReader::new(stream);
255 if *multiline {
256 let mut text = String::new();
257 if let Err(err) = reader.read_to_string(&mut text) {
258 Err(err.into())
259 } else {
260 Ok(regex.find(&text).is_some())
261 }
262 } else {
263 for line in reader.lines() {
264 let line = line?;
265 if regex.find(&line).is_some() {
266 return Ok(true);
267 }
268 }
269 Ok(false)
270 }
271 }
272 }
273 }
274 /// Returns the replacement text for this `SearchQuery`.
275 pub fn replacement(&self) -> Option<&str> {
276 match self {
277 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
278 replacement.as_deref()
279 }
280 }
281 }
282 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
283 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
284 match self {
285 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
286 SearchQuery::Regex {
287 regex, replacement, ..
288 } => {
289 if let Some(replacement) = replacement {
290 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
291 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
292 .replace_all(replacement, |c: &Captures| {
293 match c.get(0).unwrap().as_str() {
294 r"\\" => "\\",
295 r"\n" => "\n",
296 r"\t" => "\t",
297 x => unreachable!("Unexpected escape sequence: {}", x),
298 }
299 });
300 Some(regex.replace(text, replacement))
301 } else {
302 None
303 }
304 }
305 }
306 }
307
308 pub async fn search(
309 &self,
310 buffer: &BufferSnapshot,
311 subrange: Option<Range<usize>>,
312 ) -> Vec<Range<usize>> {
313 const YIELD_INTERVAL: usize = 20000;
314
315 if self.as_str().is_empty() {
316 return Default::default();
317 }
318
319 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
320 let rope = if let Some(range) = subrange {
321 buffer.as_rope().slice(range)
322 } else {
323 buffer.as_rope().clone()
324 };
325
326 let mut matches = Vec::new();
327 match self {
328 Self::Text {
329 search, whole_word, ..
330 } => {
331 for (ix, mat) in search
332 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
333 .enumerate()
334 {
335 if (ix + 1) % YIELD_INTERVAL == 0 {
336 yield_now().await;
337 }
338
339 let mat = mat.unwrap();
340 if *whole_word {
341 let classifier = buffer.char_classifier_at(range_offset + mat.start());
342
343 let prev_kind = rope
344 .reversed_chars_at(mat.start())
345 .next()
346 .map(|c| classifier.kind(c));
347 let start_kind =
348 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
349 let end_kind =
350 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
351 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
352 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
353 continue;
354 }
355 }
356 matches.push(mat.start()..mat.end())
357 }
358 }
359
360 Self::Regex {
361 regex, multiline, ..
362 } => {
363 if *multiline {
364 let text = rope.to_string();
365 for (ix, mat) in regex.find_iter(&text).enumerate() {
366 if (ix + 1) % YIELD_INTERVAL == 0 {
367 yield_now().await;
368 }
369
370 matches.push(mat.start()..mat.end());
371 }
372 } else {
373 let mut line = String::new();
374 let mut line_offset = 0;
375 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
376 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
377 yield_now().await;
378 }
379
380 for (newline_ix, text) in chunk.split('\n').enumerate() {
381 if newline_ix > 0 {
382 for mat in regex.find_iter(&line) {
383 let start = line_offset + mat.start();
384 let end = line_offset + mat.end();
385 matches.push(start..end);
386 }
387
388 line_offset += line.len() + 1;
389 line.clear();
390 }
391 line.push_str(text);
392 }
393 }
394 }
395 }
396 }
397
398 matches
399 }
400
401 pub fn is_empty(&self) -> bool {
402 self.as_str().is_empty()
403 }
404
405 pub fn as_str(&self) -> &str {
406 self.as_inner().as_str()
407 }
408
409 pub fn whole_word(&self) -> bool {
410 match self {
411 Self::Text { whole_word, .. } => *whole_word,
412 Self::Regex { whole_word, .. } => *whole_word,
413 }
414 }
415
416 pub fn case_sensitive(&self) -> bool {
417 match self {
418 Self::Text { case_sensitive, .. } => *case_sensitive,
419 Self::Regex { case_sensitive, .. } => *case_sensitive,
420 }
421 }
422
423 pub fn include_ignored(&self) -> bool {
424 match self {
425 Self::Text {
426 include_ignored, ..
427 } => *include_ignored,
428 Self::Regex {
429 include_ignored, ..
430 } => *include_ignored,
431 }
432 }
433
434 pub fn is_regex(&self) -> bool {
435 matches!(self, Self::Regex { .. })
436 }
437
438 pub fn files_to_include(&self) -> &PathMatcher {
439 self.as_inner().files_to_include()
440 }
441
442 pub fn files_to_exclude(&self) -> &PathMatcher {
443 self.as_inner().files_to_exclude()
444 }
445
446 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
447 self.as_inner().buffers.as_ref()
448 }
449
450 pub fn is_opened_only(&self) -> bool {
451 self.as_inner().buffers.is_some()
452 }
453
454 pub fn filters_path(&self) -> bool {
455 !(self.files_to_exclude().sources().is_empty()
456 && self.files_to_include().sources().is_empty())
457 }
458
459 pub fn file_matches(&self, file_path: &Path) -> bool {
460 let mut path = file_path.to_path_buf();
461 loop {
462 if self.files_to_exclude().is_match(&path) {
463 return false;
464 } else if self.files_to_include().sources().is_empty()
465 || self.files_to_include().is_match(&path)
466 {
467 return true;
468 } else if !path.pop() {
469 return false;
470 }
471 }
472 }
473 pub fn as_inner(&self) -> &SearchInputs {
474 match self {
475 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
476 }
477 }
478}
479
480pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
481 let globs = glob_set
482 .split(',')
483 .map(str::trim)
484 .filter(|&glob_str| (!glob_str.is_empty()))
485 .map(|glob_str| glob_str.to_owned())
486 .collect::<Vec<_>>();
487 Ok(PathMatcher::new(&globs)?)
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493
494 #[test]
495 fn path_matcher_creation_for_valid_paths() {
496 for valid_path in [
497 "file",
498 "Cargo.toml",
499 ".DS_Store",
500 "~/dir/another_dir/",
501 "./dir/file",
502 "dir/[a-z].txt",
503 "../dir/filé",
504 ] {
505 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
506 panic!("Valid path {valid_path} should be accepted, but got: {e}")
507 });
508 assert!(
509 path_matcher.is_match(valid_path),
510 "Path matcher for valid path {valid_path} should match itself"
511 )
512 }
513 }
514
515 #[test]
516 fn path_matcher_creation_for_globs() {
517 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
518 match PathMatcher::new(&[invalid_glob.to_owned()]) {
519 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
520 Err(_expected) => {}
521 }
522 }
523
524 for valid_glob in [
525 "dir/?ile",
526 "dir/*.txt",
527 "dir/**/file",
528 "dir/[a-z].txt",
529 "{dir,file}",
530 ] {
531 match PathMatcher::new(&[valid_glob.to_owned()]) {
532 Ok(_expected) => {}
533 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
534 }
535 }
536 }
537}