1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use gpui::Model;
5use language::{Buffer, BufferSnapshot};
6use regex::{Captures, Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Debug)]
29pub struct SearchInputs {
30 query: Arc<str>,
31 files_to_include: PathMatcher,
32 files_to_exclude: PathMatcher,
33 buffers: Option<Vec<Model<Buffer>>>,
34}
35
36impl SearchInputs {
37 pub fn as_str(&self) -> &str {
38 self.query.as_ref()
39 }
40 pub fn files_to_include(&self) -> &PathMatcher {
41 &self.files_to_include
42 }
43 pub fn files_to_exclude(&self) -> &PathMatcher {
44 &self.files_to_exclude
45 }
46 pub fn buffers(&self) -> &Option<Vec<Model<Buffer>>> {
47 &self.buffers
48 }
49}
50#[derive(Clone, Debug)]
51pub enum SearchQuery {
52 Text {
53 search: Arc<AhoCorasick>,
54 replacement: Option<String>,
55 whole_word: bool,
56 case_sensitive: bool,
57 include_ignored: bool,
58 inner: SearchInputs,
59 },
60
61 Regex {
62 regex: Regex,
63 replacement: Option<String>,
64 multiline: bool,
65 whole_word: bool,
66 case_sensitive: bool,
67 include_ignored: bool,
68 inner: SearchInputs,
69 },
70}
71
72impl SearchQuery {
73 pub fn text(
74 query: impl ToString,
75 whole_word: bool,
76 case_sensitive: bool,
77 include_ignored: bool,
78 files_to_include: PathMatcher,
79 files_to_exclude: PathMatcher,
80 buffers: Option<Vec<Model<Buffer>>>,
81 ) -> Result<Self> {
82 let query = query.to_string();
83 let search = AhoCorasickBuilder::new()
84 .ascii_case_insensitive(!case_sensitive)
85 .build(&[&query])?;
86 let inner = SearchInputs {
87 query: query.into(),
88 files_to_exclude,
89 files_to_include,
90 buffers,
91 };
92 Ok(Self::Text {
93 search: Arc::new(search),
94 replacement: None,
95 whole_word,
96 case_sensitive,
97 include_ignored,
98 inner,
99 })
100 }
101
102 pub fn regex(
103 query: impl ToString,
104 whole_word: bool,
105 case_sensitive: bool,
106 include_ignored: bool,
107 files_to_include: PathMatcher,
108 files_to_exclude: PathMatcher,
109 buffers: Option<Vec<Model<Buffer>>>,
110 ) -> Result<Self> {
111 let mut query = query.to_string();
112 let initial_query = Arc::from(query.as_str());
113 if whole_word {
114 let mut word_query = String::new();
115 word_query.push_str("\\b");
116 word_query.push_str(&query);
117 word_query.push_str("\\b");
118 query = word_query
119 }
120
121 let multiline = query.contains('\n') || query.contains("\\n");
122 let regex = RegexBuilder::new(&query)
123 .case_insensitive(!case_sensitive)
124 .multi_line(multiline)
125 .build()?;
126 let inner = SearchInputs {
127 query: initial_query,
128 files_to_exclude,
129 files_to_include,
130 buffers,
131 };
132 Ok(Self::Regex {
133 regex,
134 replacement: None,
135 multiline,
136 whole_word,
137 case_sensitive,
138 include_ignored,
139 inner,
140 })
141 }
142
143 pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
144 if message.regex {
145 Self::regex(
146 message.query,
147 message.whole_word,
148 message.case_sensitive,
149 message.include_ignored,
150 deserialize_path_matches(&message.files_to_include)?,
151 deserialize_path_matches(&message.files_to_exclude)?,
152 None,
153 )
154 } else {
155 Self::text(
156 message.query,
157 message.whole_word,
158 message.case_sensitive,
159 message.include_ignored,
160 deserialize_path_matches(&message.files_to_include)?,
161 deserialize_path_matches(&message.files_to_exclude)?,
162 None,
163 )
164 }
165 }
166
167 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
168 if message.regex {
169 Self::regex(
170 message.query,
171 message.whole_word,
172 message.case_sensitive,
173 message.include_ignored,
174 deserialize_path_matches(&message.files_to_include)?,
175 deserialize_path_matches(&message.files_to_exclude)?,
176 None, // search opened only don't need search remote
177 )
178 } else {
179 Self::text(
180 message.query,
181 message.whole_word,
182 message.case_sensitive,
183 message.include_ignored,
184 deserialize_path_matches(&message.files_to_include)?,
185 deserialize_path_matches(&message.files_to_exclude)?,
186 None, // search opened only don't need search remote
187 )
188 }
189 }
190 pub fn with_replacement(mut self, new_replacement: String) -> Self {
191 match self {
192 Self::Text {
193 ref mut replacement,
194 ..
195 }
196 | Self::Regex {
197 ref mut replacement,
198 ..
199 } => {
200 *replacement = Some(new_replacement);
201 self
202 }
203 }
204 }
205 pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
206 proto::SearchProject {
207 project_id,
208 query: self.as_str().to_string(),
209 regex: self.is_regex(),
210 whole_word: self.whole_word(),
211 case_sensitive: self.case_sensitive(),
212 include_ignored: self.include_ignored(),
213 files_to_include: self.files_to_include().sources().join(","),
214 files_to_exclude: self.files_to_exclude().sources().join(","),
215 }
216 }
217
218 pub fn to_proto(&self) -> proto::SearchQuery {
219 proto::SearchQuery {
220 query: self.as_str().to_string(),
221 regex: self.is_regex(),
222 whole_word: self.whole_word(),
223 case_sensitive: self.case_sensitive(),
224 include_ignored: self.include_ignored(),
225 files_to_include: self.files_to_include().sources().join(","),
226 files_to_exclude: self.files_to_exclude().sources().join(","),
227 }
228 }
229
230 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
231 if self.as_str().is_empty() {
232 return Ok(false);
233 }
234
235 match self {
236 Self::Text { search, .. } => {
237 let mat = search.stream_find_iter(stream).next();
238 match mat {
239 Some(Ok(_)) => Ok(true),
240 Some(Err(err)) => Err(err.into()),
241 None => Ok(false),
242 }
243 }
244 Self::Regex {
245 regex, multiline, ..
246 } => {
247 let mut reader = BufReader::new(stream);
248 if *multiline {
249 let mut text = String::new();
250 if let Err(err) = reader.read_to_string(&mut text) {
251 Err(err.into())
252 } else {
253 Ok(regex.find(&text).is_some())
254 }
255 } else {
256 for line in reader.lines() {
257 let line = line?;
258 if regex.find(&line).is_some() {
259 return Ok(true);
260 }
261 }
262 Ok(false)
263 }
264 }
265 }
266 }
267 /// Returns the replacement text for this `SearchQuery`.
268 pub fn replacement(&self) -> Option<&str> {
269 match self {
270 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
271 replacement.as_deref()
272 }
273 }
274 }
275 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
276 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
277 match self {
278 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
279 SearchQuery::Regex {
280 regex, replacement, ..
281 } => {
282 if let Some(replacement) = replacement {
283 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
284 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
285 .replace_all(replacement, |c: &Captures| {
286 match c.get(0).unwrap().as_str() {
287 r"\\" => "\\",
288 r"\n" => "\n",
289 r"\t" => "\t",
290 x => unreachable!("Unexpected escape sequence: {}", x),
291 }
292 });
293 Some(regex.replace(text, replacement))
294 } else {
295 None
296 }
297 }
298 }
299 }
300
301 pub async fn search(
302 &self,
303 buffer: &BufferSnapshot,
304 subrange: Option<Range<usize>>,
305 ) -> Vec<Range<usize>> {
306 const YIELD_INTERVAL: usize = 20000;
307
308 if self.as_str().is_empty() {
309 return Default::default();
310 }
311
312 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
313 let rope = if let Some(range) = subrange {
314 buffer.as_rope().slice(range)
315 } else {
316 buffer.as_rope().clone()
317 };
318
319 let mut matches = Vec::new();
320 match self {
321 Self::Text {
322 search, whole_word, ..
323 } => {
324 for (ix, mat) in search
325 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
326 .enumerate()
327 {
328 if (ix + 1) % YIELD_INTERVAL == 0 {
329 yield_now().await;
330 }
331
332 let mat = mat.unwrap();
333 if *whole_word {
334 let classifier = buffer.char_classifier_at(range_offset + mat.start());
335
336 let prev_kind = rope
337 .reversed_chars_at(mat.start())
338 .next()
339 .map(|c| classifier.kind(c));
340 let start_kind =
341 classifier.kind(rope.chars_at(mat.start()).next().unwrap());
342 let end_kind =
343 classifier.kind(rope.reversed_chars_at(mat.end()).next().unwrap());
344 let next_kind = rope.chars_at(mat.end()).next().map(|c| classifier.kind(c));
345 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
346 continue;
347 }
348 }
349 matches.push(mat.start()..mat.end())
350 }
351 }
352
353 Self::Regex {
354 regex, multiline, ..
355 } => {
356 if *multiline {
357 let text = rope.to_string();
358 for (ix, mat) in regex.find_iter(&text).enumerate() {
359 if (ix + 1) % YIELD_INTERVAL == 0 {
360 yield_now().await;
361 }
362
363 matches.push(mat.start()..mat.end());
364 }
365 } else {
366 let mut line = String::new();
367 let mut line_offset = 0;
368 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
369 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
370 yield_now().await;
371 }
372
373 for (newline_ix, text) in chunk.split('\n').enumerate() {
374 if newline_ix > 0 {
375 for mat in regex.find_iter(&line) {
376 let start = line_offset + mat.start();
377 let end = line_offset + mat.end();
378 matches.push(start..end);
379 }
380
381 line_offset += line.len() + 1;
382 line.clear();
383 }
384 line.push_str(text);
385 }
386 }
387 }
388 }
389 }
390
391 matches
392 }
393
394 pub fn is_empty(&self) -> bool {
395 self.as_str().is_empty()
396 }
397
398 pub fn as_str(&self) -> &str {
399 self.as_inner().as_str()
400 }
401
402 pub fn whole_word(&self) -> bool {
403 match self {
404 Self::Text { whole_word, .. } => *whole_word,
405 Self::Regex { whole_word, .. } => *whole_word,
406 }
407 }
408
409 pub fn case_sensitive(&self) -> bool {
410 match self {
411 Self::Text { case_sensitive, .. } => *case_sensitive,
412 Self::Regex { case_sensitive, .. } => *case_sensitive,
413 }
414 }
415
416 pub fn include_ignored(&self) -> bool {
417 match self {
418 Self::Text {
419 include_ignored, ..
420 } => *include_ignored,
421 Self::Regex {
422 include_ignored, ..
423 } => *include_ignored,
424 }
425 }
426
427 pub fn is_regex(&self) -> bool {
428 matches!(self, Self::Regex { .. })
429 }
430
431 pub fn files_to_include(&self) -> &PathMatcher {
432 self.as_inner().files_to_include()
433 }
434
435 pub fn files_to_exclude(&self) -> &PathMatcher {
436 self.as_inner().files_to_exclude()
437 }
438
439 pub fn buffers(&self) -> Option<&Vec<Model<Buffer>>> {
440 self.as_inner().buffers.as_ref()
441 }
442
443 pub fn is_opened_only(&self) -> bool {
444 self.as_inner().buffers.is_some()
445 }
446
447 pub fn filters_path(&self) -> bool {
448 !(self.files_to_exclude().sources().is_empty()
449 && self.files_to_include().sources().is_empty())
450 }
451
452 pub fn file_matches(&self, file_path: &Path) -> bool {
453 let mut path = file_path.to_path_buf();
454 loop {
455 if self.files_to_exclude().is_match(&path) {
456 return false;
457 } else if self.files_to_include().sources().is_empty()
458 || self.files_to_include().is_match(&path)
459 {
460 return true;
461 } else if !path.pop() {
462 return false;
463 }
464 }
465 }
466 pub fn as_inner(&self) -> &SearchInputs {
467 match self {
468 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
469 }
470 }
471}
472
473pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
474 let globs = glob_set
475 .split(',')
476 .map(str::trim)
477 .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
478 .collect::<Vec<_>>();
479 Ok(PathMatcher::new(&globs)?)
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 #[test]
487 fn path_matcher_creation_for_valid_paths() {
488 for valid_path in [
489 "file",
490 "Cargo.toml",
491 ".DS_Store",
492 "~/dir/another_dir/",
493 "./dir/file",
494 "dir/[a-z].txt",
495 "../dir/filé",
496 ] {
497 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
498 panic!("Valid path {valid_path} should be accepted, but got: {e}")
499 });
500 assert!(
501 path_matcher.is_match(valid_path),
502 "Path matcher for valid path {valid_path} should match itself"
503 )
504 }
505 }
506
507 #[test]
508 fn path_matcher_creation_for_globs() {
509 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
510 match PathMatcher::new(&[invalid_glob.to_owned()]) {
511 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
512 Err(_expected) => {}
513 }
514 }
515
516 for valid_glob in [
517 "dir/?ile",
518 "dir/*.txt",
519 "dir/**/file",
520 "dir/[a-z].txt",
521 "{dir,file}",
522 ] {
523 match PathMatcher::new(&[valid_glob.to_owned()]) {
524 Ok(_expected) => {}
525 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
526 }
527 }
528 }
529}