1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use gpui::Model;
5use language::{char_kind, Buffer, BufferSnapshot};
6use regex::{Captures, Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::{Arc, OnceLock},
14};
15use text::Anchor;
16use util::paths::PathMatcher;
17
18static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
19
20pub enum SearchResult {
21 Buffer {
22 buffer: Model<Buffer>,
23 ranges: Vec<Range<Anchor>>,
24 },
25 LimitReached,
26}
27
28#[derive(Clone, Debug)]
29pub struct SearchInputs {
30 query: Arc<str>,
31 files_to_include: PathMatcher,
32 files_to_exclude: PathMatcher,
33}
34
35impl SearchInputs {
36 pub fn as_str(&self) -> &str {
37 self.query.as_ref()
38 }
39 pub fn files_to_include(&self) -> &PathMatcher {
40 &self.files_to_include
41 }
42 pub fn files_to_exclude(&self) -> &PathMatcher {
43 &self.files_to_exclude
44 }
45}
46#[derive(Clone, Debug)]
47pub enum SearchQuery {
48 Text {
49 search: Arc<AhoCorasick>,
50 replacement: Option<String>,
51 whole_word: bool,
52 case_sensitive: bool,
53 include_ignored: bool,
54 inner: SearchInputs,
55 },
56
57 Regex {
58 regex: Regex,
59 replacement: Option<String>,
60 multiline: bool,
61 whole_word: bool,
62 case_sensitive: bool,
63 include_ignored: bool,
64 inner: SearchInputs,
65 },
66}
67
68impl SearchQuery {
69 pub fn text(
70 query: impl ToString,
71 whole_word: bool,
72 case_sensitive: bool,
73 include_ignored: bool,
74 files_to_include: PathMatcher,
75 files_to_exclude: PathMatcher,
76 ) -> Result<Self> {
77 let query = query.to_string();
78 let search = AhoCorasickBuilder::new()
79 .ascii_case_insensitive(!case_sensitive)
80 .build(&[&query])?;
81 let inner = SearchInputs {
82 query: query.into(),
83 files_to_exclude,
84 files_to_include,
85 };
86 Ok(Self::Text {
87 search: Arc::new(search),
88 replacement: None,
89 whole_word,
90 case_sensitive,
91 include_ignored,
92 inner,
93 })
94 }
95
96 pub fn regex(
97 query: impl ToString,
98 whole_word: bool,
99 case_sensitive: bool,
100 include_ignored: bool,
101 files_to_include: PathMatcher,
102 files_to_exclude: PathMatcher,
103 ) -> Result<Self> {
104 let mut query = query.to_string();
105 let initial_query = Arc::from(query.as_str());
106 if whole_word {
107 let mut word_query = String::new();
108 word_query.push_str("\\b");
109 word_query.push_str(&query);
110 word_query.push_str("\\b");
111 query = word_query
112 }
113
114 let multiline = query.contains('\n') || query.contains("\\n");
115 let regex = RegexBuilder::new(&query)
116 .case_insensitive(!case_sensitive)
117 .multi_line(multiline)
118 .build()?;
119 let inner = SearchInputs {
120 query: initial_query,
121 files_to_exclude,
122 files_to_include,
123 };
124 Ok(Self::Regex {
125 regex,
126 replacement: None,
127 multiline,
128 whole_word,
129 case_sensitive,
130 include_ignored,
131 inner,
132 })
133 }
134
135 pub fn from_proto_v1(message: proto::SearchProject) -> Result<Self> {
136 if message.regex {
137 Self::regex(
138 message.query,
139 message.whole_word,
140 message.case_sensitive,
141 message.include_ignored,
142 deserialize_path_matches(&message.files_to_include)?,
143 deserialize_path_matches(&message.files_to_exclude)?,
144 )
145 } else {
146 Self::text(
147 message.query,
148 message.whole_word,
149 message.case_sensitive,
150 message.include_ignored,
151 deserialize_path_matches(&message.files_to_include)?,
152 deserialize_path_matches(&message.files_to_exclude)?,
153 )
154 }
155 }
156
157 pub fn from_proto(message: proto::SearchQuery) -> Result<Self> {
158 if message.regex {
159 Self::regex(
160 message.query,
161 message.whole_word,
162 message.case_sensitive,
163 message.include_ignored,
164 deserialize_path_matches(&message.files_to_include)?,
165 deserialize_path_matches(&message.files_to_exclude)?,
166 )
167 } else {
168 Self::text(
169 message.query,
170 message.whole_word,
171 message.case_sensitive,
172 message.include_ignored,
173 deserialize_path_matches(&message.files_to_include)?,
174 deserialize_path_matches(&message.files_to_exclude)?,
175 )
176 }
177 }
178 pub fn with_replacement(mut self, new_replacement: String) -> Self {
179 match self {
180 Self::Text {
181 ref mut replacement,
182 ..
183 }
184 | Self::Regex {
185 ref mut replacement,
186 ..
187 } => {
188 *replacement = Some(new_replacement);
189 self
190 }
191 }
192 }
193 pub fn to_protov1(&self, project_id: u64) -> proto::SearchProject {
194 proto::SearchProject {
195 project_id,
196 query: self.as_str().to_string(),
197 regex: self.is_regex(),
198 whole_word: self.whole_word(),
199 case_sensitive: self.case_sensitive(),
200 include_ignored: self.include_ignored(),
201 files_to_include: self.files_to_include().sources().join(","),
202 files_to_exclude: self.files_to_exclude().sources().join(","),
203 }
204 }
205
206 pub fn to_proto(&self) -> proto::SearchQuery {
207 proto::SearchQuery {
208 query: self.as_str().to_string(),
209 regex: self.is_regex(),
210 whole_word: self.whole_word(),
211 case_sensitive: self.case_sensitive(),
212 include_ignored: self.include_ignored(),
213 files_to_include: self.files_to_include().sources().join(","),
214 files_to_exclude: self.files_to_exclude().sources().join(","),
215 }
216 }
217
218 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
219 if self.as_str().is_empty() {
220 return Ok(false);
221 }
222
223 match self {
224 Self::Text { search, .. } => {
225 let mat = search.stream_find_iter(stream).next();
226 match mat {
227 Some(Ok(_)) => Ok(true),
228 Some(Err(err)) => Err(err.into()),
229 None => Ok(false),
230 }
231 }
232 Self::Regex {
233 regex, multiline, ..
234 } => {
235 let mut reader = BufReader::new(stream);
236 if *multiline {
237 let mut text = String::new();
238 if let Err(err) = reader.read_to_string(&mut text) {
239 Err(err.into())
240 } else {
241 Ok(regex.find(&text).is_some())
242 }
243 } else {
244 for line in reader.lines() {
245 let line = line?;
246 if regex.find(&line).is_some() {
247 return Ok(true);
248 }
249 }
250 Ok(false)
251 }
252 }
253 }
254 }
255 /// Returns the replacement text for this `SearchQuery`.
256 pub fn replacement(&self) -> Option<&str> {
257 match self {
258 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
259 replacement.as_deref()
260 }
261 }
262 }
263 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
264 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
265 match self {
266 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
267 SearchQuery::Regex {
268 regex, replacement, ..
269 } => {
270 if let Some(replacement) = replacement {
271 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
272 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
273 .replace_all(replacement, |c: &Captures| {
274 match c.get(0).unwrap().as_str() {
275 r"\\" => "\\",
276 r"\n" => "\n",
277 r"\t" => "\t",
278 x => unreachable!("Unexpected escape sequence: {}", x),
279 }
280 });
281 Some(regex.replace(text, replacement))
282 } else {
283 None
284 }
285 }
286 }
287 }
288
289 pub async fn search(
290 &self,
291 buffer: &BufferSnapshot,
292 subrange: Option<Range<usize>>,
293 ) -> Vec<Range<usize>> {
294 const YIELD_INTERVAL: usize = 20000;
295
296 if self.as_str().is_empty() {
297 return Default::default();
298 }
299
300 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
301 let rope = if let Some(range) = subrange {
302 buffer.as_rope().slice(range)
303 } else {
304 buffer.as_rope().clone()
305 };
306
307 let mut matches = Vec::new();
308 match self {
309 Self::Text {
310 search, whole_word, ..
311 } => {
312 for (ix, mat) in search
313 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
314 .enumerate()
315 {
316 if (ix + 1) % YIELD_INTERVAL == 0 {
317 yield_now().await;
318 }
319
320 let mat = mat.unwrap();
321 if *whole_word {
322 let scope = buffer.language_scope_at(range_offset + mat.start());
323 let kind = |c| char_kind(&scope, c);
324
325 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
326 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
327 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
328 let next_kind = rope.chars_at(mat.end()).next().map(kind);
329 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
330 continue;
331 }
332 }
333 matches.push(mat.start()..mat.end())
334 }
335 }
336
337 Self::Regex {
338 regex, multiline, ..
339 } => {
340 if *multiline {
341 let text = rope.to_string();
342 for (ix, mat) in regex.find_iter(&text).enumerate() {
343 if (ix + 1) % YIELD_INTERVAL == 0 {
344 yield_now().await;
345 }
346
347 matches.push(mat.start()..mat.end());
348 }
349 } else {
350 let mut line = String::new();
351 let mut line_offset = 0;
352 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
353 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
354 yield_now().await;
355 }
356
357 for (newline_ix, text) in chunk.split('\n').enumerate() {
358 if newline_ix > 0 {
359 for mat in regex.find_iter(&line) {
360 let start = line_offset + mat.start();
361 let end = line_offset + mat.end();
362 matches.push(start..end);
363 }
364
365 line_offset += line.len() + 1;
366 line.clear();
367 }
368 line.push_str(text);
369 }
370 }
371 }
372 }
373 }
374
375 matches
376 }
377
378 pub fn is_empty(&self) -> bool {
379 self.as_str().is_empty()
380 }
381
382 pub fn as_str(&self) -> &str {
383 self.as_inner().as_str()
384 }
385
386 pub fn whole_word(&self) -> bool {
387 match self {
388 Self::Text { whole_word, .. } => *whole_word,
389 Self::Regex { whole_word, .. } => *whole_word,
390 }
391 }
392
393 pub fn case_sensitive(&self) -> bool {
394 match self {
395 Self::Text { case_sensitive, .. } => *case_sensitive,
396 Self::Regex { case_sensitive, .. } => *case_sensitive,
397 }
398 }
399
400 pub fn include_ignored(&self) -> bool {
401 match self {
402 Self::Text {
403 include_ignored, ..
404 } => *include_ignored,
405 Self::Regex {
406 include_ignored, ..
407 } => *include_ignored,
408 }
409 }
410
411 pub fn is_regex(&self) -> bool {
412 matches!(self, Self::Regex { .. })
413 }
414
415 pub fn files_to_include(&self) -> &PathMatcher {
416 self.as_inner().files_to_include()
417 }
418
419 pub fn files_to_exclude(&self) -> &PathMatcher {
420 self.as_inner().files_to_exclude()
421 }
422
423 pub fn filters_path(&self) -> bool {
424 !(self.files_to_exclude().sources().is_empty()
425 && self.files_to_include().sources().is_empty())
426 }
427
428 pub fn file_matches(&self, file_path: &Path) -> bool {
429 let mut path = file_path.to_path_buf();
430 loop {
431 if self.files_to_exclude().is_match(&path) {
432 return false;
433 } else if self.files_to_include().sources().is_empty()
434 || self.files_to_include().is_match(&path)
435 {
436 return true;
437 } else if !path.pop() {
438 return false;
439 }
440 }
441 }
442 pub fn as_inner(&self) -> &SearchInputs {
443 match self {
444 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
445 }
446 }
447}
448
449pub fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
450 let globs = glob_set
451 .split(',')
452 .map(str::trim)
453 .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
454 .collect::<Vec<_>>();
455 Ok(PathMatcher::new(&globs)?)
456}
457
458#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
463 fn path_matcher_creation_for_valid_paths() {
464 for valid_path in [
465 "file",
466 "Cargo.toml",
467 ".DS_Store",
468 "~/dir/another_dir/",
469 "./dir/file",
470 "dir/[a-z].txt",
471 "../dir/filé",
472 ] {
473 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
474 panic!("Valid path {valid_path} should be accepted, but got: {e}")
475 });
476 assert!(
477 path_matcher.is_match(valid_path),
478 "Path matcher for valid path {valid_path} should match itself"
479 )
480 }
481 }
482
483 #[test]
484 fn path_matcher_creation_for_globs() {
485 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
486 match PathMatcher::new(&[invalid_glob.to_owned()]) {
487 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
488 Err(_expected) => {}
489 }
490 }
491
492 for valid_glob in [
493 "dir/?ile",
494 "dir/*.txt",
495 "dir/**/file",
496 "dir/[a-z].txt",
497 "{dir,file}",
498 ] {
499 match PathMatcher::new(&[valid_glob.to_owned()]) {
500 Ok(_expected) => {}
501 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
502 }
503 }
504 }
505}