1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use language::{char_kind, BufferSnapshot};
5use regex::{Captures, Regex, RegexBuilder};
6use smol::future::yield_now;
7use std::{
8 borrow::Cow,
9 io::{BufRead, BufReader, Read},
10 ops::Range,
11 path::Path,
12 sync::{Arc, OnceLock},
13};
14use util::paths::PathMatcher;
15
16static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
17
18#[derive(Clone, Debug)]
19pub struct SearchInputs {
20 query: Arc<str>,
21 files_to_include: PathMatcher,
22 files_to_exclude: PathMatcher,
23}
24
25impl SearchInputs {
26 pub fn as_str(&self) -> &str {
27 self.query.as_ref()
28 }
29 pub fn files_to_include(&self) -> &PathMatcher {
30 &self.files_to_include
31 }
32 pub fn files_to_exclude(&self) -> &PathMatcher {
33 &self.files_to_exclude
34 }
35}
36#[derive(Clone, Debug)]
37pub enum SearchQuery {
38 Text {
39 search: Arc<AhoCorasick>,
40 replacement: Option<String>,
41 whole_word: bool,
42 case_sensitive: bool,
43 include_ignored: bool,
44 inner: SearchInputs,
45 },
46
47 Regex {
48 regex: Regex,
49 replacement: Option<String>,
50 multiline: bool,
51 whole_word: bool,
52 case_sensitive: bool,
53 include_ignored: bool,
54 inner: SearchInputs,
55 },
56}
57
58impl SearchQuery {
59 pub fn text(
60 query: impl ToString,
61 whole_word: bool,
62 case_sensitive: bool,
63 include_ignored: bool,
64 files_to_include: PathMatcher,
65 files_to_exclude: PathMatcher,
66 ) -> Result<Self> {
67 let query = query.to_string();
68 let search = AhoCorasickBuilder::new()
69 .ascii_case_insensitive(!case_sensitive)
70 .build(&[&query])?;
71 let inner = SearchInputs {
72 query: query.into(),
73 files_to_exclude,
74 files_to_include,
75 };
76 Ok(Self::Text {
77 search: Arc::new(search),
78 replacement: None,
79 whole_word,
80 case_sensitive,
81 include_ignored,
82 inner,
83 })
84 }
85
86 pub fn regex(
87 query: impl ToString,
88 whole_word: bool,
89 case_sensitive: bool,
90 include_ignored: bool,
91 files_to_include: PathMatcher,
92 files_to_exclude: PathMatcher,
93 ) -> Result<Self> {
94 let mut query = query.to_string();
95 let initial_query = Arc::from(query.as_str());
96 if whole_word {
97 let mut word_query = String::new();
98 word_query.push_str("\\b");
99 word_query.push_str(&query);
100 word_query.push_str("\\b");
101 query = word_query
102 }
103
104 let multiline = query.contains('\n') || query.contains("\\n");
105 let regex = RegexBuilder::new(&query)
106 .case_insensitive(!case_sensitive)
107 .multi_line(multiline)
108 .build()?;
109 let inner = SearchInputs {
110 query: initial_query,
111 files_to_exclude,
112 files_to_include,
113 };
114 Ok(Self::Regex {
115 regex,
116 replacement: None,
117 multiline,
118 whole_word,
119 case_sensitive,
120 include_ignored,
121 inner,
122 })
123 }
124
125 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
126 if message.regex {
127 Self::regex(
128 message.query,
129 message.whole_word,
130 message.case_sensitive,
131 message.include_ignored,
132 deserialize_path_matches(&message.files_to_include)?,
133 deserialize_path_matches(&message.files_to_exclude)?,
134 )
135 } else {
136 Self::text(
137 message.query,
138 message.whole_word,
139 message.case_sensitive,
140 message.include_ignored,
141 deserialize_path_matches(&message.files_to_include)?,
142 deserialize_path_matches(&message.files_to_exclude)?,
143 )
144 }
145 }
146 pub fn with_replacement(mut self, new_replacement: String) -> Self {
147 match self {
148 Self::Text {
149 ref mut replacement,
150 ..
151 }
152 | Self::Regex {
153 ref mut replacement,
154 ..
155 } => {
156 *replacement = Some(new_replacement);
157 self
158 }
159 }
160 }
161 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
162 proto::SearchProject {
163 project_id,
164 query: self.as_str().to_string(),
165 regex: self.is_regex(),
166 whole_word: self.whole_word(),
167 case_sensitive: self.case_sensitive(),
168 include_ignored: self.include_ignored(),
169 files_to_include: self.files_to_include().sources().join(","),
170 files_to_exclude: self.files_to_exclude().sources().join(","),
171 }
172 }
173
174 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
175 if self.as_str().is_empty() {
176 return Ok(false);
177 }
178
179 match self {
180 Self::Text { search, .. } => {
181 let mat = search.stream_find_iter(stream).next();
182 match mat {
183 Some(Ok(_)) => Ok(true),
184 Some(Err(err)) => Err(err.into()),
185 None => Ok(false),
186 }
187 }
188 Self::Regex {
189 regex, multiline, ..
190 } => {
191 let mut reader = BufReader::new(stream);
192 if *multiline {
193 let mut text = String::new();
194 if let Err(err) = reader.read_to_string(&mut text) {
195 Err(err.into())
196 } else {
197 Ok(regex.find(&text).is_some())
198 }
199 } else {
200 for line in reader.lines() {
201 let line = line?;
202 if regex.find(&line).is_some() {
203 return Ok(true);
204 }
205 }
206 Ok(false)
207 }
208 }
209 }
210 }
211 /// Returns the replacement text for this `SearchQuery`.
212 pub fn replacement(&self) -> Option<&str> {
213 match self {
214 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
215 replacement.as_deref()
216 }
217 }
218 }
219 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
220 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
221 match self {
222 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
223 SearchQuery::Regex {
224 regex, replacement, ..
225 } => {
226 if let Some(replacement) = replacement {
227 let replacement = TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX
228 .get_or_init(|| Regex::new(r"\\\\|\\n|\\t").unwrap())
229 .replace_all(replacement, |c: &Captures| {
230 match c.get(0).unwrap().as_str() {
231 r"\\" => "\\",
232 r"\n" => "\n",
233 r"\t" => "\t",
234 x => unreachable!("Unexpected escape sequence: {}", x),
235 }
236 });
237 Some(regex.replace(text, replacement))
238 } else {
239 None
240 }
241 }
242 }
243 }
244
245 pub async fn search(
246 &self,
247 buffer: &BufferSnapshot,
248 subrange: Option<Range<usize>>,
249 ) -> Vec<Range<usize>> {
250 const YIELD_INTERVAL: usize = 20000;
251
252 if self.as_str().is_empty() {
253 return Default::default();
254 }
255
256 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
257 let rope = if let Some(range) = subrange {
258 buffer.as_rope().slice(range)
259 } else {
260 buffer.as_rope().clone()
261 };
262
263 let mut matches = Vec::new();
264 match self {
265 Self::Text {
266 search, whole_word, ..
267 } => {
268 for (ix, mat) in search
269 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
270 .enumerate()
271 {
272 if (ix + 1) % YIELD_INTERVAL == 0 {
273 yield_now().await;
274 }
275
276 let mat = mat.unwrap();
277 if *whole_word {
278 let scope = buffer.language_scope_at(range_offset + mat.start());
279 let kind = |c| char_kind(&scope, c);
280
281 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
282 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
283 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
284 let next_kind = rope.chars_at(mat.end()).next().map(kind);
285 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
286 continue;
287 }
288 }
289 matches.push(mat.start()..mat.end())
290 }
291 }
292
293 Self::Regex {
294 regex, multiline, ..
295 } => {
296 if *multiline {
297 let text = rope.to_string();
298 for (ix, mat) in regex.find_iter(&text).enumerate() {
299 if (ix + 1) % YIELD_INTERVAL == 0 {
300 yield_now().await;
301 }
302
303 matches.push(mat.start()..mat.end());
304 }
305 } else {
306 let mut line = String::new();
307 let mut line_offset = 0;
308 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
309 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
310 yield_now().await;
311 }
312
313 for (newline_ix, text) in chunk.split('\n').enumerate() {
314 if newline_ix > 0 {
315 for mat in regex.find_iter(&line) {
316 let start = line_offset + mat.start();
317 let end = line_offset + mat.end();
318 matches.push(start..end);
319 }
320
321 line_offset += line.len() + 1;
322 line.clear();
323 }
324 line.push_str(text);
325 }
326 }
327 }
328 }
329 }
330
331 matches
332 }
333
334 pub fn is_empty(&self) -> bool {
335 self.as_str().is_empty()
336 }
337
338 pub fn as_str(&self) -> &str {
339 self.as_inner().as_str()
340 }
341
342 pub fn whole_word(&self) -> bool {
343 match self {
344 Self::Text { whole_word, .. } => *whole_word,
345 Self::Regex { whole_word, .. } => *whole_word,
346 }
347 }
348
349 pub fn case_sensitive(&self) -> bool {
350 match self {
351 Self::Text { case_sensitive, .. } => *case_sensitive,
352 Self::Regex { case_sensitive, .. } => *case_sensitive,
353 }
354 }
355
356 pub fn include_ignored(&self) -> bool {
357 match self {
358 Self::Text {
359 include_ignored, ..
360 } => *include_ignored,
361 Self::Regex {
362 include_ignored, ..
363 } => *include_ignored,
364 }
365 }
366
367 pub fn is_regex(&self) -> bool {
368 matches!(self, Self::Regex { .. })
369 }
370
371 pub fn files_to_include(&self) -> &PathMatcher {
372 self.as_inner().files_to_include()
373 }
374
375 pub fn files_to_exclude(&self) -> &PathMatcher {
376 self.as_inner().files_to_exclude()
377 }
378
379 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
380 match file_path {
381 Some(file_path) => {
382 let mut path = file_path.to_path_buf();
383 loop {
384 if self.files_to_exclude().is_match(&path) {
385 return false;
386 } else if self.files_to_include().sources().is_empty()
387 || self.files_to_include().is_match(&path)
388 {
389 return true;
390 } else if !path.pop() {
391 return false;
392 }
393 }
394 }
395 None => self.files_to_include().sources().is_empty(),
396 }
397 }
398 pub fn as_inner(&self) -> &SearchInputs {
399 match self {
400 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
401 }
402 }
403}
404
405fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<PathMatcher> {
406 let globs = glob_set
407 .split(',')
408 .map(str::trim)
409 .filter_map(|glob_str| (!glob_str.is_empty()).then(|| glob_str.to_owned()))
410 .collect::<Vec<_>>();
411 Ok(PathMatcher::new(&globs)?)
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417
418 #[test]
419 fn path_matcher_creation_for_valid_paths() {
420 for valid_path in [
421 "file",
422 "Cargo.toml",
423 ".DS_Store",
424 "~/dir/another_dir/",
425 "./dir/file",
426 "dir/[a-z].txt",
427 "../dir/filé",
428 ] {
429 let path_matcher = PathMatcher::new(&[valid_path.to_owned()]).unwrap_or_else(|e| {
430 panic!("Valid path {valid_path} should be accepted, but got: {e}")
431 });
432 assert!(
433 path_matcher.is_match(valid_path),
434 "Path matcher for valid path {valid_path} should match itself"
435 )
436 }
437 }
438
439 #[test]
440 fn path_matcher_creation_for_globs() {
441 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
442 match PathMatcher::new(&[invalid_glob.to_owned()]) {
443 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
444 Err(_expected) => {}
445 }
446 }
447
448 for valid_glob in [
449 "dir/?ile",
450 "dir/*.txt",
451 "dir/**/file",
452 "dir/[a-z].txt",
453 "{dir,file}",
454 ] {
455 match PathMatcher::new(&[valid_glob.to_owned()]) {
456 Ok(_expected) => {}
457 Err(e) => panic!("Valid glob should be accepted, but got: {e}"),
458 }
459 }
460 }
461}