1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use itertools::Itertools;
5use language::{char_kind, BufferSnapshot};
6use regex::{Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 borrow::Cow,
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::Arc,
14};
15use util::paths::PathMatcher;
16
17#[derive(Clone, Debug)]
18pub struct SearchInputs {
19 query: Arc<str>,
20 files_to_include: Vec<PathMatcher>,
21 files_to_exclude: Vec<PathMatcher>,
22}
23
24impl SearchInputs {
25 pub fn as_str(&self) -> &str {
26 self.query.as_ref()
27 }
28 pub fn files_to_include(&self) -> &[PathMatcher] {
29 &self.files_to_include
30 }
31 pub fn files_to_exclude(&self) -> &[PathMatcher] {
32 &self.files_to_exclude
33 }
34}
35#[derive(Clone, Debug)]
36pub enum SearchQuery {
37 Text {
38 search: Arc<AhoCorasick>,
39 replacement: Option<String>,
40 whole_word: bool,
41 case_sensitive: bool,
42 inner: SearchInputs,
43 },
44
45 Regex {
46 regex: Regex,
47 replacement: Option<String>,
48 multiline: bool,
49 whole_word: bool,
50 case_sensitive: bool,
51 inner: SearchInputs,
52 },
53}
54
55impl SearchQuery {
56 pub fn text(
57 query: impl ToString,
58 whole_word: bool,
59 case_sensitive: bool,
60 files_to_include: Vec<PathMatcher>,
61 files_to_exclude: Vec<PathMatcher>,
62 ) -> Result<Self> {
63 let query = query.to_string();
64 let search = AhoCorasickBuilder::new()
65 .ascii_case_insensitive(!case_sensitive)
66 .build(&[&query])?;
67 let inner = SearchInputs {
68 query: query.into(),
69 files_to_exclude,
70 files_to_include,
71 };
72 Ok(Self::Text {
73 search: Arc::new(search),
74 replacement: None,
75 whole_word,
76 case_sensitive,
77 inner,
78 })
79 }
80
81 pub fn regex(
82 query: impl ToString,
83 whole_word: bool,
84 case_sensitive: bool,
85 files_to_include: Vec<PathMatcher>,
86 files_to_exclude: Vec<PathMatcher>,
87 ) -> Result<Self> {
88 let mut query = query.to_string();
89 let initial_query = Arc::from(query.as_str());
90 if whole_word {
91 let mut word_query = String::new();
92 word_query.push_str("\\b");
93 word_query.push_str(&query);
94 word_query.push_str("\\b");
95 query = word_query
96 }
97
98 let multiline = query.contains('\n') || query.contains("\\n");
99 let regex = RegexBuilder::new(&query)
100 .case_insensitive(!case_sensitive)
101 .multi_line(multiline)
102 .build()?;
103 let inner = SearchInputs {
104 query: initial_query,
105 files_to_exclude,
106 files_to_include,
107 };
108 Ok(Self::Regex {
109 regex,
110 replacement: None,
111 multiline,
112 whole_word,
113 case_sensitive,
114 inner,
115 })
116 }
117
118 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
119 if message.regex {
120 Self::regex(
121 message.query,
122 message.whole_word,
123 message.case_sensitive,
124 deserialize_path_matches(&message.files_to_include)?,
125 deserialize_path_matches(&message.files_to_exclude)?,
126 )
127 } else {
128 Self::text(
129 message.query,
130 message.whole_word,
131 message.case_sensitive,
132 deserialize_path_matches(&message.files_to_include)?,
133 deserialize_path_matches(&message.files_to_exclude)?,
134 )
135 }
136 }
137 pub fn with_replacement(mut self, new_replacement: String) -> Self {
138 match self {
139 Self::Text {
140 ref mut replacement,
141 ..
142 }
143 | Self::Regex {
144 ref mut replacement,
145 ..
146 } => {
147 *replacement = Some(new_replacement);
148 self
149 }
150 }
151 }
152 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
153 proto::SearchProject {
154 project_id,
155 query: self.as_str().to_string(),
156 regex: self.is_regex(),
157 whole_word: self.whole_word(),
158 case_sensitive: self.case_sensitive(),
159 files_to_include: self
160 .files_to_include()
161 .iter()
162 .map(|matcher| matcher.to_string())
163 .join(","),
164 files_to_exclude: self
165 .files_to_exclude()
166 .iter()
167 .map(|matcher| matcher.to_string())
168 .join(","),
169 }
170 }
171
172 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
173 if self.as_str().is_empty() {
174 return Ok(false);
175 }
176
177 match self {
178 Self::Text { search, .. } => {
179 let mat = search.stream_find_iter(stream).next();
180 match mat {
181 Some(Ok(_)) => Ok(true),
182 Some(Err(err)) => Err(err.into()),
183 None => Ok(false),
184 }
185 }
186 Self::Regex {
187 regex, multiline, ..
188 } => {
189 let mut reader = BufReader::new(stream);
190 if *multiline {
191 let mut text = String::new();
192 if let Err(err) = reader.read_to_string(&mut text) {
193 Err(err.into())
194 } else {
195 Ok(regex.find(&text).is_some())
196 }
197 } else {
198 for line in reader.lines() {
199 let line = line?;
200 if regex.find(&line).is_some() {
201 return Ok(true);
202 }
203 }
204 Ok(false)
205 }
206 }
207 }
208 }
209 /// Returns the replacement text for this `SearchQuery`.
210 pub fn replacement(&self) -> Option<&str> {
211 match self {
212 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
213 replacement.as_deref()
214 }
215 }
216 }
217 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
218 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
219 match self {
220 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
221 SearchQuery::Regex {
222 regex, replacement, ..
223 } => {
224 if let Some(replacement) = replacement {
225 Some(regex.replace(text, replacement))
226 } else {
227 None
228 }
229 }
230 }
231 }
232 pub async fn search(
233 &self,
234 buffer: &BufferSnapshot,
235 subrange: Option<Range<usize>>,
236 ) -> Vec<Range<usize>> {
237 const YIELD_INTERVAL: usize = 20000;
238
239 if self.as_str().is_empty() {
240 return Default::default();
241 }
242
243 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
244 let rope = if let Some(range) = subrange {
245 buffer.as_rope().slice(range)
246 } else {
247 buffer.as_rope().clone()
248 };
249
250 let mut matches = Vec::new();
251 match self {
252 Self::Text {
253 search, whole_word, ..
254 } => {
255 for (ix, mat) in search
256 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
257 .enumerate()
258 {
259 if (ix + 1) % YIELD_INTERVAL == 0 {
260 yield_now().await;
261 }
262
263 let mat = mat.unwrap();
264 if *whole_word {
265 let scope = buffer.language_scope_at(range_offset + mat.start());
266 let kind = |c| char_kind(&scope, c);
267
268 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
269 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
270 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
271 let next_kind = rope.chars_at(mat.end()).next().map(kind);
272 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
273 continue;
274 }
275 }
276 matches.push(mat.start()..mat.end())
277 }
278 }
279
280 Self::Regex {
281 regex, multiline, ..
282 } => {
283 if *multiline {
284 let text = rope.to_string();
285 for (ix, mat) in regex.find_iter(&text).enumerate() {
286 if (ix + 1) % YIELD_INTERVAL == 0 {
287 yield_now().await;
288 }
289
290 matches.push(mat.start()..mat.end());
291 }
292 } else {
293 let mut line = String::new();
294 let mut line_offset = 0;
295 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
296 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
297 yield_now().await;
298 }
299
300 for (newline_ix, text) in chunk.split('\n').enumerate() {
301 if newline_ix > 0 {
302 for mat in regex.find_iter(&line) {
303 let start = line_offset + mat.start();
304 let end = line_offset + mat.end();
305 matches.push(start..end);
306 }
307
308 line_offset += line.len() + 1;
309 line.clear();
310 }
311 line.push_str(text);
312 }
313 }
314 }
315 }
316 }
317
318 matches
319 }
320
321 pub fn as_str(&self) -> &str {
322 self.as_inner().as_str()
323 }
324
325 pub fn whole_word(&self) -> bool {
326 match self {
327 Self::Text { whole_word, .. } => *whole_word,
328 Self::Regex { whole_word, .. } => *whole_word,
329 }
330 }
331
332 pub fn case_sensitive(&self) -> bool {
333 match self {
334 Self::Text { case_sensitive, .. } => *case_sensitive,
335 Self::Regex { case_sensitive, .. } => *case_sensitive,
336 }
337 }
338
339 pub fn is_regex(&self) -> bool {
340 matches!(self, Self::Regex { .. })
341 }
342
343 pub fn files_to_include(&self) -> &[PathMatcher] {
344 self.as_inner().files_to_include()
345 }
346
347 pub fn files_to_exclude(&self) -> &[PathMatcher] {
348 self.as_inner().files_to_exclude()
349 }
350
351 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
352 match file_path {
353 Some(file_path) => {
354 !self
355 .files_to_exclude()
356 .iter()
357 .any(|exclude_glob| exclude_glob.is_match(file_path))
358 && (self.files_to_include().is_empty()
359 || self
360 .files_to_include()
361 .iter()
362 .any(|include_glob| include_glob.is_match(file_path)))
363 }
364 None => self.files_to_include().is_empty(),
365 }
366 }
367 pub fn as_inner(&self) -> &SearchInputs {
368 match self {
369 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
370 }
371 }
372}
373
374fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
375 glob_set
376 .split(',')
377 .map(str::trim)
378 .filter(|glob_str| !glob_str.is_empty())
379 .map(|glob_str| {
380 PathMatcher::new(glob_str)
381 .with_context(|| format!("deserializing path match glob {glob_str}"))
382 })
383 .collect()
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn path_matcher_creation_for_valid_paths() {
392 for valid_path in [
393 "file",
394 "Cargo.toml",
395 ".DS_Store",
396 "~/dir/another_dir/",
397 "./dir/file",
398 "dir/[a-z].txt",
399 "../dir/filé",
400 ] {
401 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
402 panic!("Valid path {valid_path} should be accepted, but got: {e}")
403 });
404 assert!(
405 path_matcher.is_match(valid_path),
406 "Path matcher for valid path {valid_path} should match itself"
407 )
408 }
409 }
410
411 #[test]
412 fn path_matcher_creation_for_globs() {
413 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
414 match PathMatcher::new(invalid_glob) {
415 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
416 Err(_expected) => {}
417 }
418 }
419
420 for valid_glob in [
421 "dir/?ile",
422 "dir/*.txt",
423 "dir/**/file",
424 "dir/[a-z].txt",
425 "{dir,file}",
426 ] {
427 match PathMatcher::new(valid_glob) {
428 Ok(_expected) => {}
429 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
430 }
431 }
432 }
433}