1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, BufferSnapshot};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 borrow::Cow,
11 io::{BufRead, BufReader, Read},
12 ops::Range,
13 path::{Path, PathBuf},
14 sync::Arc,
15};
16use util::paths::PathMatcher;
17
18#[derive(Clone, Debug)]
19pub struct SearchInputs {
20 query: Arc<str>,
21 files_to_include: Vec<PathMatcher>,
22 files_to_exclude: Vec<PathMatcher>,
23}
24
25impl SearchInputs {
26 pub fn as_str(&self) -> &str {
27 self.query.as_ref()
28 }
29 pub fn files_to_include(&self) -> &[PathMatcher] {
30 &self.files_to_include
31 }
32 pub fn files_to_exclude(&self) -> &[PathMatcher] {
33 &self.files_to_exclude
34 }
35}
36#[derive(Clone, Debug)]
37pub enum SearchQuery {
38 Text {
39 search: Arc<AhoCorasick>,
40 replacement: Option<String>,
41 whole_word: bool,
42 case_sensitive: bool,
43 inner: SearchInputs,
44 },
45
46 Regex {
47 regex: Regex,
48 replacement: Option<String>,
49 multiline: bool,
50 whole_word: bool,
51 case_sensitive: bool,
52 inner: SearchInputs,
53 },
54}
55
56impl SearchQuery {
57 pub fn text(
58 query: impl ToString,
59 whole_word: bool,
60 case_sensitive: bool,
61 files_to_include: Vec<PathMatcher>,
62 files_to_exclude: Vec<PathMatcher>,
63 ) -> Result<Self> {
64 let query = query.to_string();
65 let search = AhoCorasickBuilder::new()
66 .ascii_case_insensitive(!case_sensitive)
67 .build(&[&query])?;
68 let inner = SearchInputs {
69 query: query.into(),
70 files_to_exclude,
71 files_to_include,
72 };
73 Ok(Self::Text {
74 search: Arc::new(search),
75 replacement: None,
76 whole_word,
77 case_sensitive,
78 inner,
79 })
80 }
81
82 pub fn regex(
83 query: impl ToString,
84 whole_word: bool,
85 case_sensitive: bool,
86 files_to_include: Vec<PathMatcher>,
87 files_to_exclude: Vec<PathMatcher>,
88 ) -> Result<Self> {
89 let mut query = query.to_string();
90 let initial_query = Arc::from(query.as_str());
91 if whole_word {
92 let mut word_query = String::new();
93 word_query.push_str("\\b");
94 word_query.push_str(&query);
95 word_query.push_str("\\b");
96 query = word_query
97 }
98
99 let multiline = query.contains('\n') || query.contains("\\n");
100 let regex = RegexBuilder::new(&query)
101 .case_insensitive(!case_sensitive)
102 .multi_line(multiline)
103 .build()?;
104 let inner = SearchInputs {
105 query: initial_query,
106 files_to_exclude,
107 files_to_include,
108 };
109 Ok(Self::Regex {
110 regex,
111 replacement: None,
112 multiline,
113 whole_word,
114 case_sensitive,
115 inner,
116 })
117 }
118
119 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
120 if message.regex {
121 Self::regex(
122 message.query,
123 message.whole_word,
124 message.case_sensitive,
125 deserialize_path_matches(&message.files_to_include)?,
126 deserialize_path_matches(&message.files_to_exclude)?,
127 )
128 } else {
129 Self::text(
130 message.query,
131 message.whole_word,
132 message.case_sensitive,
133 deserialize_path_matches(&message.files_to_include)?,
134 deserialize_path_matches(&message.files_to_exclude)?,
135 )
136 }
137 }
138 pub fn with_replacement(mut self, new_replacement: String) -> Self {
139 match self {
140 Self::Text {
141 ref mut replacement,
142 ..
143 }
144 | Self::Regex {
145 ref mut replacement,
146 ..
147 } => {
148 *replacement = Some(new_replacement);
149 self
150 }
151 }
152 }
153 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
154 proto::SearchProject {
155 project_id,
156 query: self.as_str().to_string(),
157 regex: self.is_regex(),
158 whole_word: self.whole_word(),
159 case_sensitive: self.case_sensitive(),
160 files_to_include: self
161 .files_to_include()
162 .iter()
163 .map(|matcher| matcher.to_string())
164 .join(","),
165 files_to_exclude: self
166 .files_to_exclude()
167 .iter()
168 .map(|matcher| matcher.to_string())
169 .join(","),
170 }
171 }
172
173 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
174 if self.as_str().is_empty() {
175 return Ok(false);
176 }
177
178 match self {
179 Self::Text { search, .. } => {
180 let mat = search.stream_find_iter(stream).next();
181 match mat {
182 Some(Ok(_)) => Ok(true),
183 Some(Err(err)) => Err(err.into()),
184 None => Ok(false),
185 }
186 }
187 Self::Regex {
188 regex, multiline, ..
189 } => {
190 let mut reader = BufReader::new(stream);
191 if *multiline {
192 let mut text = String::new();
193 if let Err(err) = reader.read_to_string(&mut text) {
194 Err(err.into())
195 } else {
196 Ok(regex.find(&text).is_some())
197 }
198 } else {
199 for line in reader.lines() {
200 let line = line?;
201 if regex.find(&line).is_some() {
202 return Ok(true);
203 }
204 }
205 Ok(false)
206 }
207 }
208 }
209 }
210 /// Returns the replacement text for this `SearchQuery`.
211 pub fn replacement(&self) -> Option<&str> {
212 match self {
213 SearchQuery::Text { replacement, .. } | SearchQuery::Regex { replacement, .. } => {
214 replacement.as_deref()
215 }
216 }
217 }
218 /// Replaces search hits if replacement is set. `text` is assumed to be a string that matches this `SearchQuery` exactly, without any leftovers on either side.
219 pub fn replacement_for<'a>(&self, text: &'a str) -> Option<Cow<'a, str>> {
220 match self {
221 SearchQuery::Text { replacement, .. } => replacement.clone().map(Cow::from),
222 SearchQuery::Regex {
223 regex, replacement, ..
224 } => {
225 if let Some(replacement) = replacement {
226 Some(regex.replace(text, replacement))
227 } else {
228 None
229 }
230 }
231 }
232 }
233 pub async fn search(
234 &self,
235 buffer: &BufferSnapshot,
236 subrange: Option<Range<usize>>,
237 ) -> Vec<Range<usize>> {
238 const YIELD_INTERVAL: usize = 20000;
239
240 if self.as_str().is_empty() {
241 return Default::default();
242 }
243
244 let range_offset = subrange.as_ref().map(|r| r.start).unwrap_or(0);
245 let rope = if let Some(range) = subrange {
246 buffer.as_rope().slice(range)
247 } else {
248 buffer.as_rope().clone()
249 };
250
251 let mut matches = Vec::new();
252 match self {
253 Self::Text {
254 search, whole_word, ..
255 } => {
256 for (ix, mat) in search
257 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
258 .enumerate()
259 {
260 if (ix + 1) % YIELD_INTERVAL == 0 {
261 yield_now().await;
262 }
263
264 let mat = mat.unwrap();
265 if *whole_word {
266 let scope = buffer.language_scope_at(range_offset + mat.start());
267 let kind = |c| char_kind(&scope, c);
268
269 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(kind);
270 let start_kind = kind(rope.chars_at(mat.start()).next().unwrap());
271 let end_kind = kind(rope.reversed_chars_at(mat.end()).next().unwrap());
272 let next_kind = rope.chars_at(mat.end()).next().map(kind);
273 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
274 continue;
275 }
276 }
277 matches.push(mat.start()..mat.end())
278 }
279 }
280
281 Self::Regex {
282 regex, multiline, ..
283 } => {
284 if *multiline {
285 let text = rope.to_string();
286 for (ix, mat) in regex.find_iter(&text).enumerate() {
287 if (ix + 1) % YIELD_INTERVAL == 0 {
288 yield_now().await;
289 }
290
291 matches.push(mat.start()..mat.end());
292 }
293 } else {
294 let mut line = String::new();
295 let mut line_offset = 0;
296 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
297 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
298 yield_now().await;
299 }
300
301 for (newline_ix, text) in chunk.split('\n').enumerate() {
302 if newline_ix > 0 {
303 for mat in regex.find_iter(&line) {
304 let start = line_offset + mat.start();
305 let end = line_offset + mat.end();
306 matches.push(start..end);
307 }
308
309 line_offset += line.len() + 1;
310 line.clear();
311 }
312 line.push_str(text);
313 }
314 }
315 }
316 }
317 }
318
319 matches
320 }
321
322 pub fn as_str(&self) -> &str {
323 self.as_inner().as_str()
324 }
325
326 pub fn whole_word(&self) -> bool {
327 match self {
328 Self::Text { whole_word, .. } => *whole_word,
329 Self::Regex { whole_word, .. } => *whole_word,
330 }
331 }
332
333 pub fn case_sensitive(&self) -> bool {
334 match self {
335 Self::Text { case_sensitive, .. } => *case_sensitive,
336 Self::Regex { case_sensitive, .. } => *case_sensitive,
337 }
338 }
339
340 pub fn is_regex(&self) -> bool {
341 matches!(self, Self::Regex { .. })
342 }
343
344 pub fn files_to_include(&self) -> &[PathMatcher] {
345 self.as_inner().files_to_include()
346 }
347
348 pub fn files_to_exclude(&self) -> &[PathMatcher] {
349 self.as_inner().files_to_exclude()
350 }
351
352 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
353 match file_path {
354 Some(file_path) => {
355 !self
356 .files_to_exclude()
357 .iter()
358 .any(|exclude_glob| exclude_glob.is_match(file_path))
359 && (self.files_to_include().is_empty()
360 || self
361 .files_to_include()
362 .iter()
363 .any(|include_glob| include_glob.is_match(file_path)))
364 }
365 None => self.files_to_include().is_empty(),
366 }
367 }
368 pub fn as_inner(&self) -> &SearchInputs {
369 match self {
370 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
371 }
372 }
373}
374
375fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
376 glob_set
377 .split(',')
378 .map(str::trim)
379 .filter(|glob_str| !glob_str.is_empty())
380 .map(|glob_str| {
381 PathMatcher::new(glob_str)
382 .with_context(|| format!("deserializing path match glob {glob_str}"))
383 })
384 .collect()
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390
391 #[test]
392 fn path_matcher_creation_for_valid_paths() {
393 for valid_path in [
394 "file",
395 "Cargo.toml",
396 ".DS_Store",
397 "~/dir/another_dir/",
398 "./dir/file",
399 "dir/[a-z].txt",
400 "../dir/filé",
401 ] {
402 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
403 panic!("Valid path {valid_path} should be accepted, but got: {e}")
404 });
405 assert!(
406 path_matcher.is_match(valid_path),
407 "Path matcher for valid path {valid_path} should match itself"
408 )
409 }
410 }
411
412 #[test]
413 fn path_matcher_creation_for_globs() {
414 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
415 match PathMatcher::new(invalid_glob) {
416 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
417 Err(_expected) => {}
418 }
419 }
420
421 for valid_glob in [
422 "dir/?ile",
423 "dir/*.txt",
424 "dir/**/file",
425 "dir/[a-z].txt",
426 "{dir,file}",
427 ] {
428 match PathMatcher::new(valid_glob) {
429 Ok(_expected) => {}
430 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
431 }
432 }
433 }
434}