1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, Rope};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub struct SearchInputs {
18 query: Arc<str>,
19 files_to_include: Vec<PathMatcher>,
20 files_to_exclude: Vec<PathMatcher>,
21}
22
23impl SearchInputs {
24 pub fn as_str(&self) -> &str {
25 self.query.as_ref()
26 }
27 pub fn files_to_include(&self) -> &[PathMatcher] {
28 &self.files_to_include
29 }
30 pub fn files_to_exclude(&self) -> &[PathMatcher] {
31 &self.files_to_exclude
32 }
33}
34#[derive(Clone, Debug)]
35pub enum SearchQuery {
36 Text {
37 search: Arc<AhoCorasick<usize>>,
38 whole_word: bool,
39 case_sensitive: bool,
40 inner: SearchInputs,
41 },
42 Regex {
43 regex: Regex,
44
45 multiline: bool,
46 whole_word: bool,
47 case_sensitive: bool,
48 inner: SearchInputs,
49 },
50}
51
52#[derive(Clone, Debug)]
53pub struct PathMatcher {
54 maybe_path: PathBuf,
55 glob: GlobMatcher,
56}
57
58impl std::fmt::Display for PathMatcher {
59 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60 self.maybe_path.to_string_lossy().fmt(f)
61 }
62}
63
64impl PathMatcher {
65 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
66 Ok(PathMatcher {
67 glob: Glob::new(&maybe_glob)?.compile_matcher(),
68 maybe_path: PathBuf::from(maybe_glob),
69 })
70 }
71
72 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
73 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
74 }
75}
76
77impl SearchQuery {
78 pub fn text(
79 query: impl ToString,
80 whole_word: bool,
81 case_sensitive: bool,
82 files_to_include: Vec<PathMatcher>,
83 files_to_exclude: Vec<PathMatcher>,
84 ) -> Self {
85 let query = query.to_string();
86 let search = AhoCorasickBuilder::new()
87 .auto_configure(&[&query])
88 .ascii_case_insensitive(!case_sensitive)
89 .build(&[&query]);
90 let inner = SearchInputs {
91 query: query.into(),
92 files_to_exclude,
93 files_to_include,
94 };
95 Self::Text {
96 search: Arc::new(search),
97 whole_word,
98 case_sensitive,
99 inner,
100 }
101 }
102
103 pub fn regex(
104 query: impl ToString,
105 whole_word: bool,
106 case_sensitive: bool,
107 files_to_include: Vec<PathMatcher>,
108 files_to_exclude: Vec<PathMatcher>,
109 ) -> Result<Self> {
110 let mut query = query.to_string();
111 let initial_query = Arc::from(query.as_str());
112 if whole_word {
113 let mut word_query = String::new();
114 word_query.push_str("\\b");
115 word_query.push_str(&query);
116 word_query.push_str("\\b");
117 query = word_query
118 }
119
120 let multiline = query.contains('\n') || query.contains("\\n");
121 let regex = RegexBuilder::new(&query)
122 .case_insensitive(!case_sensitive)
123 .multi_line(multiline)
124 .build()?;
125 let inner = SearchInputs {
126 query: initial_query,
127 files_to_exclude,
128 files_to_include,
129 };
130 Ok(Self::Regex {
131 regex,
132 multiline,
133 whole_word,
134 case_sensitive,
135 inner,
136 })
137 }
138
139 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
140 if message.regex {
141 Self::regex(
142 message.query,
143 message.whole_word,
144 message.case_sensitive,
145 deserialize_path_matches(&message.files_to_include)?,
146 deserialize_path_matches(&message.files_to_exclude)?,
147 )
148 } else {
149 Ok(Self::text(
150 message.query,
151 message.whole_word,
152 message.case_sensitive,
153 deserialize_path_matches(&message.files_to_include)?,
154 deserialize_path_matches(&message.files_to_exclude)?,
155 ))
156 }
157 }
158
159 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
160 proto::SearchProject {
161 project_id,
162 query: self.as_str().to_string(),
163 regex: self.is_regex(),
164 whole_word: self.whole_word(),
165 case_sensitive: self.case_sensitive(),
166 files_to_include: self
167 .files_to_include()
168 .iter()
169 .map(|matcher| matcher.to_string())
170 .join(","),
171 files_to_exclude: self
172 .files_to_exclude()
173 .iter()
174 .map(|matcher| matcher.to_string())
175 .join(","),
176 }
177 }
178
179 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
180 if self.as_str().is_empty() {
181 return Ok(false);
182 }
183
184 match self {
185 Self::Text { search, .. } => {
186 let mat = search.stream_find_iter(stream).next();
187 match mat {
188 Some(Ok(_)) => Ok(true),
189 Some(Err(err)) => Err(err.into()),
190 None => Ok(false),
191 }
192 }
193 Self::Regex {
194 regex, multiline, ..
195 } => {
196 let mut reader = BufReader::new(stream);
197 if *multiline {
198 let mut text = String::new();
199 if let Err(err) = reader.read_to_string(&mut text) {
200 Err(err.into())
201 } else {
202 Ok(regex.find(&text).is_some())
203 }
204 } else {
205 for line in reader.lines() {
206 let line = line?;
207 if regex.find(&line).is_some() {
208 return Ok(true);
209 }
210 }
211 Ok(false)
212 }
213 }
214 }
215 }
216
217 pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
218 const YIELD_INTERVAL: usize = 20000;
219
220 if self.as_str().is_empty() {
221 return Default::default();
222 }
223
224 let mut matches = Vec::new();
225 match self {
226 Self::Text {
227 search, whole_word, ..
228 } => {
229 for (ix, mat) in search
230 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
231 .enumerate()
232 {
233 if (ix + 1) % YIELD_INTERVAL == 0 {
234 yield_now().await;
235 }
236
237 let mat = mat.unwrap();
238 if *whole_word {
239 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
240 let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
241 let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
242 let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
243 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
244 continue;
245 }
246 }
247 matches.push(mat.start()..mat.end())
248 }
249 }
250 Self::Regex {
251 regex, multiline, ..
252 } => {
253 if *multiline {
254 let text = rope.to_string();
255 for (ix, mat) in regex.find_iter(&text).enumerate() {
256 if (ix + 1) % YIELD_INTERVAL == 0 {
257 yield_now().await;
258 }
259
260 matches.push(mat.start()..mat.end());
261 }
262 } else {
263 let mut line = String::new();
264 let mut line_offset = 0;
265 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
266 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
267 yield_now().await;
268 }
269
270 for (newline_ix, text) in chunk.split('\n').enumerate() {
271 if newline_ix > 0 {
272 for mat in regex.find_iter(&line) {
273 let start = line_offset + mat.start();
274 let end = line_offset + mat.end();
275 matches.push(start..end);
276 }
277
278 line_offset += line.len() + 1;
279 line.clear();
280 }
281 line.push_str(text);
282 }
283 }
284 }
285 }
286 }
287 matches
288 }
289
290 pub fn as_str(&self) -> &str {
291 self.as_inner().as_str()
292 }
293
294 pub fn whole_word(&self) -> bool {
295 match self {
296 Self::Text { whole_word, .. } => *whole_word,
297 Self::Regex { whole_word, .. } => *whole_word,
298 }
299 }
300
301 pub fn case_sensitive(&self) -> bool {
302 match self {
303 Self::Text { case_sensitive, .. } => *case_sensitive,
304 Self::Regex { case_sensitive, .. } => *case_sensitive,
305 }
306 }
307
308 pub fn is_regex(&self) -> bool {
309 matches!(self, Self::Regex { .. })
310 }
311
312 pub fn files_to_include(&self) -> &[PathMatcher] {
313 self.as_inner().files_to_include()
314 }
315
316 pub fn files_to_exclude(&self) -> &[PathMatcher] {
317 self.as_inner().files_to_exclude()
318 }
319
320 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
321 match file_path {
322 Some(file_path) => {
323 !self
324 .files_to_exclude()
325 .iter()
326 .any(|exclude_glob| exclude_glob.is_match(file_path))
327 && (self.files_to_include().is_empty()
328 || self
329 .files_to_include()
330 .iter()
331 .any(|include_glob| include_glob.is_match(file_path)))
332 }
333 None => self.files_to_include().is_empty(),
334 }
335 }
336 pub fn as_inner(&self) -> &SearchInputs {
337 match self {
338 Self::Regex { inner, .. } | Self::Text { inner, .. } => inner,
339 }
340 }
341}
342
343fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
344 glob_set
345 .split(',')
346 .map(str::trim)
347 .filter(|glob_str| !glob_str.is_empty())
348 .map(|glob_str| {
349 PathMatcher::new(glob_str)
350 .with_context(|| format!("deserializing path match glob {glob_str}"))
351 })
352 .collect()
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn path_matcher_creation_for_valid_paths() {
361 for valid_path in [
362 "file",
363 "Cargo.toml",
364 ".DS_Store",
365 "~/dir/another_dir/",
366 "./dir/file",
367 "dir/[a-z].txt",
368 "../dir/filé",
369 ] {
370 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
371 panic!("Valid path {valid_path} should be accepted, but got: {e}")
372 });
373 assert!(
374 path_matcher.is_match(valid_path),
375 "Path matcher for valid path {valid_path} should match itself"
376 )
377 }
378 }
379
380 #[test]
381 fn path_matcher_creation_for_globs() {
382 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
383 match PathMatcher::new(invalid_glob) {
384 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
385 Err(_expected) => {}
386 }
387 }
388
389 for valid_glob in [
390 "dir/?ile",
391 "dir/*.txt",
392 "dir/**/file",
393 "dir/[a-z].txt",
394 "{dir,file}",
395 ] {
396 match PathMatcher::new(valid_glob) {
397 Ok(_expected) => {}
398 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
399 }
400 }
401 }
402}