1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::{Context, Result};
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, Rope};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub enum SearchQuery {
18 Text {
19 search: Arc<AhoCorasick<usize>>,
20 query: Arc<str>,
21 whole_word: bool,
22 case_sensitive: bool,
23 files_to_include: Vec<PathMatcher>,
24 files_to_exclude: Vec<PathMatcher>,
25 },
26 Regex {
27 regex: Regex,
28 query: Arc<str>,
29 multiline: bool,
30 whole_word: bool,
31 case_sensitive: bool,
32 files_to_include: Vec<PathMatcher>,
33 files_to_exclude: Vec<PathMatcher>,
34 },
35}
36
37#[derive(Clone, Debug)]
38pub struct PathMatcher {
39 maybe_path: PathBuf,
40 glob: GlobMatcher,
41}
42
43impl std::fmt::Display for PathMatcher {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 self.maybe_path.to_string_lossy().fmt(f)
46 }
47}
48
49impl PathMatcher {
50 pub fn new(maybe_glob: &str) -> Result<Self, globset::Error> {
51 Ok(PathMatcher {
52 glob: Glob::new(&maybe_glob)?.compile_matcher(),
53 maybe_path: PathBuf::from(maybe_glob),
54 })
55 }
56
57 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
58 other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other)
59 }
60}
61
62impl SearchQuery {
63 pub fn text(
64 query: impl ToString,
65 whole_word: bool,
66 case_sensitive: bool,
67 files_to_include: Vec<PathMatcher>,
68 files_to_exclude: Vec<PathMatcher>,
69 ) -> Self {
70 let query = query.to_string();
71 let search = AhoCorasickBuilder::new()
72 .auto_configure(&[&query])
73 .ascii_case_insensitive(!case_sensitive)
74 .build(&[&query]);
75 Self::Text {
76 search: Arc::new(search),
77 query: Arc::from(query),
78 whole_word,
79 case_sensitive,
80 files_to_include,
81 files_to_exclude,
82 }
83 }
84
85 pub fn regex(
86 query: impl ToString,
87 whole_word: bool,
88 case_sensitive: bool,
89 files_to_include: Vec<PathMatcher>,
90 files_to_exclude: Vec<PathMatcher>,
91 ) -> Result<Self> {
92 let mut query = query.to_string();
93 let initial_query = Arc::from(query.as_str());
94 if whole_word {
95 let mut word_query = String::new();
96 word_query.push_str("\\b");
97 word_query.push_str(&query);
98 word_query.push_str("\\b");
99 query = word_query
100 }
101
102 let multiline = query.contains('\n') || query.contains("\\n");
103 let regex = RegexBuilder::new(&query)
104 .case_insensitive(!case_sensitive)
105 .multi_line(multiline)
106 .build()?;
107 Ok(Self::Regex {
108 regex,
109 query: initial_query,
110 multiline,
111 whole_word,
112 case_sensitive,
113 files_to_include,
114 files_to_exclude,
115 })
116 }
117
118 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
119 if message.regex {
120 Self::regex(
121 message.query,
122 message.whole_word,
123 message.case_sensitive,
124 deserialize_path_matches(&message.files_to_include)?,
125 deserialize_path_matches(&message.files_to_exclude)?,
126 )
127 } else {
128 Ok(Self::text(
129 message.query,
130 message.whole_word,
131 message.case_sensitive,
132 deserialize_path_matches(&message.files_to_include)?,
133 deserialize_path_matches(&message.files_to_exclude)?,
134 ))
135 }
136 }
137
138 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
139 proto::SearchProject {
140 project_id,
141 query: self.as_str().to_string(),
142 regex: self.is_regex(),
143 whole_word: self.whole_word(),
144 case_sensitive: self.case_sensitive(),
145 files_to_include: self
146 .files_to_include()
147 .iter()
148 .map(|matcher| matcher.to_string())
149 .join(","),
150 files_to_exclude: self
151 .files_to_exclude()
152 .iter()
153 .map(|matcher| matcher.to_string())
154 .join(","),
155 }
156 }
157
158 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
159 if self.as_str().is_empty() {
160 return Ok(false);
161 }
162
163 match self {
164 Self::Text { search, .. } => {
165 let mat = search.stream_find_iter(stream).next();
166 match mat {
167 Some(Ok(_)) => Ok(true),
168 Some(Err(err)) => Err(err.into()),
169 None => Ok(false),
170 }
171 }
172 Self::Regex {
173 regex, multiline, ..
174 } => {
175 let mut reader = BufReader::new(stream);
176 if *multiline {
177 let mut text = String::new();
178 if let Err(err) = reader.read_to_string(&mut text) {
179 Err(err.into())
180 } else {
181 Ok(regex.find(&text).is_some())
182 }
183 } else {
184 for line in reader.lines() {
185 let line = line?;
186 if regex.find(&line).is_some() {
187 return Ok(true);
188 }
189 }
190 Ok(false)
191 }
192 }
193 }
194 }
195
196 pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
197 const YIELD_INTERVAL: usize = 20000;
198
199 if self.as_str().is_empty() {
200 return Default::default();
201 }
202
203 let mut matches = Vec::new();
204 match self {
205 Self::Text {
206 search, whole_word, ..
207 } => {
208 for (ix, mat) in search
209 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
210 .enumerate()
211 {
212 if (ix + 1) % YIELD_INTERVAL == 0 {
213 yield_now().await;
214 }
215
216 let mat = mat.unwrap();
217 if *whole_word {
218 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
219 let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
220 let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
221 let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
222 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
223 continue;
224 }
225 }
226 matches.push(mat.start()..mat.end())
227 }
228 }
229 Self::Regex {
230 regex, multiline, ..
231 } => {
232 if *multiline {
233 let text = rope.to_string();
234 for (ix, mat) in regex.find_iter(&text).enumerate() {
235 if (ix + 1) % YIELD_INTERVAL == 0 {
236 yield_now().await;
237 }
238
239 matches.push(mat.start()..mat.end());
240 }
241 } else {
242 let mut line = String::new();
243 let mut line_offset = 0;
244 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
245 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
246 yield_now().await;
247 }
248
249 for (newline_ix, text) in chunk.split('\n').enumerate() {
250 if newline_ix > 0 {
251 for mat in regex.find_iter(&line) {
252 let start = line_offset + mat.start();
253 let end = line_offset + mat.end();
254 matches.push(start..end);
255 }
256
257 line_offset += line.len() + 1;
258 line.clear();
259 }
260 line.push_str(text);
261 }
262 }
263 }
264 }
265 }
266 matches
267 }
268
269 pub fn as_str(&self) -> &str {
270 match self {
271 Self::Text { query, .. } => query.as_ref(),
272 Self::Regex { query, .. } => query.as_ref(),
273 }
274 }
275
276 pub fn whole_word(&self) -> bool {
277 match self {
278 Self::Text { whole_word, .. } => *whole_word,
279 Self::Regex { whole_word, .. } => *whole_word,
280 }
281 }
282
283 pub fn case_sensitive(&self) -> bool {
284 match self {
285 Self::Text { case_sensitive, .. } => *case_sensitive,
286 Self::Regex { case_sensitive, .. } => *case_sensitive,
287 }
288 }
289
290 pub fn is_regex(&self) -> bool {
291 matches!(self, Self::Regex { .. })
292 }
293
294 pub fn files_to_include(&self) -> &[PathMatcher] {
295 match self {
296 Self::Text {
297 files_to_include, ..
298 } => files_to_include,
299 Self::Regex {
300 files_to_include, ..
301 } => files_to_include,
302 }
303 }
304
305 pub fn files_to_exclude(&self) -> &[PathMatcher] {
306 match self {
307 Self::Text {
308 files_to_exclude, ..
309 } => files_to_exclude,
310 Self::Regex {
311 files_to_exclude, ..
312 } => files_to_exclude,
313 }
314 }
315
316 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
317 match file_path {
318 Some(file_path) => {
319 !self
320 .files_to_exclude()
321 .iter()
322 .any(|exclude_glob| exclude_glob.is_match(file_path))
323 && (self.files_to_include().is_empty()
324 || self
325 .files_to_include()
326 .iter()
327 .any(|include_glob| include_glob.is_match(file_path)))
328 }
329 None => self.files_to_include().is_empty(),
330 }
331 }
332}
333
334fn deserialize_path_matches(glob_set: &str) -> anyhow::Result<Vec<PathMatcher>> {
335 glob_set
336 .split(',')
337 .map(str::trim)
338 .filter(|glob_str| !glob_str.is_empty())
339 .map(|glob_str| {
340 PathMatcher::new(glob_str)
341 .with_context(|| format!("deserializing path match glob {glob_str}"))
342 })
343 .collect()
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn path_matcher_creation_for_valid_paths() {
352 for valid_path in [
353 "file",
354 "Cargo.toml",
355 ".DS_Store",
356 "~/dir/another_dir/",
357 "./dir/file",
358 "dir/[a-z].txt",
359 "../dir/filé",
360 ] {
361 let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| {
362 panic!("Valid path {valid_path} should be accepted, but got: {e}")
363 });
364 assert!(
365 path_matcher.is_match(valid_path),
366 "Path matcher for valid path {valid_path} should match itself"
367 )
368 }
369 }
370
371 #[test]
372 fn path_matcher_creation_for_globs() {
373 for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] {
374 match PathMatcher::new(invalid_glob) {
375 Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"),
376 Err(_expected) => {}
377 }
378 }
379
380 for valid_glob in [
381 "dir/?ile",
382 "dir/*.txt",
383 "dir/**/file",
384 "dir/[a-z].txt",
385 "{dir,file}",
386 ] {
387 match PathMatcher::new(valid_glob) {
388 Ok(_expected) => {}
389 Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"),
390 }
391 }
392 }
393}