1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use globset::{Glob, GlobMatcher};
5use itertools::Itertools;
6use language::{char_kind, Rope};
7use regex::{Regex, RegexBuilder};
8use smol::future::yield_now;
9use std::{
10 io::{BufRead, BufReader, Read},
11 ops::Range,
12 path::Path,
13 sync::Arc,
14};
15
16#[derive(Clone, Debug)]
17pub enum SearchQuery {
18 Text {
19 search: Arc<AhoCorasick<usize>>,
20 query: Arc<str>,
21 whole_word: bool,
22 case_sensitive: bool,
23 files_to_include: Vec<GlobMatcher>,
24 files_to_exclude: Vec<GlobMatcher>,
25 },
26 Regex {
27 regex: Regex,
28 query: Arc<str>,
29 multiline: bool,
30 whole_word: bool,
31 case_sensitive: bool,
32 files_to_include: Vec<GlobMatcher>,
33 files_to_exclude: Vec<GlobMatcher>,
34 },
35}
36
37impl SearchQuery {
38 pub fn text(
39 query: impl ToString,
40 whole_word: bool,
41 case_sensitive: bool,
42 files_to_include: Vec<GlobMatcher>,
43 files_to_exclude: Vec<GlobMatcher>,
44 ) -> Self {
45 let query = query.to_string();
46 let search = AhoCorasickBuilder::new()
47 .auto_configure(&[&query])
48 .ascii_case_insensitive(!case_sensitive)
49 .build(&[&query]);
50 Self::Text {
51 search: Arc::new(search),
52 query: Arc::from(query),
53 whole_word,
54 case_sensitive,
55 files_to_include,
56 files_to_exclude,
57 }
58 }
59
60 pub fn regex(
61 query: impl ToString,
62 whole_word: bool,
63 case_sensitive: bool,
64 files_to_include: Vec<GlobMatcher>,
65 files_to_exclude: Vec<GlobMatcher>,
66 ) -> Result<Self> {
67 let mut query = query.to_string();
68 let initial_query = Arc::from(query.as_str());
69 if whole_word {
70 let mut word_query = String::new();
71 word_query.push_str("\\b");
72 word_query.push_str(&query);
73 word_query.push_str("\\b");
74 query = word_query
75 }
76
77 let multiline = query.contains('\n') || query.contains("\\n");
78 let regex = RegexBuilder::new(&query)
79 .case_insensitive(!case_sensitive)
80 .multi_line(multiline)
81 .build()?;
82 Ok(Self::Regex {
83 regex,
84 query: initial_query,
85 multiline,
86 whole_word,
87 case_sensitive,
88 files_to_include,
89 files_to_exclude,
90 })
91 }
92
93 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
94 if message.regex {
95 Self::regex(
96 message.query,
97 message.whole_word,
98 message.case_sensitive,
99 deserialize_globs(&message.files_to_include)?,
100 deserialize_globs(&message.files_to_exclude)?,
101 )
102 } else {
103 Ok(Self::text(
104 message.query,
105 message.whole_word,
106 message.case_sensitive,
107 deserialize_globs(&message.files_to_include)?,
108 deserialize_globs(&message.files_to_exclude)?,
109 ))
110 }
111 }
112
113 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
114 proto::SearchProject {
115 project_id,
116 query: self.as_str().to_string(),
117 regex: self.is_regex(),
118 whole_word: self.whole_word(),
119 case_sensitive: self.case_sensitive(),
120 files_to_include: self
121 .files_to_include()
122 .iter()
123 .map(|g| g.glob().to_string())
124 .join(","),
125 files_to_exclude: self
126 .files_to_exclude()
127 .iter()
128 .map(|g| g.glob().to_string())
129 .join(","),
130 }
131 }
132
133 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
134 if self.as_str().is_empty() {
135 return Ok(false);
136 }
137
138 match self {
139 Self::Text { search, .. } => {
140 let mat = search.stream_find_iter(stream).next();
141 match mat {
142 Some(Ok(_)) => Ok(true),
143 Some(Err(err)) => Err(err.into()),
144 None => Ok(false),
145 }
146 }
147 Self::Regex {
148 regex, multiline, ..
149 } => {
150 let mut reader = BufReader::new(stream);
151 if *multiline {
152 let mut text = String::new();
153 if let Err(err) = reader.read_to_string(&mut text) {
154 Err(err.into())
155 } else {
156 Ok(regex.find(&text).is_some())
157 }
158 } else {
159 for line in reader.lines() {
160 let line = line?;
161 if regex.find(&line).is_some() {
162 return Ok(true);
163 }
164 }
165 Ok(false)
166 }
167 }
168 }
169 }
170
171 pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
172 const YIELD_INTERVAL: usize = 20000;
173
174 if self.as_str().is_empty() {
175 return Default::default();
176 }
177
178 let mut matches = Vec::new();
179 match self {
180 Self::Text {
181 search, whole_word, ..
182 } => {
183 for (ix, mat) in search
184 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
185 .enumerate()
186 {
187 if (ix + 1) % YIELD_INTERVAL == 0 {
188 yield_now().await;
189 }
190
191 let mat = mat.unwrap();
192 if *whole_word {
193 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
194 let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
195 let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
196 let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
197 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
198 continue;
199 }
200 }
201 matches.push(mat.start()..mat.end())
202 }
203 }
204 Self::Regex {
205 regex, multiline, ..
206 } => {
207 if *multiline {
208 let text = rope.to_string();
209 for (ix, mat) in regex.find_iter(&text).enumerate() {
210 if (ix + 1) % YIELD_INTERVAL == 0 {
211 yield_now().await;
212 }
213
214 matches.push(mat.start()..mat.end());
215 }
216 } else {
217 let mut line = String::new();
218 let mut line_offset = 0;
219 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
220 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
221 yield_now().await;
222 }
223
224 for (newline_ix, text) in chunk.split('\n').enumerate() {
225 if newline_ix > 0 {
226 for mat in regex.find_iter(&line) {
227 let start = line_offset + mat.start();
228 let end = line_offset + mat.end();
229 matches.push(start..end);
230 }
231
232 line_offset += line.len() + 1;
233 line.clear();
234 }
235 line.push_str(text);
236 }
237 }
238 }
239 }
240 }
241 matches
242 }
243
244 pub fn as_str(&self) -> &str {
245 match self {
246 Self::Text { query, .. } => query.as_ref(),
247 Self::Regex { query, .. } => query.as_ref(),
248 }
249 }
250
251 pub fn whole_word(&self) -> bool {
252 match self {
253 Self::Text { whole_word, .. } => *whole_word,
254 Self::Regex { whole_word, .. } => *whole_word,
255 }
256 }
257
258 pub fn case_sensitive(&self) -> bool {
259 match self {
260 Self::Text { case_sensitive, .. } => *case_sensitive,
261 Self::Regex { case_sensitive, .. } => *case_sensitive,
262 }
263 }
264
265 pub fn is_regex(&self) -> bool {
266 matches!(self, Self::Regex { .. })
267 }
268
269 pub fn files_to_include(&self) -> &[GlobMatcher] {
270 match self {
271 Self::Text {
272 files_to_include, ..
273 } => files_to_include,
274 Self::Regex {
275 files_to_include, ..
276 } => files_to_include,
277 }
278 }
279
280 pub fn files_to_exclude(&self) -> &[GlobMatcher] {
281 match self {
282 Self::Text {
283 files_to_exclude, ..
284 } => files_to_exclude,
285 Self::Regex {
286 files_to_exclude, ..
287 } => files_to_exclude,
288 }
289 }
290
291 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
292 match file_path {
293 Some(file_path) => {
294 !self
295 .files_to_exclude()
296 .iter()
297 .any(|exclude_glob| exclude_glob.is_match(file_path))
298 && (self.files_to_include().is_empty()
299 || self
300 .files_to_include()
301 .iter()
302 .any(|include_glob| include_glob.is_match(file_path)))
303 }
304 None => self.files_to_include().is_empty(),
305 }
306 }
307}
308
309fn deserialize_globs(glob_set: &str) -> Result<Vec<GlobMatcher>> {
310 glob_set
311 .split(',')
312 .map(str::trim)
313 .filter(|glob_str| !glob_str.is_empty())
314 .map(|glob_str| Ok(Glob::new(glob_str)?.compile_matcher()))
315 .collect()
316}