1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use client::proto;
4use itertools::Itertools;
5use language::{char_kind, Rope};
6use regex::{Regex, RegexBuilder};
7use smol::future::yield_now;
8use std::{
9 io::{BufRead, BufReader, Read},
10 ops::Range,
11 path::Path,
12 sync::Arc,
13};
14
15#[derive(Clone, Debug)]
16pub enum SearchQuery {
17 Text {
18 search: Arc<AhoCorasick<usize>>,
19 query: Arc<str>,
20 whole_word: bool,
21 case_sensitive: bool,
22 files_to_include: Vec<glob::Pattern>,
23 files_to_exclude: Vec<glob::Pattern>,
24 },
25 Regex {
26 regex: Regex,
27 query: Arc<str>,
28 multiline: bool,
29 whole_word: bool,
30 case_sensitive: bool,
31 files_to_include: Vec<glob::Pattern>,
32 files_to_exclude: Vec<glob::Pattern>,
33 },
34}
35
36impl SearchQuery {
37 pub fn text(
38 query: impl ToString,
39 whole_word: bool,
40 case_sensitive: bool,
41 files_to_include: Vec<glob::Pattern>,
42 files_to_exclude: Vec<glob::Pattern>,
43 ) -> Self {
44 let query = query.to_string();
45 let search = AhoCorasickBuilder::new()
46 .auto_configure(&[&query])
47 .ascii_case_insensitive(!case_sensitive)
48 .build(&[&query]);
49 Self::Text {
50 search: Arc::new(search),
51 query: Arc::from(query),
52 whole_word,
53 case_sensitive,
54 files_to_include,
55 files_to_exclude,
56 }
57 }
58
59 pub fn regex(
60 query: impl ToString,
61 whole_word: bool,
62 case_sensitive: bool,
63 files_to_include: Vec<glob::Pattern>,
64 files_to_exclude: Vec<glob::Pattern>,
65 ) -> Result<Self> {
66 let mut query = query.to_string();
67 let initial_query = Arc::from(query.as_str());
68 if whole_word {
69 let mut word_query = String::new();
70 word_query.push_str("\\b");
71 word_query.push_str(&query);
72 word_query.push_str("\\b");
73 query = word_query
74 }
75
76 let multiline = query.contains('\n') || query.contains("\\n");
77 let regex = RegexBuilder::new(&query)
78 .case_insensitive(!case_sensitive)
79 .multi_line(multiline)
80 .build()?;
81 Ok(Self::Regex {
82 regex,
83 query: initial_query,
84 multiline,
85 whole_word,
86 case_sensitive,
87 files_to_include,
88 files_to_exclude,
89 })
90 }
91
92 pub fn from_proto(message: proto::SearchProject) -> Result<Self> {
93 if message.regex {
94 Self::regex(
95 message.query,
96 message.whole_word,
97 message.case_sensitive,
98 message
99 .files_to_include
100 .split(',')
101 .map(str::trim)
102 .filter(|glob_str| !glob_str.is_empty())
103 .map(|glob_str| glob::Pattern::new(glob_str))
104 .collect::<Result<_, _>>()?,
105 message
106 .files_to_exclude
107 .split(',')
108 .map(str::trim)
109 .filter(|glob_str| !glob_str.is_empty())
110 .map(|glob_str| glob::Pattern::new(glob_str))
111 .collect::<Result<_, _>>()?,
112 )
113 } else {
114 Ok(Self::text(
115 message.query,
116 message.whole_word,
117 message.case_sensitive,
118 message
119 .files_to_include
120 .split(',')
121 .map(str::trim)
122 .filter(|glob_str| !glob_str.is_empty())
123 .map(|glob_str| glob::Pattern::new(glob_str))
124 .collect::<Result<_, _>>()?,
125 message
126 .files_to_exclude
127 .split(',')
128 .map(str::trim)
129 .filter(|glob_str| !glob_str.is_empty())
130 .map(|glob_str| glob::Pattern::new(glob_str))
131 .collect::<Result<_, _>>()?,
132 ))
133 }
134 }
135
136 pub fn to_proto(&self, project_id: u64) -> proto::SearchProject {
137 proto::SearchProject {
138 project_id,
139 query: self.as_str().to_string(),
140 regex: self.is_regex(),
141 whole_word: self.whole_word(),
142 case_sensitive: self.case_sensitive(),
143 files_to_include: self
144 .files_to_include()
145 .iter()
146 .map(ToString::to_string)
147 .join(","),
148 files_to_exclude: self
149 .files_to_exclude()
150 .iter()
151 .map(ToString::to_string)
152 .join(","),
153 }
154 }
155
156 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
157 if self.as_str().is_empty() {
158 return Ok(false);
159 }
160
161 match self {
162 Self::Text { search, .. } => {
163 let mat = search.stream_find_iter(stream).next();
164 match mat {
165 Some(Ok(_)) => Ok(true),
166 Some(Err(err)) => Err(err.into()),
167 None => Ok(false),
168 }
169 }
170 Self::Regex {
171 regex, multiline, ..
172 } => {
173 let mut reader = BufReader::new(stream);
174 if *multiline {
175 let mut text = String::new();
176 if let Err(err) = reader.read_to_string(&mut text) {
177 Err(err.into())
178 } else {
179 Ok(regex.find(&text).is_some())
180 }
181 } else {
182 for line in reader.lines() {
183 let line = line?;
184 if regex.find(&line).is_some() {
185 return Ok(true);
186 }
187 }
188 Ok(false)
189 }
190 }
191 }
192 }
193
194 pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
195 const YIELD_INTERVAL: usize = 20000;
196
197 if self.as_str().is_empty() {
198 return Default::default();
199 }
200
201 let mut matches = Vec::new();
202 match self {
203 Self::Text {
204 search, whole_word, ..
205 } => {
206 for (ix, mat) in search
207 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
208 .enumerate()
209 {
210 if (ix + 1) % YIELD_INTERVAL == 0 {
211 yield_now().await;
212 }
213
214 let mat = mat.unwrap();
215 if *whole_word {
216 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
217 let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
218 let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
219 let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
220 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
221 continue;
222 }
223 }
224 matches.push(mat.start()..mat.end())
225 }
226 }
227 Self::Regex {
228 regex, multiline, ..
229 } => {
230 if *multiline {
231 let text = rope.to_string();
232 for (ix, mat) in regex.find_iter(&text).enumerate() {
233 if (ix + 1) % YIELD_INTERVAL == 0 {
234 yield_now().await;
235 }
236
237 matches.push(mat.start()..mat.end());
238 }
239 } else {
240 let mut line = String::new();
241 let mut line_offset = 0;
242 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
243 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
244 yield_now().await;
245 }
246
247 for (newline_ix, text) in chunk.split('\n').enumerate() {
248 if newline_ix > 0 {
249 for mat in regex.find_iter(&line) {
250 let start = line_offset + mat.start();
251 let end = line_offset + mat.end();
252 matches.push(start..end);
253 }
254
255 line_offset += line.len() + 1;
256 line.clear();
257 }
258 line.push_str(text);
259 }
260 }
261 }
262 }
263 }
264 matches
265 }
266
267 pub fn as_str(&self) -> &str {
268 match self {
269 Self::Text { query, .. } => query.as_ref(),
270 Self::Regex { query, .. } => query.as_ref(),
271 }
272 }
273
274 pub fn whole_word(&self) -> bool {
275 match self {
276 Self::Text { whole_word, .. } => *whole_word,
277 Self::Regex { whole_word, .. } => *whole_word,
278 }
279 }
280
281 pub fn case_sensitive(&self) -> bool {
282 match self {
283 Self::Text { case_sensitive, .. } => *case_sensitive,
284 Self::Regex { case_sensitive, .. } => *case_sensitive,
285 }
286 }
287
288 pub fn is_regex(&self) -> bool {
289 matches!(self, Self::Regex { .. })
290 }
291
292 pub fn files_to_include(&self) -> &[glob::Pattern] {
293 match self {
294 Self::Text {
295 files_to_include, ..
296 } => files_to_include,
297 Self::Regex {
298 files_to_include, ..
299 } => files_to_include,
300 }
301 }
302
303 pub fn files_to_exclude(&self) -> &[glob::Pattern] {
304 match self {
305 Self::Text {
306 files_to_exclude, ..
307 } => files_to_exclude,
308 Self::Regex {
309 files_to_exclude, ..
310 } => files_to_exclude,
311 }
312 }
313
314 pub fn file_matches(&self, file_path: Option<&Path>) -> bool {
315 match file_path {
316 Some(file_path) => {
317 !self
318 .files_to_exclude()
319 .iter()
320 .any(|exclude_glob| exclude_glob.matches_path(file_path))
321 && (self.files_to_include().is_empty()
322 || self
323 .files_to_include()
324 .iter()
325 .any(|include_glob| include_glob.matches_path(file_path)))
326 }
327 None => self.files_to_include().is_empty(),
328 }
329 }
330}