1use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
2use anyhow::Result;
3use language::{char_kind, Rope};
4use regex::{Regex, RegexBuilder};
5use smol::future::yield_now;
6use std::{
7 io::{BufRead, BufReader, Read},
8 ops::Range,
9 sync::Arc,
10};
11
12#[derive(Clone)]
13pub enum SearchQuery {
14 Text {
15 search: Arc<AhoCorasick<usize>>,
16 query: String,
17 whole_word: bool,
18 },
19 Regex {
20 multiline: bool,
21 regex: Regex,
22 },
23}
24
25impl SearchQuery {
26 pub fn text(query: impl ToString, whole_word: bool, case_sensitive: bool) -> Self {
27 let query = query.to_string();
28 let search = AhoCorasickBuilder::new()
29 .auto_configure(&[&query])
30 .ascii_case_insensitive(!case_sensitive)
31 .build(&[&query]);
32 Self::Text {
33 search: Arc::new(search),
34 query,
35 whole_word,
36 }
37 }
38
39 pub fn regex(query: impl ToString, whole_word: bool, case_sensitive: bool) -> Result<Self> {
40 let mut query = query.to_string();
41 if whole_word {
42 let mut word_query = String::new();
43 word_query.push_str("\\b");
44 word_query.push_str(&query);
45 word_query.push_str("\\b");
46 query = word_query
47 }
48
49 let multiline = query.contains("\n") || query.contains("\\n");
50 let regex = RegexBuilder::new(&query)
51 .case_insensitive(!case_sensitive)
52 .multi_line(multiline)
53 .build()?;
54 Ok(Self::Regex { multiline, regex })
55 }
56
57 pub fn detect<T: Read>(&self, stream: T) -> Result<bool> {
58 if self.as_str().is_empty() {
59 return Ok(false);
60 }
61
62 match self {
63 SearchQuery::Text { search, .. } => {
64 let mat = search.stream_find_iter(stream).next();
65 match mat {
66 Some(Ok(_)) => Ok(true),
67 Some(Err(err)) => Err(err.into()),
68 None => Ok(false),
69 }
70 }
71 SearchQuery::Regex { multiline, regex } => {
72 let mut reader = BufReader::new(stream);
73 if *multiline {
74 let mut text = String::new();
75 if let Err(err) = reader.read_to_string(&mut text) {
76 Err(err.into())
77 } else {
78 Ok(regex.find(&text).is_some())
79 }
80 } else {
81 for line in reader.lines() {
82 let line = line?;
83 if regex.find(&line).is_some() {
84 return Ok(true);
85 }
86 }
87 Ok(false)
88 }
89 }
90 }
91 }
92
93 pub async fn search(&self, rope: &Rope) -> Vec<Range<usize>> {
94 const YIELD_INTERVAL: usize = 20000;
95
96 if self.as_str().is_empty() {
97 return Default::default();
98 }
99
100 let mut matches = Vec::new();
101 match self {
102 SearchQuery::Text {
103 search, whole_word, ..
104 } => {
105 for (ix, mat) in search
106 .stream_find_iter(rope.bytes_in_range(0..rope.len()))
107 .enumerate()
108 {
109 if (ix + 1) % YIELD_INTERVAL == 0 {
110 yield_now().await;
111 }
112
113 let mat = mat.unwrap();
114 if *whole_word {
115 let prev_kind = rope.reversed_chars_at(mat.start()).next().map(char_kind);
116 let start_kind = char_kind(rope.chars_at(mat.start()).next().unwrap());
117 let end_kind = char_kind(rope.reversed_chars_at(mat.end()).next().unwrap());
118 let next_kind = rope.chars_at(mat.end()).next().map(char_kind);
119 if Some(start_kind) == prev_kind || Some(end_kind) == next_kind {
120 continue;
121 }
122 }
123 matches.push(mat.start()..mat.end())
124 }
125 }
126 SearchQuery::Regex { multiline, regex } => {
127 if *multiline {
128 let text = rope.to_string();
129 for (ix, mat) in regex.find_iter(&text).enumerate() {
130 if (ix + 1) % YIELD_INTERVAL == 0 {
131 yield_now().await;
132 }
133
134 matches.push(mat.start()..mat.end());
135 }
136 } else {
137 let mut line = String::new();
138 let mut line_offset = 0;
139 for (chunk_ix, chunk) in rope.chunks().chain(["\n"]).enumerate() {
140 if (chunk_ix + 1) % YIELD_INTERVAL == 0 {
141 yield_now().await;
142 }
143
144 for (newline_ix, text) in chunk.split('\n').enumerate() {
145 if newline_ix > 0 {
146 for mat in regex.find_iter(&line) {
147 let start = line_offset + mat.start();
148 let end = line_offset + mat.end();
149 matches.push(start..end);
150 }
151
152 line_offset += line.len() + 1;
153 line.clear();
154 }
155 line.push_str(text);
156 }
157 }
158 }
159 }
160 }
161 matches
162 }
163
164 fn as_str(&self) -> &str {
165 match self {
166 SearchQuery::Text { query, .. } => query.as_str(),
167 SearchQuery::Regex { regex, .. } => regex.as_str(),
168 }
169 }
170}