1use std::ops::Range;
2
3use anyhow::Result;
4use collections::HashMap;
5use edit_prediction_context::{EditPredictionExcerpt, EditPredictionExcerptOptions};
6use futures::{
7 StreamExt,
8 channel::mpsc::{self, UnboundedSender},
9};
10use gpui::{AppContext, AsyncApp, Entity};
11use language::{Anchor, Buffer, BufferSnapshot, OffsetRangeExt, ToPoint as _};
12use project::{
13 Project, WorktreeSettings,
14 search::{SearchQuery, SearchResult},
15};
16use util::{
17 ResultExt as _,
18 paths::{PathMatcher, PathStyle},
19};
20use workspace::item::Settings as _;
21
22pub async fn run_retrieval_searches(
23 project: Entity<Project>,
24 regex_by_glob: HashMap<String, String>,
25 cx: &mut AsyncApp,
26) -> Result<HashMap<Entity<Buffer>, Vec<Range<Anchor>>>> {
27 let (exclude_matcher, path_style) = project.update(cx, |project, cx| {
28 let global_settings = WorktreeSettings::get_global(cx);
29 let exclude_patterns = global_settings
30 .file_scan_exclusions
31 .sources()
32 .iter()
33 .chain(global_settings.private_files.sources().iter());
34 let path_style = project.path_style(cx);
35 anyhow::Ok((PathMatcher::new(exclude_patterns, path_style)?, path_style))
36 })??;
37
38 let (results_tx, mut results_rx) = mpsc::unbounded();
39
40 for (glob, regex) in regex_by_glob {
41 let exclude_matcher = exclude_matcher.clone();
42 let results_tx = results_tx.clone();
43 let project = project.clone();
44 cx.spawn(async move |cx| {
45 run_query(
46 &glob,
47 ®ex,
48 results_tx.clone(),
49 path_style,
50 exclude_matcher,
51 &project,
52 cx,
53 )
54 .await
55 .log_err();
56 })
57 .detach()
58 }
59 drop(results_tx);
60
61 cx.background_spawn(async move {
62 let mut results: HashMap<Entity<Buffer>, Vec<Range<Anchor>>> = HashMap::default();
63 let mut snapshots = HashMap::default();
64
65 let mut total_bytes = 0;
66 'outer: while let Some((buffer, snapshot, excerpts)) = results_rx.next().await {
67 snapshots.insert(buffer.entity_id(), snapshot);
68 let existing = results.entry(buffer).or_default();
69 existing.reserve(excerpts.len());
70
71 for (range, size) in excerpts {
72 // Blunt trimming of the results until we have a proper algorithmic filtering step
73 if (total_bytes + size) > MAX_RESULTS_LEN {
74 log::trace!("Combined results reached limit of {MAX_RESULTS_LEN}B");
75 break 'outer;
76 }
77 total_bytes += size;
78 existing.push(range);
79 }
80 }
81
82 for (buffer, ranges) in results.iter_mut() {
83 if let Some(snapshot) = snapshots.get(&buffer.entity_id()) {
84 ranges.sort_unstable_by(|a, b| {
85 a.start
86 .cmp(&b.start, snapshot)
87 .then(b.end.cmp(&b.end, snapshot))
88 });
89
90 let mut index = 1;
91 while index < ranges.len() {
92 if ranges[index - 1]
93 .end
94 .cmp(&ranges[index].start, snapshot)
95 .is_gt()
96 {
97 let removed = ranges.remove(index);
98 ranges[index - 1].end = removed.end;
99 } else {
100 index += 1;
101 }
102 }
103 }
104 }
105
106 Ok(results)
107 })
108 .await
109}
110
111const MIN_EXCERPT_LEN: usize = 16;
112const MAX_EXCERPT_LEN: usize = 768;
113const MAX_RESULTS_LEN: usize = MAX_EXCERPT_LEN * 5;
114
115async fn run_query(
116 glob: &str,
117 regex: &str,
118 results_tx: UnboundedSender<(Entity<Buffer>, BufferSnapshot, Vec<(Range<Anchor>, usize)>)>,
119 path_style: PathStyle,
120 exclude_matcher: PathMatcher,
121 project: &Entity<Project>,
122 cx: &mut AsyncApp,
123) -> Result<()> {
124 let include_matcher = PathMatcher::new(vec![glob], path_style)?;
125
126 let query = SearchQuery::regex(
127 regex,
128 false,
129 true,
130 false,
131 true,
132 include_matcher,
133 exclude_matcher,
134 true,
135 None,
136 )?;
137
138 let results = project.update(cx, |project, cx| project.search(query, cx))?;
139 futures::pin_mut!(results);
140
141 while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
142 if results_tx.is_closed() {
143 break;
144 }
145
146 if ranges.is_empty() {
147 continue;
148 }
149
150 let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
151 let results_tx = results_tx.clone();
152
153 cx.background_spawn(async move {
154 let mut excerpts = Vec::with_capacity(ranges.len());
155
156 for range in ranges {
157 let offset_range = range.to_offset(&snapshot);
158 let query_point = (offset_range.start + offset_range.len() / 2).to_point(&snapshot);
159
160 let excerpt = EditPredictionExcerpt::select_from_buffer(
161 query_point,
162 &snapshot,
163 &EditPredictionExcerptOptions {
164 max_bytes: MAX_EXCERPT_LEN,
165 min_bytes: MIN_EXCERPT_LEN,
166 target_before_cursor_over_total_bytes: 0.5,
167 },
168 None,
169 );
170
171 if let Some(excerpt) = excerpt
172 && !excerpt.line_range.is_empty()
173 {
174 excerpts.push((
175 snapshot.anchor_after(excerpt.range.start)
176 ..snapshot.anchor_before(excerpt.range.end),
177 excerpt.range.len(),
178 ));
179 }
180 }
181
182 let send_result = results_tx.unbounded_send((buffer, snapshot, excerpts));
183
184 if let Err(err) = send_result
185 && !err.is_disconnected()
186 {
187 log::error!("{err}");
188 }
189 })
190 .detach();
191 }
192
193 anyhow::Ok(())
194}