retrieval_search.rs

  1use std::ops::Range;
  2
  3use anyhow::Result;
  4use collections::HashMap;
  5use edit_prediction_context::{EditPredictionExcerpt, EditPredictionExcerptOptions};
  6use futures::{
  7    StreamExt,
  8    channel::mpsc::{self, UnboundedSender},
  9};
 10use gpui::{AppContext, AsyncApp, Entity};
 11use language::{Anchor, Buffer, BufferSnapshot, OffsetRangeExt, ToPoint as _};
 12use project::{
 13    Project, WorktreeSettings,
 14    search::{SearchQuery, SearchResult},
 15};
 16use util::{
 17    ResultExt as _,
 18    paths::{PathMatcher, PathStyle},
 19};
 20use workspace::item::Settings as _;
 21
 22pub async fn run_retrieval_searches(
 23    project: Entity<Project>,
 24    regex_by_glob: HashMap<String, String>,
 25    cx: &mut AsyncApp,
 26) -> Result<HashMap<Entity<Buffer>, Vec<Range<Anchor>>>> {
 27    let (exclude_matcher, path_style) = project.update(cx, |project, cx| {
 28        let global_settings = WorktreeSettings::get_global(cx);
 29        let exclude_patterns = global_settings
 30            .file_scan_exclusions
 31            .sources()
 32            .iter()
 33            .chain(global_settings.private_files.sources().iter());
 34        let path_style = project.path_style(cx);
 35        anyhow::Ok((PathMatcher::new(exclude_patterns, path_style)?, path_style))
 36    })??;
 37
 38    let (results_tx, mut results_rx) = mpsc::unbounded();
 39
 40    for (glob, regex) in regex_by_glob {
 41        let exclude_matcher = exclude_matcher.clone();
 42        let results_tx = results_tx.clone();
 43        let project = project.clone();
 44        cx.spawn(async move |cx| {
 45            run_query(
 46                &glob,
 47                &regex,
 48                results_tx.clone(),
 49                path_style,
 50                exclude_matcher,
 51                &project,
 52                cx,
 53            )
 54            .await
 55            .log_err();
 56        })
 57        .detach()
 58    }
 59    drop(results_tx);
 60
 61    cx.background_spawn(async move {
 62        let mut results: HashMap<Entity<Buffer>, Vec<Range<Anchor>>> = HashMap::default();
 63        let mut snapshots = HashMap::default();
 64
 65        let mut total_bytes = 0;
 66        'outer: while let Some((buffer, snapshot, excerpts)) = results_rx.next().await {
 67            snapshots.insert(buffer.entity_id(), snapshot);
 68            let existing = results.entry(buffer).or_default();
 69            existing.reserve(excerpts.len());
 70
 71            for (range, size) in excerpts {
 72                // Blunt trimming of the results until we have a proper algorithmic filtering step
 73                if (total_bytes + size) > MAX_RESULTS_LEN {
 74                    log::trace!("Combined results reached limit of {MAX_RESULTS_LEN}B");
 75                    break 'outer;
 76                }
 77                total_bytes += size;
 78                existing.push(range);
 79            }
 80        }
 81
 82        for (buffer, ranges) in results.iter_mut() {
 83            if let Some(snapshot) = snapshots.get(&buffer.entity_id()) {
 84                ranges.sort_unstable_by(|a, b| {
 85                    a.start
 86                        .cmp(&b.start, snapshot)
 87                        .then(b.end.cmp(&b.end, snapshot))
 88                });
 89
 90                let mut index = 1;
 91                while index < ranges.len() {
 92                    if ranges[index - 1]
 93                        .end
 94                        .cmp(&ranges[index].start, snapshot)
 95                        .is_gt()
 96                    {
 97                        let removed = ranges.remove(index);
 98                        ranges[index - 1].end = removed.end;
 99                    } else {
100                        index += 1;
101                    }
102                }
103            }
104        }
105
106        Ok(results)
107    })
108    .await
109}
110
111const MIN_EXCERPT_LEN: usize = 16;
112const MAX_EXCERPT_LEN: usize = 768;
113const MAX_RESULTS_LEN: usize = MAX_EXCERPT_LEN * 5;
114
115async fn run_query(
116    glob: &str,
117    regex: &str,
118    results_tx: UnboundedSender<(Entity<Buffer>, BufferSnapshot, Vec<(Range<Anchor>, usize)>)>,
119    path_style: PathStyle,
120    exclude_matcher: PathMatcher,
121    project: &Entity<Project>,
122    cx: &mut AsyncApp,
123) -> Result<()> {
124    let include_matcher = PathMatcher::new(vec![glob], path_style)?;
125
126    let query = SearchQuery::regex(
127        regex,
128        false,
129        true,
130        false,
131        true,
132        include_matcher,
133        exclude_matcher,
134        true,
135        None,
136    )?;
137
138    let results = project.update(cx, |project, cx| project.search(query, cx))?;
139    futures::pin_mut!(results);
140
141    while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await {
142        if results_tx.is_closed() {
143            break;
144        }
145
146        if ranges.is_empty() {
147            continue;
148        }
149
150        let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
151        let results_tx = results_tx.clone();
152
153        cx.background_spawn(async move {
154            let mut excerpts = Vec::with_capacity(ranges.len());
155
156            for range in ranges {
157                let offset_range = range.to_offset(&snapshot);
158                let query_point = (offset_range.start + offset_range.len() / 2).to_point(&snapshot);
159
160                let excerpt = EditPredictionExcerpt::select_from_buffer(
161                    query_point,
162                    &snapshot,
163                    &EditPredictionExcerptOptions {
164                        max_bytes: MAX_EXCERPT_LEN,
165                        min_bytes: MIN_EXCERPT_LEN,
166                        target_before_cursor_over_total_bytes: 0.5,
167                    },
168                    None,
169                );
170
171                if let Some(excerpt) = excerpt
172                    && !excerpt.line_range.is_empty()
173                {
174                    excerpts.push((
175                        snapshot.anchor_after(excerpt.range.start)
176                            ..snapshot.anchor_before(excerpt.range.end),
177                        excerpt.range.len(),
178                    ));
179                }
180            }
181
182            let send_result = results_tx.unbounded_send((buffer, snapshot, excerpts));
183
184            if let Err(err) = send_result
185                && !err.is_disconnected()
186            {
187                log::error!("{err}");
188            }
189        })
190        .detach();
191    }
192
193    anyhow::Ok(())
194}