edit_prediction_context.rs

  1use crate::assemble_excerpts::assemble_excerpt_ranges;
  2use anyhow::Result;
  3use collections::HashMap;
  4use futures::{FutureExt, StreamExt as _, channel::mpsc, future};
  5use gpui::{
  6    App, AppContext, AsyncApp, Context, Entity, EntityId, EventEmitter, Task, TaskExt, WeakEntity,
  7};
  8use language::{Anchor, Buffer, BufferSnapshot, OffsetRangeExt as _, Point, ToOffset as _};
  9use project::{LocationLink, Project, ProjectPath};
 10use smallvec::SmallVec;
 11use std::{
 12    collections::hash_map,
 13    ops::Range,
 14    path::Path,
 15    sync::Arc,
 16    time::{Duration, Instant},
 17};
 18use util::paths::PathStyle;
 19use util::rel_path::RelPath;
 20use util::{RangeExt as _, ResultExt};
 21
 22mod assemble_excerpts;
 23#[cfg(test)]
 24mod edit_prediction_context_tests;
 25#[cfg(test)]
 26mod fake_definition_lsp;
 27
 28pub use zeta_prompt::{RelatedExcerpt, RelatedFile};
 29
 30const IDENTIFIER_LINE_COUNT: u32 = 3;
 31
 32pub struct RelatedExcerptStore {
 33    project: WeakEntity<Project>,
 34    related_buffers: Vec<RelatedBuffer>,
 35    cache: HashMap<Identifier, Arc<CacheEntry>>,
 36    update_tx: mpsc::UnboundedSender<(Entity<Buffer>, Anchor)>,
 37    identifier_line_count: u32,
 38}
 39
 40struct RelatedBuffer {
 41    buffer: Entity<Buffer>,
 42    path: Arc<Path>,
 43    anchor_ranges: Vec<Range<Anchor>>,
 44    excerpt_orders: Vec<usize>,
 45    cached_file: Option<CachedRelatedFile>,
 46}
 47
 48struct CachedRelatedFile {
 49    excerpts: Vec<RelatedExcerpt>,
 50    buffer_version: clock::Global,
 51}
 52
 53pub enum RelatedExcerptStoreEvent {
 54    StartedRefresh,
 55    FinishedRefresh {
 56        cache_hit_count: usize,
 57        cache_miss_count: usize,
 58        mean_definition_latency: Duration,
 59        max_definition_latency: Duration,
 60    },
 61}
 62
 63#[derive(Clone, Debug, PartialEq, Eq, Hash)]
 64struct Identifier {
 65    pub name: String,
 66    pub range: Range<Anchor>,
 67}
 68
 69enum DefinitionTask {
 70    CacheHit(Arc<CacheEntry>),
 71    CacheMiss {
 72        definitions: Task<Result<Option<Vec<LocationLink>>>>,
 73        type_definitions: Task<Result<Option<Vec<LocationLink>>>>,
 74    },
 75}
 76
 77#[derive(Debug)]
 78struct CacheEntry {
 79    definitions: SmallVec<[CachedDefinition; 1]>,
 80    type_definitions: SmallVec<[CachedDefinition; 1]>,
 81}
 82
 83#[derive(Clone, Debug)]
 84struct CachedDefinition {
 85    path: ProjectPath,
 86    buffer: Entity<Buffer>,
 87    anchor_range: Range<Anchor>,
 88}
 89
 90const DEBOUNCE_DURATION: Duration = Duration::from_millis(100);
 91
 92impl EventEmitter<RelatedExcerptStoreEvent> for RelatedExcerptStore {}
 93
 94impl RelatedExcerptStore {
 95    pub fn new(project: &Entity<Project>, cx: &mut Context<Self>) -> Self {
 96        let (update_tx, mut update_rx) = mpsc::unbounded::<(Entity<Buffer>, Anchor)>();
 97        cx.spawn(async move |this, cx| {
 98            let executor = cx.background_executor().clone();
 99            while let Some((mut buffer, mut position)) = update_rx.next().await {
100                let mut timer = executor.timer(DEBOUNCE_DURATION).fuse();
101                loop {
102                    futures::select_biased! {
103                        next = update_rx.next() => {
104                            if let Some((new_buffer, new_position)) = next {
105                                buffer = new_buffer;
106                                position = new_position;
107                                timer = executor.timer(DEBOUNCE_DURATION).fuse();
108                            } else {
109                                return anyhow::Ok(());
110                            }
111                        }
112                        _ = timer => break,
113                    }
114                }
115
116                Self::fetch_excerpts(this.clone(), buffer, position, cx).await?;
117            }
118            anyhow::Ok(())
119        })
120        .detach_and_log_err(cx);
121
122        RelatedExcerptStore {
123            project: project.downgrade(),
124            update_tx,
125            related_buffers: Vec::new(),
126            cache: Default::default(),
127            identifier_line_count: IDENTIFIER_LINE_COUNT,
128        }
129    }
130
131    pub fn set_identifier_line_count(&mut self, count: u32) {
132        self.identifier_line_count = count;
133    }
134
135    pub fn refresh(&mut self, buffer: Entity<Buffer>, position: Anchor, _: &mut Context<Self>) {
136        self.update_tx.unbounded_send((buffer, position)).ok();
137    }
138
139    pub fn related_files(&mut self, cx: &App) -> Vec<RelatedFile> {
140        self.related_buffers
141            .iter_mut()
142            .map(|related| related.related_file(cx))
143            .collect()
144    }
145
146    pub fn related_files_with_buffers(
147        &mut self,
148        cx: &App,
149    ) -> impl Iterator<Item = (RelatedFile, Entity<Buffer>)> {
150        self.related_buffers
151            .iter_mut()
152            .map(|related| (related.related_file(cx), related.buffer.clone()))
153    }
154
155    pub fn set_related_files(&mut self, files: Vec<RelatedFile>, cx: &App) {
156        self.related_buffers = files
157            .into_iter()
158            .filter_map(|file| {
159                let project = self.project.upgrade()?;
160                let project = project.read(cx);
161                let worktree = project.worktrees(cx).find(|wt| {
162                    let root_name = wt.read(cx).root_name().as_unix_str();
163                    file.path
164                        .components()
165                        .next()
166                        .is_some_and(|c| c.as_os_str() == root_name)
167                })?;
168                let worktree = worktree.read(cx);
169                let relative_path = file
170                    .path
171                    .strip_prefix(worktree.root_name().as_unix_str())
172                    .ok()?;
173                let relative_path = RelPath::new(relative_path, PathStyle::Posix).ok()?;
174                let project_path = ProjectPath {
175                    worktree_id: worktree.id(),
176                    path: relative_path.into_owned().into(),
177                };
178                let buffer = project.get_open_buffer(&project_path, cx)?;
179                let snapshot = buffer.read(cx).snapshot();
180                let mut anchor_ranges = Vec::with_capacity(file.excerpts.len());
181                let mut excerpt_orders = Vec::with_capacity(file.excerpts.len());
182                for excerpt in &file.excerpts {
183                    let start = snapshot.anchor_before(Point::new(excerpt.row_range.start, 0));
184                    let end_row = excerpt.row_range.end;
185                    let end_col = snapshot.line_len(end_row);
186                    let end = snapshot.anchor_after(Point::new(end_row, end_col));
187                    anchor_ranges.push(start..end);
188                    excerpt_orders.push(excerpt.order);
189                }
190                Some(RelatedBuffer {
191                    buffer,
192                    path: file.path.clone(),
193                    anchor_ranges,
194                    excerpt_orders,
195                    cached_file: None,
196                })
197            })
198            .collect();
199    }
200
201    async fn fetch_excerpts(
202        this: WeakEntity<Self>,
203        buffer: Entity<Buffer>,
204        position: Anchor,
205        cx: &mut AsyncApp,
206    ) -> Result<()> {
207        let (project, snapshot, identifier_line_count) = this.read_with(cx, |this, cx| {
208            (
209                this.project.upgrade(),
210                buffer.read(cx).snapshot(),
211                this.identifier_line_count,
212            )
213        })?;
214        let Some(project) = project else {
215            return Ok(());
216        };
217
218        let file = snapshot.file().cloned();
219        if let Some(file) = &file {
220            log::debug!("retrieving_context buffer:{}", file.path().as_unix_str());
221        }
222
223        this.update(cx, |_, cx| {
224            cx.emit(RelatedExcerptStoreEvent::StartedRefresh);
225        })?;
226
227        let identifiers_with_ranks = cx
228            .background_spawn(async move {
229                let cursor_offset = position.to_offset(&snapshot);
230                let identifiers =
231                    identifiers_for_position(&snapshot, position, identifier_line_count);
232
233                // Compute byte distance from cursor to each identifier, then sort by
234                // distance so we can assign ordinal ranks. Identifiers at the same
235                // distance share the same rank.
236                let mut identifiers_with_distance: Vec<(Identifier, usize)> = identifiers
237                    .into_iter()
238                    .map(|id| {
239                        let start = id.range.start.to_offset(&snapshot);
240                        let end = id.range.end.to_offset(&snapshot);
241                        let distance = if cursor_offset < start {
242                            start - cursor_offset
243                        } else if cursor_offset > end {
244                            cursor_offset - end
245                        } else {
246                            0
247                        };
248                        (id, distance)
249                    })
250                    .collect();
251                identifiers_with_distance.sort_by_key(|(_, distance)| *distance);
252
253                let mut cursor_distances: HashMap<Identifier, usize> = HashMap::default();
254                let mut current_rank = 0;
255                let mut previous_distance = None;
256                for (identifier, distance) in &identifiers_with_distance {
257                    if previous_distance != Some(*distance) {
258                        current_rank = cursor_distances.len();
259                        previous_distance = Some(*distance);
260                    }
261                    cursor_distances.insert(identifier.clone(), current_rank);
262                }
263
264                (identifiers_with_distance, cursor_distances)
265            })
266            .await;
267
268        let (identifiers_with_distance, cursor_distances) = identifiers_with_ranks;
269
270        let async_cx = cx.clone();
271        let start_time = Instant::now();
272        let futures = this.update(cx, |this, cx| {
273            identifiers_with_distance
274                .into_iter()
275                .filter_map(|(identifier, _)| {
276                    let task = if let Some(entry) = this.cache.get(&identifier) {
277                        DefinitionTask::CacheHit(entry.clone())
278                    } else {
279                        let definitions = this
280                            .project
281                            .update(cx, |project, cx| {
282                                project.definitions(&buffer, identifier.range.start, cx)
283                            })
284                            .ok()?;
285                        let type_definitions = this
286                            .project
287                            .update(cx, |project, cx| {
288                                project.type_definitions(&buffer, identifier.range.start, cx)
289                            })
290                            .ok()?;
291                        DefinitionTask::CacheMiss {
292                            definitions,
293                            type_definitions,
294                        }
295                    };
296
297                    let cx = async_cx.clone();
298                    let project = project.clone();
299                    Some(async move {
300                        match task {
301                            DefinitionTask::CacheHit(cache_entry) => {
302                                Some((identifier, cache_entry, None))
303                            }
304                            DefinitionTask::CacheMiss {
305                                definitions,
306                                type_definitions,
307                            } => {
308                                let (definition_locations, type_definition_locations) =
309                                    futures::join!(definitions, type_definitions);
310                                let duration = start_time.elapsed();
311
312                                let definition_locations =
313                                    definition_locations.log_err().flatten().unwrap_or_default();
314                                let type_definition_locations = type_definition_locations
315                                    .log_err()
316                                    .flatten()
317                                    .unwrap_or_default();
318
319                                Some(cx.update(|cx| {
320                                    let definitions: SmallVec<[CachedDefinition; 1]> =
321                                        definition_locations
322                                            .into_iter()
323                                            .filter_map(|location| {
324                                                process_definition(location, &project, cx)
325                                            })
326                                            .collect();
327
328                                    let type_definitions: SmallVec<[CachedDefinition; 1]> =
329                                        type_definition_locations
330                                            .into_iter()
331                                            .filter_map(|location| {
332                                                process_definition(location, &project, cx)
333                                            })
334                                            .filter(|type_def| {
335                                                !definitions.iter().any(|def| {
336                                                    def.buffer.entity_id()
337                                                        == type_def.buffer.entity_id()
338                                                        && def.anchor_range == type_def.anchor_range
339                                                })
340                                            })
341                                            .collect();
342
343                                    (
344                                        identifier,
345                                        Arc::new(CacheEntry {
346                                            definitions,
347                                            type_definitions,
348                                        }),
349                                        Some(duration),
350                                    )
351                                }))
352                            }
353                        }
354                    })
355                })
356                .collect::<Vec<_>>()
357        })?;
358
359        let mut cache_hit_count = 0;
360        let mut cache_miss_count = 0;
361        let mut mean_definition_latency = Duration::ZERO;
362        let mut max_definition_latency = Duration::ZERO;
363        let mut new_cache = HashMap::default();
364        new_cache.reserve(futures.len());
365        for (identifier, entry, duration) in future::join_all(futures).await.into_iter().flatten() {
366            new_cache.insert(identifier, entry);
367            if let Some(duration) = duration {
368                cache_miss_count += 1;
369                mean_definition_latency += duration;
370                max_definition_latency = max_definition_latency.max(duration);
371            } else {
372                cache_hit_count += 1;
373            }
374        }
375        mean_definition_latency /= cache_miss_count.max(1) as u32;
376
377        let (new_cache, related_buffers) =
378            rebuild_related_files(&project, new_cache, &cursor_distances, cx).await?;
379
380        if let Some(file) = &file {
381            log::debug!(
382                "finished retrieving context buffer:{}, latency:{:?}",
383                file.path().as_unix_str(),
384                start_time.elapsed()
385            );
386        }
387
388        this.update(cx, |this, cx| {
389            this.cache = new_cache;
390            this.related_buffers = related_buffers;
391            cx.emit(RelatedExcerptStoreEvent::FinishedRefresh {
392                cache_hit_count,
393                cache_miss_count,
394                mean_definition_latency,
395                max_definition_latency,
396            });
397        })?;
398
399        anyhow::Ok(())
400    }
401}
402
403async fn rebuild_related_files(
404    project: &Entity<Project>,
405    mut new_entries: HashMap<Identifier, Arc<CacheEntry>>,
406    cursor_distances: &HashMap<Identifier, usize>,
407    cx: &mut AsyncApp,
408) -> Result<(HashMap<Identifier, Arc<CacheEntry>>, Vec<RelatedBuffer>)> {
409    let mut snapshots = HashMap::default();
410    let mut worktree_root_names = HashMap::default();
411    for entry in new_entries.values() {
412        for definition in entry
413            .definitions
414            .iter()
415            .chain(entry.type_definitions.iter())
416        {
417            if let hash_map::Entry::Vacant(e) = snapshots.entry(definition.buffer.entity_id()) {
418                definition
419                    .buffer
420                    .read_with(cx, |buffer, _| buffer.parsing_idle())
421                    .await;
422                e.insert(
423                    definition
424                        .buffer
425                        .read_with(cx, |buffer, _| buffer.snapshot()),
426                );
427            }
428            let worktree_id = definition.path.worktree_id;
429            if let hash_map::Entry::Vacant(e) =
430                worktree_root_names.entry(definition.path.worktree_id)
431            {
432                project.read_with(cx, |project, cx| {
433                    if let Some(worktree) = project.worktree_for_id(worktree_id, cx) {
434                        e.insert(worktree.read(cx).root_name().as_unix_str().to_string());
435                    }
436                });
437            }
438        }
439    }
440
441    let cursor_distances = cursor_distances.clone();
442    Ok(cx
443        .background_spawn(async move {
444            let mut ranges_by_buffer =
445                HashMap::<EntityId, (Entity<Buffer>, Vec<(Range<Point>, usize)>)>::default();
446            let mut paths_by_buffer = HashMap::default();
447            let mut min_rank_by_buffer = HashMap::<EntityId, usize>::default();
448            for (identifier, entry) in new_entries.iter_mut() {
449                let rank = cursor_distances
450                    .get(identifier)
451                    .copied()
452                    .unwrap_or(usize::MAX);
453                for definition in entry
454                    .definitions
455                    .iter()
456                    .chain(entry.type_definitions.iter())
457                {
458                    let Some(snapshot) = snapshots.get(&definition.buffer.entity_id()) else {
459                        continue;
460                    };
461                    paths_by_buffer.insert(definition.buffer.entity_id(), definition.path.clone());
462
463                    let buffer_rank = min_rank_by_buffer
464                        .entry(definition.buffer.entity_id())
465                        .or_insert(usize::MAX);
466                    *buffer_rank = (*buffer_rank).min(rank);
467
468                    ranges_by_buffer
469                        .entry(definition.buffer.entity_id())
470                        .or_insert_with(|| (definition.buffer.clone(), Vec::new()))
471                        .1
472                        .push((definition.anchor_range.to_point(snapshot), rank));
473                }
474            }
475
476            let mut related_buffers: Vec<RelatedBuffer> = ranges_by_buffer
477                .into_iter()
478                .filter_map(|(entity_id, (buffer, ranges))| {
479                    let snapshot = snapshots.get(&entity_id)?;
480                    let project_path = paths_by_buffer.get(&entity_id)?;
481                    let assembled = assemble_excerpt_ranges(snapshot, ranges);
482                    let root_name = worktree_root_names.get(&project_path.worktree_id)?;
483
484                    let path: Arc<Path> = Path::new(&format!(
485                        "{}/{}",
486                        root_name,
487                        project_path.path.as_unix_str()
488                    ))
489                    .into();
490
491                    let mut anchor_ranges = Vec::with_capacity(assembled.len());
492                    let mut excerpt_orders = Vec::with_capacity(assembled.len());
493                    for (row_range, order) in assembled {
494                        let start = snapshot.anchor_before(Point::new(row_range.start, 0));
495                        let end_col = snapshot.line_len(row_range.end);
496                        let end = snapshot.anchor_after(Point::new(row_range.end, end_col));
497                        anchor_ranges.push(start..end);
498                        excerpt_orders.push(order);
499                    }
500
501                    let mut related_buffer = RelatedBuffer {
502                        buffer,
503                        path,
504                        anchor_ranges,
505                        excerpt_orders,
506                        cached_file: None,
507                    };
508                    related_buffer.fill_cache(snapshot);
509                    Some(related_buffer)
510                })
511                .collect();
512
513            related_buffers.sort_by(|a, b| {
514                let rank_a = min_rank_by_buffer
515                    .get(&a.buffer.entity_id())
516                    .copied()
517                    .unwrap_or(usize::MAX);
518                let rank_b = min_rank_by_buffer
519                    .get(&b.buffer.entity_id())
520                    .copied()
521                    .unwrap_or(usize::MAX);
522                rank_a.cmp(&rank_b).then_with(|| a.path.cmp(&b.path))
523            });
524
525            (new_entries, related_buffers)
526        })
527        .await)
528}
529
530impl RelatedBuffer {
531    fn related_file(&mut self, cx: &App) -> RelatedFile {
532        let buffer = self.buffer.read(cx);
533        let path = self.path.clone();
534        let cached = if let Some(cached) = &self.cached_file
535            && buffer.version() == cached.buffer_version
536        {
537            cached
538        } else {
539            self.fill_cache(buffer)
540        };
541        let related_file = RelatedFile {
542            path,
543            excerpts: cached.excerpts.clone(),
544            max_row: buffer.max_point().row,
545            in_open_source_repo: false,
546        };
547        return related_file;
548    }
549
550    fn fill_cache(&mut self, buffer: &text::BufferSnapshot) -> &CachedRelatedFile {
551        let excerpts = self
552            .anchor_ranges
553            .iter()
554            .zip(self.excerpt_orders.iter())
555            .map(|(range, &order)| {
556                let start = range.start.to_point(buffer);
557                let end = range.end.to_point(buffer);
558                RelatedExcerpt {
559                    row_range: start.row..end.row,
560                    text: buffer.text_for_range(start..end).collect::<String>().into(),
561                    order,
562                }
563            })
564            .collect::<Vec<_>>();
565        self.cached_file = Some(CachedRelatedFile {
566            excerpts: excerpts,
567            buffer_version: buffer.version().clone(),
568        });
569        self.cached_file.as_ref().unwrap()
570    }
571}
572
573use language::ToPoint as _;
574
575const MAX_TARGET_LEN: usize = 128;
576
577fn process_definition(
578    location: LocationLink,
579    project: &Entity<Project>,
580    cx: &mut App,
581) -> Option<CachedDefinition> {
582    let buffer = location.target.buffer.read(cx);
583    let anchor_range = location.target.range;
584    let file = buffer.file()?;
585    let worktree = project.read(cx).worktree_for_id(file.worktree_id(cx), cx)?;
586    if worktree.read(cx).is_single_file() {
587        return None;
588    }
589
590    // If the target range is large, it likely means we requested the definition of an entire module.
591    // For individual definitions, the target range should be small as it only covers the symbol.
592    let buffer = location.target.buffer.read(cx);
593    let target_len = anchor_range.to_offset(&buffer).len();
594    if target_len > MAX_TARGET_LEN {
595        return None;
596    }
597
598    Some(CachedDefinition {
599        path: ProjectPath {
600            worktree_id: file.worktree_id(cx),
601            path: file.path().clone(),
602        },
603        buffer: location.target.buffer,
604        anchor_range,
605    })
606}
607
608/// Gets all of the identifiers that are present in the given line, and its containing
609/// outline items.
610fn identifiers_for_position(
611    buffer: &BufferSnapshot,
612    position: Anchor,
613    identifier_line_count: u32,
614) -> Vec<Identifier> {
615    let offset = position.to_offset(buffer);
616    let point = buffer.offset_to_point(offset);
617
618    // Search for identifiers on lines adjacent to the cursor.
619    let start = Point::new(point.row.saturating_sub(identifier_line_count), 0);
620    let end = Point::new(point.row + identifier_line_count + 1, 0).min(buffer.max_point());
621    let line_range = start..end;
622    let mut ranges = vec![line_range.to_offset(&buffer)];
623
624    // Search for identifiers mentioned in headers/signatures of containing outline items.
625    let outline_items = buffer.outline_items_as_offsets_containing(offset..offset, false, None);
626    for item in outline_items {
627        if let Some(body_range) = item.body_range(&buffer) {
628            ranges.push(item.range.start..body_range.start.to_offset(&buffer));
629        } else {
630            ranges.push(item.range.clone());
631        }
632    }
633
634    ranges.sort_by(|a, b| a.start.cmp(&b.start).then(b.end.cmp(&a.end)));
635    ranges.dedup_by(|a, b| {
636        if a.start <= b.end {
637            b.start = b.start.min(a.start);
638            b.end = b.end.max(a.end);
639            true
640        } else {
641            false
642        }
643    });
644
645    let mut identifiers = Vec::new();
646    let outer_range =
647        ranges.first().map_or(0, |r| r.start)..ranges.last().map_or(buffer.len(), |r| r.end);
648
649    let mut captures = buffer.captures(outer_range.clone(), |grammar| {
650        grammar
651            .highlights_config
652            .as_ref()
653            .map(|config| &config.query)
654    });
655
656    for range in ranges {
657        captures.set_byte_range(range.start..outer_range.end);
658
659        let mut last_range = None;
660        while let Some(capture) = captures.peek() {
661            let node_range = capture.node.byte_range();
662            if node_range.start > range.end {
663                break;
664            }
665            let config = captures.grammars()[capture.grammar_index]
666                .highlights_config
667                .as_ref();
668
669            if let Some(config) = config
670                && config.identifier_capture_indices.contains(&capture.index)
671                && range.contains_inclusive(&node_range)
672                && Some(&node_range) != last_range.as_ref()
673            {
674                let name = buffer.text_for_range(node_range.clone()).collect();
675                identifiers.push(Identifier {
676                    range: buffer.anchor_after(node_range.start)
677                        ..buffer.anchor_before(node_range.end),
678                    name,
679                });
680                last_range = Some(node_range);
681            }
682
683            captures.advance();
684        }
685    }
686
687    identifiers
688}