1use crate::assemble_excerpts::assemble_excerpt_ranges;
2use anyhow::Result;
3use collections::HashMap;
4use futures::{FutureExt, StreamExt as _, channel::mpsc, future};
5use gpui::{
6 App, AppContext, AsyncApp, Context, Entity, EntityId, EventEmitter, Task, TaskExt, WeakEntity,
7};
8use language::{Anchor, Buffer, BufferSnapshot, OffsetRangeExt as _, Point, ToOffset as _};
9use project::{LocationLink, Project, ProjectPath};
10use smallvec::SmallVec;
11use std::{
12 collections::hash_map,
13 ops::Range,
14 path::Path,
15 sync::Arc,
16 time::{Duration, Instant},
17};
18use util::paths::PathStyle;
19use util::rel_path::RelPath;
20use util::{RangeExt as _, ResultExt};
21
22mod assemble_excerpts;
23#[cfg(test)]
24mod edit_prediction_context_tests;
25#[cfg(test)]
26mod fake_definition_lsp;
27
28pub use zeta_prompt::{RelatedExcerpt, RelatedFile};
29
30const IDENTIFIER_LINE_COUNT: u32 = 3;
31
32pub struct RelatedExcerptStore {
33 project: WeakEntity<Project>,
34 related_buffers: Vec<RelatedBuffer>,
35 cache: HashMap<Identifier, Arc<CacheEntry>>,
36 update_tx: mpsc::UnboundedSender<(Entity<Buffer>, Anchor)>,
37 identifier_line_count: u32,
38}
39
40struct RelatedBuffer {
41 buffer: Entity<Buffer>,
42 path: Arc<Path>,
43 anchor_ranges: Vec<Range<Anchor>>,
44 excerpt_orders: Vec<usize>,
45 cached_file: Option<CachedRelatedFile>,
46}
47
48struct CachedRelatedFile {
49 excerpts: Vec<RelatedExcerpt>,
50 buffer_version: clock::Global,
51}
52
53pub enum RelatedExcerptStoreEvent {
54 StartedRefresh,
55 FinishedRefresh {
56 cache_hit_count: usize,
57 cache_miss_count: usize,
58 mean_definition_latency: Duration,
59 max_definition_latency: Duration,
60 },
61}
62
63#[derive(Clone, Debug, PartialEq, Eq, Hash)]
64struct Identifier {
65 pub name: String,
66 pub range: Range<Anchor>,
67}
68
69enum DefinitionTask {
70 CacheHit(Arc<CacheEntry>),
71 CacheMiss {
72 definitions: Task<Result<Option<Vec<LocationLink>>>>,
73 type_definitions: Task<Result<Option<Vec<LocationLink>>>>,
74 },
75}
76
77#[derive(Debug)]
78struct CacheEntry {
79 definitions: SmallVec<[CachedDefinition; 1]>,
80 type_definitions: SmallVec<[CachedDefinition; 1]>,
81}
82
83#[derive(Clone, Debug)]
84struct CachedDefinition {
85 path: ProjectPath,
86 buffer: Entity<Buffer>,
87 anchor_range: Range<Anchor>,
88}
89
90const DEBOUNCE_DURATION: Duration = Duration::from_millis(100);
91
92impl EventEmitter<RelatedExcerptStoreEvent> for RelatedExcerptStore {}
93
94impl RelatedExcerptStore {
95 pub fn new(project: &Entity<Project>, cx: &mut Context<Self>) -> Self {
96 let (update_tx, mut update_rx) = mpsc::unbounded::<(Entity<Buffer>, Anchor)>();
97 cx.spawn(async move |this, cx| {
98 let executor = cx.background_executor().clone();
99 while let Some((mut buffer, mut position)) = update_rx.next().await {
100 let mut timer = executor.timer(DEBOUNCE_DURATION).fuse();
101 loop {
102 futures::select_biased! {
103 next = update_rx.next() => {
104 if let Some((new_buffer, new_position)) = next {
105 buffer = new_buffer;
106 position = new_position;
107 timer = executor.timer(DEBOUNCE_DURATION).fuse();
108 } else {
109 return anyhow::Ok(());
110 }
111 }
112 _ = timer => break,
113 }
114 }
115
116 Self::fetch_excerpts(this.clone(), buffer, position, cx).await?;
117 }
118 anyhow::Ok(())
119 })
120 .detach_and_log_err(cx);
121
122 RelatedExcerptStore {
123 project: project.downgrade(),
124 update_tx,
125 related_buffers: Vec::new(),
126 cache: Default::default(),
127 identifier_line_count: IDENTIFIER_LINE_COUNT,
128 }
129 }
130
131 pub fn set_identifier_line_count(&mut self, count: u32) {
132 self.identifier_line_count = count;
133 }
134
135 pub fn refresh(&mut self, buffer: Entity<Buffer>, position: Anchor, _: &mut Context<Self>) {
136 self.update_tx.unbounded_send((buffer, position)).ok();
137 }
138
139 pub fn related_files(&mut self, cx: &App) -> Vec<RelatedFile> {
140 self.related_buffers
141 .iter_mut()
142 .map(|related| related.related_file(cx))
143 .collect()
144 }
145
146 pub fn related_files_with_buffers(
147 &mut self,
148 cx: &App,
149 ) -> impl Iterator<Item = (RelatedFile, Entity<Buffer>)> {
150 self.related_buffers
151 .iter_mut()
152 .map(|related| (related.related_file(cx), related.buffer.clone()))
153 }
154
155 pub fn set_related_files(&mut self, files: Vec<RelatedFile>, cx: &App) {
156 self.related_buffers = files
157 .into_iter()
158 .filter_map(|file| {
159 let project = self.project.upgrade()?;
160 let project = project.read(cx);
161 let worktree = project.worktrees(cx).find(|wt| {
162 let root_name = wt.read(cx).root_name().as_unix_str();
163 file.path
164 .components()
165 .next()
166 .is_some_and(|c| c.as_os_str() == root_name)
167 })?;
168 let worktree = worktree.read(cx);
169 let relative_path = file
170 .path
171 .strip_prefix(worktree.root_name().as_unix_str())
172 .ok()?;
173 let relative_path = RelPath::new(relative_path, PathStyle::Posix).ok()?;
174 let project_path = ProjectPath {
175 worktree_id: worktree.id(),
176 path: relative_path.into_owned().into(),
177 };
178 let buffer = project.get_open_buffer(&project_path, cx)?;
179 let snapshot = buffer.read(cx).snapshot();
180 let mut anchor_ranges = Vec::with_capacity(file.excerpts.len());
181 let mut excerpt_orders = Vec::with_capacity(file.excerpts.len());
182 for excerpt in &file.excerpts {
183 let start = snapshot.anchor_before(Point::new(excerpt.row_range.start, 0));
184 let end_row = excerpt.row_range.end;
185 let end_col = snapshot.line_len(end_row);
186 let end = snapshot.anchor_after(Point::new(end_row, end_col));
187 anchor_ranges.push(start..end);
188 excerpt_orders.push(excerpt.order);
189 }
190 Some(RelatedBuffer {
191 buffer,
192 path: file.path.clone(),
193 anchor_ranges,
194 excerpt_orders,
195 cached_file: None,
196 })
197 })
198 .collect();
199 }
200
201 async fn fetch_excerpts(
202 this: WeakEntity<Self>,
203 buffer: Entity<Buffer>,
204 position: Anchor,
205 cx: &mut AsyncApp,
206 ) -> Result<()> {
207 let (project, snapshot, identifier_line_count) = this.read_with(cx, |this, cx| {
208 (
209 this.project.upgrade(),
210 buffer.read(cx).snapshot(),
211 this.identifier_line_count,
212 )
213 })?;
214 let Some(project) = project else {
215 return Ok(());
216 };
217
218 let file = snapshot.file().cloned();
219 if let Some(file) = &file {
220 log::debug!("retrieving_context buffer:{}", file.path().as_unix_str());
221 }
222
223 this.update(cx, |_, cx| {
224 cx.emit(RelatedExcerptStoreEvent::StartedRefresh);
225 })?;
226
227 let identifiers_with_ranks = cx
228 .background_spawn(async move {
229 let cursor_offset = position.to_offset(&snapshot);
230 let identifiers =
231 identifiers_for_position(&snapshot, position, identifier_line_count);
232
233 // Compute byte distance from cursor to each identifier, then sort by
234 // distance so we can assign ordinal ranks. Identifiers at the same
235 // distance share the same rank.
236 let mut identifiers_with_distance: Vec<(Identifier, usize)> = identifiers
237 .into_iter()
238 .map(|id| {
239 let start = id.range.start.to_offset(&snapshot);
240 let end = id.range.end.to_offset(&snapshot);
241 let distance = if cursor_offset < start {
242 start - cursor_offset
243 } else if cursor_offset > end {
244 cursor_offset - end
245 } else {
246 0
247 };
248 (id, distance)
249 })
250 .collect();
251 identifiers_with_distance.sort_by_key(|(_, distance)| *distance);
252
253 let mut cursor_distances: HashMap<Identifier, usize> = HashMap::default();
254 let mut current_rank = 0;
255 let mut previous_distance = None;
256 for (identifier, distance) in &identifiers_with_distance {
257 if previous_distance != Some(*distance) {
258 current_rank = cursor_distances.len();
259 previous_distance = Some(*distance);
260 }
261 cursor_distances.insert(identifier.clone(), current_rank);
262 }
263
264 (identifiers_with_distance, cursor_distances)
265 })
266 .await;
267
268 let (identifiers_with_distance, cursor_distances) = identifiers_with_ranks;
269
270 let async_cx = cx.clone();
271 let start_time = Instant::now();
272 let futures = this.update(cx, |this, cx| {
273 identifiers_with_distance
274 .into_iter()
275 .filter_map(|(identifier, _)| {
276 let task = if let Some(entry) = this.cache.get(&identifier) {
277 DefinitionTask::CacheHit(entry.clone())
278 } else {
279 let definitions = this
280 .project
281 .update(cx, |project, cx| {
282 project.definitions(&buffer, identifier.range.start, cx)
283 })
284 .ok()?;
285 let type_definitions = this
286 .project
287 .update(cx, |project, cx| {
288 project.type_definitions(&buffer, identifier.range.start, cx)
289 })
290 .ok()?;
291 DefinitionTask::CacheMiss {
292 definitions,
293 type_definitions,
294 }
295 };
296
297 let cx = async_cx.clone();
298 let project = project.clone();
299 Some(async move {
300 match task {
301 DefinitionTask::CacheHit(cache_entry) => {
302 Some((identifier, cache_entry, None))
303 }
304 DefinitionTask::CacheMiss {
305 definitions,
306 type_definitions,
307 } => {
308 let (definition_locations, type_definition_locations) =
309 futures::join!(definitions, type_definitions);
310 let duration = start_time.elapsed();
311
312 let definition_locations =
313 definition_locations.log_err().flatten().unwrap_or_default();
314 let type_definition_locations = type_definition_locations
315 .log_err()
316 .flatten()
317 .unwrap_or_default();
318
319 Some(cx.update(|cx| {
320 let definitions: SmallVec<[CachedDefinition; 1]> =
321 definition_locations
322 .into_iter()
323 .filter_map(|location| {
324 process_definition(location, &project, cx)
325 })
326 .collect();
327
328 let type_definitions: SmallVec<[CachedDefinition; 1]> =
329 type_definition_locations
330 .into_iter()
331 .filter_map(|location| {
332 process_definition(location, &project, cx)
333 })
334 .filter(|type_def| {
335 !definitions.iter().any(|def| {
336 def.buffer.entity_id()
337 == type_def.buffer.entity_id()
338 && def.anchor_range == type_def.anchor_range
339 })
340 })
341 .collect();
342
343 (
344 identifier,
345 Arc::new(CacheEntry {
346 definitions,
347 type_definitions,
348 }),
349 Some(duration),
350 )
351 }))
352 }
353 }
354 })
355 })
356 .collect::<Vec<_>>()
357 })?;
358
359 let mut cache_hit_count = 0;
360 let mut cache_miss_count = 0;
361 let mut mean_definition_latency = Duration::ZERO;
362 let mut max_definition_latency = Duration::ZERO;
363 let mut new_cache = HashMap::default();
364 new_cache.reserve(futures.len());
365 for (identifier, entry, duration) in future::join_all(futures).await.into_iter().flatten() {
366 new_cache.insert(identifier, entry);
367 if let Some(duration) = duration {
368 cache_miss_count += 1;
369 mean_definition_latency += duration;
370 max_definition_latency = max_definition_latency.max(duration);
371 } else {
372 cache_hit_count += 1;
373 }
374 }
375 mean_definition_latency /= cache_miss_count.max(1) as u32;
376
377 let (new_cache, related_buffers) =
378 rebuild_related_files(&project, new_cache, &cursor_distances, cx).await?;
379
380 if let Some(file) = &file {
381 log::debug!(
382 "finished retrieving context buffer:{}, latency:{:?}",
383 file.path().as_unix_str(),
384 start_time.elapsed()
385 );
386 }
387
388 this.update(cx, |this, cx| {
389 this.cache = new_cache;
390 this.related_buffers = related_buffers;
391 cx.emit(RelatedExcerptStoreEvent::FinishedRefresh {
392 cache_hit_count,
393 cache_miss_count,
394 mean_definition_latency,
395 max_definition_latency,
396 });
397 })?;
398
399 anyhow::Ok(())
400 }
401}
402
403async fn rebuild_related_files(
404 project: &Entity<Project>,
405 mut new_entries: HashMap<Identifier, Arc<CacheEntry>>,
406 cursor_distances: &HashMap<Identifier, usize>,
407 cx: &mut AsyncApp,
408) -> Result<(HashMap<Identifier, Arc<CacheEntry>>, Vec<RelatedBuffer>)> {
409 let mut snapshots = HashMap::default();
410 let mut worktree_root_names = HashMap::default();
411 for entry in new_entries.values() {
412 for definition in entry
413 .definitions
414 .iter()
415 .chain(entry.type_definitions.iter())
416 {
417 if let hash_map::Entry::Vacant(e) = snapshots.entry(definition.buffer.entity_id()) {
418 definition
419 .buffer
420 .read_with(cx, |buffer, _| buffer.parsing_idle())
421 .await;
422 e.insert(
423 definition
424 .buffer
425 .read_with(cx, |buffer, _| buffer.snapshot()),
426 );
427 }
428 let worktree_id = definition.path.worktree_id;
429 if let hash_map::Entry::Vacant(e) =
430 worktree_root_names.entry(definition.path.worktree_id)
431 {
432 project.read_with(cx, |project, cx| {
433 if let Some(worktree) = project.worktree_for_id(worktree_id, cx) {
434 e.insert(worktree.read(cx).root_name().as_unix_str().to_string());
435 }
436 });
437 }
438 }
439 }
440
441 let cursor_distances = cursor_distances.clone();
442 Ok(cx
443 .background_spawn(async move {
444 let mut ranges_by_buffer =
445 HashMap::<EntityId, (Entity<Buffer>, Vec<(Range<Point>, usize)>)>::default();
446 let mut paths_by_buffer = HashMap::default();
447 let mut min_rank_by_buffer = HashMap::<EntityId, usize>::default();
448 for (identifier, entry) in new_entries.iter_mut() {
449 let rank = cursor_distances
450 .get(identifier)
451 .copied()
452 .unwrap_or(usize::MAX);
453 for definition in entry
454 .definitions
455 .iter()
456 .chain(entry.type_definitions.iter())
457 {
458 let Some(snapshot) = snapshots.get(&definition.buffer.entity_id()) else {
459 continue;
460 };
461 paths_by_buffer.insert(definition.buffer.entity_id(), definition.path.clone());
462
463 let buffer_rank = min_rank_by_buffer
464 .entry(definition.buffer.entity_id())
465 .or_insert(usize::MAX);
466 *buffer_rank = (*buffer_rank).min(rank);
467
468 ranges_by_buffer
469 .entry(definition.buffer.entity_id())
470 .or_insert_with(|| (definition.buffer.clone(), Vec::new()))
471 .1
472 .push((definition.anchor_range.to_point(snapshot), rank));
473 }
474 }
475
476 let mut related_buffers: Vec<RelatedBuffer> = ranges_by_buffer
477 .into_iter()
478 .filter_map(|(entity_id, (buffer, ranges))| {
479 let snapshot = snapshots.get(&entity_id)?;
480 let project_path = paths_by_buffer.get(&entity_id)?;
481 let assembled = assemble_excerpt_ranges(snapshot, ranges);
482 let root_name = worktree_root_names.get(&project_path.worktree_id)?;
483
484 let path: Arc<Path> = Path::new(&format!(
485 "{}/{}",
486 root_name,
487 project_path.path.as_unix_str()
488 ))
489 .into();
490
491 let mut anchor_ranges = Vec::with_capacity(assembled.len());
492 let mut excerpt_orders = Vec::with_capacity(assembled.len());
493 for (row_range, order) in assembled {
494 let start = snapshot.anchor_before(Point::new(row_range.start, 0));
495 let end_col = snapshot.line_len(row_range.end);
496 let end = snapshot.anchor_after(Point::new(row_range.end, end_col));
497 anchor_ranges.push(start..end);
498 excerpt_orders.push(order);
499 }
500
501 let mut related_buffer = RelatedBuffer {
502 buffer,
503 path,
504 anchor_ranges,
505 excerpt_orders,
506 cached_file: None,
507 };
508 related_buffer.fill_cache(snapshot);
509 Some(related_buffer)
510 })
511 .collect();
512
513 related_buffers.sort_by(|a, b| {
514 let rank_a = min_rank_by_buffer
515 .get(&a.buffer.entity_id())
516 .copied()
517 .unwrap_or(usize::MAX);
518 let rank_b = min_rank_by_buffer
519 .get(&b.buffer.entity_id())
520 .copied()
521 .unwrap_or(usize::MAX);
522 rank_a.cmp(&rank_b).then_with(|| a.path.cmp(&b.path))
523 });
524
525 (new_entries, related_buffers)
526 })
527 .await)
528}
529
530impl RelatedBuffer {
531 fn related_file(&mut self, cx: &App) -> RelatedFile {
532 let buffer = self.buffer.read(cx);
533 let path = self.path.clone();
534 let cached = if let Some(cached) = &self.cached_file
535 && buffer.version() == cached.buffer_version
536 {
537 cached
538 } else {
539 self.fill_cache(buffer)
540 };
541 let related_file = RelatedFile {
542 path,
543 excerpts: cached.excerpts.clone(),
544 max_row: buffer.max_point().row,
545 in_open_source_repo: false,
546 };
547 return related_file;
548 }
549
550 fn fill_cache(&mut self, buffer: &text::BufferSnapshot) -> &CachedRelatedFile {
551 let excerpts = self
552 .anchor_ranges
553 .iter()
554 .zip(self.excerpt_orders.iter())
555 .map(|(range, &order)| {
556 let start = range.start.to_point(buffer);
557 let end = range.end.to_point(buffer);
558 RelatedExcerpt {
559 row_range: start.row..end.row,
560 text: buffer.text_for_range(start..end).collect::<String>().into(),
561 order,
562 }
563 })
564 .collect::<Vec<_>>();
565 self.cached_file = Some(CachedRelatedFile {
566 excerpts: excerpts,
567 buffer_version: buffer.version().clone(),
568 });
569 self.cached_file.as_ref().unwrap()
570 }
571}
572
573use language::ToPoint as _;
574
575const MAX_TARGET_LEN: usize = 128;
576
577fn process_definition(
578 location: LocationLink,
579 project: &Entity<Project>,
580 cx: &mut App,
581) -> Option<CachedDefinition> {
582 let buffer = location.target.buffer.read(cx);
583 let anchor_range = location.target.range;
584 let file = buffer.file()?;
585 let worktree = project.read(cx).worktree_for_id(file.worktree_id(cx), cx)?;
586 if worktree.read(cx).is_single_file() {
587 return None;
588 }
589
590 // If the target range is large, it likely means we requested the definition of an entire module.
591 // For individual definitions, the target range should be small as it only covers the symbol.
592 let buffer = location.target.buffer.read(cx);
593 let target_len = anchor_range.to_offset(&buffer).len();
594 if target_len > MAX_TARGET_LEN {
595 return None;
596 }
597
598 Some(CachedDefinition {
599 path: ProjectPath {
600 worktree_id: file.worktree_id(cx),
601 path: file.path().clone(),
602 },
603 buffer: location.target.buffer,
604 anchor_range,
605 })
606}
607
608/// Gets all of the identifiers that are present in the given line, and its containing
609/// outline items.
610fn identifiers_for_position(
611 buffer: &BufferSnapshot,
612 position: Anchor,
613 identifier_line_count: u32,
614) -> Vec<Identifier> {
615 let offset = position.to_offset(buffer);
616 let point = buffer.offset_to_point(offset);
617
618 // Search for identifiers on lines adjacent to the cursor.
619 let start = Point::new(point.row.saturating_sub(identifier_line_count), 0);
620 let end = Point::new(point.row + identifier_line_count + 1, 0).min(buffer.max_point());
621 let line_range = start..end;
622 let mut ranges = vec![line_range.to_offset(&buffer)];
623
624 // Search for identifiers mentioned in headers/signatures of containing outline items.
625 let outline_items = buffer.outline_items_as_offsets_containing(offset..offset, false, None);
626 for item in outline_items {
627 if let Some(body_range) = item.body_range(&buffer) {
628 ranges.push(item.range.start..body_range.start.to_offset(&buffer));
629 } else {
630 ranges.push(item.range.clone());
631 }
632 }
633
634 ranges.sort_by(|a, b| a.start.cmp(&b.start).then(b.end.cmp(&a.end)));
635 ranges.dedup_by(|a, b| {
636 if a.start <= b.end {
637 b.start = b.start.min(a.start);
638 b.end = b.end.max(a.end);
639 true
640 } else {
641 false
642 }
643 });
644
645 let mut identifiers = Vec::new();
646 let outer_range =
647 ranges.first().map_or(0, |r| r.start)..ranges.last().map_or(buffer.len(), |r| r.end);
648
649 let mut captures = buffer.captures(outer_range.clone(), |grammar| {
650 grammar
651 .highlights_config
652 .as_ref()
653 .map(|config| &config.query)
654 });
655
656 for range in ranges {
657 captures.set_byte_range(range.start..outer_range.end);
658
659 let mut last_range = None;
660 while let Some(capture) = captures.peek() {
661 let node_range = capture.node.byte_range();
662 if node_range.start > range.end {
663 break;
664 }
665 let config = captures.grammars()[capture.grammar_index]
666 .highlights_config
667 .as_ref();
668
669 if let Some(config) = config
670 && config.identifier_capture_indices.contains(&capture.index)
671 && range.contains_inclusive(&node_range)
672 && Some(&node_range) != last_range.as_ref()
673 {
674 let name = buffer.text_for_range(node_range.clone()).collect();
675 identifiers.push(Identifier {
676 range: buffer.anchor_after(node_range.start)
677 ..buffer.anchor_before(node_range.end),
678 name,
679 });
680 last_range = Some(node_range);
681 }
682
683 captures.advance();
684 }
685 }
686
687 identifiers
688}