From c94a9b7a948f2454e349ab10bf9d6021a964a0f5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 24 Feb 2026 20:27:57 -0800 Subject: [PATCH] Edit prediction: prioritize related excerpts that are referenced near the cursor (#50050) We store the byte distance between the cursor and references to each definition. When including excerpts in the prompt, we prioritize them in the order of proximity. I've updated the Edit Prediction Context view to display the excerpt's `order`, and sorting the files in order of their excerpt with the lowest order. Release Notes: - N/A --- .../src/assemble_excerpts.rs | 42 +- .../src/edit_prediction_context.rs | 126 ++++-- .../src/edit_prediction_context_tests.rs | 320 +++++++++++--- .../src/edit_prediction_context_view.rs | 56 ++- crates/zeta_prompt/src/zeta_prompt.rs | 393 ++++++++++++++++-- 5 files changed, 796 insertions(+), 141 deletions(-) diff --git a/crates/edit_prediction_context/src/assemble_excerpts.rs b/crates/edit_prediction_context/src/assemble_excerpts.rs index 3366b1fdd0fed167368157175f0f88e579e310d3..97b83653f53e693218189a938b02b0411fa78a33 100644 --- a/crates/edit_prediction_context/src/assemble_excerpts.rs +++ b/crates/edit_prediction_context/src/assemble_excerpts.rs @@ -8,16 +8,18 @@ const MAX_OUTLINE_ITEM_BODY_SIZE: usize = 24; pub fn assemble_excerpt_ranges( buffer: &BufferSnapshot, - mut input_ranges: Vec>, -) -> Vec> { + input_ranges: Vec<(Range, usize)>, +) -> Vec<(Range, usize)> { + let mut input_ranges: Vec<(Range, usize)> = input_ranges + .into_iter() + .map(|(range, order)| (clip_range_to_lines(&range, false, buffer), order)) + .collect(); merge_ranges(&mut input_ranges); - let mut outline_ranges = Vec::new(); + let mut outline_ranges: Vec<(Range, usize)> = Vec::new(); let outline_items = buffer.outline_items_as_points_containing(0..buffer.len(), false, None); let mut outline_ix = 0; - for input_range in &mut input_ranges { - *input_range = clip_range_to_lines(input_range, false, buffer); - + for (input_range, input_order) in &mut input_ranges { while let Some(outline_item) = outline_items.get(outline_ix) { let item_range = clip_range_to_lines(&outline_item.range, false, buffer); @@ -36,6 +38,7 @@ pub fn assemble_excerpt_ranges( add_outline_item( item_range.clone(), body_range.clone(), + *input_order, buffer, &mut outline_ranges, ); @@ -57,6 +60,7 @@ pub fn assemble_excerpt_ranges( next_outline_item .body_range(buffer) .map(|body| clip_range_to_lines(&body, true, buffer)), + *input_order, buffer, &mut outline_ranges, ); @@ -70,12 +74,12 @@ pub fn assemble_excerpt_ranges( } } - input_ranges.extend_from_slice(&outline_ranges); + input_ranges.extend(outline_ranges); merge_ranges(&mut input_ranges); input_ranges .into_iter() - .map(|range| range.start.row..range.end.row) + .map(|(range, order)| (range.start.row..range.end.row, order)) .collect() } @@ -102,8 +106,9 @@ fn clip_range_to_lines( fn add_outline_item( mut item_range: Range, body_range: Option>, + order: usize, buffer: &BufferSnapshot, - outline_ranges: &mut Vec>, + outline_ranges: &mut Vec<(Range, usize)>, ) { if let Some(mut body_range) = body_range { if body_range.start.column > 0 { @@ -113,38 +118,39 @@ fn add_outline_item( let head_range = item_range.start..body_range.start; if head_range.start < head_range.end { - outline_ranges.push(head_range); + outline_ranges.push((head_range, order)); } let tail_range = body_range.end..item_range.end; if tail_range.start < tail_range.end { - outline_ranges.push(tail_range); + outline_ranges.push((tail_range, order)); } } else { item_range.start.column = 0; item_range.end.column = buffer.line_len(item_range.end.row); - outline_ranges.push(item_range); + outline_ranges.push((item_range, order)); } } -pub fn merge_ranges(ranges: &mut Vec>) { - ranges.sort_unstable_by(|a, b| a.start.cmp(&b.start).then(b.end.cmp(&a.end))); +pub fn merge_ranges(ranges: &mut Vec<(Range, usize)>) { + ranges.sort_unstable_by(|(a, _), (b, _)| a.start.cmp(&b.start).then(b.end.cmp(&a.end))); let mut index = 1; while index < ranges.len() { - let mut prev_range_end = ranges[index - 1].end; + let mut prev_range_end = ranges[index - 1].0.end; if prev_range_end.column > 0 { prev_range_end += Point::new(1, 0); } if (prev_range_end + Point::new(1, 0)) - .cmp(&ranges[index].start) + .cmp(&ranges[index].0.start) .is_ge() { let removed = ranges.remove(index); - if removed.end.cmp(&ranges[index - 1].end).is_gt() { - ranges[index - 1].end = removed.end; + if removed.0.end.cmp(&ranges[index - 1].0.end).is_gt() { + ranges[index - 1].0.end = removed.0.end; } + ranges[index - 1].1 = ranges[index - 1].1.min(removed.1); } else { index += 1; } diff --git a/crates/edit_prediction_context/src/edit_prediction_context.rs b/crates/edit_prediction_context/src/edit_prediction_context.rs index 5805e93330504fef1ce70e899d413faf9e89aed2..b93fef49296e493b4f06e93e8d855d6a8e111e97 100644 --- a/crates/edit_prediction_context/src/edit_prediction_context.rs +++ b/crates/edit_prediction_context/src/edit_prediction_context.rs @@ -39,6 +39,7 @@ struct RelatedBuffer { buffer: Entity, path: Arc, anchor_ranges: Vec>, + excerpt_orders: Vec, cached_file: Option, } @@ -174,21 +175,21 @@ impl RelatedExcerptStore { }; let buffer = project.get_open_buffer(&project_path, cx)?; let snapshot = buffer.read(cx).snapshot(); - let anchor_ranges = file - .excerpts - .iter() - .map(|excerpt| { - let start = snapshot.anchor_before(Point::new(excerpt.row_range.start, 0)); - let end_row = excerpt.row_range.end; - let end_col = snapshot.line_len(end_row); - let end = snapshot.anchor_after(Point::new(end_row, end_col)); - start..end - }) - .collect(); + let mut anchor_ranges = Vec::with_capacity(file.excerpts.len()); + let mut excerpt_orders = Vec::with_capacity(file.excerpts.len()); + for excerpt in &file.excerpts { + let start = snapshot.anchor_before(Point::new(excerpt.row_range.start, 0)); + let end_row = excerpt.row_range.end; + let end_col = snapshot.line_len(end_row); + let end = snapshot.anchor_after(Point::new(end_row, end_col)); + anchor_ranges.push(start..end); + excerpt_orders.push(excerpt.order); + } Some(RelatedBuffer { buffer, path: file.path.clone(), anchor_ranges, + excerpt_orders, cached_file: None, }) }) @@ -221,18 +222,55 @@ impl RelatedExcerptStore { cx.emit(RelatedExcerptStoreEvent::StartedRefresh); })?; - let identifiers = cx + let identifiers_with_ranks = cx .background_spawn(async move { - identifiers_for_position(&snapshot, position, identifier_line_count) + let cursor_offset = position.to_offset(&snapshot); + let identifiers = + identifiers_for_position(&snapshot, position, identifier_line_count); + + // Compute byte distance from cursor to each identifier, then sort by + // distance so we can assign ordinal ranks. Identifiers at the same + // distance share the same rank. + let mut identifiers_with_distance: Vec<(Identifier, usize)> = identifiers + .into_iter() + .map(|id| { + let start = id.range.start.to_offset(&snapshot); + let end = id.range.end.to_offset(&snapshot); + let distance = if cursor_offset < start { + start - cursor_offset + } else if cursor_offset > end { + cursor_offset - end + } else { + 0 + }; + (id, distance) + }) + .collect(); + identifiers_with_distance.sort_by_key(|(_, distance)| *distance); + + let mut cursor_distances: HashMap = HashMap::default(); + let mut current_rank = 0; + let mut previous_distance = None; + for (identifier, distance) in &identifiers_with_distance { + if previous_distance != Some(*distance) { + current_rank = cursor_distances.len(); + previous_distance = Some(*distance); + } + cursor_distances.insert(identifier.clone(), current_rank); + } + + (identifiers_with_distance, cursor_distances) }) .await; + let (identifiers_with_distance, cursor_distances) = identifiers_with_ranks; + let async_cx = cx.clone(); let start_time = Instant::now(); let futures = this.update(cx, |this, cx| { - identifiers + identifiers_with_distance .into_iter() - .filter_map(|identifier| { + .filter_map(|(identifier, _)| { let task = if let Some(entry) = this.cache.get(&identifier) { DefinitionTask::CacheHit(entry.clone()) } else { @@ -334,7 +372,8 @@ impl RelatedExcerptStore { } mean_definition_latency /= cache_miss_count.max(1) as u32; - let (new_cache, related_buffers) = rebuild_related_files(&project, new_cache, cx).await?; + let (new_cache, related_buffers) = + rebuild_related_files(&project, new_cache, &cursor_distances, cx).await?; if let Some(file) = &file { log::debug!( @@ -362,6 +401,7 @@ impl RelatedExcerptStore { async fn rebuild_related_files( project: &Entity, mut new_entries: HashMap>, + cursor_distances: &HashMap, cx: &mut AsyncApp, ) -> Result<(HashMap>, Vec)> { let mut snapshots = HashMap::default(); @@ -396,12 +436,18 @@ async fn rebuild_related_files( } } + let cursor_distances = cursor_distances.clone(); Ok(cx .background_spawn(async move { let mut ranges_by_buffer = - HashMap::, Vec>)>::default(); + HashMap::, Vec<(Range, usize)>)>::default(); let mut paths_by_buffer = HashMap::default(); - for entry in new_entries.values_mut() { + let mut min_rank_by_buffer = HashMap::::default(); + for (identifier, entry) in new_entries.iter_mut() { + let rank = cursor_distances + .get(identifier) + .copied() + .unwrap_or(usize::MAX); for definition in entry .definitions .iter() @@ -412,11 +458,16 @@ async fn rebuild_related_files( }; paths_by_buffer.insert(definition.buffer.entity_id(), definition.path.clone()); + let buffer_rank = min_rank_by_buffer + .entry(definition.buffer.entity_id()) + .or_insert(usize::MAX); + *buffer_rank = (*buffer_rank).min(rank); + ranges_by_buffer .entry(definition.buffer.entity_id()) .or_insert_with(|| (definition.buffer.clone(), Vec::new())) .1 - .push(definition.anchor_range.to_point(snapshot)); + .push((definition.anchor_range.to_point(snapshot), rank)); } } @@ -425,7 +476,7 @@ async fn rebuild_related_files( .filter_map(|(entity_id, (buffer, ranges))| { let snapshot = snapshots.get(&entity_id)?; let project_path = paths_by_buffer.get(&entity_id)?; - let row_ranges = assemble_excerpt_ranges(snapshot, ranges); + let assembled = assemble_excerpt_ranges(snapshot, ranges); let root_name = worktree_root_names.get(&project_path.worktree_id)?; let path: Arc = Path::new(&format!( @@ -435,20 +486,21 @@ async fn rebuild_related_files( )) .into(); - let anchor_ranges = row_ranges - .into_iter() - .map(|row_range| { - let start = snapshot.anchor_before(Point::new(row_range.start, 0)); - let end_col = snapshot.line_len(row_range.end); - let end = snapshot.anchor_after(Point::new(row_range.end, end_col)); - start..end - }) - .collect(); + let mut anchor_ranges = Vec::with_capacity(assembled.len()); + let mut excerpt_orders = Vec::with_capacity(assembled.len()); + for (row_range, order) in assembled { + let start = snapshot.anchor_before(Point::new(row_range.start, 0)); + let end_col = snapshot.line_len(row_range.end); + let end = snapshot.anchor_after(Point::new(row_range.end, end_col)); + anchor_ranges.push(start..end); + excerpt_orders.push(order); + } let mut related_buffer = RelatedBuffer { buffer, path, anchor_ranges, + excerpt_orders, cached_file: None, }; related_buffer.fill_cache(snapshot); @@ -456,7 +508,17 @@ async fn rebuild_related_files( }) .collect(); - related_buffers.sort_by_key(|related| related.path.clone()); + related_buffers.sort_by(|a, b| { + let rank_a = min_rank_by_buffer + .get(&a.buffer.entity_id()) + .copied() + .unwrap_or(usize::MAX); + let rank_b = min_rank_by_buffer + .get(&b.buffer.entity_id()) + .copied() + .unwrap_or(usize::MAX); + rank_a.cmp(&rank_b).then_with(|| a.path.cmp(&b.path)) + }); (new_entries, related_buffers) }) @@ -487,12 +549,14 @@ impl RelatedBuffer { let excerpts = self .anchor_ranges .iter() - .map(|range| { + .zip(self.excerpt_orders.iter()) + .map(|(range, &order)| { let start = range.start.to_point(buffer); let end = range.end.to_point(buffer); RelatedExcerpt { row_range: start.row..end.row, text: buffer.text_for_range(start..end).collect::().into(), + order, } }) .collect::>(); diff --git a/crates/edit_prediction_context/src/edit_prediction_context_tests.rs b/crates/edit_prediction_context/src/edit_prediction_context_tests.rs index b619fa729449f2e232a8c8231f416f5a15c5271f..01c4c76e82eb0851b7552b3d9117af1212a8b3da 100644 --- a/crates/edit_prediction_context/src/edit_prediction_context_tests.rs +++ b/crates/edit_prediction_context/src/edit_prediction_context_tests.rs @@ -48,6 +48,24 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) { assert_related_files( &excerpts, &[ + ( + "root/src/person.rs", + &[ + indoc! {" + pub struct Person { + first_name: String, + last_name: String, + email: String, + age: u32, + } + + impl Person { + pub fn get_first_name(&self) -> &str { + &self.first_name + }"}, + "}", + ], + ), ( "root/src/company.rs", &[indoc! {" @@ -71,24 +89,6 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) { }"}, ], ), - ( - "root/src/person.rs", - &[ - indoc! {" - pub struct Person { - first_name: String, - last_name: String, - email: String, - age: u32, - } - - impl Person { - pub fn get_first_name(&self) -> &str { - &self.first_name - }"}, - "}", - ], - ), ], ); }); @@ -112,6 +112,24 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) { assert_related_files( &excerpts, &[ + ( + "root/src/person.rs", + &[ + indoc! {" + pub struct Person { + first_name: String, + last_name: String, + email: String, + age: u32, + } + + impl Person { + pub fn get_first_name(&self) -> &str { + &self.first_name + }"}, + "}", + ], + ), ( "root/src/company.rs", &[indoc! {" @@ -136,24 +154,6 @@ async fn test_edit_prediction_context(cx: &mut TestAppContext) { }"}, ], ), - ( - "root/src/person.rs", - &[ - indoc! {" - pub struct Person { - first_name: String, - last_name: String, - email: String, - age: u32, - } - - impl Person { - pub fn get_first_name(&self) -> &str { - &self.first_name - }"}, - "}", - ], - ), ], ); }); @@ -290,20 +290,21 @@ fn test_assemble_excerpts(cx: &mut TestAppContext) { let (input, ranges) = marked_text_ranges(&input, false); let buffer = cx.new(|cx| Buffer::local(input, cx).with_language(rust_lang(), cx)); buffer.read_with(cx, |buffer, _cx| { - let ranges: Vec> = ranges + let ranges: Vec<(Range, usize)> = ranges .into_iter() - .map(|range| range.to_point(&buffer)) + .map(|range| (range.to_point(&buffer), 0)) .collect(); - let row_ranges = assemble_excerpt_ranges(&buffer.snapshot(), ranges); - let excerpts: Vec = row_ranges + let assembled = assemble_excerpt_ranges(&buffer.snapshot(), ranges); + let excerpts: Vec = assembled .into_iter() - .map(|row_range| { + .map(|(row_range, order)| { let start = Point::new(row_range.start, 0); let end = Point::new(row_range.end, buffer.line_len(row_range.end)); RelatedExcerpt { row_range, text: buffer.text_for_range(start..end).collect::().into(), + order, } }) .collect(); @@ -620,7 +621,6 @@ async fn test_type_definition_deduplication(cx: &mut TestAppContext) { assert_related_files( &excerpts, &[ - ("root/src/main.rs", &["fn work() {", "}"]), ( "root/src/types.rs", &[indoc! {" @@ -628,6 +628,194 @@ async fn test_type_definition_deduplication(cx: &mut TestAppContext) { value: i32, }"}], ), + ("root/src/main.rs", &["fn work() {", "}"]), + ], + ); + }); +} + +#[gpui::test] +async fn test_definitions_ranked_by_cursor_proximity(cx: &mut TestAppContext) { + init_test(cx); + let fs = FakeFs::new(cx.executor()); + + // helpers.rs has an impl block whose body exceeds the test + // MAX_OUTLINE_ITEM_BODY_SIZE (24 bytes), so assemble_excerpt_ranges + // splits it into header + individual children + closing brace. main.rs + // references two of the three methods on separate lines at varying + // distances from the cursor. This exercises: + // 1. File ordering by closest identifier rank. + // 2. Per-excerpt ordering within a file — child excerpts carry the rank + // of the identifier that discovered them. + // 3. Parent excerpt (impl header / closing brace) inheriting the minimum + // order of its children. + fs.insert_tree( + path!("/root"), + json!({ + "src": { + "helpers.rs": indoc! {r#" + pub struct Helpers { + value: i32, + } + + impl Helpers { + pub fn alpha(&self) -> i32 { + let intermediate = self.value; + intermediate + 1 + } + + pub fn beta(&self) -> i32 { + let intermediate = self.value; + intermediate + 2 + } + + pub fn gamma(&self) -> i32 { + let intermediate = self.value; + intermediate + 3 + } + } + "#}, + "main.rs": indoc! {r#" + use super::helpers::Helpers; + + fn process(h: Helpers) { + let a = h.alpha(); + let b = h.gamma(); + } + "#}, + }, + }), + ) + .await; + + let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await; + let mut servers = setup_fake_lsp(&project, cx); + + let (buffer, _handle) = project + .update(cx, |project, cx| { + project.open_local_buffer_with_lsp(path!("/root/src/main.rs"), cx) + }) + .await + .unwrap(); + + let _server = servers.next().await.unwrap(); + cx.run_until_parked(); + + // Place cursor on "h.alpha()". `alpha` is at distance 0, `gamma` is + // farther below. Both resolve to methods inside `impl Helpers` in + // helpers.rs. The impl header and closing brace excerpts should inherit + // the min order of their children (alpha's order). + let related_excerpt_store = cx.new(|cx| RelatedExcerptStore::new(&project, cx)); + related_excerpt_store.update(cx, |store, cx| { + let position = { + let buffer = buffer.read(cx); + let offset = buffer.text().find("h.alpha()").unwrap(); + buffer.anchor_before(offset) + }; + + store.set_identifier_line_count(1); + store.refresh(buffer.clone(), position, cx); + }); + + cx.executor().advance_clock(DEBOUNCE_DURATION); + related_excerpt_store.update(cx, |store, cx| { + let files = store.related_files(cx); + + // helpers.rs has 4 excerpts: the struct+impl header merged with + // the alpha method header (order 1 from alpha), alpha's closing + // brace (order 1), gamma's method header (order 6), and the + // gamma+impl closing brace (order 1, inherited from alpha which + // is also a child of the impl). + let alpha_order = 1; + let gamma_order = 6; + assert_related_files_with_orders( + &files, + &[ + ( + "root/src/helpers.rs", + &[ + ( + indoc! {" + pub struct Helpers { + value: i32, + } + + impl Helpers { + pub fn alpha(&self) -> i32 {"}, + alpha_order, + ), + (" }", alpha_order), + (" pub fn gamma(&self) -> i32 {", gamma_order), + ( + indoc! {" + } + }"}, + alpha_order, + ), + ], + ), + ( + "root/src/main.rs", + &[("fn process(h: Helpers) {", 8), ("}", 8)], + ), + ], + ); + }); + + // Now move cursor to "h.gamma()" — gamma becomes closest, reranking the + // excerpts so that the gamma method excerpt has the best order and the + // alpha method excerpt has a worse order. + related_excerpt_store.update(cx, |store, cx| { + let position = { + let buffer = buffer.read(cx); + let offset = buffer.text().find("h.gamma()").unwrap(); + buffer.anchor_before(offset) + }; + + store.set_identifier_line_count(1); + store.refresh(buffer.clone(), position, cx); + }); + + cx.executor().advance_clock(DEBOUNCE_DURATION); + related_excerpt_store.update(cx, |store, cx| { + let files = store.related_files(cx); + + // Now gamma is closest. The alpha method excerpts carry alpha's + // rank (3), and the gamma method excerpts carry gamma's rank (1). + // The impl closing brace merges with gamma's closing brace and + // inherits gamma's order (the best child). + let alpha_order = 3; + let gamma_order = 1; + assert_related_files_with_orders( + &files, + &[ + ( + "root/src/helpers.rs", + &[ + ( + indoc! {" + pub struct Helpers { + value: i32, + } + + impl Helpers { + pub fn alpha(&self) -> i32 {"}, + alpha_order, + ), + (" }", alpha_order), + (" pub fn gamma(&self) -> i32 {", gamma_order), + ( + indoc! {" + } + }"}, + gamma_order, + ), + ], + ), + ( + "root/src/main.rs", + &[("fn process(h: Helpers) {", 8), ("}", 8)], + ), ], ); }); @@ -788,30 +976,56 @@ fn test_project_1() -> serde_json::Value { } fn assert_related_files(actual_files: &[RelatedFile], expected_files: &[(&str, &[&str])]) { - let actual_files = actual_files + let expected_with_orders: Vec<(&str, Vec<(&str, usize)>)> = expected_files + .iter() + .map(|(path, texts)| (*path, texts.iter().map(|text| (*text, 0)).collect())) + .collect(); + let expected_refs: Vec<(&str, &[(&str, usize)])> = expected_with_orders + .iter() + .map(|(path, excerpts)| (*path, excerpts.as_slice())) + .collect(); + assert_related_files_impl(actual_files, &expected_refs, false) +} + +fn assert_related_files_with_orders( + actual_files: &[RelatedFile], + expected_files: &[(&str, &[(&str, usize)])], +) { + assert_related_files_impl(actual_files, expected_files, true) +} + +fn assert_related_files_impl( + actual_files: &[RelatedFile], + expected_files: &[(&str, &[(&str, usize)])], + check_orders: bool, +) { + let actual: Vec<(&str, Vec<(String, usize)>)> = actual_files .iter() .map(|file| { let excerpts = file .excerpts .iter() - .map(|excerpt| excerpt.text.to_string()) - .collect::>(); + .map(|excerpt| { + let order = if check_orders { excerpt.order } else { 0 }; + (excerpt.text.to_string(), order) + }) + .collect(); (file.path.to_str().unwrap(), excerpts) }) - .collect::>(); - let expected_excerpts = expected_files + .collect(); + let expected: Vec<(&str, Vec<(String, usize)>)> = expected_files .iter() - .map(|(path, texts)| { + .map(|(path, excerpts)| { ( *path, - texts + excerpts .iter() - .map(|line| line.to_string()) - .collect::>(), + .map(|(text, order)| (text.to_string(), *order)) + .collect(), ) }) - .collect::>(); - pretty_assertions::assert_eq!(actual_files, expected_excerpts) + .collect(); + pretty_assertions::assert_eq!(actual, expected) } fn assert_definitions(definitions: &[LocationLink], first_lines: &[&str], cx: &mut TestAppContext) { diff --git a/crates/edit_prediction_ui/src/edit_prediction_context_view.rs b/crates/edit_prediction_ui/src/edit_prediction_context_view.rs index 6ad816c36ddea3f0493ce853fd6f0efd4b8e0dc7..48e74dcdcc102f9ed7844f1b8829e0182fe2c97b 100644 --- a/crates/edit_prediction_ui/src/edit_prediction_context_view.rs +++ b/crates/edit_prediction_ui/src/edit_prediction_context_view.rs @@ -8,14 +8,17 @@ use std::{ use anyhow::Result; use client::{Client, UserStore}; -use editor::{Editor, PathKey}; +use editor::{ + Editor, PathKey, + display_map::{BlockPlacement, BlockProperties, BlockStyle}, +}; use futures::StreamExt as _; use gpui::{ Animation, AnimationExt, App, AppContext as _, Context, Entity, EventEmitter, FocusHandle, Focusable, InteractiveElement as _, IntoElement as _, ParentElement as _, SharedString, Styled as _, Task, TextAlign, Window, actions, div, pulsating_between, }; -use multi_buffer::MultiBuffer; +use multi_buffer::{Anchor, MultiBuffer}; use project::Project; use text::Point; use ui::{ @@ -165,8 +168,14 @@ impl EditPredictionContextView { } cx.spawn_in(window, async move |this, cx| { - let mut paths = Vec::new(); + let mut paths: Vec<(PathKey, _, Vec<_>, Vec, usize)> = Vec::new(); for (related_file, buffer) in related_files { + let orders = related_file + .excerpts + .iter() + .map(|excerpt| excerpt.order) + .collect::>(); + let min_order = orders.iter().copied().min().unwrap_or(usize::MAX); let point_ranges = related_file .excerpts .iter() @@ -175,20 +184,53 @@ impl EditPredictionContextView { }) .collect::>(); cx.update(|_, cx| { - let path = PathKey::for_buffer(&buffer, cx); - paths.push((path, buffer, point_ranges)); + let path = if let Some(file) = buffer.read(cx).file() { + PathKey::with_sort_prefix(min_order as u64, file.path().clone()) + } else { + PathKey::for_buffer(&buffer, cx) + }; + paths.push((path, buffer, point_ranges, orders, min_order)); })?; } + paths.sort_by_key(|(_, _, _, _, min_order)| *min_order); + + let mut excerpt_anchors_with_orders: Vec<(Anchor, usize)> = Vec::new(); + multibuffer.update(cx, |multibuffer, cx| { multibuffer.clear(cx); - for (path, buffer, ranges) in paths { - multibuffer.set_excerpts_for_path(path, buffer, ranges, 0, cx); + for (path, buffer, ranges, orders, _) in paths { + let (anchor_ranges, _) = + multibuffer.set_excerpts_for_path(path, buffer, ranges, 0, cx); + for (anchor_range, order) in anchor_ranges.into_iter().zip(orders) { + excerpt_anchors_with_orders.push((anchor_range.start, order)); + } } }); editor.update_in(cx, |editor, window, cx| { + let blocks = excerpt_anchors_with_orders + .into_iter() + .map(|(anchor, order)| { + let label = SharedString::from(format!("order: {order}")); + BlockProperties { + placement: BlockPlacement::Above(anchor), + height: Some(1), + style: BlockStyle::Sticky, + render: Arc::new(move |cx| { + div() + .pl(cx.anchor_x) + .text_ui_xs(cx) + .text_color(cx.editor_style.status.info) + .child(label.clone()) + .into_any_element() + }), + priority: 0, + } + }) + .collect::>(); + editor.insert_blocks(blocks, None, cx); editor.move_to_beginning(&Default::default(), window, cx); })?; diff --git a/crates/zeta_prompt/src/zeta_prompt.rs b/crates/zeta_prompt/src/zeta_prompt.rs index 7391683d34d8010336c6f81e6da50be6e6c11c15..95110bae009d1fc40766f741e4aad06b4c10ca6c 100644 --- a/crates/zeta_prompt/src/zeta_prompt.rs +++ b/crates/zeta_prompt/src/zeta_prompt.rs @@ -213,6 +213,8 @@ pub struct RelatedFile { pub struct RelatedExcerpt { pub row_range: Range, pub text: Arc, + #[serde(default)] + pub order: usize, } pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool { @@ -419,53 +421,167 @@ fn format_edit_history_within_budget( result } +fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize { + let needs_newline = !excerpt.text.ends_with('\n'); + let needs_ellipsis = excerpt.row_range.end < file_max_row; + let len = excerpt.text.len() + + if needs_newline { "\n".len() } else { 0 } + + if needs_ellipsis { "...\n".len() } else { 0 }; + estimate_tokens(len) +} + fn format_related_files_within_budget( related_files: &[RelatedFile], file_marker: &str, max_tokens: usize, ) -> String { - let mut result = String::new(); - let mut total_tokens = 0; + // Collect the distinct order values across all excerpts, sorted ascending. + let mut order_levels: Vec = related_files + .iter() + .flat_map(|f| f.excerpts.iter().map(|e| e.order)) + .collect(); + order_levels.sort_unstable(); + order_levels.dedup(); - for file in related_files { - let path_str = file.path.to_string_lossy(); - let header = format!("{}{}\n", file_marker, path_str); - let header_tokens = estimate_tokens(header.len()); + // Pre-compute file header strings and their token costs. + let file_headers: Vec = related_files + .iter() + .map(|file| { + let path_str = file.path.to_string_lossy(); + format!("{}{}\n", file_marker, path_str) + }) + .collect(); + + // Track which excerpts are included per file. + let mut included: Vec> = related_files + .iter() + .map(|file| vec![false; file.excerpts.len()]) + .collect(); + let mut file_included: Vec = vec![false; related_files.len()]; + let mut total_tokens = 0; - if total_tokens + header_tokens > max_tokens { - break; + // Process order levels from best (lowest) to worst. At each level, try to + // include all not-yet-included excerpts with that order across all files. + // If the full level doesn't fit, include a partial prefix (top-to-bottom + // within each file) and stop — don't proceed to worse order levels. + 'outer: for &order in &order_levels { + // Gather the work for this order level: for each file that has excerpts + // at this order, collect the not-yet-included excerpt indices (in their + // original positional order) and the token cost to add them (including + // the file header if the file isn't already included). + struct FileWork { + file_idx: usize, + excerpt_indices: Vec, + header_cost: usize, + excerpt_costs: Vec, } - let mut file_tokens = header_tokens; - let mut excerpts_to_include = 0; - - for excerpt in &file.excerpts { - let needs_newline = !excerpt.text.ends_with('\n'); - let needs_ellipsis = excerpt.row_range.end < file.max_row; - let excerpt_len = excerpt.text.len() - + if needs_newline { "\n".len() } else { 0 } - + if needs_ellipsis { "...\n".len() } else { 0 }; - - let excerpt_tokens = estimate_tokens(excerpt_len); - if total_tokens + file_tokens + excerpt_tokens > max_tokens { - break; + let mut work_items: Vec = Vec::new(); + for (file_idx, file) in related_files.iter().enumerate() { + let mut excerpt_indices = Vec::new(); + let mut excerpt_costs = Vec::new(); + for (eidx, excerpt) in file.excerpts.iter().enumerate() { + if excerpt.order == order && !included[file_idx][eidx] { + excerpt_indices.push(eidx); + excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row)); + } } - file_tokens += excerpt_tokens; - excerpts_to_include += 1; + if excerpt_indices.is_empty() { + continue; + } + let header_cost = if file_included[file_idx] { + 0 + } else { + estimate_tokens(file_headers[file_idx].len()) + }; + work_items.push(FileWork { + file_idx, + excerpt_indices, + header_cost, + excerpt_costs, + }); } - if excerpts_to_include > 0 { - total_tokens += file_tokens; - result.push_str(&header); - for excerpt in file.excerpts.iter().take(excerpts_to_include) { - result.push_str(&excerpt.text); - if !result.ends_with('\n') { - result.push('\n'); + // Compute the total cost for this entire order level. + let level_cost: usize = work_items + .iter() + .map(|w| w.header_cost + w.excerpt_costs.iter().sum::()) + .sum(); + + if total_tokens + level_cost <= max_tokens { + // The whole level fits — include everything. + for work in &work_items { + total_tokens += work.header_cost; + file_included[work.file_idx] = true; + for (i, &eidx) in work.excerpt_indices.iter().enumerate() { + included[work.file_idx][eidx] = true; + total_tokens += work.excerpt_costs[i]; } - if excerpt.row_range.end < file.max_row { - result.push_str("...\n"); + } + } else { + // The whole level doesn't fit. Include as many excerpts as possible + // from each file (in positional order), then stop entirely. + for work in &work_items { + let available = max_tokens.saturating_sub(total_tokens); + let mut file_cost = work.header_cost; + + let mut count = 0; + for i in 0..work.excerpt_indices.len() { + if file_cost + work.excerpt_costs[i] > available { + break; + } + file_cost += work.excerpt_costs[i]; + count += 1; + } + + if count > 0 { + total_tokens += work.header_cost; + file_included[work.file_idx] = true; + for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() { + included[work.file_idx][eidx] = true; + total_tokens += work.excerpt_costs[i]; + } } } + break 'outer; + } + } + + // Determine file rendering order: by the best (lowest) order of any + // included excerpt, breaking ties by original file index. + let mut file_order: Vec<(usize, usize)> = Vec::new(); + for (file_idx, file) in related_files.iter().enumerate() { + if !file_included[file_idx] { + continue; + } + let best_order = file + .excerpts + .iter() + .enumerate() + .filter(|(eidx, _)| included[file_idx][*eidx]) + .map(|(_, e)| e.order) + .min() + .unwrap_or(usize::MAX); + file_order.push((file_idx, best_order)); + } + file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx)); + + // Render included files and excerpts in positional order within each file. + let mut result = String::new(); + for &(file_idx, _) in &file_order { + let file = &related_files[file_idx]; + result.push_str(&file_headers[file_idx]); + for (eidx, excerpt) in file.excerpts.iter().enumerate() { + if !included[file_idx][eidx] { + continue; + } + result.push_str(&excerpt.text); + if !result.ends_with('\n') { + result.push('\n'); + } + if excerpt.row_range.end < file.max_row { + result.push_str("...\n"); + } } } @@ -1136,6 +1252,7 @@ mod tests { excerpts: vec![RelatedExcerpt { row_range: 0..content.lines().count() as u32, text: content.into(), + order: 0, }], in_open_source_repo: false, } @@ -1244,14 +1361,17 @@ mod tests { RelatedExcerpt { row_range: 0..10, text: "first excerpt\n".into(), + order: 0, }, RelatedExcerpt { row_range: 10..20, text: "second excerpt\n".into(), + order: 0, }, RelatedExcerpt { row_range: 20..30, text: "third excerpt\n".into(), + order: 0, }, ], }], @@ -1291,6 +1411,149 @@ mod tests { ); } + #[test] + fn test_truncation_prioritizes_lower_order_excerpts() { + // Two files: file_a has a high-order excerpt, file_b has a low-order one. + // With tight budget, only the lower-order excerpt from file_b should be included. + let input = make_input( + "x", + 0..1, + 0, + vec![], + vec![ + RelatedFile { + path: Path::new("file_a.rs").into(), + max_row: 10, + in_open_source_repo: false, + excerpts: vec![RelatedExcerpt { + row_range: 0..10, + text: "low priority content\n".into(), + order: 5, + }], + }, + RelatedFile { + path: Path::new("file_b.rs").into(), + max_row: 10, + in_open_source_repo: false, + excerpts: vec![RelatedExcerpt { + row_range: 0..10, + text: "high priority content\n".into(), + order: 1, + }], + }, + ], + ); + + // With large budget, both files included; file_b (order 1) renders before file_a (order 5). + assert_eq!( + format_with_budget(&input, 10000), + indoc! {r#" + <|file_sep|>file_b.rs + high priority content + <|file_sep|>file_a.rs + low priority content + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + <|user_cursor|>x + <|fim_suffix|> + <|fim_middle|>updated + "#} + ); + + // With tight budget, only file_b (lower order) fits. + // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files. + // file_b header (7) + excerpt (7) = 14 tokens, which fits. + // file_a would need another 14 tokens, which doesn't fit. + assert_eq!( + format_with_budget(&input, 52), + indoc! {r#" + <|file_sep|>file_b.rs + high priority content + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + <|user_cursor|>x + <|fim_suffix|> + <|fim_middle|>updated + "#} + ); + } + + #[test] + fn test_truncation_drops_high_order_excerpts_within_file() { + // A single file has excerpts at order 1 and order 3. With a tight budget, + // only the order-1 excerpts are included while the order-3 excerpt is + // dropped — even though they belong to the same file. This also preserves + // the parent invariant: parent outline items have order ≤ their best + // child, so they're always included when any child is. + let input = make_input( + "x", + 0..1, + 0, + vec![], + vec![RelatedFile { + path: Path::new("mod.rs").into(), + max_row: 30, + in_open_source_repo: false, + excerpts: vec![ + RelatedExcerpt { + row_range: 0..5, + text: "mod header\n".into(), + order: 1, + }, + RelatedExcerpt { + row_range: 5..15, + text: "important fn\n".into(), + order: 1, + }, + RelatedExcerpt { + row_range: 15..30, + text: "less important fn\n".into(), + order: 3, + }, + ], + }], + ); + + // With large budget, all three excerpts included. + assert_eq!( + format_with_budget(&input, 10000), + indoc! {r#" + <|file_sep|>mod.rs + mod header + ... + important fn + ... + less important fn + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + <|user_cursor|>x + <|fim_suffix|> + <|fim_middle|>updated + "#} + ); + + // With tight budget, only order<=1 excerpts included (header + important fn). + assert_eq!( + format_with_budget(&input, 55), + indoc! {r#" + <|file_sep|>mod.rs + mod header + ... + important fn + ... + <|file_sep|>test.rs + <|fim_prefix|> + <|fim_middle|>current + <|user_cursor|>x + <|fim_suffix|> + <|fim_middle|>updated + "#} + ); + } + #[test] fn test_truncation_drops_older_events_first() { let input = make_input( @@ -1463,6 +1726,72 @@ mod tests { ); } + #[test] + fn test_seed_coder_truncation_prioritizes_lower_order() { + let input = make_input( + "code", + 0..4, + 2, + vec![], + vec![ + RelatedFile { + path: Path::new("low_prio.rs").into(), + max_row: 5, + in_open_source_repo: false, + excerpts: vec![RelatedExcerpt { + row_range: 0..5, + text: "low prio\n".into(), + order: 10, + }], + }, + RelatedFile { + path: Path::new("high_prio.rs").into(), + max_row: 5, + in_open_source_repo: false, + excerpts: vec![RelatedExcerpt { + row_range: 0..5, + text: "high prio\n".into(), + order: 1, + }], + }, + ], + ); + + // With large budget, both included; high_prio first due to lower order. + assert_eq!( + format_seed_coder(&input), + indoc! {r#" + <[fim-suffix]> + <[fim-prefix]>high_prio.rs + high prio + low_prio.rs + low prio + + test.rs + <<<<<<< CURRENT + co<|user_cursor|>de + ======= + <[fim-middle]>"#} + ); + + // With tight budget, only high_prio included. + // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files. + // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19. + assert_eq!( + format_seed_coder_with_budget(&input, 44), + indoc! {r#" + <[fim-suffix]> + <[fim-prefix]>high_prio.rs + high prio + + test.rs + <<<<<<< CURRENT + co<|user_cursor|>de + ======= + <[fim-middle]>"#} + ); + } + #[test] fn test_seed_coder_clean_output() { let output_with_marker = "new code\n>>>>>>> UPDATED\n";