From 892e0e956505ed455466fce769e01cb4abc7aadf Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 20 Feb 2026 02:28:35 +0200 Subject: [PATCH] Draft the overly large excerpt mitigation --- crates/multi_buffer/src/path_key.rs | 105 ++++++++++++++++++++++------ crates/search/src/project_search.rs | 63 ++++++++++++++++- 2 files changed, 144 insertions(+), 24 deletions(-) diff --git a/crates/multi_buffer/src/path_key.rs b/crates/multi_buffer/src/path_key.rs index a85a67fd6edde174fa0bf57d4776f300948f1281..fc9c18aed4a31ef2f100d80d3ee3f5e2da8f5b67 100644 --- a/crates/multi_buffer/src/path_key.rs +++ b/crates/multi_buffer/src/path_key.rs @@ -1,5 +1,13 @@ use std::{mem, ops::Range, sync::Arc}; +/// When incrementally updating excerpts, we try to expand new ranges to match +/// overlapping existing excerpts so we can reuse them (avoiding rewrap churn). +/// But if an existing excerpt is much larger than the new range, blindly +/// expanding creates huge excerpts with many irrelevant lines above/below the +/// actual matches. This constant caps how many extra rows we allow on each +/// side of the *original* new range when expanding toward an existing excerpt. +const MAX_EXCERPT_EXPANSION_ROWS: u32 = 10; + use collections::HashSet; use gpui::{App, AppContext, Context, Entity}; use itertools::Itertools; @@ -13,6 +21,22 @@ use crate::{ Anchor, ExcerptId, ExcerptRange, ExpandExcerptDirection, MultiBuffer, build_excerpt_ranges, }; +struct ExistingExcerpt { + id: ExcerptId, + range: Range, +} + +struct ExpandedExcerpts { + ranges: Vec>, + counts: Vec, + /// Existing excerpts that overlap new ranges but are too large to expand + /// to within `MAX_EXCERPT_EXPANSION_ROWS`. These must be removed before + /// `update_path_excerpts` runs, so the new ranges get inserted fresh + /// instead of being unioned back to the old size by the partial-overlap + /// branch. + stale_excerpt_ids: Vec, +} + #[derive(Debug, Clone)] pub struct PathExcerptInsertResult { pub inserted_ranges: Vec>, @@ -297,14 +321,22 @@ impl MultiBuffer { counts: Vec, cx: &mut Context, ) -> PathExcerptInsertResult { - let (new, counts) = - self.expand_new_ranges_to_existing(&path, buffer_snapshot, new, counts, cx); + let expanded = self.expand_new_ranges_to_existing(&path, buffer_snapshot, new, counts, cx); + if !expanded.stale_excerpt_ids.is_empty() { + if let Some(existing) = self.excerpts_by_path.get_mut(&path) { + existing.retain(|id| !expanded.stale_excerpt_ids.contains(id)); + } + for &id in &expanded.stale_excerpt_ids { + self.paths_by_excerpt.remove(&id); + } + self.remove_excerpts(expanded.stale_excerpt_ids, cx); + } let (excerpt_ids, added_new_excerpt) = - self.update_path_excerpts(path, buffer, buffer_snapshot, new, cx); + self.update_path_excerpts(path, buffer, buffer_snapshot, expanded.ranges, cx); let mut inserted_ranges = Vec::new(); let mut ranges = ranges.into_iter(); - for (&excerpt_id, range_count) in excerpt_ids.iter().zip(counts.into_iter()) { + for (&excerpt_id, range_count) in excerpt_ids.iter().zip(expanded.counts.into_iter()) { for range in ranges.by_ref().take(range_count) { let range = Anchor::range_in_buffer( excerpt_id, @@ -333,44 +365,67 @@ impl MultiBuffer { mut new: Vec>, counts: Vec, cx: &App, - ) -> (Vec>, Vec) { + ) -> ExpandedExcerpts { let existing = self.excerpts_by_path.get(path).cloned().unwrap_or_default(); if existing.is_empty() || new.is_empty() { - return (new, counts); + return ExpandedExcerpts { + ranges: new, + counts, + stale_excerpt_ids: Vec::new(), + }; } let snapshot = self.snapshot(cx); let buffer_id = buffer_snapshot.remote_id(); - let existing_ranges: Vec> = existing + let existing_excerpts = existing .iter() .filter_map(|&id| { let excerpt = snapshot.excerpt(id)?; - (excerpt.buffer_id == buffer_id) - .then(|| excerpt.range.context.to_point(buffer_snapshot)) + (excerpt.buffer_id == buffer_id).then(|| ExistingExcerpt { + id, + range: excerpt.range.context.to_point(buffer_snapshot), + }) }) - .collect(); + .collect::>(); let mut changed = false; + let mut stale_excerpt_ids = Vec::new(); for new_range in &mut new { - for existing_range in &existing_ranges { - if new_range.context.start <= existing_range.end - && new_range.context.end >= existing_range.start + let original_start_row = new_range.context.start.row; + let original_end_row = new_range.context.end.row; + for existing in &existing_excerpts { + let overlaps = new_range.context.start <= existing.range.end + && new_range.context.end >= existing.range.start; + if !overlaps { + continue; + } + + let start_excess = original_start_row.saturating_sub(existing.range.start.row); + let end_excess = existing.range.end.row.saturating_sub(original_end_row); + + if start_excess <= MAX_EXCERPT_EXPANSION_ROWS + && end_excess <= MAX_EXCERPT_EXPANSION_ROWS { - let expanded_start = new_range.context.start.min(existing_range.start); - let expanded_end = new_range.context.end.max(existing_range.end); - if expanded_start != new_range.context.start - || expanded_end != new_range.context.end - { - new_range.context.start = expanded_start; - new_range.context.end = expanded_end; + if existing.range.start < new_range.context.start { + new_range.context.start = existing.range.start; changed = true; } + if existing.range.end > new_range.context.end { + new_range.context.end = existing.range.end; + changed = true; + } + } else { + stale_excerpt_ids.push(existing.id); } } } - if !changed { - return (new, counts); + if !changed && stale_excerpt_ids.is_empty() { + return ExpandedExcerpts { + ranges: new, + counts, + stale_excerpt_ids: Vec::new(), + }; } let mut result_ranges: Vec> = Vec::new(); @@ -389,7 +444,11 @@ impl MultiBuffer { result_counts.push(count); } - (result_ranges, result_counts) + ExpandedExcerpts { + ranges: result_ranges, + counts: result_counts, + stale_excerpt_ids, + } } fn update_path_excerpts( diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs index c1eb04826e4b3b6973a24a8f330a7da235fa4236..eb338090209b9c625189ed49a5b262896823ab9e 100644 --- a/crates/search/src/project_search.rs +++ b/crates/search/src/project_search.rs @@ -2669,7 +2669,7 @@ pub mod tests { use editor::{DisplayPoint, display_map::DisplayRow}; use gpui::{Action, TestAppContext, VisualTestContext, WindowHandle}; use language::{FakeLspAdapter, rust_lang}; - use multi_buffer::Event as MultiBufferEvent; + use multi_buffer::{Event as MultiBufferEvent, ToPoint}; use pretty_assertions::assert_eq; use project::FakeFs; use serde_json::json; @@ -5351,6 +5351,67 @@ pub mod tests { assert_eq!(take_excerpt_changes(), Vec::new()); } + #[gpui::test] + async fn test_incremental_search_does_not_overly_expand_excerpts(cx: &mut TestAppContext) { + init_test(cx); + + // File where "o" matches on every line (creating a huge merged excerpt) + // but "on" only matches near the end. Without the expansion cap, + // narrowing "o" → "on" would preserve the old huge excerpt, placing + // 40+ irrelevant lines above the first actual match. + let mut lines: Vec = (0..50).map(|i| format!("line {i}: block")).collect(); + lines[45] = "line 45: configure one".into(); + + let fs = FakeFs::new(cx.background_executor.clone()); + fs.insert_tree( + path!("/dir"), + json!({ + "big.rs": lines.join("\n"), + }), + ) + .await; + let project = Project::test(fs.clone(), [path!("/dir").as_ref()], cx).await; + let window = + cx.add_window(|window, cx| MultiWorkspace::test_new(project.clone(), window, cx)); + let workspace = window + .read_with(cx, |mw, _| mw.workspace().clone()) + .unwrap(); + let search = cx.new(|cx| ProjectSearch::new(project.clone(), cx)); + let search_view = cx.add_window(|window, cx| { + ProjectSearchView::new(workspace.downgrade(), search.clone(), window, cx, None) + }); + + // "o" matches on every line, creating one large merged excerpt. + perform_search(search_view, "o", cx); + let broad_count = read_match_count(search_view, cx); + assert!( + broad_count > 40, + "expected many matches for 'o', got {broad_count}" + ); + + // Narrow to "on" — only line 45 matches. + perform_incremental_search(search_view, "on", cx); + assert_all_highlights_match_query(search_view, "on", cx); + assert_eq!(read_match_count(search_view, cx), 2); + + // The first match must be close to the top of the multibuffer. + // Before the fix, the excerpt expanded to the old [0..49] range, + // placing 40+ empty lines above the first hit. + let first_match_row = search_view + .read_with(cx, |search_view, cx| { + let search = search_view.entity.read(cx); + let snapshot = search.excerpts.read(cx).snapshot(cx); + let first_range = &search.match_ranges[0]; + first_range.start.to_point(&snapshot).row + }) + .unwrap(); + assert!( + first_match_row <= 15, + "first match at multibuffer row {first_match_row} is too far from the excerpt start; \ + the excerpt was over-expanded" + ); + } + #[gpui::test] async fn test_search_on_input_keeps_focus_confirm_shifts_it(cx: &mut TestAppContext) { init_test(cx);