Draft the overly large excerpt mitigation

Kirill Bulatov created

Change summary

crates/multi_buffer/src/path_key.rs | 105 ++++++++++++++++++++++++------
crates/search/src/project_search.rs |  63 ++++++++++++++++++
2 files changed, 144 insertions(+), 24 deletions(-)

Detailed changes

crates/multi_buffer/src/path_key.rs 🔗

@@ -1,5 +1,13 @@
 use std::{mem, ops::Range, sync::Arc};
 
+/// When incrementally updating excerpts, we try to expand new ranges to match
+/// overlapping existing excerpts so we can reuse them (avoiding rewrap churn).
+/// But if an existing excerpt is much larger than the new range, blindly
+/// expanding creates huge excerpts with many irrelevant lines above/below the
+/// actual matches. This constant caps how many extra rows we allow on each
+/// side of the *original* new range when expanding toward an existing excerpt.
+const MAX_EXCERPT_EXPANSION_ROWS: u32 = 10;
+
 use collections::HashSet;
 use gpui::{App, AppContext, Context, Entity};
 use itertools::Itertools;
@@ -13,6 +21,22 @@ use crate::{
     Anchor, ExcerptId, ExcerptRange, ExpandExcerptDirection, MultiBuffer, build_excerpt_ranges,
 };
 
+struct ExistingExcerpt {
+    id: ExcerptId,
+    range: Range<Point>,
+}
+
+struct ExpandedExcerpts {
+    ranges: Vec<ExcerptRange<Point>>,
+    counts: Vec<usize>,
+    /// Existing excerpts that overlap new ranges but are too large to expand
+    /// to within `MAX_EXCERPT_EXPANSION_ROWS`. These must be removed before
+    /// `update_path_excerpts` runs, so the new ranges get inserted fresh
+    /// instead of being unioned back to the old size by the partial-overlap
+    /// branch.
+    stale_excerpt_ids: Vec<ExcerptId>,
+}
+
 #[derive(Debug, Clone)]
 pub struct PathExcerptInsertResult {
     pub inserted_ranges: Vec<Range<Anchor>>,
@@ -297,14 +321,22 @@ impl MultiBuffer {
         counts: Vec<usize>,
         cx: &mut Context<Self>,
     ) -> PathExcerptInsertResult {
-        let (new, counts) =
-            self.expand_new_ranges_to_existing(&path, buffer_snapshot, new, counts, cx);
+        let expanded = self.expand_new_ranges_to_existing(&path, buffer_snapshot, new, counts, cx);
+        if !expanded.stale_excerpt_ids.is_empty() {
+            if let Some(existing) = self.excerpts_by_path.get_mut(&path) {
+                existing.retain(|id| !expanded.stale_excerpt_ids.contains(id));
+            }
+            for &id in &expanded.stale_excerpt_ids {
+                self.paths_by_excerpt.remove(&id);
+            }
+            self.remove_excerpts(expanded.stale_excerpt_ids, cx);
+        }
         let (excerpt_ids, added_new_excerpt) =
-            self.update_path_excerpts(path, buffer, buffer_snapshot, new, cx);
+            self.update_path_excerpts(path, buffer, buffer_snapshot, expanded.ranges, cx);
 
         let mut inserted_ranges = Vec::new();
         let mut ranges = ranges.into_iter();
-        for (&excerpt_id, range_count) in excerpt_ids.iter().zip(counts.into_iter()) {
+        for (&excerpt_id, range_count) in excerpt_ids.iter().zip(expanded.counts.into_iter()) {
             for range in ranges.by_ref().take(range_count) {
                 let range = Anchor::range_in_buffer(
                     excerpt_id,
@@ -333,44 +365,67 @@ impl MultiBuffer {
         mut new: Vec<ExcerptRange<Point>>,
         counts: Vec<usize>,
         cx: &App,
-    ) -> (Vec<ExcerptRange<Point>>, Vec<usize>) {
+    ) -> ExpandedExcerpts {
         let existing = self.excerpts_by_path.get(path).cloned().unwrap_or_default();
         if existing.is_empty() || new.is_empty() {
-            return (new, counts);
+            return ExpandedExcerpts {
+                ranges: new,
+                counts,
+                stale_excerpt_ids: Vec::new(),
+            };
         }
 
         let snapshot = self.snapshot(cx);
         let buffer_id = buffer_snapshot.remote_id();
-        let existing_ranges: Vec<Range<Point>> = existing
+        let existing_excerpts = existing
             .iter()
             .filter_map(|&id| {
                 let excerpt = snapshot.excerpt(id)?;
-                (excerpt.buffer_id == buffer_id)
-                    .then(|| excerpt.range.context.to_point(buffer_snapshot))
+                (excerpt.buffer_id == buffer_id).then(|| ExistingExcerpt {
+                    id,
+                    range: excerpt.range.context.to_point(buffer_snapshot),
+                })
             })
-            .collect();
+            .collect::<Vec<_>>();
 
         let mut changed = false;
+        let mut stale_excerpt_ids = Vec::new();
         for new_range in &mut new {
-            for existing_range in &existing_ranges {
-                if new_range.context.start <= existing_range.end
-                    && new_range.context.end >= existing_range.start
+            let original_start_row = new_range.context.start.row;
+            let original_end_row = new_range.context.end.row;
+            for existing in &existing_excerpts {
+                let overlaps = new_range.context.start <= existing.range.end
+                    && new_range.context.end >= existing.range.start;
+                if !overlaps {
+                    continue;
+                }
+
+                let start_excess = original_start_row.saturating_sub(existing.range.start.row);
+                let end_excess = existing.range.end.row.saturating_sub(original_end_row);
+
+                if start_excess <= MAX_EXCERPT_EXPANSION_ROWS
+                    && end_excess <= MAX_EXCERPT_EXPANSION_ROWS
                 {
-                    let expanded_start = new_range.context.start.min(existing_range.start);
-                    let expanded_end = new_range.context.end.max(existing_range.end);
-                    if expanded_start != new_range.context.start
-                        || expanded_end != new_range.context.end
-                    {
-                        new_range.context.start = expanded_start;
-                        new_range.context.end = expanded_end;
+                    if existing.range.start < new_range.context.start {
+                        new_range.context.start = existing.range.start;
                         changed = true;
                     }
+                    if existing.range.end > new_range.context.end {
+                        new_range.context.end = existing.range.end;
+                        changed = true;
+                    }
+                } else {
+                    stale_excerpt_ids.push(existing.id);
                 }
             }
         }
 
-        if !changed {
-            return (new, counts);
+        if !changed && stale_excerpt_ids.is_empty() {
+            return ExpandedExcerpts {
+                ranges: new,
+                counts,
+                stale_excerpt_ids: Vec::new(),
+            };
         }
 
         let mut result_ranges: Vec<ExcerptRange<Point>> = Vec::new();
@@ -389,7 +444,11 @@ impl MultiBuffer {
             result_counts.push(count);
         }
 
-        (result_ranges, result_counts)
+        ExpandedExcerpts {
+            ranges: result_ranges,
+            counts: result_counts,
+            stale_excerpt_ids,
+        }
     }
 
     fn update_path_excerpts(

crates/search/src/project_search.rs 🔗

@@ -2669,7 +2669,7 @@ pub mod tests {
     use editor::{DisplayPoint, display_map::DisplayRow};
     use gpui::{Action, TestAppContext, VisualTestContext, WindowHandle};
     use language::{FakeLspAdapter, rust_lang};
-    use multi_buffer::Event as MultiBufferEvent;
+    use multi_buffer::{Event as MultiBufferEvent, ToPoint};
     use pretty_assertions::assert_eq;
     use project::FakeFs;
     use serde_json::json;
@@ -5351,6 +5351,67 @@ pub mod tests {
         assert_eq!(take_excerpt_changes(), Vec::new());
     }
 
+    #[gpui::test]
+    async fn test_incremental_search_does_not_overly_expand_excerpts(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        // File where "o" matches on every line (creating a huge merged excerpt)
+        // but "on" only matches near the end. Without the expansion cap,
+        // narrowing "o" → "on" would preserve the old huge excerpt, placing
+        // 40+ irrelevant lines above the first actual match.
+        let mut lines: Vec<String> = (0..50).map(|i| format!("line {i}: block")).collect();
+        lines[45] = "line 45: configure one".into();
+
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree(
+            path!("/dir"),
+            json!({
+                "big.rs": lines.join("\n"),
+            }),
+        )
+        .await;
+        let project = Project::test(fs.clone(), [path!("/dir").as_ref()], cx).await;
+        let window =
+            cx.add_window(|window, cx| MultiWorkspace::test_new(project.clone(), window, cx));
+        let workspace = window
+            .read_with(cx, |mw, _| mw.workspace().clone())
+            .unwrap();
+        let search = cx.new(|cx| ProjectSearch::new(project.clone(), cx));
+        let search_view = cx.add_window(|window, cx| {
+            ProjectSearchView::new(workspace.downgrade(), search.clone(), window, cx, None)
+        });
+
+        // "o" matches on every line, creating one large merged excerpt.
+        perform_search(search_view, "o", cx);
+        let broad_count = read_match_count(search_view, cx);
+        assert!(
+            broad_count > 40,
+            "expected many matches for 'o', got {broad_count}"
+        );
+
+        // Narrow to "on" — only line 45 matches.
+        perform_incremental_search(search_view, "on", cx);
+        assert_all_highlights_match_query(search_view, "on", cx);
+        assert_eq!(read_match_count(search_view, cx), 2);
+
+        // The first match must be close to the top of the multibuffer.
+        // Before the fix, the excerpt expanded to the old [0..49] range,
+        // placing 40+ empty lines above the first hit.
+        let first_match_row = search_view
+            .read_with(cx, |search_view, cx| {
+                let search = search_view.entity.read(cx);
+                let snapshot = search.excerpts.read(cx).snapshot(cx);
+                let first_range = &search.match_ranges[0];
+                first_range.start.to_point(&snapshot).row
+            })
+            .unwrap();
+        assert!(
+            first_match_row <= 15,
+            "first match at multibuffer row {first_match_row} is too far from the excerpt start; \
+             the excerpt was over-expanded"
+        );
+    }
+
     #[gpui::test]
     async fn test_search_on_input_keeps_focus_confirm_shifts_it(cx: &mut TestAppContext) {
         init_test(cx);