git: Optimize `patch_for_range` functions (#48852)

Cole Miller created

These are heavily used by the side-by-side diff. Previously, we were
iterating over all hunks for each call. Now we skip hunks that can't
affect the provided range.

Release Notes:

- N/A

Change summary

crates/buffer_diff/src/buffer_diff.rs   | 386 +++++++++++++++++++++++++-
crates/editor/src/split.rs              | 105 +++++-
crates/multi_buffer/src/multi_buffer.rs |  41 ++
3 files changed, 482 insertions(+), 50 deletions(-)

Detailed changes

crates/buffer_diff/src/buffer_diff.rs 🔗

@@ -362,7 +362,7 @@ impl BufferDiffSnapshot {
         (new_id == old_id && new_version == old_version) || (new_empty && old_empty)
     }
 
-    #[allow(unused)]
+    /// Returns the last hunk whose start is less than or equal to the given position.
     fn hunk_before_base_text_offset<'a>(
         &self,
         target: usize,
@@ -375,16 +375,11 @@ impl BufferDiffSnapshot {
         {
             cursor.prev();
         }
-        let result = cursor
+        cursor
             .item()
-            .filter(|hunk| target >= hunk.diff_base_byte_range.start);
-        if cursor.item().is_none() {
-            cursor.reset();
-        }
-        result
+            .filter(|hunk| target >= hunk.diff_base_byte_range.start)
     }
 
-    #[allow(unused)]
     fn hunk_before_buffer_anchor<'a>(
         &self,
         target: Anchor,
@@ -398,13 +393,9 @@ impl BufferDiffSnapshot {
         {
             cursor.prev();
         }
-        let result = cursor
+        cursor
             .item()
-            .filter(|hunk| target.cmp(&hunk.buffer_range.start, buffer).is_ge());
-        if cursor.item().is_none() {
-            cursor.reset();
-        }
-        result
+            .filter(|hunk| target.cmp(&hunk.buffer_range.start, buffer).is_ge())
     }
 
     /// Returns a patch mapping the provided main buffer snapshot to the base text of this diff.
@@ -413,7 +404,79 @@ impl BufferDiffSnapshot {
     /// but not necessarily for points outside that range.
     pub fn patch_for_buffer_range<'a>(
         &'a self,
-        _range: RangeInclusive<Point>,
+        range: RangeInclusive<Point>,
+        buffer: &'a text::BufferSnapshot,
+    ) -> Patch<Point> {
+        if !self.inner.base_text_exists {
+            return Patch::new(vec![Edit {
+                old: Point::zero()..buffer.max_point(),
+                new: Point::zero()..Point::zero(),
+            }]);
+        }
+
+        let mut edits_since_diff = Patch::new(
+            buffer
+                .edits_since::<Point>(&self.inner.buffer_snapshot.version)
+                .collect::<Vec<_>>(),
+        );
+        edits_since_diff.invert();
+
+        let mut start_point = edits_since_diff.old_to_new(*range.start());
+        if let Some(first_edit) = edits_since_diff.edits().first() {
+            start_point = start_point.min(first_edit.new.start);
+        }
+
+        let original_snapshot = self.original_buffer_snapshot();
+        let base_text = self.base_text();
+
+        let mut cursor = self.inner.hunks.cursor(original_snapshot);
+        self.hunk_before_buffer_anchor(
+            original_snapshot.anchor_before(start_point),
+            &mut cursor,
+            original_snapshot,
+        );
+        if cursor.item().is_none() {
+            cursor.next();
+        }
+
+        let mut prefix_edit = cursor.prev_item().map(|prev_hunk| Edit {
+            old: Point::zero()..prev_hunk.buffer_range.end.to_point(original_snapshot),
+            new: Point::zero()..prev_hunk.diff_base_byte_range.end.to_point(base_text),
+        });
+
+        let mut range_end = edits_since_diff.old_to_new(*range.end());
+        if let Some(last_edit) = edits_since_diff.edits().last() {
+            range_end = range_end.max(last_edit.new.end);
+        }
+        let range_end = original_snapshot.anchor_before(range_end);
+
+        let hunk_iter = std::iter::from_fn(move || {
+            if let Some(edit) = prefix_edit.take() {
+                return Some(edit);
+            }
+            let hunk = cursor.item()?;
+            if hunk
+                .buffer_range
+                .start
+                .cmp(&range_end, original_snapshot)
+                .is_gt()
+            {
+                return None;
+            }
+            let edit = Edit {
+                old: hunk.buffer_range.to_point(original_snapshot),
+                new: hunk.diff_base_byte_range.to_point(base_text),
+            };
+            cursor.next();
+            Some(edit)
+        });
+
+        edits_since_diff.compose(hunk_iter)
+    }
+
+    #[cfg(test)]
+    pub(crate) fn patch_for_buffer_range_naive<'a>(
+        &'a self,
         buffer: &'a text::BufferSnapshot,
     ) -> Patch<Point> {
         let original_snapshot = self.original_buffer_snapshot();
@@ -455,13 +518,87 @@ impl BufferDiffSnapshot {
         )
     }
 
-    /// Returns a patch mapping the base text of this diff to the provided buffer snapshot.
+    /// Returns a patch mapping the base text of this diff to the provided main buffer snapshot.
     ///
     /// The returned patch is guaranteed to be accurate for all base text points in the provided range,
     /// but not necessarily for points outside that range.
     pub fn patch_for_base_text_range<'a>(
         &'a self,
-        _range: RangeInclusive<Point>,
+        range: RangeInclusive<Point>,
+        buffer: &'a text::BufferSnapshot,
+    ) -> Patch<Point> {
+        if !self.inner.base_text_exists {
+            return Patch::new(vec![Edit {
+                old: Point::zero()..Point::zero(),
+                new: Point::zero()..buffer.max_point(),
+            }]);
+        }
+
+        let edits_since_diff = buffer
+            .edits_since::<Point>(&self.inner.buffer_snapshot.version)
+            .collect::<Vec<_>>();
+
+        let mut hunk_patch = Vec::new();
+        let mut cursor = self.inner.hunks.cursor(self.original_buffer_snapshot());
+        let hunk_before = self
+            .hunk_before_base_text_offset(range.start().to_offset(self.base_text()), &mut cursor);
+
+        if let Some(hunk) = hunk_before
+            && let Some(first_edit) = edits_since_diff.first()
+            && hunk
+                .buffer_range
+                .start
+                .to_point(self.original_buffer_snapshot())
+                > first_edit.old.start
+        {
+            cursor.reset();
+            self.hunk_before_buffer_anchor(
+                self.original_buffer_snapshot()
+                    .anchor_before(first_edit.old.start),
+                &mut cursor,
+                self.original_buffer_snapshot(),
+            );
+        }
+        if cursor.item().is_none() {
+            cursor.next();
+        }
+        if let Some(prev_hunk) = cursor.prev_item() {
+            hunk_patch.push(Edit {
+                old: Point::zero()
+                    ..prev_hunk
+                        .diff_base_byte_range
+                        .end
+                        .to_point(self.base_text()),
+                new: Point::zero()
+                    ..prev_hunk
+                        .buffer_range
+                        .end
+                        .to_point(self.original_buffer_snapshot()),
+            })
+        }
+        let range_end = range.end().to_offset(self.base_text());
+        while let Some(hunk) = cursor.item()
+            && (hunk.diff_base_byte_range.start <= range_end
+                || edits_since_diff.last().is_some_and(|last_edit| {
+                    hunk.buffer_range
+                        .start
+                        .to_point(self.original_buffer_snapshot())
+                        <= last_edit.old.end
+                }))
+        {
+            hunk_patch.push(Edit {
+                old: hunk.diff_base_byte_range.to_point(self.base_text()),
+                new: hunk.buffer_range.to_point(self.original_buffer_snapshot()),
+            });
+            cursor.next();
+        }
+
+        Patch::new(hunk_patch).compose(edits_since_diff)
+    }
+
+    #[cfg(test)]
+    pub(crate) fn patch_for_base_text_range_naive<'a>(
+        &'a self,
         buffer: &'a text::BufferSnapshot,
     ) -> Patch<Point> {
         let original_snapshot = self.original_buffer_snapshot();
@@ -3543,4 +3680,219 @@ mod tests {
             Point::new(1, 4)..Point::new(4, 0),
         );
     }
+
+    #[gpui::test(iterations = 100)]
+    async fn test_patch_for_range_random(cx: &mut TestAppContext, mut rng: StdRng) {
+        fn gen_line(rng: &mut StdRng) -> String {
+            if rng.random_bool(0.2) {
+                "\n".to_owned()
+            } else {
+                let c = rng.random_range('A'..='Z');
+                format!("{c}{c}{c}\n")
+            }
+        }
+
+        fn gen_text(rng: &mut StdRng, line_count: usize) -> String {
+            (0..line_count).map(|_| gen_line(rng)).collect()
+        }
+
+        fn gen_edits_from(rng: &mut StdRng, base: &str) -> String {
+            let mut old_lines: Vec<&str> = base.lines().collect();
+            let mut result = String::new();
+
+            while !old_lines.is_empty() {
+                let unchanged_count = rng.random_range(0..=old_lines.len());
+                for _ in 0..unchanged_count {
+                    if old_lines.is_empty() {
+                        break;
+                    }
+                    result.push_str(old_lines.remove(0));
+                    result.push('\n');
+                }
+
+                if old_lines.is_empty() {
+                    break;
+                }
+
+                let deleted_count = rng.random_range(0..=old_lines.len().min(3));
+                for _ in 0..deleted_count {
+                    if old_lines.is_empty() {
+                        break;
+                    }
+                    old_lines.remove(0);
+                }
+
+                let minimum_added = if deleted_count == 0 { 1 } else { 0 };
+                let added_count = rng.random_range(minimum_added..=3);
+                for _ in 0..added_count {
+                    result.push_str(&gen_line(rng));
+                }
+            }
+
+            result
+        }
+
+        fn random_point_in_text(rng: &mut StdRng, lines: &[&str]) -> Point {
+            if lines.is_empty() {
+                return Point::zero();
+            }
+            let row = rng.random_range(0..lines.len() as u32);
+            let line = lines[row as usize];
+            let col = if line.is_empty() {
+                0
+            } else {
+                rng.random_range(0..=line.len() as u32)
+            };
+            Point::new(row, col)
+        }
+
+        fn random_range_in_text(rng: &mut StdRng, lines: &[&str]) -> RangeInclusive<Point> {
+            let start = random_point_in_text(rng, lines);
+            let end = random_point_in_text(rng, lines);
+            if start <= end {
+                start..=end
+            } else {
+                end..=start
+            }
+        }
+
+        fn points_in_range(range: &RangeInclusive<Point>, lines: &[&str]) -> Vec<Point> {
+            let mut points = Vec::new();
+            for row in range.start().row..=range.end().row {
+                if row as usize >= lines.len() {
+                    points.push(Point::new(row, 0));
+                    continue;
+                }
+                let line = lines[row as usize];
+                let start_col = if row == range.start().row {
+                    range.start().column
+                } else {
+                    0
+                };
+                let end_col = if row == range.end().row {
+                    range.end().column
+                } else {
+                    line.len() as u32
+                };
+                for col in start_col..=end_col {
+                    points.push(Point::new(row, col));
+                }
+            }
+            points
+        }
+
+        let rng = &mut rng;
+
+        let line_count = rng.random_range(5..20);
+        let base_text = gen_text(rng, line_count);
+        let initial_buffer_text = gen_edits_from(rng, &base_text);
+
+        let mut buffer = Buffer::new(
+            ReplicaId::LOCAL,
+            BufferId::new(1).unwrap(),
+            initial_buffer_text.clone(),
+        );
+
+        let diff = BufferDiffSnapshot::new_sync(&buffer, base_text.clone(), cx);
+
+        let edit_count = rng.random_range(1..=5);
+        for _ in 0..edit_count {
+            let buffer_text = buffer.text();
+            if buffer_text.is_empty() {
+                buffer.edit([(0..0, gen_line(rng))]);
+            } else {
+                let lines: Vec<&str> = buffer_text.lines().collect();
+                let start_row = rng.random_range(0..lines.len());
+                let end_row = rng.random_range(start_row..=lines.len().min(start_row + 3));
+
+                let start_col = if start_row < lines.len() {
+                    rng.random_range(0..=lines[start_row].len())
+                } else {
+                    0
+                };
+                let end_col = if end_row < lines.len() {
+                    rng.random_range(0..=lines[end_row].len())
+                } else {
+                    0
+                };
+
+                let start_offset = buffer
+                    .point_to_offset(Point::new(start_row as u32, start_col as u32))
+                    .min(buffer.len());
+                let end_offset = buffer
+                    .point_to_offset(Point::new(end_row as u32, end_col as u32))
+                    .min(buffer.len());
+
+                let (start, end) = if start_offset <= end_offset {
+                    (start_offset, end_offset)
+                } else {
+                    (end_offset, start_offset)
+                };
+
+                let new_text = if rng.random_bool(0.3) {
+                    String::new()
+                } else {
+                    let line_count = rng.random_range(0..=2);
+                    gen_text(rng, line_count)
+                };
+
+                buffer.edit([(start..end, new_text)]);
+            }
+        }
+
+        let buffer_snapshot = buffer.snapshot();
+
+        let buffer_text = buffer_snapshot.text();
+        let buffer_lines: Vec<&str> = buffer_text.lines().collect();
+        let base_lines: Vec<&str> = base_text.lines().collect();
+
+        let test_count = 10;
+        for _ in 0..test_count {
+            let range = random_range_in_text(rng, &buffer_lines);
+            let points = points_in_range(&range, &buffer_lines);
+
+            let optimized_patch = diff.patch_for_buffer_range(range.clone(), &buffer_snapshot);
+            let naive_patch = diff.patch_for_buffer_range_naive(&buffer_snapshot);
+
+            for point in points {
+                let optimized_edit = optimized_patch.edit_for_old_position(point);
+                let naive_edit = naive_patch.edit_for_old_position(point);
+
+                assert_eq!(
+                    optimized_edit,
+                    naive_edit,
+                    "patch_for_buffer_range mismatch at point {:?} in range {:?}\nbase_text: {:?}\ninitial_buffer: {:?}\ncurrent_buffer: {:?}",
+                    point,
+                    range,
+                    base_text,
+                    initial_buffer_text,
+                    buffer_snapshot.text()
+                );
+            }
+        }
+
+        for _ in 0..test_count {
+            let range = random_range_in_text(rng, &base_lines);
+            let points = points_in_range(&range, &base_lines);
+
+            let optimized_patch = diff.patch_for_base_text_range(range.clone(), &buffer_snapshot);
+            let naive_patch = diff.patch_for_base_text_range_naive(&buffer_snapshot);
+
+            for point in points {
+                let optimized_edit = optimized_patch.edit_for_old_position(point);
+                let naive_edit = naive_patch.edit_for_old_position(point);
+
+                assert_eq!(
+                    optimized_edit,
+                    naive_edit,
+                    "patch_for_base_text_range mismatch at point {:?} in range {:?}\nbase_text: {:?}\ninitial_buffer: {:?}\ncurrent_buffer: {:?}",
+                    point,
+                    range,
+                    base_text,
+                    initial_buffer_text,
+                    buffer_snapshot.text()
+                );
+            }
+        }
+    }
 }

crates/editor/src/split.rs 🔗

@@ -173,42 +173,94 @@ fn patches_for_range<F>(
 where
     F: Fn(&BufferDiffSnapshot, RangeInclusive<Point>, &text::BufferSnapshot) -> Patch<Point>,
 {
+    struct PendingExcerpt<'a> {
+        source_excerpt_id: ExcerptId,
+        target_excerpt_id: ExcerptId,
+        source_buffer: &'a text::BufferSnapshot,
+        target_buffer: &'a text::BufferSnapshot,
+        buffer_point_range: Range<Point>,
+        source_context_range: Range<Point>,
+    }
+
     let mut result = Vec::new();
-    let mut patches = HashMap::default();
+    let mut current_buffer_id: Option<BufferId> = None;
+    let mut pending_excerpts: Vec<PendingExcerpt> = Vec::new();
+    let mut union_context_start: Option<Point> = None;
+    let mut union_context_end: Option<Point> = None;
+
+    let flush_buffer = |pending: &mut Vec<PendingExcerpt>,
+                        union_start: Point,
+                        union_end: Point,
+                        result: &mut Vec<CompanionExcerptPatch>| {
+        let Some(first) = pending.first() else {
+            return;
+        };
 
-    for (source_buffer, buffer_offset_range, source_excerpt_id) in
-        source_snapshot.range_to_buffer_ranges(source_bounds)
+        let diff = source_snapshot
+            .diff_for_buffer_id(first.source_buffer.remote_id())
+            .unwrap();
+        let rhs_buffer = if first.source_buffer.remote_id() == diff.base_text().remote_id() {
+            first.target_buffer
+        } else {
+            first.source_buffer
+        };
+
+        let patch = translate_fn(diff, union_start..=union_end, rhs_buffer);
+
+        for excerpt in pending.drain(..) {
+            result.push(patch_for_excerpt(
+                source_snapshot,
+                target_snapshot,
+                excerpt.source_excerpt_id,
+                excerpt.target_excerpt_id,
+                excerpt.target_buffer,
+                excerpt.source_context_range,
+                &patch,
+                excerpt.buffer_point_range,
+            ));
+        }
+    };
+
+    for (source_buffer, buffer_offset_range, source_excerpt_id, source_context_range) in
+        source_snapshot.range_to_buffer_ranges_with_context(source_bounds)
     {
         let target_excerpt_id = excerpt_map.get(&source_excerpt_id).copied().unwrap();
         let target_buffer = target_snapshot
             .buffer_for_excerpt(target_excerpt_id)
             .unwrap();
-        let patch = patches.entry(source_buffer.remote_id()).or_insert_with(|| {
-            let diff = source_snapshot
-                .diff_for_buffer_id(source_buffer.remote_id())
-                .unwrap();
-            let rhs_buffer = if source_buffer.remote_id() == diff.base_text().remote_id() {
-                &target_buffer
-            } else {
-                source_buffer
-            };
-            // TODO(split-diff) pass only the union of the ranges for the affected excerpts
-            translate_fn(diff, Point::zero()..=source_buffer.max_point(), rhs_buffer)
-        });
+
+        let buffer_id = source_buffer.remote_id();
+
+        if current_buffer_id != Some(buffer_id) {
+            if let (Some(start), Some(end)) = (union_context_start.take(), union_context_end.take())
+            {
+                flush_buffer(&mut pending_excerpts, start, end, &mut result);
+            }
+            current_buffer_id = Some(buffer_id);
+        }
+
         let buffer_point_range = buffer_offset_range.to_point(source_buffer);
+        let source_context_range = source_context_range.to_point(source_buffer);
+
+        union_context_start = Some(union_context_start.map_or(source_context_range.start, |s| {
+            s.min(source_context_range.start)
+        }));
+        union_context_end = Some(union_context_end.map_or(source_context_range.end, |e| {
+            e.max(source_context_range.end)
+        }));
 
-        // TODO(split-diff) maybe narrow the patch to only the edited part of the excerpt
-        // (less useful for project diff, but important if we want to do singleton side-by-side diff)
-        result.push(patch_for_excerpt(
-            source_snapshot,
-            target_snapshot,
+        pending_excerpts.push(PendingExcerpt {
             source_excerpt_id,
             target_excerpt_id,
             source_buffer,
             target_buffer,
-            patch,
             buffer_point_range,
-        ));
+            source_context_range,
+        });
+    }
+
+    if let (Some(start), Some(end)) = (union_context_start, union_context_end) {
+        flush_buffer(&mut pending_excerpts, start, end, &mut result);
     }
 
     result
@@ -219,8 +271,8 @@ fn patch_for_excerpt(
     target_snapshot: &MultiBufferSnapshot,
     source_excerpt_id: ExcerptId,
     target_excerpt_id: ExcerptId,
-    source_buffer: &text::BufferSnapshot,
     target_buffer: &text::BufferSnapshot,
+    source_context_range: Range<Point>,
     patch: &Patch<Point>,
     source_edited_range: Range<Point>,
 ) -> CompanionExcerptPatch {
@@ -228,11 +280,8 @@ fn patch_for_excerpt(
         .range_for_excerpt(source_excerpt_id)
         .unwrap();
     let source_excerpt_start_in_multibuffer = source_multibuffer_range.start;
-    let source_context_range = source_snapshot
-        .context_range_for_excerpt(source_excerpt_id)
-        .unwrap();
-    let source_excerpt_start_in_buffer = source_context_range.start.to_point(&source_buffer);
-    let source_excerpt_end_in_buffer = source_context_range.end.to_point(&source_buffer);
+    let source_excerpt_start_in_buffer = source_context_range.start;
+    let source_excerpt_end_in_buffer = source_context_range.end;
     let target_multibuffer_range = target_snapshot
         .range_for_excerpt(target_excerpt_id)
         .unwrap();

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -4018,6 +4018,25 @@ impl MultiBufferSnapshot {
         &self,
         range: R,
     ) -> Vec<(&BufferSnapshot, Range<BufferOffset>, ExcerptId)>
+    where
+        R: RangeBounds<T>,
+        T: ToOffset,
+    {
+        self.range_to_buffer_ranges_with_context(range)
+            .into_iter()
+            .map(|(buffer, range, id, _context)| (buffer, range, id))
+            .collect()
+    }
+
+    pub fn range_to_buffer_ranges_with_context<R, T>(
+        &self,
+        range: R,
+    ) -> Vec<(
+        &BufferSnapshot,
+        Range<BufferOffset>,
+        ExcerptId,
+        Range<text::Anchor>,
+    )>
     where
         R: RangeBounds<T>,
         T: ToOffset,
@@ -4037,7 +4056,12 @@ impl MultiBufferSnapshot {
         let mut cursor = self.cursor::<MultiBufferOffset, BufferOffset>();
         cursor.seek(&start);
 
-        let mut result: Vec<(&BufferSnapshot, Range<BufferOffset>, ExcerptId)> = Vec::new();
+        let mut result: Vec<(
+            &BufferSnapshot,
+            Range<BufferOffset>,
+            ExcerptId,
+            Range<text::Anchor>,
+        )> = Vec::new();
         while let Some(region) = cursor.region() {
             let dominated_by_end_bound = match end_bound {
                 Bound::Included(end) => region.range.start > end,
@@ -4062,12 +4086,13 @@ impl MultiBufferSnapshot {
                     .buffer_range
                     .end
                     .min(region.buffer_range.start + end_overshoot);
-                if let Some(prev) = result.last_mut().filter(|(_, prev_range, excerpt_id)| {
+                let context = region.excerpt.range.context.clone();
+                if let Some(prev) = result.last_mut().filter(|(_, prev_range, excerpt_id, _)| {
                     *excerpt_id == region.excerpt.id && prev_range.end == start
                 }) {
                     prev.1.end = end;
                 } else {
-                    result.push((region.buffer, start..end, region.excerpt.id));
+                    result.push((region.buffer, start..end, region.excerpt.id, context));
                 }
             }
             cursor.next();
@@ -4075,13 +4100,19 @@ impl MultiBufferSnapshot {
 
         if let Some(excerpt) = cursor.excerpt() {
             let dominated_by_prev_excerpt =
-                result.last().is_some_and(|(_, _, id)| *id == excerpt.id);
+                result.last().is_some_and(|(_, _, id, _)| *id == excerpt.id);
             if !dominated_by_prev_excerpt && excerpt.text_summary.len == 0 {
                 let excerpt_position = self.len();
                 if bounds.contains(&excerpt_position) {
                     let buffer_offset =
                         BufferOffset(excerpt.range.context.start.to_offset(&excerpt.buffer));
-                    result.push((&excerpt.buffer, buffer_offset..buffer_offset, excerpt.id));
+                    let context = excerpt.range.context.clone();
+                    result.push((
+                        &excerpt.buffer,
+                        buffer_offset..buffer_offset,
+                        excerpt.id,
+                        context,
+                    ));
                 }
             }
         }