diff.rs

  1use anyhow::Result;
  2use buffer_diff::{BufferDiff, InternalDiffHunk};
  3use gpui::{App, AppContext, AsyncApp, Context, Entity, Task};
  4use itertools::Itertools;
  5use language::{
  6    Anchor, Buffer, Capability, LanguageRegistry, OffsetRangeExt as _, Point, TextBuffer,
  7};
  8use multi_buffer::{MultiBuffer, PathKey, excerpt_context_lines};
  9use std::{cmp::Reverse, ops::Range, path::Path, sync::Arc};
 10use streaming_diff::LineOperation;
 11use sum_tree::SumTree;
 12use util::ResultExt;
 13
 14pub enum Diff {
 15    Pending(PendingDiff),
 16    Finalized(FinalizedDiff),
 17}
 18
 19impl Diff {
 20    pub fn finalized(
 21        path: String,
 22        old_text: Option<String>,
 23        new_text: String,
 24        language_registry: Arc<LanguageRegistry>,
 25        cx: &mut Context<Self>,
 26    ) -> Self {
 27        let multibuffer = cx.new(|_cx| MultiBuffer::without_headers(Capability::ReadOnly));
 28        let new_buffer = cx.new(|cx| Buffer::local(new_text, cx));
 29        let base_text = old_text.clone().unwrap_or(String::new()).into();
 30        let task = cx.spawn({
 31            let multibuffer = multibuffer.clone();
 32            let path = path.clone();
 33            let buffer = new_buffer.clone();
 34            async move |_, cx| {
 35                let language = language_registry
 36                    .load_language_for_file_path(Path::new(&path))
 37                    .await
 38                    .log_err();
 39
 40                buffer.update(cx, |buffer, cx| buffer.set_language(language.clone(), cx));
 41                buffer.update(cx, |buffer, _| buffer.parsing_idle()).await;
 42
 43                let diff = build_buffer_diff(
 44                    old_text.unwrap_or("".into()).into(),
 45                    &buffer,
 46                    Some(language_registry.clone()),
 47                    cx,
 48                )
 49                .await?;
 50
 51                multibuffer.update(cx, |multibuffer, cx| {
 52                    let hunk_ranges = {
 53                        let buffer = buffer.read(cx);
 54                        diff.read(cx)
 55                            .snapshot(cx)
 56                            .hunks_intersecting_range(
 57                                Anchor::min_for_buffer(buffer.remote_id())
 58                                    ..Anchor::max_for_buffer(buffer.remote_id()),
 59                                buffer,
 60                            )
 61                            .map(|diff_hunk| diff_hunk.buffer_range.to_point(buffer))
 62                            .collect::<Vec<_>>()
 63                    };
 64
 65                    multibuffer.set_excerpts_for_path(
 66                        PathKey::for_buffer(&buffer, cx),
 67                        buffer.clone(),
 68                        hunk_ranges,
 69                        excerpt_context_lines(cx),
 70                        cx,
 71                    );
 72                    multibuffer.add_diff(diff, cx);
 73                });
 74
 75                anyhow::Ok(())
 76            }
 77        });
 78
 79        Self::Finalized(FinalizedDiff {
 80            multibuffer,
 81            path,
 82            base_text,
 83            new_buffer,
 84            _update_diff: task,
 85        })
 86    }
 87
 88    pub fn new(buffer: Entity<Buffer>, cx: &mut Context<Self>) -> Self {
 89        let buffer_text_snapshot = buffer.read(cx).text_snapshot();
 90        let language = buffer.read(cx).language().cloned();
 91        let language_registry = buffer.read(cx).language_registry();
 92        let buffer_diff = cx.new(|cx| {
 93            let mut diff = BufferDiff::new_unchanged(&buffer_text_snapshot, cx);
 94            diff.language_changed(language.clone(), language_registry.clone(), cx);
 95            let secondary_diff = cx.new(|cx| {
 96                // For the secondary diff buffer we skip assigning the language as we do not really need to perform any syntax highlighting on
 97                // it. As a result, by skipping it we are potentially shaving off a lot of RSS plus we get a snappier feel for large diff
 98                // view multibuffers.
 99                BufferDiff::new_unchanged(&buffer_text_snapshot, cx)
100            });
101            diff.set_secondary_diff(secondary_diff);
102            diff
103        });
104
105        let multibuffer = cx.new(|cx| {
106            let mut multibuffer = MultiBuffer::without_headers(Capability::ReadOnly);
107            multibuffer.set_all_diff_hunks_expanded(cx);
108            multibuffer.add_diff(buffer_diff.clone(), cx);
109            multibuffer
110        });
111
112        Self::Pending(PendingDiff {
113            multibuffer,
114            base_text: Arc::from(buffer_text_snapshot.text().as_str()),
115            // _subscription: cx.observe(&buffer, |this, _, cx| {
116            //     if let Diff::Pending(diff) = this {
117            //         diff.update(cx);
118            //     }
119            // }),
120            new_buffer: buffer,
121            diff: buffer_diff,
122            revealed_ranges: Vec::new(),
123            update_diff: Task::ready(Ok(())),
124            pending_update: None,
125            is_updating: false,
126        })
127    }
128
129    pub fn reveal_range(&mut self, range: Range<Anchor>, cx: &mut Context<Self>) {
130        if let Self::Pending(diff) = self {
131            diff.reveal_range(range, cx);
132        }
133    }
134
135    pub fn finalize(&mut self, cx: &mut Context<Self>) {
136        if let Self::Pending(diff) = self {
137            *self = Self::Finalized(diff.finalize(cx));
138        }
139    }
140
141    /// Returns the original text before any edits were applied.
142    pub fn base_text(&self) -> &Arc<str> {
143        match self {
144            Self::Pending(PendingDiff { base_text, .. }) => base_text,
145            Self::Finalized(FinalizedDiff { base_text, .. }) => base_text,
146        }
147    }
148
149    /// Returns the buffer being edited (for pending diffs) or the snapshot buffer (for finalized diffs).
150    pub fn buffer(&self) -> &Entity<Buffer> {
151        match self {
152            Self::Pending(PendingDiff { new_buffer, .. }) => new_buffer,
153            Self::Finalized(FinalizedDiff { new_buffer, .. }) => new_buffer,
154        }
155    }
156
157    pub fn file_path(&self, cx: &App) -> Option<String> {
158        match self {
159            Self::Pending(PendingDiff { new_buffer, .. }) => new_buffer
160                .read(cx)
161                .file()
162                .map(|file| file.full_path(cx).to_string_lossy().into_owned()),
163            Self::Finalized(FinalizedDiff { path, .. }) => Some(path.clone()),
164        }
165    }
166
167    pub fn multibuffer(&self) -> &Entity<MultiBuffer> {
168        match self {
169            Self::Pending(PendingDiff { multibuffer, .. }) => multibuffer,
170            Self::Finalized(FinalizedDiff { multibuffer, .. }) => multibuffer,
171        }
172    }
173
174    pub fn to_markdown(&self, cx: &App) -> String {
175        let buffer_text = self
176            .multibuffer()
177            .read(cx)
178            .all_buffers()
179            .iter()
180            .map(|buffer| buffer.read(cx).text())
181            .join("\n");
182        let path = match self {
183            Diff::Pending(PendingDiff {
184                new_buffer: buffer, ..
185            }) => buffer
186                .read(cx)
187                .file()
188                .map(|file| file.path().display(file.path_style(cx))),
189            Diff::Finalized(FinalizedDiff { path, .. }) => Some(path.as_str().into()),
190        };
191        format!(
192            "Diff: {}\n```\n{}\n```\n",
193            path.unwrap_or("untitled".into()),
194            buffer_text
195        )
196    }
197
198    pub fn has_revealed_range(&self, cx: &App) -> bool {
199        self.multibuffer().read(cx).paths().next().is_some()
200    }
201
202    pub fn needs_update(&self, old_text: &str, new_text: &str, cx: &App) -> bool {
203        match self {
204            Diff::Pending(PendingDiff {
205                base_text,
206                new_buffer,
207                ..
208            }) => {
209                base_text.as_ref() != old_text
210                    || !new_buffer.read(cx).as_rope().chunks().equals_str(new_text)
211            }
212            Diff::Finalized(FinalizedDiff {
213                base_text,
214                new_buffer,
215                ..
216            }) => {
217                base_text.as_ref() != old_text
218                    || !new_buffer.read(cx).as_rope().chunks().equals_str(new_text)
219            }
220        }
221    }
222
223    pub fn update_pending(
224        &mut self,
225        operations: Vec<LineOperation>,
226        snapshot: text::BufferSnapshot,
227        cx: &mut Context<Diff>,
228    ) {
229        match self {
230            Diff::Pending(diff) => diff.update(operations, snapshot, cx),
231            Diff::Finalized(_) => {}
232        }
233    }
234}
235
236pub struct PendingDiff {
237    multibuffer: Entity<MultiBuffer>,
238    base_text: Arc<str>,
239    new_buffer: Entity<Buffer>,
240    diff: Entity<BufferDiff>,
241    revealed_ranges: Vec<Range<Anchor>>,
242    // _subscription: Subscription,
243    update_diff: Task<Result<()>>,
244    // The latest update waiting to be processed. Storing only the latest means
245    // intermediate chunks are coalesced when the worker task can't keep up.
246    pending_update: Option<PendingUpdate>,
247    is_updating: bool,
248}
249
250struct PendingUpdate {
251    operations: Vec<LineOperation>,
252    base_snapshot: text::BufferSnapshot,
253    text_snapshot: text::BufferSnapshot,
254}
255
256fn compute_hunks(
257    diff_base: &text::BufferSnapshot,
258    buffer: &text::BufferSnapshot,
259    line_operations: Vec<LineOperation>,
260) -> SumTree<buffer_diff::InternalDiffHunk> {
261    let mut tree = SumTree::new(buffer);
262
263    let mut old_row = 0u32;
264    let mut new_row = 0u32;
265
266    // Merge adjacent Delete+Insert into a single Modified hunk
267    let mut pending_delete_lines: Option<u32> = None;
268
269    let flush_delete = |pending_delete_lines: &mut Option<u32>,
270                        old_row: &mut u32,
271                        new_row: u32,
272                        tree: &mut SumTree<InternalDiffHunk>,
273                        diff_base: &text::BufferSnapshot,
274                        buffer: &text::BufferSnapshot| {
275        if let Some(del_lines) = pending_delete_lines.take() {
276            let old_start =
277                diff_base.point_to_offset(Point::new(*old_row, 0).min(diff_base.max_point()));
278            let old_end = diff_base
279                .point_to_offset(Point::new(*old_row + del_lines, 0).min(diff_base.max_point()));
280            let new_pos = buffer.anchor_before(Point::new(new_row, 0).min(buffer.max_point()));
281            tree.push(
282                InternalDiffHunk {
283                    buffer_range: new_pos..new_pos,
284                    diff_base_byte_range: old_start..old_end,
285                    base_word_diffs: Vec::new(),
286                    buffer_word_diffs: Vec::new(),
287                },
288                buffer,
289            );
290            *old_row += del_lines;
291        }
292    };
293
294    for operation in line_operations {
295        match operation {
296            LineOperation::Delete { lines } => {
297                // Accumulate deletions — they might be followed by an Insert (= modification)
298                *pending_delete_lines.get_or_insert(0) += lines;
299            }
300            LineOperation::Insert { lines } => {
301                let old_start =
302                    diff_base.point_to_offset(Point::new(old_row, 0).min(diff_base.max_point()));
303                let (old_end, del_lines) = if let Some(del_lines) = pending_delete_lines.take() {
304                    // Delete followed by Insert = Modified hunk
305                    let old_end = diff_base.point_to_offset(
306                        Point::new(old_row + del_lines, 0).min(diff_base.max_point()),
307                    );
308                    (old_end, del_lines)
309                } else {
310                    // Pure insertion
311                    (old_start, 0)
312                };
313                let new_start =
314                    buffer.anchor_before(Point::new(new_row, 0).min(buffer.max_point()));
315                let new_end =
316                    buffer.anchor_before(Point::new(new_row + lines, 0).min(buffer.max_point()));
317                tree.push(
318                    InternalDiffHunk {
319                        buffer_range: new_start..new_end,
320                        diff_base_byte_range: old_start..old_end,
321                        base_word_diffs: Vec::new(),
322                        buffer_word_diffs: Vec::new(),
323                    },
324                    buffer,
325                );
326                old_row += del_lines;
327                new_row += lines;
328            }
329            LineOperation::Keep { lines } => {
330                // Flush any pending deletion before a Keep
331                flush_delete(
332                    &mut pending_delete_lines,
333                    &mut old_row,
334                    new_row,
335                    &mut tree,
336                    diff_base,
337                    buffer,
338                );
339                // Keep = unchanged, no hunk to push
340                old_row += lines;
341                new_row += lines;
342            }
343        }
344    }
345
346    // Flush any trailing deletion
347    flush_delete(
348        &mut pending_delete_lines,
349        &mut old_row,
350        new_row,
351        &mut tree,
352        diff_base,
353        buffer,
354    );
355
356    tree
357}
358
359impl PendingDiff {
360    pub fn update(
361        &mut self,
362        operations: Vec<LineOperation>,
363        base_snapshot: text::BufferSnapshot,
364        cx: &mut Context<Diff>,
365    ) {
366        // Capture the buffer snapshot now, synchronously, so it matches the
367        // line operations. Capturing it inside the spawned task would race with
368        // subsequent chunks arriving before the task starts.
369        let text_snapshot = self.new_buffer.read(cx).text_snapshot();
370        self.pending_update = Some(PendingUpdate {
371            operations,
372            base_snapshot,
373            text_snapshot,
374        });
375        if !self.is_updating {
376            self.flush_pending_update(cx);
377        }
378    }
379
380    fn flush_pending_update(&mut self, cx: &mut Context<Diff>) {
381        let Some(PendingUpdate {
382            operations,
383            base_snapshot,
384            text_snapshot,
385        }) = self.pending_update.take()
386        else {
387            self.is_updating = false;
388            return;
389        };
390        self.is_updating = true;
391
392        let buffer_diff = self.diff.clone();
393        let base_text = self.base_text.clone();
394        let language = self.new_buffer.read(cx).language().cloned();
395        self.update_diff = cx.spawn(async move |diff, cx| {
396            let snapshot = text_snapshot.clone();
397            let update = buffer_diff
398                .update(cx, |diff, cx| {
399                    diff.update_diff_impl(
400                        text_snapshot.clone(),
401                        Some(base_text.clone()),
402                        None,
403                        language,
404                        move |_d, _b, _o| compute_hunks(&base_snapshot, &text_snapshot, operations),
405                        cx,
406                    )
407                })
408                .await;
409            let (task1, task2) = buffer_diff.update(cx, |diff, cx| {
410                let task1 = diff.set_snapshot(update.clone(), &snapshot, cx);
411                let task2 = diff
412                    .secondary_diff()
413                    .unwrap()
414                    .update(cx, |diff, cx| diff.set_snapshot(update, &snapshot, cx));
415                (task1, task2)
416            });
417            task1.await;
418            task2.await;
419            diff.update(cx, |diff, cx| {
420                if let Diff::Pending(diff) = diff {
421                    diff.update_visible_ranges(cx);
422                    // Pick up any update that arrived while this task was running.
423                    diff.flush_pending_update(cx);
424                }
425            })
426        });
427    }
428
429    pub fn reveal_range(&mut self, range: Range<Anchor>, cx: &mut Context<Diff>) {
430        self.revealed_ranges.push(range);
431        self.update_visible_ranges(cx);
432    }
433
434    fn finalize(&self, cx: &mut Context<Diff>) -> FinalizedDiff {
435        let ranges = self.excerpt_ranges(cx);
436        let base_text = self.base_text.clone();
437        let new_buffer = self.new_buffer.read(cx);
438        let language_registry = new_buffer.language_registry();
439
440        let path = new_buffer
441            .file()
442            .map(|file| file.path().display(file.path_style(cx)))
443            .unwrap_or("untitled".into())
444            .into();
445        let replica_id = new_buffer.replica_id();
446
447        // Replace the buffer in the multibuffer with the snapshot
448        let buffer = cx.new(|cx| {
449            let language = self.new_buffer.read(cx).language().cloned();
450            let buffer = TextBuffer::new_normalized(
451                replica_id,
452                cx.entity_id().as_non_zero_u64().into(),
453                self.new_buffer.read(cx).line_ending(),
454                self.new_buffer.read(cx).as_rope().clone(),
455            );
456            let mut buffer = Buffer::build(buffer, None, Capability::ReadWrite);
457            buffer.set_language(language, cx);
458            buffer
459        });
460
461        let buffer_diff = cx.spawn({
462            let buffer = buffer.clone();
463            async move |_this, cx| {
464                buffer.update(cx, |buffer, _| buffer.parsing_idle()).await;
465                build_buffer_diff(base_text, &buffer, language_registry, cx).await
466            }
467        });
468
469        let update_diff = cx.spawn(async move |this, cx| {
470            let buffer_diff = buffer_diff.await?;
471            this.update(cx, |this, cx| {
472                this.multibuffer().update(cx, |multibuffer, cx| {
473                    let path_key = PathKey::for_buffer(&buffer, cx);
474                    multibuffer.clear(cx);
475                    multibuffer.set_excerpts_for_path(
476                        path_key,
477                        buffer,
478                        ranges,
479                        excerpt_context_lines(cx),
480                        cx,
481                    );
482                    multibuffer.add_diff(buffer_diff.clone(), cx);
483                });
484
485                cx.notify();
486            })
487        });
488
489        FinalizedDiff {
490            path,
491            base_text: self.base_text.clone(),
492            multibuffer: self.multibuffer.clone(),
493            new_buffer: self.new_buffer.clone(),
494            _update_diff: update_diff,
495        }
496    }
497
498    fn update_visible_ranges(&mut self, cx: &mut Context<Diff>) {
499        let ranges = self.excerpt_ranges(cx);
500        self.multibuffer.update(cx, |multibuffer, cx| {
501            multibuffer.set_excerpts_for_path(
502                PathKey::for_buffer(&self.new_buffer, cx),
503                self.new_buffer.clone(),
504                ranges,
505                excerpt_context_lines(cx),
506                cx,
507            );
508            let end = multibuffer.len(cx);
509            Some(multibuffer.snapshot(cx).offset_to_point(end).row + 1)
510        });
511        cx.notify();
512    }
513
514    fn excerpt_ranges(&self, cx: &App) -> Vec<Range<Point>> {
515        let buffer = self.new_buffer.read(cx);
516        let mut ranges = self
517            .diff
518            .read(cx)
519            .snapshot(cx)
520            .hunks_intersecting_range(
521                Anchor::min_for_buffer(buffer.remote_id())
522                    ..Anchor::max_for_buffer(buffer.remote_id()),
523                buffer,
524            )
525            .map(|diff_hunk| diff_hunk.buffer_range.to_point(buffer))
526            .collect::<Vec<_>>();
527        ranges.extend(
528            self.revealed_ranges
529                .iter()
530                .map(|range| range.to_point(buffer)),
531        );
532        ranges.sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
533
534        // Merge adjacent ranges
535        let mut ranges = ranges.into_iter().peekable();
536        let mut merged_ranges = Vec::new();
537        while let Some(mut range) = ranges.next() {
538            while let Some(next_range) = ranges.peek() {
539                if range.end >= next_range.start {
540                    range.end = range.end.max(next_range.end);
541                    ranges.next();
542                } else {
543                    break;
544                }
545            }
546
547            merged_ranges.push(range);
548        }
549        merged_ranges
550    }
551}
552
553pub struct FinalizedDiff {
554    path: String,
555    base_text: Arc<str>,
556    new_buffer: Entity<Buffer>,
557    multibuffer: Entity<MultiBuffer>,
558    _update_diff: Task<Result<()>>,
559}
560
561async fn build_buffer_diff(
562    old_text: Arc<str>,
563    buffer: &Entity<Buffer>,
564    language_registry: Option<Arc<LanguageRegistry>>,
565    cx: &mut AsyncApp,
566) -> Result<Entity<BufferDiff>> {
567    let language = cx.update(|cx| buffer.read(cx).language().cloned());
568    let text_snapshot = cx.update(|cx| buffer.read(cx).text_snapshot());
569    let buffer = cx.update(|cx| buffer.read(cx).snapshot());
570
571    let secondary_diff = cx.new(|cx| BufferDiff::new(&buffer, cx));
572
573    let update = secondary_diff
574        .update(cx, |secondary_diff, cx| {
575            secondary_diff.update_diff(
576                text_snapshot.clone(),
577                Some(old_text),
578                Some(false),
579                language.clone(),
580                cx,
581            )
582        })
583        .await;
584
585    secondary_diff
586        .update(cx, |secondary_diff, cx| {
587            secondary_diff.set_snapshot(update.clone(), &buffer, cx)
588        })
589        .await;
590
591    let diff = cx.new(|cx| BufferDiff::new(&buffer, cx));
592    diff.update(cx, |diff, cx| {
593        diff.language_changed(language, language_registry, cx);
594        diff.set_secondary_diff(secondary_diff);
595        diff.set_snapshot(update.clone(), &buffer, cx)
596    })
597    .await;
598    Ok(diff)
599}
600
601#[cfg(test)]
602mod tests {
603    use gpui::{AppContext as _, TestAppContext};
604    use language::Buffer;
605
606    use crate::Diff;
607
608    #[gpui::test]
609    async fn test_pending_diff(cx: &mut TestAppContext) {
610        let buffer = cx.new(|cx| Buffer::local("hello!", cx));
611        let _diff = cx.new(|cx| Diff::new(buffer.clone(), cx));
612        buffer.update(cx, |buffer, cx| {
613            buffer.set_text("HELLO!", cx);
614        });
615        cx.run_until_parked();
616    }
617}