git: Show pure white space changes in word diffs (#45090)

Anthony Eid created

Closes #44624

Before this change, white space would be trimmed from word diff ranges.
Users found this behavior confusing, so we're changing it to be more
inline with how GitHub treats whitespace in their word diffs.

Release Notes:

- git: Word diffs won't filter out pure whitespace diffs now

Change summary

crates/buffer_diff/src/buffer_diff.rs         | 11 +++
crates/language/src/text_diff.rs              | 61 ++------------------
crates/multi_buffer/src/multi_buffer_tests.rs | 13 ++++
3 files changed, 30 insertions(+), 55 deletions(-)

Detailed changes

crates/buffer_diff/src/buffer_diff.rs 🔗

@@ -2155,7 +2155,7 @@ mod tests {
         let range = diff_1.inner.compare(&empty_diff.inner, &buffer).unwrap();
         assert_eq!(range.to_point(&buffer), Point::new(0, 0)..Point::new(8, 0));
 
-        // Edit does not affect the diff.
+        // Edit does affects the diff because it recalculates word diffs.
         buffer.edit_via_marked_text(
             &"
                 one
@@ -2170,7 +2170,14 @@ mod tests {
             .unindent(),
         );
         let diff_2 = BufferDiffSnapshot::new_sync(buffer.clone(), base_text.clone(), cx);
-        assert_eq!(None, diff_2.inner.compare(&diff_1.inner, &buffer));
+        assert_eq!(
+            Point::new(4, 0)..Point::new(5, 0),
+            diff_2
+                .inner
+                .compare(&diff_1.inner, &buffer)
+                .unwrap()
+                .to_point(&buffer)
+        );
 
         // Edit turns a deletion hunk into a modification.
         buffer.edit_via_marked_text(

crates/language/src/text_diff.rs 🔗

@@ -48,7 +48,6 @@ pub fn text_diff(old_text: &str, new_text: &str) -> Vec<(Range<usize>, Arc<str>)
 ///
 /// Returns a tuple of (old_ranges, new_ranges) where each vector contains
 /// the byte ranges of changed words in the respective text.
-/// Whitespace-only changes are excluded from the results.
 pub fn word_diff_ranges(
     old_text: &str,
     new_text: &str,
@@ -62,23 +61,23 @@ pub fn word_diff_ranges(
     let mut new_ranges: Vec<Range<usize>> = Vec::new();
 
     diff_internal(&input, |old_byte_range, new_byte_range, _, _| {
-        for range in split_on_whitespace(old_text, &old_byte_range) {
+        if !old_byte_range.is_empty() {
             if let Some(last) = old_ranges.last_mut()
-                && last.end >= range.start
+                && last.end >= old_byte_range.start
             {
-                last.end = range.end;
+                last.end = old_byte_range.end;
             } else {
-                old_ranges.push(range);
+                old_ranges.push(old_byte_range);
             }
         }
 
-        for range in split_on_whitespace(new_text, &new_byte_range) {
+        if !new_byte_range.is_empty() {
             if let Some(last) = new_ranges.last_mut()
-                && last.end >= range.start
+                && last.end >= new_byte_range.start
             {
-                last.end = range.end;
+                last.end = new_byte_range.end;
             } else {
-                new_ranges.push(range);
+                new_ranges.push(new_byte_range);
             }
         }
     });
@@ -86,50 +85,6 @@ pub fn word_diff_ranges(
     (old_ranges, new_ranges)
 }
 
-fn split_on_whitespace(text: &str, range: &Range<usize>) -> Vec<Range<usize>> {
-    if range.is_empty() {
-        return Vec::new();
-    }
-
-    let slice = &text[range.clone()];
-    let mut ranges = Vec::new();
-    let mut offset = 0;
-
-    for line in slice.lines() {
-        let line_start = offset;
-        let line_end = line_start + line.len();
-        offset = line_end + 1;
-        let trimmed = line.trim();
-
-        if !trimmed.is_empty() {
-            let leading = line.len() - line.trim_start().len();
-            let trailing = line.len() - line.trim_end().len();
-            let trimmed_start = range.start + line_start + leading;
-            let trimmed_end = range.start + line_end - trailing;
-
-            let original_line_start = text[..range.start + line_start]
-                .rfind('\n')
-                .map(|i| i + 1)
-                .unwrap_or(0);
-            let original_line_end = text[range.start + line_start..]
-                .find('\n')
-                .map(|i| range.start + line_start + i)
-                .unwrap_or(text.len());
-            let original_line = &text[original_line_start..original_line_end];
-            let original_trimmed_start =
-                original_line_start + (original_line.len() - original_line.trim_start().len());
-            let original_trimmed_end =
-                original_line_end - (original_line.len() - original_line.trim_end().len());
-
-            if trimmed_start > original_trimmed_start || trimmed_end < original_trimmed_end {
-                ranges.push(trimmed_start..trimmed_end);
-            }
-        }
-    }
-
-    ranges
-}
-
 pub struct DiffOptions {
     pub language_scope: Option<LanguageScope>,
     pub max_word_diff_len: usize,

crates/multi_buffer/src/multi_buffer_tests.rs 🔗

@@ -4480,6 +4480,19 @@ async fn test_word_diff_simple_replacement(cx: &mut TestAppContext) {
     assert_eq!(word_diffs, vec!["world", "bar", "WORLD", "BAR"]);
 }
 
+#[gpui::test]
+async fn test_word_diff_white_space(cx: &mut TestAppContext) {
+    let settings_store = cx.update(|cx| SettingsStore::test(cx));
+    cx.set_global(settings_store);
+
+    let base_text = "hello world foo bar\n";
+    let modified_text = "    hello world foo bar\n";
+
+    let word_diffs = collect_word_diffs(base_text, modified_text, cx);
+
+    assert_eq!(word_diffs, vec!["    "]);
+}
+
 #[gpui::test]
 async fn test_word_diff_consecutive_modified_lines(cx: &mut TestAppContext) {
     let settings_store = cx.update(|cx| SettingsStore::test(cx));