Add APIs for stripping trailing whitespace from a buffer

Max Brunsfeld created

Change summary

crates/language/src/buffer.rs       | 122 +++++++++++++++++++++++++++---
crates/language/src/buffer_tests.rs | 120 ++++++++++++++++++++++++++++++
2 files changed, 229 insertions(+), 13 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -305,7 +305,7 @@ pub struct Chunk<'a> {
 }
 
 pub struct Diff {
-    base_version: clock::Global,
+    pub(crate) base_version: clock::Global,
     line_ending: LineEnding,
     edits: Vec<(Range<usize>, Arc<str>)>,
 }
@@ -1154,20 +1154,77 @@ impl Buffer {
         })
     }
 
-    pub fn apply_diff(&mut self, diff: Diff, cx: &mut ModelContext<Self>) -> Option<&Transaction> {
-        if self.version == diff.base_version {
-            self.finalize_last_transaction();
-            self.start_transaction();
-            self.text.set_line_ending(diff.line_ending);
-            self.edit(diff.edits, None, cx);
-            if self.end_transaction(cx).is_some() {
-                self.finalize_last_transaction()
-            } else {
-                None
+    pub fn normalize_whitespace(&self, cx: &AppContext) -> Task<Diff> {
+        let old_text = self.as_rope().clone();
+        let line_ending = self.line_ending();
+        let base_version = self.version();
+        cx.background().spawn(async move {
+            let ranges = trailing_whitespace_ranges(&old_text);
+            let empty = Arc::<str>::from("");
+            Diff {
+                base_version,
+                line_ending,
+                edits: ranges
+                    .into_iter()
+                    .map(|range| (range, empty.clone()))
+                    .collect(),
             }
-        } else {
-            None
+        })
+    }
+
+    pub fn apply_diff(&mut self, diff: Diff, cx: &mut ModelContext<Self>) -> Option<&Transaction> {
+        if self.version != diff.base_version {
+            return None;
         }
+
+        self.finalize_last_transaction();
+        self.start_transaction();
+        self.text.set_line_ending(diff.line_ending);
+        self.edit(diff.edits, None, cx);
+        self.end_transaction(cx)?;
+        self.finalize_last_transaction()
+    }
+
+    pub fn apply_diff_force(
+        &mut self,
+        diff: Diff,
+        cx: &mut ModelContext<Self>,
+    ) -> Option<&Transaction> {
+        // Check for any edits to the buffer that have occurred since this diff
+        // was computed.
+        let snapshot = self.snapshot();
+        let mut edits_since = snapshot.edits_since::<usize>(&diff.base_version).peekable();
+        let mut delta = 0;
+        let adjusted_edits = diff.edits.into_iter().filter_map(|(range, new_text)| {
+            while let Some(edit_since) = edits_since.peek() {
+                // If the edit occurs after a diff hunk, then it can does not
+                // affect that hunk.
+                if edit_since.old.start > range.end {
+                    break;
+                }
+                // If the edit precedes the diff hunk, then adjust the hunk
+                // to reflect the edit.
+                else if edit_since.old.end < range.start {
+                    delta += edit_since.new_len() as i64 - edit_since.old_len() as i64;
+                    edits_since.next();
+                }
+                // If the edit intersects a diff hunk, then discard that hunk.
+                else {
+                    return None;
+                }
+            }
+
+            let start = (range.start as i64 + delta) as usize;
+            let end = (range.end as i64 + delta) as usize;
+            Some((start..end, new_text))
+        });
+
+        self.finalize_last_transaction();
+        self.start_transaction();
+        self.text.set_line_ending(diff.line_ending);
+        self.edit(adjusted_edits, None, cx);
+        self.end_transaction(cx)?;
+        self.finalize_last_transaction()
     }
 
     pub fn is_dirty(&self) -> bool {
@@ -2840,3 +2897,42 @@ pub fn char_kind(c: char) -> CharKind {
         CharKind::Punctuation
     }
 }
+
+/// Find all of the ranges of whitespace that occur at the ends of lines
+/// in the given rope.
+///
+/// This could also be done with a regex search, but this implementation
+/// avoids copying text.
+pub fn trailing_whitespace_ranges(rope: &Rope) -> Vec<Range<usize>> {
+    let mut ranges = Vec::new();
+
+    let mut offset = 0;
+    let mut prev_chunk_trailing_whitespace_range = 0..0;
+    for chunk in rope.chunks() {
+        let mut prev_line_trailing_whitespace_range = 0..0;
+        for (i, line) in chunk.split('\n').enumerate() {
+            let line_end_offset = offset + line.len();
+            let trimmed_line_len = line.trim_end_matches(|c| matches!(c, ' ' | '\t')).len();
+            let mut trailing_whitespace_range = (offset + trimmed_line_len)..line_end_offset;
+
+            if i == 0 && trimmed_line_len == 0 {
+                trailing_whitespace_range.start = prev_chunk_trailing_whitespace_range.start;
+            }
+            if !prev_line_trailing_whitespace_range.is_empty() {
+                ranges.push(prev_line_trailing_whitespace_range);
+            }
+
+            offset = line_end_offset + 1;
+            prev_line_trailing_whitespace_range = trailing_whitespace_range;
+        }
+
+        offset -= 1;
+        prev_chunk_trailing_whitespace_range = prev_line_trailing_whitespace_range;
+    }
+
+    if !prev_chunk_trailing_whitespace_range.is_empty() {
+        ranges.push(prev_chunk_trailing_whitespace_range);
+    }
+
+    ranges
+}

crates/language/src/buffer_tests.rs 🔗

@@ -6,6 +6,7 @@ use gpui::{ModelHandle, MutableAppContext};
 use indoc::indoc;
 use proto::deserialize_operation;
 use rand::prelude::*;
+use regex::RegexBuilder;
 use settings::Settings;
 use std::{
     cell::RefCell,
@@ -18,6 +19,13 @@ use text::network::Network;
 use unindent::Unindent as _;
 use util::{assert_set_eq, post_inc, test::marked_text_ranges, RandomCharIter};
 
+lazy_static! {
+    static ref TRAILING_WHITESPACE_REGEX: Regex = RegexBuilder::new("[ \t]+$")
+        .multi_line(true)
+        .build()
+        .unwrap();
+}
+
 #[cfg(test)]
 #[ctor::ctor]
 fn init_logger() {
@@ -211,6 +219,79 @@ async fn test_apply_diff(cx: &mut gpui::TestAppContext) {
     });
 }
 
+#[gpui::test(iterations = 10)]
+async fn test_normalize_whitespace(cx: &mut gpui::TestAppContext) {
+    let text = [
+        "zero",     //
+        "one  ",    // 2 trailing spaces
+        "two",      //
+        "three   ", // 3 trailing spaces
+        "four",     //
+        "five    ", // 4 trailing spaces
+    ]
+    .join("\n");
+
+    let buffer = cx.add_model(|cx| Buffer::new(0, text, cx));
+
+    // Spawn a task to format the buffer's whitespace.
+    // Pause so that the foratting task starts running.
+    let format = buffer.read_with(cx, |buffer, cx| buffer.normalize_whitespace(cx));
+    smol::future::yield_now().await;
+
+    // Edit the buffer while the normalization task is running.
+    let version_before_edit = buffer.read_with(cx, |buffer, _| buffer.version());
+    buffer.update(cx, |buffer, cx| {
+        buffer.edit(
+            [
+                (Point::new(0, 1)..Point::new(0, 1), "EE"),
+                (Point::new(3, 5)..Point::new(3, 5), "EEE"),
+            ],
+            None,
+            cx,
+        );
+    });
+
+    let format_diff = format.await;
+    buffer.update(cx, |buffer, cx| {
+        let version_before_format = format_diff.base_version.clone();
+        buffer.apply_diff_force(format_diff, cx);
+
+        // The outcome depends on the order of concurrent taks.
+        //
+        // If the edit occurred while searching for trailing whitespace ranges,
+        // then the trailing whitespace region touched by the edit is left intact.
+        if version_before_format == version_before_edit {
+            assert_eq!(
+                buffer.text(),
+                [
+                    "zEEero",      //
+                    "one",         //
+                    "two",         //
+                    "threeEEE   ", //
+                    "four",        //
+                    "five",        //
+                ]
+                .join("\n")
+            );
+        }
+        // Otherwise, all trailing whitespace is removed.
+        else {
+            assert_eq!(
+                buffer.text(),
+                [
+                    "zEEero",   //
+                    "one",      //
+                    "two",      //
+                    "threeEEE", //
+                    "four",     //
+                    "five",     //
+                ]
+                .join("\n")
+            );
+        }
+    });
+}
+
 #[gpui::test]
 async fn test_reparse(cx: &mut gpui::TestAppContext) {
     let text = "fn a() {}";
@@ -1943,6 +2024,45 @@ fn test_contiguous_ranges() {
     );
 }
 
+#[gpui::test(iterations = 500)]
+fn test_trailing_whitespace_ranges(mut rng: StdRng) {
+    // Generate a random multi-line string containing
+    // some lines with trailing whitespace.
+    let mut text = String::new();
+    for _ in 0..rng.gen_range(0..16) {
+        for _ in 0..rng.gen_range(0..36) {
+            text.push(match rng.gen_range(0..10) {
+                0..=1 => ' ',
+                3 => '\t',
+                _ => rng.gen_range('a'..'z'),
+            });
+        }
+        text.push('\n');
+    }
+
+    match rng.gen_range(0..10) {
+        // sometimes remove the last newline
+        0..=1 => drop(text.pop()), //
+
+        // sometimes add extra newlines
+        2..=3 => text.push_str(&"\n".repeat(rng.gen_range(1..5))),
+        _ => {}
+    }
+
+    let rope = Rope::from(text.as_str());
+    let actual_ranges = trailing_whitespace_ranges(&rope);
+    let expected_ranges = TRAILING_WHITESPACE_REGEX
+        .find_iter(&text)
+        .map(|m| m.range())
+        .collect::<Vec<_>>();
+    assert_eq!(
+        actual_ranges,
+        expected_ranges,
+        "wrong ranges for text lines:\n{:?}",
+        text.split("\n").collect::<Vec<_>>()
+    );
+}
+
 fn ruby_lang() -> Language {
     Language::new(
         LanguageConfig {