From b00e467ede67d8205707f0a9aff64b3818d1763b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 23 Feb 2023 19:45:48 -0800 Subject: [PATCH] Add APIs for stripping trailing whitespace from a buffer --- crates/language/src/buffer.rs | 122 +++++++++++++++++++++++++--- crates/language/src/buffer_tests.rs | 120 +++++++++++++++++++++++++++ 2 files changed, 229 insertions(+), 13 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 03d28025917842f37af861d8dba57dbf7e720339..95bb514dd49199293f41b2be7e32af0cacf8cab6 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -305,7 +305,7 @@ pub struct Chunk<'a> { } pub struct Diff { - base_version: clock::Global, + pub(crate) base_version: clock::Global, line_ending: LineEnding, edits: Vec<(Range, Arc)>, } @@ -1154,20 +1154,77 @@ impl Buffer { }) } - pub fn apply_diff(&mut self, diff: Diff, cx: &mut ModelContext) -> Option<&Transaction> { - if self.version == diff.base_version { - self.finalize_last_transaction(); - self.start_transaction(); - self.text.set_line_ending(diff.line_ending); - self.edit(diff.edits, None, cx); - if self.end_transaction(cx).is_some() { - self.finalize_last_transaction() - } else { - None + pub fn normalize_whitespace(&self, cx: &AppContext) -> Task { + let old_text = self.as_rope().clone(); + let line_ending = self.line_ending(); + let base_version = self.version(); + cx.background().spawn(async move { + let ranges = trailing_whitespace_ranges(&old_text); + let empty = Arc::::from(""); + Diff { + base_version, + line_ending, + edits: ranges + .into_iter() + .map(|range| (range, empty.clone())) + .collect(), } - } else { - None + }) + } + + pub fn apply_diff(&mut self, diff: Diff, cx: &mut ModelContext) -> Option<&Transaction> { + if self.version != diff.base_version { + return None; } + + self.finalize_last_transaction(); + self.start_transaction(); + self.text.set_line_ending(diff.line_ending); + self.edit(diff.edits, None, cx); + self.end_transaction(cx)?; + self.finalize_last_transaction() + } + + pub fn apply_diff_force( + &mut self, + diff: Diff, + cx: &mut ModelContext, + ) -> Option<&Transaction> { + // Check for any edits to the buffer that have occurred since this diff + // was computed. + let snapshot = self.snapshot(); + let mut edits_since = snapshot.edits_since::(&diff.base_version).peekable(); + let mut delta = 0; + let adjusted_edits = diff.edits.into_iter().filter_map(|(range, new_text)| { + while let Some(edit_since) = edits_since.peek() { + // If the edit occurs after a diff hunk, then it can does not + // affect that hunk. + if edit_since.old.start > range.end { + break; + } + // If the edit precedes the diff hunk, then adjust the hunk + // to reflect the edit. + else if edit_since.old.end < range.start { + delta += edit_since.new_len() as i64 - edit_since.old_len() as i64; + edits_since.next(); + } + // If the edit intersects a diff hunk, then discard that hunk. + else { + return None; + } + } + + let start = (range.start as i64 + delta) as usize; + let end = (range.end as i64 + delta) as usize; + Some((start..end, new_text)) + }); + + self.finalize_last_transaction(); + self.start_transaction(); + self.text.set_line_ending(diff.line_ending); + self.edit(adjusted_edits, None, cx); + self.end_transaction(cx)?; + self.finalize_last_transaction() } pub fn is_dirty(&self) -> bool { @@ -2840,3 +2897,42 @@ pub fn char_kind(c: char) -> CharKind { CharKind::Punctuation } } + +/// Find all of the ranges of whitespace that occur at the ends of lines +/// in the given rope. +/// +/// This could also be done with a regex search, but this implementation +/// avoids copying text. +pub fn trailing_whitespace_ranges(rope: &Rope) -> Vec> { + let mut ranges = Vec::new(); + + let mut offset = 0; + let mut prev_chunk_trailing_whitespace_range = 0..0; + for chunk in rope.chunks() { + let mut prev_line_trailing_whitespace_range = 0..0; + for (i, line) in chunk.split('\n').enumerate() { + let line_end_offset = offset + line.len(); + let trimmed_line_len = line.trim_end_matches(|c| matches!(c, ' ' | '\t')).len(); + let mut trailing_whitespace_range = (offset + trimmed_line_len)..line_end_offset; + + if i == 0 && trimmed_line_len == 0 { + trailing_whitespace_range.start = prev_chunk_trailing_whitespace_range.start; + } + if !prev_line_trailing_whitespace_range.is_empty() { + ranges.push(prev_line_trailing_whitespace_range); + } + + offset = line_end_offset + 1; + prev_line_trailing_whitespace_range = trailing_whitespace_range; + } + + offset -= 1; + prev_chunk_trailing_whitespace_range = prev_line_trailing_whitespace_range; + } + + if !prev_chunk_trailing_whitespace_range.is_empty() { + ranges.push(prev_chunk_trailing_whitespace_range); + } + + ranges +} diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index 36add5f1f3fe036b222c9751dfe3496c1596d677..6bdff1ea2823f70d9f6c3ee2d464a37869e97112 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -6,6 +6,7 @@ use gpui::{ModelHandle, MutableAppContext}; use indoc::indoc; use proto::deserialize_operation; use rand::prelude::*; +use regex::RegexBuilder; use settings::Settings; use std::{ cell::RefCell, @@ -18,6 +19,13 @@ use text::network::Network; use unindent::Unindent as _; use util::{assert_set_eq, post_inc, test::marked_text_ranges, RandomCharIter}; +lazy_static! { + static ref TRAILING_WHITESPACE_REGEX: Regex = RegexBuilder::new("[ \t]+$") + .multi_line(true) + .build() + .unwrap(); +} + #[cfg(test)] #[ctor::ctor] fn init_logger() { @@ -211,6 +219,79 @@ async fn test_apply_diff(cx: &mut gpui::TestAppContext) { }); } +#[gpui::test(iterations = 10)] +async fn test_normalize_whitespace(cx: &mut gpui::TestAppContext) { + let text = [ + "zero", // + "one ", // 2 trailing spaces + "two", // + "three ", // 3 trailing spaces + "four", // + "five ", // 4 trailing spaces + ] + .join("\n"); + + let buffer = cx.add_model(|cx| Buffer::new(0, text, cx)); + + // Spawn a task to format the buffer's whitespace. + // Pause so that the foratting task starts running. + let format = buffer.read_with(cx, |buffer, cx| buffer.normalize_whitespace(cx)); + smol::future::yield_now().await; + + // Edit the buffer while the normalization task is running. + let version_before_edit = buffer.read_with(cx, |buffer, _| buffer.version()); + buffer.update(cx, |buffer, cx| { + buffer.edit( + [ + (Point::new(0, 1)..Point::new(0, 1), "EE"), + (Point::new(3, 5)..Point::new(3, 5), "EEE"), + ], + None, + cx, + ); + }); + + let format_diff = format.await; + buffer.update(cx, |buffer, cx| { + let version_before_format = format_diff.base_version.clone(); + buffer.apply_diff_force(format_diff, cx); + + // The outcome depends on the order of concurrent taks. + // + // If the edit occurred while searching for trailing whitespace ranges, + // then the trailing whitespace region touched by the edit is left intact. + if version_before_format == version_before_edit { + assert_eq!( + buffer.text(), + [ + "zEEero", // + "one", // + "two", // + "threeEEE ", // + "four", // + "five", // + ] + .join("\n") + ); + } + // Otherwise, all trailing whitespace is removed. + else { + assert_eq!( + buffer.text(), + [ + "zEEero", // + "one", // + "two", // + "threeEEE", // + "four", // + "five", // + ] + .join("\n") + ); + } + }); +} + #[gpui::test] async fn test_reparse(cx: &mut gpui::TestAppContext) { let text = "fn a() {}"; @@ -1943,6 +2024,45 @@ fn test_contiguous_ranges() { ); } +#[gpui::test(iterations = 500)] +fn test_trailing_whitespace_ranges(mut rng: StdRng) { + // Generate a random multi-line string containing + // some lines with trailing whitespace. + let mut text = String::new(); + for _ in 0..rng.gen_range(0..16) { + for _ in 0..rng.gen_range(0..36) { + text.push(match rng.gen_range(0..10) { + 0..=1 => ' ', + 3 => '\t', + _ => rng.gen_range('a'..'z'), + }); + } + text.push('\n'); + } + + match rng.gen_range(0..10) { + // sometimes remove the last newline + 0..=1 => drop(text.pop()), // + + // sometimes add extra newlines + 2..=3 => text.push_str(&"\n".repeat(rng.gen_range(1..5))), + _ => {} + } + + let rope = Rope::from(text.as_str()); + let actual_ranges = trailing_whitespace_ranges(&rope); + let expected_ranges = TRAILING_WHITESPACE_REGEX + .find_iter(&text) + .map(|m| m.range()) + .collect::>(); + assert_eq!( + actual_ranges, + expected_ranges, + "wrong ranges for text lines:\n{:?}", + text.split("\n").collect::>() + ); +} + fn ruby_lang() -> Language { Language::new( LanguageConfig {