From e48973f75a57718015698f4707bacbe2bcc753b5 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 14 May 2021 11:33:39 +0200 Subject: [PATCH] Start on a SumTree-based Rope implementation --- zed/src/editor/buffer/mod.rs | 1 + zed/src/editor/buffer/rope.rs | 295 ++++++++++++++++++++++++++++++++++ zed/src/sum_tree/mod.rs | 44 +++++ 3 files changed, 340 insertions(+) create mode 100644 zed/src/editor/buffer/rope.rs diff --git a/zed/src/editor/buffer/mod.rs b/zed/src/editor/buffer/mod.rs index 23077a25a900adf02cc8901983bac6f183f83130..5cb2e1920d88712380eb6b7f4c5cfc97c0d73b64 100644 --- a/zed/src/editor/buffer/mod.rs +++ b/zed/src/editor/buffer/mod.rs @@ -1,5 +1,6 @@ mod anchor; mod point; +mod rope; mod selection; pub use anchor::*; diff --git a/zed/src/editor/buffer/rope.rs b/zed/src/editor/buffer/rope.rs new file mode 100644 index 0000000000000000000000000000000000000000..bb441e96d34797d1af5097fce675977453bce457 --- /dev/null +++ b/zed/src/editor/buffer/rope.rs @@ -0,0 +1,295 @@ +use super::Point; +use crate::sum_tree::{self, SeekBias, SumTree}; +use arrayvec::ArrayString; +use std::{cmp, ops::Range, str}; + +#[cfg(test)] +const CHUNK_BASE: usize = 2; + +#[cfg(not(test))] +const CHUNK_BASE: usize = 8; + +#[derive(Clone, Default, Debug)] +pub struct Rope { + chunks: SumTree, +} + +impl Rope { + pub fn new() -> Self { + Self::default() + } + + pub fn append(&mut self, rope: Rope) { + self.chunks.push_tree(rope.chunks, &()); + } + + pub fn push(&mut self, mut text: &str) { + let mut suffix = ArrayString::<[_; 2 * CHUNK_BASE]>::new(); + self.chunks.with_last_mut( + |chunk| { + if chunk.0.len() + text.len() <= 2 * CHUNK_BASE { + chunk.0.push_str(text); + text = ""; + } else { + let mut append_len = CHUNK_BASE.saturating_sub(chunk.0.len()); + while !text.is_char_boundary(append_len) { + append_len -= 1; + } + + if append_len > 0 { + let split = text.split_at(append_len); + chunk.0.push_str(split.0); + text = split.1; + } else { + let mut take_len = CHUNK_BASE; + while !chunk.0.is_char_boundary(take_len) { + take_len -= 1; + } + + let split = chunk.0.split_at(take_len); + suffix.push_str(split.1); + chunk.0 = ArrayString::from(split.0).unwrap(); + } + } + }, + &(), + ); + + let mut chunks = vec![]; + let mut chunk = ArrayString::new(); + for ch in suffix.chars().chain(text.chars()) { + if chunk.len() + ch.len_utf8() > CHUNK_BASE { + chunks.push(Chunk(chunk)); + chunk = ArrayString::new(); + } + chunk.push(ch); + } + if !chunk.is_empty() { + chunks.push(Chunk(chunk)); + } + self.chunks.extend(chunks, &()); + } + + pub fn slice(&self, range: Range) -> Rope { + let mut slice = Rope::new(); + let mut cursor = self.chunks.cursor::(); + + cursor.slice(&range.start, SeekBias::Left, &()); + if let Some(start_chunk) = cursor.item() { + let start_ix = range.start - cursor.start(); + let end_ix = cmp::min(range.end, cursor.end()) - cursor.start(); + slice.push(&start_chunk.0[start_ix..end_ix]); + } + + if range.end > cursor.end() { + cursor.next(); + slice.append(Rope { + chunks: cursor.slice(&range.end, SeekBias::Left, &()), + }); + if let Some(end_chunk) = cursor.item() { + slice.push(&end_chunk.0[..range.end - cursor.start()]); + } + } + + slice + } + + pub fn summary(&self) -> TextSummary { + self.chunks.summary() + } + + pub fn chars(&self) -> Chars { + self.chars_at(0) + } + + pub fn chars_at(&self, start: usize) -> Chars { + Chars::new(self, start) + } + + fn text(&self) -> String { + let mut text = String::new(); + for chunk in self.chunks.cursor::<(), ()>() { + text.push_str(&chunk.0); + } + text + } +} + +impl<'a> From<&'a str> for Rope { + fn from(text: &'a str) -> Self { + let mut rope = Self::new(); + rope.push(text); + rope + } +} + +#[derive(Clone, Debug, Default)] +struct Chunk(ArrayString<[u8; 2 * CHUNK_BASE]>); + +impl sum_tree::Item for Chunk { + type Summary = TextSummary; + + fn summary(&self) -> Self::Summary { + let mut chars = 0; + let mut bytes = 0; + let mut lines = Point::new(0, 0); + let mut first_line_len = 0; + let mut rightmost_point = Point::new(0, 0); + for c in self.0.chars() { + chars += 1; + bytes += c.len_utf8(); + if c == '\n' { + lines.row += 1; + lines.column = 0; + } else { + lines.column += 1; + if lines.row == 0 { + first_line_len = lines.column; + } + if lines.column > rightmost_point.column { + rightmost_point = lines; + } + } + } + + TextSummary { + chars, + bytes, + lines, + first_line_len, + rightmost_point, + } + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct TextSummary { + pub chars: usize, + pub bytes: usize, + pub lines: Point, + pub first_line_len: u32, + pub rightmost_point: Point, +} + +impl sum_tree::Summary for TextSummary { + type Context = (); + + fn add_summary(&mut self, summary: &Self, _: &Self::Context) { + *self += summary; + } +} + +impl<'a> std::ops::AddAssign<&'a Self> for TextSummary { + fn add_assign(&mut self, other: &'a Self) { + let joined_line_len = self.lines.column + other.first_line_len; + if joined_line_len > self.rightmost_point.column { + self.rightmost_point = Point::new(self.lines.row, joined_line_len); + } + if other.rightmost_point.column > self.rightmost_point.column { + self.rightmost_point = self.lines + &other.rightmost_point; + } + + if self.lines.row == 0 { + self.first_line_len += other.first_line_len; + } + + self.chars += other.chars; + self.bytes += other.bytes; + self.lines += &other.lines; + } +} + +impl std::ops::AddAssign for TextSummary { + fn add_assign(&mut self, other: Self) { + *self += &other; + } +} + +impl<'a> sum_tree::Dimension<'a, TextSummary> for usize { + fn add_summary(&mut self, summary: &'a TextSummary) { + *self += summary.chars; + } +} + +pub struct Chars<'a> { + cursor: sum_tree::Cursor<'a, Chunk, usize, usize>, + chars: str::Chars<'a>, +} + +impl<'a> Chars<'a> { + pub fn new(rope: &'a Rope, start: usize) -> Self { + let mut cursor = rope.chunks.cursor::(); + cursor.slice(&start, SeekBias::Left, &()); + let chunk = cursor.item().expect("invalid index"); + let chars = chunk.0[start - cursor.start()..].chars(); + Self { cursor, chars } + } +} + +impl<'a> Iterator for Chars<'a> { + type Item = char; + + fn next(&mut self) -> Option { + if let Some(ch) = self.chars.next() { + Some(ch) + } else if let Some(chunk) = self.cursor.item() { + self.chars = chunk.0.chars(); + self.cursor.next(); + Some(self.chars.next().unwrap()) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use crate::util::RandomCharIter; + + use super::*; + use rand::prelude::*; + use std::env; + + #[test] + fn test_random() { + let iterations = env::var("ITERATIONS") + .map(|i| i.parse().expect("invalid `ITERATIONS` variable")) + .unwrap_or(100); + let operations = env::var("OPERATIONS") + .map(|i| i.parse().expect("invalid `OPERATIONS` variable")) + .unwrap_or(10); + let seed_range = if let Ok(seed) = env::var("SEED") { + let seed = seed.parse().expect("invalid `SEED` variable"); + seed..seed + 1 + } else { + 0..iterations + }; + + for seed in seed_range { + dbg!(seed); + let mut rng = StdRng::seed_from_u64(seed); + let mut expected = String::new(); + let mut actual = Rope::new(); + for _ in 0..operations { + let end_ix = rng.gen_range(0..=expected.len()); + let start_ix = rng.gen_range(0..=end_ix); + let len = rng.gen_range(0..=5); + let new_text: String = RandomCharIter::new(&mut rng).take(len).collect(); + + let mut new_actual = Rope::new(); + new_actual.append(actual.slice(0..start_ix)); + new_actual.push(&new_text); + new_actual.append(actual.slice(end_ix..actual.summary().chars)); + actual = new_actual; + + let mut new_expected = String::new(); + new_expected.push_str(&expected[..start_ix]); + new_expected.push_str(&new_text); + new_expected.push_str(&expected[end_ix..]); + expected = new_expected; + + assert_eq!(actual.text(), expected); + } + } + } +} diff --git a/zed/src/sum_tree/mod.rs b/zed/src/sum_tree/mod.rs index e70440f16f0c0bfeb811c8ad1aa6167adc85b253..7511da6fc61ee3042a0806382e5b7533970f1ffe 100644 --- a/zed/src/sum_tree/mod.rs +++ b/zed/src/sum_tree/mod.rs @@ -101,6 +101,50 @@ impl SumTree { self.rightmost_leaf().0.items().last() } + pub fn with_last_mut( + &mut self, + f: impl FnOnce(&mut T), + ctx: &::Context, + ) { + self.with_last_mut_recursive(f, ctx); + } + + fn with_last_mut_recursive( + &mut self, + f: impl FnOnce(&mut T), + ctx: &::Context, + ) -> Option { + match Arc::make_mut(&mut self.0) { + Node::Internal { + summary, + child_summaries, + child_trees, + .. + } => { + let last_summary = child_summaries.last_mut().unwrap(); + let last_child = child_trees.last_mut().unwrap(); + *last_summary = last_child.with_last_mut_recursive(f, ctx).unwrap(); + *summary = sum(child_summaries.iter(), ctx); + Some(summary.clone()) + } + Node::Leaf { + summary, + items, + item_summaries, + } => { + if let Some((item, item_summary)) = items.last_mut().zip(item_summaries.last_mut()) + { + (f)(item); + *item_summary = item.summary(); + *summary = sum(item_summaries.iter(), ctx); + Some(summary.clone()) + } else { + None + } + } + } + } + pub fn extent<'a, D: Dimension<'a, T::Summary>>(&'a self) -> D { let mut extent = D::default(); match self.0.as_ref() {