diff --git a/crates/clock/src/clock.rs b/crates/clock/src/clock.rs index cb0808abcae1709020f3fd3077436aeb1140a140..57612c5ff70ad7088dc4ff4bc348377b78184bae 100644 --- a/crates/clock/src/clock.rs +++ b/crates/clock/src/clock.rs @@ -61,8 +61,8 @@ pub type Seq = u32; /// used to determine the ordering of events in the editor. #[derive(Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)] pub struct Lamport { - pub replica_id: ReplicaId, pub value: Seq, + pub replica_id: ReplicaId, } /// A [version vector](https://en.wikipedia.org/wiki/Version_vector). diff --git a/crates/language/src/proto.rs b/crates/language/src/proto.rs index 132f971675ede12bb8ef5f941b57415f22d7ba88..89c44513067f6d2309d68a9f38984988358d8877 100644 --- a/crates/language/src/proto.rs +++ b/crates/language/src/proto.rs @@ -496,7 +496,7 @@ pub fn deserialize_anchor(anchor: proto::Anchor) -> Option { }; Some(Anchor::new( timestamp, - anchor.offset as usize, + anchor.offset as u32, bias, buffer_id, )) diff --git a/crates/text/src/anchor.rs b/crates/text/src/anchor.rs index 22a5c3090a1ef9e1c3581893ae8cbe16f79d776b..63e0570e91ef08dfce02fbbca25e97ee7519dc0a 100644 --- a/crates/text/src/anchor.rs +++ b/crates/text/src/anchor.rs @@ -20,7 +20,7 @@ pub struct Anchor { /// The byte offset into the text inserted in the operation /// at `timestamp`. - pub offset: usize, + pub offset: u32, /// Whether this anchor stays attached to the character *before* or *after* /// the offset. pub bias: Bias, @@ -49,7 +49,7 @@ impl Anchor { pub const MIN: Self = Self { timestamp_replica_id: clock::Lamport::MIN.replica_id, timestamp_value: clock::Lamport::MIN.value, - offset: usize::MIN, + offset: u32::MIN, bias: Bias::Left, buffer_id: None, }; @@ -57,14 +57,14 @@ impl Anchor { pub const MAX: Self = Self { timestamp_replica_id: clock::Lamport::MAX.replica_id, timestamp_value: clock::Lamport::MAX.value, - offset: usize::MAX, + offset: u32::MAX, bias: Bias::Right, buffer_id: None, }; pub fn new( timestamp: clock::Lamport, - offset: usize, + offset: u32, bias: Bias, buffer_id: Option, ) -> Self { @@ -81,7 +81,7 @@ impl Anchor { Self { timestamp_replica_id: clock::Lamport::MIN.replica_id, timestamp_value: clock::Lamport::MIN.value, - offset: usize::MIN, + offset: u32::MIN, bias: Bias::Left, buffer_id: Some(buffer_id), } @@ -91,7 +91,7 @@ impl Anchor { Self { timestamp_replica_id: clock::Lamport::MAX.replica_id, timestamp_value: clock::Lamport::MAX.value, - offset: usize::MAX, + offset: u32::MAX, bias: Bias::Right, buffer_id: Some(buffer_id), } @@ -190,13 +190,13 @@ impl Anchor { pub fn is_min(&self) -> bool { self.timestamp() == clock::Lamport::MIN - && self.offset == usize::MIN + && self.offset == u32::MIN && self.bias == Bias::Left } pub fn is_max(&self) -> bool { self.timestamp() == clock::Lamport::MAX - && self.offset == usize::MAX + && self.offset == u32::MAX && self.bias == Bias::Right } diff --git a/crates/text/src/locator.rs b/crates/text/src/locator.rs index cc94441a3d1ea2654875cd286d91b9dc2334ab53..be72c4dd9564d4c6024ce0206ff0eb99b0cd457b 100644 --- a/crates/text/src/locator.rs +++ b/crates/text/src/locator.rs @@ -8,18 +8,32 @@ use std::iter; /// /// The initial location for a collection should be `Locator::between(Locator::min(), Locator::max())`, /// leaving room for items to be inserted before and after it. -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Locator(SmallVec<[u64; 4]>); +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Locator(SmallVec<[u64; 2]>); + +impl Clone for Locator { + fn clone(&self) -> Self { + // We manually implement clone to avoid the overhead of SmallVec's clone implementation. + // Using `from_slice` is faster than `clone` for SmallVec as we can use our `Copy` implementation of u64. + Self { + 0: SmallVec::from_slice(&self.0), + } + } + + fn clone_from(&mut self, source: &Self) { + self.0.clone_from(&source.0); + } +} impl Locator { pub const fn min() -> Self { - // SAFETY: 1 is <= 4 - Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MIN; 4], 1) }) + // SAFETY: 1 is <= 2 + Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MIN; 2], 1) }) } pub const fn max() -> Self { - // SAFETY: 1 is <= 4 - Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MAX; 4], 1) }) + // SAFETY: 1 is <= 2 + Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MAX; 2], 1) }) } pub const fn min_ref() -> &'static Self { @@ -40,6 +54,7 @@ impl Locator { let rhs = rhs.0.iter().copied().chain(iter::repeat(u64::MAX)); let mut location = SmallVec::new(); for (lhs, rhs) in lhs.zip(rhs) { + // This shift is essential! It optimizes for the common case of sequential typing. let mid = lhs + ((rhs.saturating_sub(lhs)) >> 48); location.push(mid); if mid > lhs { @@ -127,4 +142,36 @@ mod tests { ); } } + + // Simulates 100,000 sequential forward appends (the pattern used when + // building a buffer's initial fragments and when + // `push_fragments_for_insertion` chains new text fragments). + #[test] + fn test_sequential_forward_append_stays_at_depth_1() { + let mut prev = Locator::min(); + let max = Locator::max(); + for _ in 0..100_000 { + let loc = Locator::between(&prev, &max); + assert_eq!(loc.len(), 1, "sequential forward append grew past depth 1"); + prev = loc; + } + } + + // Simulates the most common real editing pattern: a fragment is split + // (producing a depth-2 prefix), then 10,000 new fragments are inserted + // sequentially forward within that split region. + #[test] + fn test_typing_at_cursor_stays_at_depth_2() { + let initial = Locator::between(&Locator::min(), &Locator::max()); + let prefix = Locator::between(&Locator::min(), &initial); + assert_eq!(prefix.len(), 2); + + let suffix_id = initial; + let mut prev = prefix; + for _ in 0..10_000 { + let loc = Locator::between(&prev, &suffix_id); + assert_eq!(loc.len(), 2, "forward typing after split grew past depth 2"); + prev = loc; + } + } } diff --git a/crates/text/src/tests.rs b/crates/text/src/tests.rs index c9e04e407ffdb8ffde6b139e01d78822e54e1a4b..194ac2a40d5ac96a39177eedd35b991ded30de38 100644 --- a/crates/text/src/tests.rs +++ b/crates/text/src/tests.rs @@ -810,3 +810,188 @@ fn test_random_concurrent_edits(mut rng: StdRng) { buffer.check_invariants(); } } + +#[test] +fn test_new_normalized_splits_large_base_text() { + // ASCII text that exceeds max_insertion_len + let text = "abcdefghij".repeat(10); // 100 bytes + let rope = Rope::from(text.as_str()); + let buffer = Buffer::new_normalized( + ReplicaId::LOCAL, + BufferId::new(1).unwrap(), + LineEnding::Unix, + rope, + ); + assert_eq!(buffer.text(), text); + buffer.check_invariants(); + + // Verify anchors at various positions, including across chunk boundaries + for offset in [0, 1, 15, 16, 17, 50, 99] { + let anchor = buffer.anchor_before(offset); + assert_eq!( + anchor.to_offset(&buffer), + offset, + "anchor_before({offset}) round-tripped incorrectly" + ); + let anchor = buffer.anchor_after(offset); + assert_eq!( + anchor.to_offset(&buffer), + offset, + "anchor_after({offset}) round-tripped incorrectly" + ); + } + + // Verify editing works after a split initialization + let mut buffer = buffer; + buffer.edit([(50..60, "XYZ")]); + let mut expected = text; + expected.replace_range(50..60, "XYZ"); + assert_eq!(buffer.text(), expected); + buffer.check_invariants(); +} + +#[test] +fn test_new_normalized_splits_large_base_text_with_multibyte_chars() { + // Use multi-byte chars (é is 2 bytes in UTF-8) so that a naive byte-level + // split would land in the middle of a character. + let unit = "ééééééééé"; // 9 chars × 2 bytes = 18 bytes + let text = unit.repeat(6); // 108 bytes + let rope = Rope::from(text.as_str()); + let buffer = Buffer::new_normalized( + ReplicaId::LOCAL, + BufferId::new(1).unwrap(), + LineEnding::Unix, + rope, + ); + assert_eq!(buffer.text(), text); + buffer.check_invariants(); + + // Every anchor should resolve correctly even though chunks had to be + // rounded down to a char boundary. + let snapshot = buffer.snapshot(); + for offset in (0..text.len()).filter(|o| text.is_char_boundary(*o)) { + let anchor = snapshot.anchor_before(offset); + assert_eq!( + anchor.to_offset(snapshot), + offset, + "anchor round-trip failed at byte offset {offset}" + ); + } +} + +#[test] +fn test_new_normalized_small_text_unchanged() { + // Text that fits in a single chunk should produce exactly one fragment, + // matching the original single-fragment behaviour. + let text = "hello world"; + let rope = Rope::from(text); + let buffer = Buffer::new_normalized( + ReplicaId::LOCAL, + BufferId::new(1).unwrap(), + LineEnding::Unix, + rope, + ); + assert_eq!(buffer.text(), text); + buffer.check_invariants(); + assert_eq!(buffer.snapshot().fragments.items(&None).len(), 1); +} + +#[test] +fn test_edit_splits_large_insertion() { + let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "abcdefghij"); + + let large_text: Arc = "X".repeat(100).into(); + let edits = vec![(3..7, large_text.clone())]; + + buffer.edit(edits); + + let expected = format!("abc{}hij", large_text); + assert_eq!(buffer.text(), expected); + buffer.check_invariants(); + + // Anchors should resolve correctly throughout the buffer. + for offset in [0, 3, 50, 103, expected.len()] { + let anchor = buffer.anchor_before(offset); + assert_eq!( + anchor.to_offset(&buffer), + offset, + "anchor_before({offset}) round-tripped incorrectly" + ); + } +} + +#[test] +fn test_edit_splits_large_insertion_with_multibyte_chars() { + let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "abcdefghij"); + + // 4-byte chars so that naive byte splits would land mid-character. + let large_text: Arc = "😀".repeat(30).into(); // 30 × 4 = 120 bytes + let edits = vec![(5..5, large_text.clone())]; + + buffer.edit(edits); + + let expected = format!("abcde{}fghij", large_text); + assert_eq!(buffer.text(), expected); + buffer.check_invariants(); +} + +#[test] +fn test_edit_splits_large_insertion_among_multiple_edits() { + let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "ABCDEFGHIJ"); + + let large_text: Arc = "x".repeat(60).into(); + // Three edits: small, large, small. The large one must be split while + // preserving the correct positions of the surrounding edits. + let edits = vec![ + (1..2, Arc::from("y")), // replace "B" with "y" + (4..6, large_text.clone()), // replace "EF" with 60 x's + (9..9, Arc::from("z")), // insert "z" before "J" + ]; + + buffer.edit(edits); + + // Original: A B C D E F G H I J + // After (1..2, "y"): A y C D E F G H I J + // After (4..6, large): A y C D <60 x's> G H I J + // After (9..9, "z"): A y C D <60 x's> G H I z J + let expected = format!("AyCD{}GHIzJ", large_text); + assert_eq!(buffer.text(), expected); + buffer.check_invariants(); +} + +#[test] +fn test_edit_splits_multiple_large_insertions() { + let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "ABCDE"); + + let text1: Arc = "a".repeat(40).into(); + let text2: Arc = "b".repeat(40).into(); + let edits = vec![ + (1..2, text1.clone()), // replace "B" with 40 a's + (3..4, text2.clone()), // replace "D" with 40 b's + ]; + + buffer.edit(edits); + + let expected = format!("A{}C{}E", text1, text2); + assert_eq!(buffer.text(), expected); + buffer.check_invariants(); +} + +#[test] +fn test_edit_undo_after_split() { + let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "hello world"); + buffer.set_group_interval(Duration::from_secs(0)); + let original = buffer.text(); + + let large_text: Arc = "Z".repeat(50).into(); + let edits = vec![(5..6, large_text)]; + buffer.edit(edits); + assert_ne!(buffer.text(), original); + buffer.check_invariants(); + + // Undo should restore the original text even though the edit was split + // into multiple internal operations grouped in one transaction. + buffer.undo(); + assert_eq!(buffer.text(), original); + buffer.check_invariants(); +} diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 2c51a0d5e5b29bc08fdacc6b8b90edd8f65cd83d..a5bdbe443bbaa4723c8d3104bfed28e4c2fe8fdb 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -48,6 +48,12 @@ use util::RandomCharIter; static LINE_SEPARATORS_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX")); +/// The maximum length of a single insertion operation. +/// Fragments larger than this will be split into multiple smaller +/// fragments. This allows us to use relative `u32` offsets instead of `usize`, +/// reducing memory usage. +const MAX_INSERTION_LEN: usize = if cfg!(test) { 16 } else { u32::MAX as usize }; + pub type TransactionId = clock::Lamport; pub struct Buffer { @@ -155,18 +161,38 @@ struct History { #[derive(Clone, Debug, Eq, PartialEq)] struct InsertionSlice { - edit_id: clock::Lamport, - insertion_id: clock::Lamport, - range: Range, + // Inline the lamports to allow the replica ids to share the same alignment + // saving 4 bytes space edit_id: clock::Lamport, + edit_id_value: clock::Seq, + edit_id_replica_id: ReplicaId, + // insertion_id: clock::Lamport, + insertion_id_value: clock::Seq, + insertion_id_replica_id: ReplicaId, + range: Range, } impl Ord for InsertionSlice { fn cmp(&self, other: &Self) -> Ordering { - self.edit_id - .cmp(&other.edit_id) - .then_with(|| self.insertion_id.cmp(&other.insertion_id)) - .then_with(|| self.range.start.cmp(&other.range.start)) - .then_with(|| self.range.end.cmp(&other.range.end)) + Lamport { + value: self.edit_id_value, + replica_id: self.edit_id_replica_id, + } + .cmp(&Lamport { + value: other.edit_id_value, + replica_id: other.edit_id_replica_id, + }) + .then_with(|| { + Lamport { + value: self.insertion_id_value, + replica_id: self.insertion_id_replica_id, + } + .cmp(&Lamport { + value: other.insertion_id_value, + replica_id: other.insertion_id_replica_id, + }) + }) + .then_with(|| self.range.start.cmp(&other.range.start)) + .then_with(|| self.range.end.cmp(&other.range.end)) } } @@ -179,8 +205,10 @@ impl PartialOrd for InsertionSlice { impl InsertionSlice { fn from_fragment(edit_id: clock::Lamport, fragment: &Fragment) -> Self { Self { - edit_id, - insertion_id: fragment.timestamp, + edit_id_value: edit_id.value, + edit_id_replica_id: edit_id.replica_id, + insertion_id_value: fragment.timestamp.value, + insertion_id_replica_id: fragment.timestamp.replica_id, range: fragment.insertion_offset..fragment.insertion_offset + fragment.len, } } @@ -309,6 +337,7 @@ impl History { fn finalize_last_transaction(&mut self) -> Option<&Transaction> { self.undo_stack.last_mut().map(|entry| { + entry.transaction.edit_ids.shrink_to_fit(); entry.suppress_grouping = true; &entry.transaction }) @@ -489,7 +518,7 @@ struct Edits<'a, D: TextDimension, F: FnMut(&FragmentSummary) -> bool> { since: &'a clock::Global, old_end: D, new_end: D, - range: Range<(&'a Locator, usize)>, + range: Range<(&'a Locator, u32)>, buffer_id: BufferId, } @@ -536,18 +565,18 @@ impl Edit<(D1, D2)> { } #[derive(Eq, PartialEq, Clone, Debug)] -pub struct Fragment { - pub id: Locator, - pub timestamp: clock::Lamport, - pub insertion_offset: usize, - pub len: usize, - pub visible: bool, - pub deletions: SmallVec<[clock::Lamport; 2]>, - pub max_undos: clock::Global, +struct Fragment { + id: Locator, + timestamp: clock::Lamport, + insertion_offset: u32, + len: u32, + visible: bool, + deletions: SmallVec<[clock::Lamport; 2]>, + max_undos: clock::Global, } #[derive(Eq, PartialEq, Clone, Debug)] -pub struct FragmentSummary { +struct FragmentSummary { text: FragmentTextSummary, max_id: Locator, max_version: clock::Global, @@ -575,14 +604,14 @@ impl<'a> sum_tree::Dimension<'a, FragmentSummary> for FragmentTextSummary { #[derive(Eq, PartialEq, Clone, Debug)] struct InsertionFragment { timestamp: clock::Lamport, - split_offset: usize, + split_offset: u32, fragment_id: Locator, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] struct InsertionFragmentKey { timestamp: clock::Lamport, - split_offset: usize, + split_offset: u32, } #[derive(Clone, Debug, Eq, PartialEq)] @@ -740,18 +769,37 @@ impl Buffer { let insertion_timestamp = clock::Lamport::new(ReplicaId::LOCAL); lamport_clock.observe(insertion_timestamp); version.observe(insertion_timestamp); - let fragment_id = Locator::between(&Locator::min(), &Locator::max()); - let fragment = Fragment { - id: fragment_id, - timestamp: insertion_timestamp, - insertion_offset: 0, - len: visible_text.len(), - visible: true, - deletions: Default::default(), - max_undos: Default::default(), - }; - insertions.push(InsertionFragment::new(&fragment), ()); - fragments.push(fragment, &None); + + let mut insertion_offset: u32 = 0; + let mut text_offset: usize = 0; + let mut prev_locator = Locator::min(); + + while text_offset < visible_text.len() { + let target_end = visible_text.len().min(text_offset + MAX_INSERTION_LEN); + let chunk_end = if target_end == visible_text.len() { + target_end + } else { + visible_text.floor_char_boundary(target_end) + }; + let chunk_len = chunk_end - text_offset; + + let fragment_id = Locator::between(&prev_locator, &Locator::max()); + let fragment = Fragment { + id: fragment_id.clone(), + timestamp: insertion_timestamp, + insertion_offset, + len: chunk_len as u32, + visible: true, + deletions: Default::default(), + max_undos: Default::default(), + }; + insertions.push(InsertionFragment::new(&fragment), ()); + fragments.push(fragment, &None); + + prev_locator = fragment_id; + insertion_offset += chunk_len as u32; + text_offset = chunk_end; + } } Buffer { @@ -853,7 +901,7 @@ impl Buffer { new_text: Vec::with_capacity(edits.len()), }; let mut new_insertions = Vec::new(); - let mut insertion_offset = 0; + let mut insertion_offset: u32 = 0; let mut insertion_slices = Vec::new(); let mut edits = edits @@ -879,8 +927,9 @@ impl Buffer { if fragment_start > old_fragments.start().visible { if fragment_end > fragment_start { let mut suffix = old_fragments.item().unwrap().clone(); - suffix.len = fragment_end - fragment_start; - suffix.insertion_offset += fragment_start - old_fragments.start().visible; + suffix.len = (fragment_end - fragment_start) as u32; + suffix.insertion_offset += + (fragment_start - old_fragments.start().visible) as u32; new_insertions.push(InsertionFragment::insert_new(&suffix)); new_ropes.push_fragment(&suffix, suffix.visible); new_fragments.push(suffix, &None); @@ -899,8 +948,8 @@ impl Buffer { // Preserve any portion of the current fragment that precedes this range. if fragment_start < range.start { let mut prefix = old_fragments.item().unwrap().clone(); - prefix.len = range.start - fragment_start; - prefix.insertion_offset += fragment_start - old_fragments.start().visible; + prefix.len = (range.start - fragment_start) as u32; + prefix.insertion_offset += (fragment_start - old_fragments.start().visible) as u32; prefix.id = Locator::between(&new_fragments.summary().max_id, &prefix.id); new_insertions.push(InsertionFragment::insert_new(&prefix)); new_ropes.push_fragment(&prefix, prefix.visible); @@ -912,29 +961,24 @@ impl Buffer { if !new_text.is_empty() { let new_start = new_fragments.summary().text.visible; - let fragment = Fragment { - id: Locator::between( - &new_fragments.summary().max_id, - old_fragments - .item() - .map_or(&Locator::max(), |old_fragment| &old_fragment.id), - ), + let next_fragment_id = old_fragments + .item() + .map_or(Locator::max_ref(), |old_fragment| &old_fragment.id); + Self::push_fragments_for_insertion( + new_text.as_ref(), timestamp, - insertion_offset, - len: new_text.len(), - deletions: Default::default(), - max_undos: Default::default(), - visible: true, - }; + &mut insertion_offset, + &mut new_fragments, + &mut new_insertions, + &mut insertion_slices, + &mut new_ropes, + next_fragment_id, + timestamp, + ); edits_patch.push(Edit { old: fragment_start..fragment_start, new: new_start..new_start + new_text.len(), }); - insertion_slices.push(InsertionSlice::from_fragment(timestamp, &fragment)); - new_insertions.push(InsertionFragment::insert_new(&fragment)); - new_ropes.push_str(new_text.as_ref()); - new_fragments.push(fragment, &None); - insertion_offset += new_text.len(); } // Advance through every fragment that intersects this range, marking the intersecting @@ -945,8 +989,9 @@ impl Buffer { let mut intersection = fragment.clone(); let intersection_end = cmp::min(range.end, fragment_end); if fragment.visible { - intersection.len = intersection_end - fragment_start; - intersection.insertion_offset += fragment_start - old_fragments.start().visible; + intersection.len = (intersection_end - fragment_start) as u32; + intersection.insertion_offset += + (fragment_start - old_fragments.start().visible) as u32; intersection.id = Locator::between(&new_fragments.summary().max_id, &intersection.id); intersection.deletions.push(timestamp); @@ -983,8 +1028,8 @@ impl Buffer { let fragment_end = old_fragments.end().visible; if fragment_end > fragment_start { let mut suffix = old_fragments.item().unwrap().clone(); - suffix.len = fragment_end - fragment_start; - suffix.insertion_offset += fragment_start - old_fragments.start().visible; + suffix.len = (fragment_end - fragment_start) as u32; + suffix.insertion_offset += (fragment_start - old_fragments.start().visible) as u32; new_insertions.push(InsertionFragment::insert_new(&suffix)); new_ropes.push_fragment(&suffix, suffix.visible); new_fragments.push(suffix, &None); @@ -1075,7 +1120,7 @@ impl Buffer { let mut insertion_slices = Vec::new(); let cx = Some(version.clone()); let mut new_insertions = Vec::new(); - let mut insertion_offset = 0; + let mut insertion_offset: u32 = 0; let mut new_ropes = RopeBuilder::new(self.visible_text.cursor(0), self.deleted_text.cursor(0)); let mut old_fragments = self @@ -1097,9 +1142,9 @@ impl Buffer { if fragment_start > old_fragments.start().0.full_offset() { if fragment_end > fragment_start { let mut suffix = old_fragments.item().unwrap().clone(); - suffix.len = fragment_end.0 - fragment_start.0; + suffix.len = (fragment_end.0 - fragment_start.0) as u32; suffix.insertion_offset += - fragment_start - old_fragments.start().0.full_offset(); + (fragment_start - old_fragments.start().0.full_offset()) as u32; new_insertions.push(InsertionFragment::insert_new(&suffix)); new_ropes.push_fragment(&suffix, suffix.visible); new_fragments.push(suffix, &None); @@ -1118,8 +1163,9 @@ impl Buffer { let fragment_end = old_fragments.end().0.full_offset(); if fragment_end == range.start && fragment_end > fragment_start { let mut fragment = old_fragments.item().unwrap().clone(); - fragment.len = fragment_end.0 - fragment_start.0; - fragment.insertion_offset += fragment_start - old_fragments.start().0.full_offset(); + fragment.len = (fragment_end.0 - fragment_start.0) as u32; + fragment.insertion_offset += + (fragment_start - old_fragments.start().0.full_offset()) as u32; new_insertions.push(InsertionFragment::insert_new(&fragment)); new_ropes.push_fragment(&fragment, fragment.visible); new_fragments.push(fragment, &None); @@ -1144,8 +1190,9 @@ impl Buffer { // Preserve any portion of the current fragment that precedes this range. if fragment_start < range.start { let mut prefix = old_fragments.item().unwrap().clone(); - prefix.len = range.start.0 - fragment_start.0; - prefix.insertion_offset += fragment_start - old_fragments.start().0.full_offset(); + prefix.len = (range.start.0 - fragment_start.0) as u32; + prefix.insertion_offset += + (fragment_start - old_fragments.start().0.full_offset()) as u32; prefix.id = Locator::between(&new_fragments.summary().max_id, &prefix.id); new_insertions.push(InsertionFragment::insert_new(&prefix)); fragment_start = range.start; @@ -1160,29 +1207,24 @@ impl Buffer { old_start += fragment_start.0 - old_fragments.start().0.full_offset().0; } let new_start = new_fragments.summary().text.visible; - let fragment = Fragment { - id: Locator::between( - &new_fragments.summary().max_id, - old_fragments - .item() - .map_or(&Locator::max(), |old_fragment| &old_fragment.id), - ), + let next_fragment_id = old_fragments + .item() + .map_or(Locator::max_ref(), |old_fragment| &old_fragment.id); + Self::push_fragments_for_insertion( + new_text, timestamp, - insertion_offset, - len: new_text.len(), - deletions: Default::default(), - max_undos: Default::default(), - visible: true, - }; + &mut insertion_offset, + &mut new_fragments, + &mut new_insertions, + &mut insertion_slices, + &mut new_ropes, + next_fragment_id, + timestamp, + ); edits_patch.push(Edit { old: old_start..old_start, new: new_start..new_start + new_text.len(), }); - insertion_slices.push(InsertionSlice::from_fragment(timestamp, &fragment)); - new_insertions.push(InsertionFragment::insert_new(&fragment)); - new_ropes.push_str(new_text); - new_fragments.push(fragment, &None); - insertion_offset += new_text.len(); } // Advance through every fragment that intersects this range, marking the intersecting @@ -1193,9 +1235,9 @@ impl Buffer { let mut intersection = fragment.clone(); let intersection_end = cmp::min(range.end, fragment_end); if fragment.was_visible(version, &self.undo_map) { - intersection.len = intersection_end.0 - fragment_start.0; + intersection.len = (intersection_end.0 - fragment_start.0) as u32; intersection.insertion_offset += - fragment_start - old_fragments.start().0.full_offset(); + (fragment_start - old_fragments.start().0.full_offset()) as u32; intersection.id = Locator::between(&new_fragments.summary().max_id, &intersection.id); intersection.deletions.push(timestamp); @@ -1208,7 +1250,7 @@ impl Buffer { + (fragment_start.0 - old_fragments.start().0.full_offset().0); let new_start = new_fragments.summary().text.visible; edits_patch.push(Edit { - old: old_start..old_start + intersection.len, + old: old_start..old_start + intersection.len as usize, new: new_start..new_start, }); } @@ -1229,8 +1271,9 @@ impl Buffer { let fragment_end = old_fragments.end().0.full_offset(); if fragment_end > fragment_start { let mut suffix = old_fragments.item().unwrap().clone(); - suffix.len = fragment_end.0 - fragment_start.0; - suffix.insertion_offset += fragment_start - old_fragments.start().0.full_offset(); + suffix.len = (fragment_end.0 - fragment_start.0) as u32; + suffix.insertion_offset += + (fragment_start - old_fragments.start().0.full_offset()) as u32; new_insertions.push(InsertionFragment::insert_new(&suffix)); new_ropes.push_fragment(&suffix, suffix.visible); new_fragments.push(suffix, &None); @@ -1252,6 +1295,49 @@ impl Buffer { self.subscriptions.publish_mut(&edits_patch) } + fn push_fragments_for_insertion( + new_text: &str, + timestamp: clock::Lamport, + insertion_offset: &mut u32, + new_fragments: &mut SumTree, + new_insertions: &mut Vec>, + insertion_slices: &mut Vec, + new_ropes: &mut RopeBuilder, + next_fragment_id: &Locator, + edit_timestamp: clock::Lamport, + ) { + let mut text_offset = 0; + while text_offset < new_text.len() { + let target_end = new_text.len().min(text_offset + MAX_INSERTION_LEN); + let chunk_end = if target_end == new_text.len() { + target_end + } else { + new_text.floor_char_boundary(target_end) + }; + if chunk_end == text_offset { + break; + } + let chunk_len = chunk_end - text_offset; + + let fragment = Fragment { + id: Locator::between(&new_fragments.summary().max_id, next_fragment_id), + timestamp, + insertion_offset: *insertion_offset, + len: chunk_len as u32, + deletions: Default::default(), + max_undos: Default::default(), + visible: true, + }; + insertion_slices.push(InsertionSlice::from_fragment(edit_timestamp, &fragment)); + new_insertions.push(InsertionFragment::insert_new(&fragment)); + new_fragments.push(fragment, &None); + + *insertion_offset += chunk_len as u32; + text_offset = chunk_end; + } + new_ropes.push_str(new_text); + } + fn fragment_ids_for_edits<'a>( &'a self, edit_ids: impl Iterator, @@ -1260,38 +1346,56 @@ impl Buffer { let mut insertion_slices = Vec::new(); for edit_id in edit_ids { let insertion_slice = InsertionSlice { - edit_id: *edit_id, - insertion_id: clock::Lamport::MIN, + edit_id_value: edit_id.value, + edit_id_replica_id: edit_id.replica_id, + insertion_id_value: Lamport::MIN.value, + insertion_id_replica_id: Lamport::MIN.replica_id, range: 0..0, }; let slices = self .snapshot .insertion_slices .iter_from(&insertion_slice) - .take_while(|slice| slice.edit_id == *edit_id); + .take_while(|slice| { + Lamport { + value: slice.edit_id_value, + replica_id: slice.edit_id_replica_id, + } == *edit_id + }); insertion_slices.extend(slices) } - insertion_slices - .sort_unstable_by_key(|s| (s.insertion_id, s.range.start, Reverse(s.range.end))); + insertion_slices.sort_unstable_by_key(|s| { + ( + Lamport { + value: s.insertion_id_value, + replica_id: s.insertion_id_replica_id, + }, + s.range.start, + Reverse(s.range.end), + ) + }); // Get all of the fragments corresponding to these insertion slices. let mut fragment_ids = Vec::new(); let mut insertions_cursor = self.insertions.cursor::(()); for insertion_slice in &insertion_slices { - if insertion_slice.insertion_id != insertions_cursor.start().timestamp + let insertion_id = Lamport { + value: insertion_slice.insertion_id_value, + replica_id: insertion_slice.insertion_id_replica_id, + }; + if insertion_id != insertions_cursor.start().timestamp || insertion_slice.range.start > insertions_cursor.start().split_offset { insertions_cursor.seek_forward( &InsertionFragmentKey { - timestamp: insertion_slice.insertion_id, + timestamp: insertion_id, split_offset: insertion_slice.range.start, }, Bias::Left, ); } while let Some(item) = insertions_cursor.item() { - if item.timestamp != insertion_slice.insertion_id - || item.split_offset >= insertion_slice.range.end + if item.timestamp != insertion_id || item.split_offset >= insertion_slice.range.end { break; } @@ -1330,13 +1434,13 @@ impl Buffer { let new_start = new_fragments.summary().text.visible; if fragment_was_visible && !fragment.visible { edits.push(Edit { - old: old_start..old_start + fragment.len, + old: old_start..old_start + fragment.len as usize, new: new_start..new_start, }); } else if !fragment_was_visible && fragment.visible { edits.push(Edit { old: old_start..old_start, - new: new_start..new_start + fragment.len, + new: new_start..new_start + fragment.len as usize, }); } new_ropes.push_fragment(&fragment, fragment_was_visible); @@ -1582,7 +1686,12 @@ impl Buffer { cursor.seek_forward(&Some(fragment_id), Bias::Left); let fragment = cursor.item()?; let start_offset = cursor.start().1; - let end_offset = start_offset + if fragment.visible { fragment.len } else { 0 }; + let end_offset = start_offset + + if fragment.visible { + fragment.len as usize + } else { + 0 + }; Some(start_offset..end_offset) }); @@ -2038,10 +2147,6 @@ impl BufferSnapshot { self.deleted_text.to_string() } - pub fn fragments(&self) -> impl Iterator { - self.fragments.iter() - } - pub fn text_summary(&self) -> TextSummary { self.visible_text.summary() } @@ -2287,7 +2392,7 @@ impl BufferSnapshot { let fragment = fragment_cursor.item().unwrap(); let mut fragment_offset = fragment_cursor.start().1; if fragment.visible { - fragment_offset += anchor.offset - insertion.split_offset; + fragment_offset += (anchor.offset - insertion.split_offset) as usize; } position.add_assign(&text_cursor.summary(fragment_offset)); @@ -2332,7 +2437,7 @@ impl BufferSnapshot { let fragment = item.unwrap(); let mut fragment_offset = start.1; if fragment.visible { - fragment_offset += anchor.offset - insertion.split_offset; + fragment_offset += (anchor.offset - insertion.split_offset) as usize; } fragment_offset } @@ -2457,7 +2562,7 @@ impl BufferSnapshot { let overshoot = offset - start; Anchor::new( fragment.timestamp, - fragment.insertion_offset + overshoot, + fragment.insertion_offset + overshoot as u32, bias, Some(self.remote_id), ) @@ -2546,7 +2651,7 @@ impl BufferSnapshot { let mut visible_start = start.1.visible; let mut deleted_start = start.1.deleted; if let Some(fragment) = item { - let overshoot = range.start.offset - fragment.insertion_offset; + let overshoot = (range.start.offset - fragment.insertion_offset) as usize; if fragment.visible { visible_start += overshoot; } else { @@ -2706,7 +2811,7 @@ impl<'a> RopeBuilder<'a> { fn push_fragment(&mut self, fragment: &Fragment, was_visible: bool) { debug_assert!(fragment.len > 0); - self.push(fragment.len, was_visible, fragment.visible) + self.push(fragment.len as usize, was_visible, fragment.visible) } fn push(&mut self, len: usize, was_visible: bool, is_visible: bool) { @@ -2781,7 +2886,8 @@ impl bool> Iterator for Ed if fragment.id == *self.range.end.0 { visible_end = cmp::min( visible_end, - cursor.start().visible + (self.range.end.1 - fragment.insertion_offset), + cursor.start().visible + + (self.range.end.1 - fragment.insertion_offset) as usize, ); } @@ -2807,7 +2913,8 @@ impl bool> Iterator for Ed if fragment.id == *self.range.end.0 { deleted_end = cmp::min( deleted_end, - cursor.start().deleted + (self.range.end.1 - fragment.insertion_offset), + cursor.start().deleted + + (self.range.end.1 - fragment.insertion_offset) as usize, ); } @@ -2872,7 +2979,7 @@ impl sum_tree::Item for Fragment { FragmentSummary { max_id: self.id.clone(), text: FragmentTextSummary { - visible: self.len, + visible: self.len as usize, deleted: 0, }, max_version, @@ -2884,7 +2991,7 @@ impl sum_tree::Item for Fragment { max_id: self.id.clone(), text: FragmentTextSummary { visible: 0, - deleted: self.len, + deleted: self.len as usize, }, max_version, min_insertion_version,