text: Narrow insertion-relative offsets from `usize` to `u32` (#49801)

Lukas Wirth created

Reduces memory usage of `InsertionSlice` from 32 to 24 bytes, `Fragment`
from 120 to 96 bytes by narrowing offsets that are relative to
individual insertion operations from `usize` to `u32`. These offsets are
bounded by the size of a single insertion, not the total buffer size, so
`u32` is sufficient.

To prevent any single insertion from exceeding `u32::MAX` bytes, both
`Buffer::new_normalized` and `apply_local_edit`/`apply_remote_edit` now
split large text insertions into multiple fragments via
`push_fragments_for_insertion`.

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/clock/src/clock.rs    |   2 
crates/language/src/proto.rs |   2 
crates/text/src/anchor.rs    |  16 
crates/text/src/locator.rs   |  59 +++++
crates/text/src/tests.rs     | 185 ++++++++++++++++++++
crates/text/src/text.rs      | 339 ++++++++++++++++++++++++-------------
6 files changed, 471 insertions(+), 132 deletions(-)

Detailed changes

crates/clock/src/clock.rs 🔗

@@ -61,8 +61,8 @@ pub type Seq = u32;
 /// used to determine the ordering of events in the editor.
 #[derive(Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
 pub struct Lamport {
-    pub replica_id: ReplicaId,
     pub value: Seq,
+    pub replica_id: ReplicaId,
 }
 
 /// A [version vector](https://en.wikipedia.org/wiki/Version_vector).

crates/language/src/proto.rs 🔗

@@ -496,7 +496,7 @@ pub fn deserialize_anchor(anchor: proto::Anchor) -> Option<Anchor> {
     };
     Some(Anchor::new(
         timestamp,
-        anchor.offset as usize,
+        anchor.offset as u32,
         bias,
         buffer_id,
     ))

crates/text/src/anchor.rs 🔗

@@ -20,7 +20,7 @@ pub struct Anchor {
 
     /// The byte offset into the text inserted in the operation
     /// at `timestamp`.
-    pub offset: usize,
+    pub offset: u32,
     /// Whether this anchor stays attached to the character *before* or *after*
     /// the offset.
     pub bias: Bias,
@@ -49,7 +49,7 @@ impl Anchor {
     pub const MIN: Self = Self {
         timestamp_replica_id: clock::Lamport::MIN.replica_id,
         timestamp_value: clock::Lamport::MIN.value,
-        offset: usize::MIN,
+        offset: u32::MIN,
         bias: Bias::Left,
         buffer_id: None,
     };
@@ -57,14 +57,14 @@ impl Anchor {
     pub const MAX: Self = Self {
         timestamp_replica_id: clock::Lamport::MAX.replica_id,
         timestamp_value: clock::Lamport::MAX.value,
-        offset: usize::MAX,
+        offset: u32::MAX,
         bias: Bias::Right,
         buffer_id: None,
     };
 
     pub fn new(
         timestamp: clock::Lamport,
-        offset: usize,
+        offset: u32,
         bias: Bias,
         buffer_id: Option<BufferId>,
     ) -> Self {
@@ -81,7 +81,7 @@ impl Anchor {
         Self {
             timestamp_replica_id: clock::Lamport::MIN.replica_id,
             timestamp_value: clock::Lamport::MIN.value,
-            offset: usize::MIN,
+            offset: u32::MIN,
             bias: Bias::Left,
             buffer_id: Some(buffer_id),
         }
@@ -91,7 +91,7 @@ impl Anchor {
         Self {
             timestamp_replica_id: clock::Lamport::MAX.replica_id,
             timestamp_value: clock::Lamport::MAX.value,
-            offset: usize::MAX,
+            offset: u32::MAX,
             bias: Bias::Right,
             buffer_id: Some(buffer_id),
         }
@@ -190,13 +190,13 @@ impl Anchor {
 
     pub fn is_min(&self) -> bool {
         self.timestamp() == clock::Lamport::MIN
-            && self.offset == usize::MIN
+            && self.offset == u32::MIN
             && self.bias == Bias::Left
     }
 
     pub fn is_max(&self) -> bool {
         self.timestamp() == clock::Lamport::MAX
-            && self.offset == usize::MAX
+            && self.offset == u32::MAX
             && self.bias == Bias::Right
     }
 

crates/text/src/locator.rs 🔗

@@ -8,18 +8,32 @@ use std::iter;
 ///
 /// The initial location for a collection should be `Locator::between(Locator::min(), Locator::max())`,
 /// leaving room for items to be inserted before and after it.
-#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Locator(SmallVec<[u64; 4]>);
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Locator(SmallVec<[u64; 2]>);
+
+impl Clone for Locator {
+    fn clone(&self) -> Self {
+        // We manually implement clone to avoid the overhead of SmallVec's clone implementation.
+        // Using `from_slice` is faster than `clone` for SmallVec as we can use our `Copy` implementation of u64.
+        Self {
+            0: SmallVec::from_slice(&self.0),
+        }
+    }
+
+    fn clone_from(&mut self, source: &Self) {
+        self.0.clone_from(&source.0);
+    }
+}
 
 impl Locator {
     pub const fn min() -> Self {
-        // SAFETY: 1 is <= 4
-        Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MIN; 4], 1) })
+        // SAFETY: 1 is <= 2
+        Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MIN; 2], 1) })
     }
 
     pub const fn max() -> Self {
-        // SAFETY: 1 is <= 4
-        Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MAX; 4], 1) })
+        // SAFETY: 1 is <= 2
+        Self(unsafe { SmallVec::from_const_with_len_unchecked([u64::MAX; 2], 1) })
     }
 
     pub const fn min_ref() -> &'static Self {
@@ -40,6 +54,7 @@ impl Locator {
         let rhs = rhs.0.iter().copied().chain(iter::repeat(u64::MAX));
         let mut location = SmallVec::new();
         for (lhs, rhs) in lhs.zip(rhs) {
+            // This shift is essential! It optimizes for the common case of sequential typing.
             let mid = lhs + ((rhs.saturating_sub(lhs)) >> 48);
             location.push(mid);
             if mid > lhs {
@@ -127,4 +142,36 @@ mod tests {
             );
         }
     }
+
+    // Simulates 100,000 sequential forward appends (the pattern used when
+    // building a buffer's initial fragments and when
+    // `push_fragments_for_insertion` chains new text fragments).
+    #[test]
+    fn test_sequential_forward_append_stays_at_depth_1() {
+        let mut prev = Locator::min();
+        let max = Locator::max();
+        for _ in 0..100_000 {
+            let loc = Locator::between(&prev, &max);
+            assert_eq!(loc.len(), 1, "sequential forward append grew past depth 1");
+            prev = loc;
+        }
+    }
+
+    // Simulates the most common real editing pattern: a fragment is split
+    // (producing a depth-2 prefix), then 10,000 new fragments are inserted
+    // sequentially forward within that split region.
+    #[test]
+    fn test_typing_at_cursor_stays_at_depth_2() {
+        let initial = Locator::between(&Locator::min(), &Locator::max());
+        let prefix = Locator::between(&Locator::min(), &initial);
+        assert_eq!(prefix.len(), 2);
+
+        let suffix_id = initial;
+        let mut prev = prefix;
+        for _ in 0..10_000 {
+            let loc = Locator::between(&prev, &suffix_id);
+            assert_eq!(loc.len(), 2, "forward typing after split grew past depth 2");
+            prev = loc;
+        }
+    }
 }

crates/text/src/tests.rs 🔗

@@ -810,3 +810,188 @@ fn test_random_concurrent_edits(mut rng: StdRng) {
         buffer.check_invariants();
     }
 }
+
+#[test]
+fn test_new_normalized_splits_large_base_text() {
+    // ASCII text that exceeds max_insertion_len
+    let text = "abcdefghij".repeat(10); // 100 bytes
+    let rope = Rope::from(text.as_str());
+    let buffer = Buffer::new_normalized(
+        ReplicaId::LOCAL,
+        BufferId::new(1).unwrap(),
+        LineEnding::Unix,
+        rope,
+    );
+    assert_eq!(buffer.text(), text);
+    buffer.check_invariants();
+
+    // Verify anchors at various positions, including across chunk boundaries
+    for offset in [0, 1, 15, 16, 17, 50, 99] {
+        let anchor = buffer.anchor_before(offset);
+        assert_eq!(
+            anchor.to_offset(&buffer),
+            offset,
+            "anchor_before({offset}) round-tripped incorrectly"
+        );
+        let anchor = buffer.anchor_after(offset);
+        assert_eq!(
+            anchor.to_offset(&buffer),
+            offset,
+            "anchor_after({offset}) round-tripped incorrectly"
+        );
+    }
+
+    // Verify editing works after a split initialization
+    let mut buffer = buffer;
+    buffer.edit([(50..60, "XYZ")]);
+    let mut expected = text;
+    expected.replace_range(50..60, "XYZ");
+    assert_eq!(buffer.text(), expected);
+    buffer.check_invariants();
+}
+
+#[test]
+fn test_new_normalized_splits_large_base_text_with_multibyte_chars() {
+    // Use multi-byte chars (é is 2 bytes in UTF-8) so that a naive byte-level
+    // split would land in the middle of a character.
+    let unit = "ééééééééé"; // 9 chars × 2 bytes = 18 bytes
+    let text = unit.repeat(6); // 108 bytes
+    let rope = Rope::from(text.as_str());
+    let buffer = Buffer::new_normalized(
+        ReplicaId::LOCAL,
+        BufferId::new(1).unwrap(),
+        LineEnding::Unix,
+        rope,
+    );
+    assert_eq!(buffer.text(), text);
+    buffer.check_invariants();
+
+    // Every anchor should resolve correctly even though chunks had to be
+    // rounded down to a char boundary.
+    let snapshot = buffer.snapshot();
+    for offset in (0..text.len()).filter(|o| text.is_char_boundary(*o)) {
+        let anchor = snapshot.anchor_before(offset);
+        assert_eq!(
+            anchor.to_offset(snapshot),
+            offset,
+            "anchor round-trip failed at byte offset {offset}"
+        );
+    }
+}
+
+#[test]
+fn test_new_normalized_small_text_unchanged() {
+    // Text that fits in a single chunk should produce exactly one fragment,
+    // matching the original single-fragment behaviour.
+    let text = "hello world";
+    let rope = Rope::from(text);
+    let buffer = Buffer::new_normalized(
+        ReplicaId::LOCAL,
+        BufferId::new(1).unwrap(),
+        LineEnding::Unix,
+        rope,
+    );
+    assert_eq!(buffer.text(), text);
+    buffer.check_invariants();
+    assert_eq!(buffer.snapshot().fragments.items(&None).len(), 1);
+}
+
+#[test]
+fn test_edit_splits_large_insertion() {
+    let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "abcdefghij");
+
+    let large_text: Arc<str> = "X".repeat(100).into();
+    let edits = vec![(3..7, large_text.clone())];
+
+    buffer.edit(edits);
+
+    let expected = format!("abc{}hij", large_text);
+    assert_eq!(buffer.text(), expected);
+    buffer.check_invariants();
+
+    // Anchors should resolve correctly throughout the buffer.
+    for offset in [0, 3, 50, 103, expected.len()] {
+        let anchor = buffer.anchor_before(offset);
+        assert_eq!(
+            anchor.to_offset(&buffer),
+            offset,
+            "anchor_before({offset}) round-tripped incorrectly"
+        );
+    }
+}
+
+#[test]
+fn test_edit_splits_large_insertion_with_multibyte_chars() {
+    let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "abcdefghij");
+
+    // 4-byte chars so that naive byte splits would land mid-character.
+    let large_text: Arc<str> = "😀".repeat(30).into(); // 30 × 4 = 120 bytes
+    let edits = vec![(5..5, large_text.clone())];
+
+    buffer.edit(edits);
+
+    let expected = format!("abcde{}fghij", large_text);
+    assert_eq!(buffer.text(), expected);
+    buffer.check_invariants();
+}
+
+#[test]
+fn test_edit_splits_large_insertion_among_multiple_edits() {
+    let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "ABCDEFGHIJ");
+
+    let large_text: Arc<str> = "x".repeat(60).into();
+    // Three edits: small, large, small. The large one must be split while
+    // preserving the correct positions of the surrounding edits.
+    let edits = vec![
+        (1..2, Arc::from("y")),     // replace "B" with "y"
+        (4..6, large_text.clone()), // replace "EF" with 60 x's
+        (9..9, Arc::from("z")),     // insert "z" before "J"
+    ];
+
+    buffer.edit(edits);
+
+    // Original: A B C D E F G H I J
+    // After (1..2, "y"):       A y C D E F G H I J
+    // After (4..6, large):     A y C D <60 x's> G H I J
+    // After (9..9, "z"):       A y C D <60 x's> G H I z J
+    let expected = format!("AyCD{}GHIzJ", large_text);
+    assert_eq!(buffer.text(), expected);
+    buffer.check_invariants();
+}
+
+#[test]
+fn test_edit_splits_multiple_large_insertions() {
+    let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "ABCDE");
+
+    let text1: Arc<str> = "a".repeat(40).into();
+    let text2: Arc<str> = "b".repeat(40).into();
+    let edits = vec![
+        (1..2, text1.clone()), // replace "B" with 40 a's
+        (3..4, text2.clone()), // replace "D" with 40 b's
+    ];
+
+    buffer.edit(edits);
+
+    let expected = format!("A{}C{}E", text1, text2);
+    assert_eq!(buffer.text(), expected);
+    buffer.check_invariants();
+}
+
+#[test]
+fn test_edit_undo_after_split() {
+    let mut buffer = Buffer::new(ReplicaId::LOCAL, BufferId::new(1).unwrap(), "hello world");
+    buffer.set_group_interval(Duration::from_secs(0));
+    let original = buffer.text();
+
+    let large_text: Arc<str> = "Z".repeat(50).into();
+    let edits = vec![(5..6, large_text)];
+    buffer.edit(edits);
+    assert_ne!(buffer.text(), original);
+    buffer.check_invariants();
+
+    // Undo should restore the original text even though the edit was split
+    // into multiple internal operations grouped in one transaction.
+    buffer.undo();
+    assert_eq!(buffer.text(), original);
+    buffer.check_invariants();
+}

crates/text/src/text.rs 🔗

@@ -48,6 +48,12 @@ use util::RandomCharIter;
 static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
     LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
 
+/// The maximum length of a single insertion operation.
+/// Fragments larger than this will be split into multiple smaller
+/// fragments. This allows us to use relative `u32` offsets instead of `usize`,
+/// reducing memory usage.
+const MAX_INSERTION_LEN: usize = if cfg!(test) { 16 } else { u32::MAX as usize };
+
 pub type TransactionId = clock::Lamport;
 
 pub struct Buffer {
@@ -155,18 +161,38 @@ struct History {
 
 #[derive(Clone, Debug, Eq, PartialEq)]
 struct InsertionSlice {
-    edit_id: clock::Lamport,
-    insertion_id: clock::Lamport,
-    range: Range<usize>,
+    // Inline the lamports to allow the replica ids to share the same alignment
+    // saving 4 bytes space edit_id: clock::Lamport,
+    edit_id_value: clock::Seq,
+    edit_id_replica_id: ReplicaId,
+    // insertion_id: clock::Lamport,
+    insertion_id_value: clock::Seq,
+    insertion_id_replica_id: ReplicaId,
+    range: Range<u32>,
 }
 
 impl Ord for InsertionSlice {
     fn cmp(&self, other: &Self) -> Ordering {
-        self.edit_id
-            .cmp(&other.edit_id)
-            .then_with(|| self.insertion_id.cmp(&other.insertion_id))
-            .then_with(|| self.range.start.cmp(&other.range.start))
-            .then_with(|| self.range.end.cmp(&other.range.end))
+        Lamport {
+            value: self.edit_id_value,
+            replica_id: self.edit_id_replica_id,
+        }
+        .cmp(&Lamport {
+            value: other.edit_id_value,
+            replica_id: other.edit_id_replica_id,
+        })
+        .then_with(|| {
+            Lamport {
+                value: self.insertion_id_value,
+                replica_id: self.insertion_id_replica_id,
+            }
+            .cmp(&Lamport {
+                value: other.insertion_id_value,
+                replica_id: other.insertion_id_replica_id,
+            })
+        })
+        .then_with(|| self.range.start.cmp(&other.range.start))
+        .then_with(|| self.range.end.cmp(&other.range.end))
     }
 }
 
@@ -179,8 +205,10 @@ impl PartialOrd for InsertionSlice {
 impl InsertionSlice {
     fn from_fragment(edit_id: clock::Lamport, fragment: &Fragment) -> Self {
         Self {
-            edit_id,
-            insertion_id: fragment.timestamp,
+            edit_id_value: edit_id.value,
+            edit_id_replica_id: edit_id.replica_id,
+            insertion_id_value: fragment.timestamp.value,
+            insertion_id_replica_id: fragment.timestamp.replica_id,
             range: fragment.insertion_offset..fragment.insertion_offset + fragment.len,
         }
     }
@@ -309,6 +337,7 @@ impl History {
 
     fn finalize_last_transaction(&mut self) -> Option<&Transaction> {
         self.undo_stack.last_mut().map(|entry| {
+            entry.transaction.edit_ids.shrink_to_fit();
             entry.suppress_grouping = true;
             &entry.transaction
         })
@@ -489,7 +518,7 @@ struct Edits<'a, D: TextDimension, F: FnMut(&FragmentSummary) -> bool> {
     since: &'a clock::Global,
     old_end: D,
     new_end: D,
-    range: Range<(&'a Locator, usize)>,
+    range: Range<(&'a Locator, u32)>,
     buffer_id: BufferId,
 }
 
@@ -536,18 +565,18 @@ impl<D1, D2> Edit<(D1, D2)> {
 }
 
 #[derive(Eq, PartialEq, Clone, Debug)]
-pub struct Fragment {
-    pub id: Locator,
-    pub timestamp: clock::Lamport,
-    pub insertion_offset: usize,
-    pub len: usize,
-    pub visible: bool,
-    pub deletions: SmallVec<[clock::Lamport; 2]>,
-    pub max_undos: clock::Global,
+struct Fragment {
+    id: Locator,
+    timestamp: clock::Lamport,
+    insertion_offset: u32,
+    len: u32,
+    visible: bool,
+    deletions: SmallVec<[clock::Lamport; 2]>,
+    max_undos: clock::Global,
 }
 
 #[derive(Eq, PartialEq, Clone, Debug)]
-pub struct FragmentSummary {
+struct FragmentSummary {
     text: FragmentTextSummary,
     max_id: Locator,
     max_version: clock::Global,
@@ -575,14 +604,14 @@ impl<'a> sum_tree::Dimension<'a, FragmentSummary> for FragmentTextSummary {
 #[derive(Eq, PartialEq, Clone, Debug)]
 struct InsertionFragment {
     timestamp: clock::Lamport,
-    split_offset: usize,
+    split_offset: u32,
     fragment_id: Locator,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 struct InsertionFragmentKey {
     timestamp: clock::Lamport,
-    split_offset: usize,
+    split_offset: u32,
 }
 
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -740,18 +769,37 @@ impl Buffer {
             let insertion_timestamp = clock::Lamport::new(ReplicaId::LOCAL);
             lamport_clock.observe(insertion_timestamp);
             version.observe(insertion_timestamp);
-            let fragment_id = Locator::between(&Locator::min(), &Locator::max());
-            let fragment = Fragment {
-                id: fragment_id,
-                timestamp: insertion_timestamp,
-                insertion_offset: 0,
-                len: visible_text.len(),
-                visible: true,
-                deletions: Default::default(),
-                max_undos: Default::default(),
-            };
-            insertions.push(InsertionFragment::new(&fragment), ());
-            fragments.push(fragment, &None);
+
+            let mut insertion_offset: u32 = 0;
+            let mut text_offset: usize = 0;
+            let mut prev_locator = Locator::min();
+
+            while text_offset < visible_text.len() {
+                let target_end = visible_text.len().min(text_offset + MAX_INSERTION_LEN);
+                let chunk_end = if target_end == visible_text.len() {
+                    target_end
+                } else {
+                    visible_text.floor_char_boundary(target_end)
+                };
+                let chunk_len = chunk_end - text_offset;
+
+                let fragment_id = Locator::between(&prev_locator, &Locator::max());
+                let fragment = Fragment {
+                    id: fragment_id.clone(),
+                    timestamp: insertion_timestamp,
+                    insertion_offset,
+                    len: chunk_len as u32,
+                    visible: true,
+                    deletions: Default::default(),
+                    max_undos: Default::default(),
+                };
+                insertions.push(InsertionFragment::new(&fragment), ());
+                fragments.push(fragment, &None);
+
+                prev_locator = fragment_id;
+                insertion_offset += chunk_len as u32;
+                text_offset = chunk_end;
+            }
         }
 
         Buffer {
@@ -853,7 +901,7 @@ impl Buffer {
             new_text: Vec::with_capacity(edits.len()),
         };
         let mut new_insertions = Vec::new();
-        let mut insertion_offset = 0;
+        let mut insertion_offset: u32 = 0;
         let mut insertion_slices = Vec::new();
 
         let mut edits = edits
@@ -879,8 +927,9 @@ impl Buffer {
                 if fragment_start > old_fragments.start().visible {
                     if fragment_end > fragment_start {
                         let mut suffix = old_fragments.item().unwrap().clone();
-                        suffix.len = fragment_end - fragment_start;
-                        suffix.insertion_offset += fragment_start - old_fragments.start().visible;
+                        suffix.len = (fragment_end - fragment_start) as u32;
+                        suffix.insertion_offset +=
+                            (fragment_start - old_fragments.start().visible) as u32;
                         new_insertions.push(InsertionFragment::insert_new(&suffix));
                         new_ropes.push_fragment(&suffix, suffix.visible);
                         new_fragments.push(suffix, &None);
@@ -899,8 +948,8 @@ impl Buffer {
             // Preserve any portion of the current fragment that precedes this range.
             if fragment_start < range.start {
                 let mut prefix = old_fragments.item().unwrap().clone();
-                prefix.len = range.start - fragment_start;
-                prefix.insertion_offset += fragment_start - old_fragments.start().visible;
+                prefix.len = (range.start - fragment_start) as u32;
+                prefix.insertion_offset += (fragment_start - old_fragments.start().visible) as u32;
                 prefix.id = Locator::between(&new_fragments.summary().max_id, &prefix.id);
                 new_insertions.push(InsertionFragment::insert_new(&prefix));
                 new_ropes.push_fragment(&prefix, prefix.visible);
@@ -912,29 +961,24 @@ impl Buffer {
             if !new_text.is_empty() {
                 let new_start = new_fragments.summary().text.visible;
 
-                let fragment = Fragment {
-                    id: Locator::between(
-                        &new_fragments.summary().max_id,
-                        old_fragments
-                            .item()
-                            .map_or(&Locator::max(), |old_fragment| &old_fragment.id),
-                    ),
+                let next_fragment_id = old_fragments
+                    .item()
+                    .map_or(Locator::max_ref(), |old_fragment| &old_fragment.id);
+                Self::push_fragments_for_insertion(
+                    new_text.as_ref(),
                     timestamp,
-                    insertion_offset,
-                    len: new_text.len(),
-                    deletions: Default::default(),
-                    max_undos: Default::default(),
-                    visible: true,
-                };
+                    &mut insertion_offset,
+                    &mut new_fragments,
+                    &mut new_insertions,
+                    &mut insertion_slices,
+                    &mut new_ropes,
+                    next_fragment_id,
+                    timestamp,
+                );
                 edits_patch.push(Edit {
                     old: fragment_start..fragment_start,
                     new: new_start..new_start + new_text.len(),
                 });
-                insertion_slices.push(InsertionSlice::from_fragment(timestamp, &fragment));
-                new_insertions.push(InsertionFragment::insert_new(&fragment));
-                new_ropes.push_str(new_text.as_ref());
-                new_fragments.push(fragment, &None);
-                insertion_offset += new_text.len();
             }
 
             // Advance through every fragment that intersects this range, marking the intersecting
@@ -945,8 +989,9 @@ impl Buffer {
                 let mut intersection = fragment.clone();
                 let intersection_end = cmp::min(range.end, fragment_end);
                 if fragment.visible {
-                    intersection.len = intersection_end - fragment_start;
-                    intersection.insertion_offset += fragment_start - old_fragments.start().visible;
+                    intersection.len = (intersection_end - fragment_start) as u32;
+                    intersection.insertion_offset +=
+                        (fragment_start - old_fragments.start().visible) as u32;
                     intersection.id =
                         Locator::between(&new_fragments.summary().max_id, &intersection.id);
                     intersection.deletions.push(timestamp);
@@ -983,8 +1028,8 @@ impl Buffer {
             let fragment_end = old_fragments.end().visible;
             if fragment_end > fragment_start {
                 let mut suffix = old_fragments.item().unwrap().clone();
-                suffix.len = fragment_end - fragment_start;
-                suffix.insertion_offset += fragment_start - old_fragments.start().visible;
+                suffix.len = (fragment_end - fragment_start) as u32;
+                suffix.insertion_offset += (fragment_start - old_fragments.start().visible) as u32;
                 new_insertions.push(InsertionFragment::insert_new(&suffix));
                 new_ropes.push_fragment(&suffix, suffix.visible);
                 new_fragments.push(suffix, &None);
@@ -1075,7 +1120,7 @@ impl Buffer {
         let mut insertion_slices = Vec::new();
         let cx = Some(version.clone());
         let mut new_insertions = Vec::new();
-        let mut insertion_offset = 0;
+        let mut insertion_offset: u32 = 0;
         let mut new_ropes =
             RopeBuilder::new(self.visible_text.cursor(0), self.deleted_text.cursor(0));
         let mut old_fragments = self
@@ -1097,9 +1142,9 @@ impl Buffer {
                 if fragment_start > old_fragments.start().0.full_offset() {
                     if fragment_end > fragment_start {
                         let mut suffix = old_fragments.item().unwrap().clone();
-                        suffix.len = fragment_end.0 - fragment_start.0;
+                        suffix.len = (fragment_end.0 - fragment_start.0) as u32;
                         suffix.insertion_offset +=
-                            fragment_start - old_fragments.start().0.full_offset();
+                            (fragment_start - old_fragments.start().0.full_offset()) as u32;
                         new_insertions.push(InsertionFragment::insert_new(&suffix));
                         new_ropes.push_fragment(&suffix, suffix.visible);
                         new_fragments.push(suffix, &None);
@@ -1118,8 +1163,9 @@ impl Buffer {
             let fragment_end = old_fragments.end().0.full_offset();
             if fragment_end == range.start && fragment_end > fragment_start {
                 let mut fragment = old_fragments.item().unwrap().clone();
-                fragment.len = fragment_end.0 - fragment_start.0;
-                fragment.insertion_offset += fragment_start - old_fragments.start().0.full_offset();
+                fragment.len = (fragment_end.0 - fragment_start.0) as u32;
+                fragment.insertion_offset +=
+                    (fragment_start - old_fragments.start().0.full_offset()) as u32;
                 new_insertions.push(InsertionFragment::insert_new(&fragment));
                 new_ropes.push_fragment(&fragment, fragment.visible);
                 new_fragments.push(fragment, &None);
@@ -1144,8 +1190,9 @@ impl Buffer {
             // Preserve any portion of the current fragment that precedes this range.
             if fragment_start < range.start {
                 let mut prefix = old_fragments.item().unwrap().clone();
-                prefix.len = range.start.0 - fragment_start.0;
-                prefix.insertion_offset += fragment_start - old_fragments.start().0.full_offset();
+                prefix.len = (range.start.0 - fragment_start.0) as u32;
+                prefix.insertion_offset +=
+                    (fragment_start - old_fragments.start().0.full_offset()) as u32;
                 prefix.id = Locator::between(&new_fragments.summary().max_id, &prefix.id);
                 new_insertions.push(InsertionFragment::insert_new(&prefix));
                 fragment_start = range.start;
@@ -1160,29 +1207,24 @@ impl Buffer {
                     old_start += fragment_start.0 - old_fragments.start().0.full_offset().0;
                 }
                 let new_start = new_fragments.summary().text.visible;
-                let fragment = Fragment {
-                    id: Locator::between(
-                        &new_fragments.summary().max_id,
-                        old_fragments
-                            .item()
-                            .map_or(&Locator::max(), |old_fragment| &old_fragment.id),
-                    ),
+                let next_fragment_id = old_fragments
+                    .item()
+                    .map_or(Locator::max_ref(), |old_fragment| &old_fragment.id);
+                Self::push_fragments_for_insertion(
+                    new_text,
                     timestamp,
-                    insertion_offset,
-                    len: new_text.len(),
-                    deletions: Default::default(),
-                    max_undos: Default::default(),
-                    visible: true,
-                };
+                    &mut insertion_offset,
+                    &mut new_fragments,
+                    &mut new_insertions,
+                    &mut insertion_slices,
+                    &mut new_ropes,
+                    next_fragment_id,
+                    timestamp,
+                );
                 edits_patch.push(Edit {
                     old: old_start..old_start,
                     new: new_start..new_start + new_text.len(),
                 });
-                insertion_slices.push(InsertionSlice::from_fragment(timestamp, &fragment));
-                new_insertions.push(InsertionFragment::insert_new(&fragment));
-                new_ropes.push_str(new_text);
-                new_fragments.push(fragment, &None);
-                insertion_offset += new_text.len();
             }
 
             // Advance through every fragment that intersects this range, marking the intersecting
@@ -1193,9 +1235,9 @@ impl Buffer {
                 let mut intersection = fragment.clone();
                 let intersection_end = cmp::min(range.end, fragment_end);
                 if fragment.was_visible(version, &self.undo_map) {
-                    intersection.len = intersection_end.0 - fragment_start.0;
+                    intersection.len = (intersection_end.0 - fragment_start.0) as u32;
                     intersection.insertion_offset +=
-                        fragment_start - old_fragments.start().0.full_offset();
+                        (fragment_start - old_fragments.start().0.full_offset()) as u32;
                     intersection.id =
                         Locator::between(&new_fragments.summary().max_id, &intersection.id);
                     intersection.deletions.push(timestamp);
@@ -1208,7 +1250,7 @@ impl Buffer {
                             + (fragment_start.0 - old_fragments.start().0.full_offset().0);
                         let new_start = new_fragments.summary().text.visible;
                         edits_patch.push(Edit {
-                            old: old_start..old_start + intersection.len,
+                            old: old_start..old_start + intersection.len as usize,
                             new: new_start..new_start,
                         });
                     }
@@ -1229,8 +1271,9 @@ impl Buffer {
             let fragment_end = old_fragments.end().0.full_offset();
             if fragment_end > fragment_start {
                 let mut suffix = old_fragments.item().unwrap().clone();
-                suffix.len = fragment_end.0 - fragment_start.0;
-                suffix.insertion_offset += fragment_start - old_fragments.start().0.full_offset();
+                suffix.len = (fragment_end.0 - fragment_start.0) as u32;
+                suffix.insertion_offset +=
+                    (fragment_start - old_fragments.start().0.full_offset()) as u32;
                 new_insertions.push(InsertionFragment::insert_new(&suffix));
                 new_ropes.push_fragment(&suffix, suffix.visible);
                 new_fragments.push(suffix, &None);
@@ -1252,6 +1295,49 @@ impl Buffer {
         self.subscriptions.publish_mut(&edits_patch)
     }
 
+    fn push_fragments_for_insertion(
+        new_text: &str,
+        timestamp: clock::Lamport,
+        insertion_offset: &mut u32,
+        new_fragments: &mut SumTree<Fragment>,
+        new_insertions: &mut Vec<sum_tree::Edit<InsertionFragment>>,
+        insertion_slices: &mut Vec<InsertionSlice>,
+        new_ropes: &mut RopeBuilder,
+        next_fragment_id: &Locator,
+        edit_timestamp: clock::Lamport,
+    ) {
+        let mut text_offset = 0;
+        while text_offset < new_text.len() {
+            let target_end = new_text.len().min(text_offset + MAX_INSERTION_LEN);
+            let chunk_end = if target_end == new_text.len() {
+                target_end
+            } else {
+                new_text.floor_char_boundary(target_end)
+            };
+            if chunk_end == text_offset {
+                break;
+            }
+            let chunk_len = chunk_end - text_offset;
+
+            let fragment = Fragment {
+                id: Locator::between(&new_fragments.summary().max_id, next_fragment_id),
+                timestamp,
+                insertion_offset: *insertion_offset,
+                len: chunk_len as u32,
+                deletions: Default::default(),
+                max_undos: Default::default(),
+                visible: true,
+            };
+            insertion_slices.push(InsertionSlice::from_fragment(edit_timestamp, &fragment));
+            new_insertions.push(InsertionFragment::insert_new(&fragment));
+            new_fragments.push(fragment, &None);
+
+            *insertion_offset += chunk_len as u32;
+            text_offset = chunk_end;
+        }
+        new_ropes.push_str(new_text);
+    }
+
     fn fragment_ids_for_edits<'a>(
         &'a self,
         edit_ids: impl Iterator<Item = &'a clock::Lamport>,
@@ -1260,38 +1346,56 @@ impl Buffer {
         let mut insertion_slices = Vec::new();
         for edit_id in edit_ids {
             let insertion_slice = InsertionSlice {
-                edit_id: *edit_id,
-                insertion_id: clock::Lamport::MIN,
+                edit_id_value: edit_id.value,
+                edit_id_replica_id: edit_id.replica_id,
+                insertion_id_value: Lamport::MIN.value,
+                insertion_id_replica_id: Lamport::MIN.replica_id,
                 range: 0..0,
             };
             let slices = self
                 .snapshot
                 .insertion_slices
                 .iter_from(&insertion_slice)
-                .take_while(|slice| slice.edit_id == *edit_id);
+                .take_while(|slice| {
+                    Lamport {
+                        value: slice.edit_id_value,
+                        replica_id: slice.edit_id_replica_id,
+                    } == *edit_id
+                });
             insertion_slices.extend(slices)
         }
-        insertion_slices
-            .sort_unstable_by_key(|s| (s.insertion_id, s.range.start, Reverse(s.range.end)));
+        insertion_slices.sort_unstable_by_key(|s| {
+            (
+                Lamport {
+                    value: s.insertion_id_value,
+                    replica_id: s.insertion_id_replica_id,
+                },
+                s.range.start,
+                Reverse(s.range.end),
+            )
+        });
 
         // Get all of the fragments corresponding to these insertion slices.
         let mut fragment_ids = Vec::new();
         let mut insertions_cursor = self.insertions.cursor::<InsertionFragmentKey>(());
         for insertion_slice in &insertion_slices {
-            if insertion_slice.insertion_id != insertions_cursor.start().timestamp
+            let insertion_id = Lamport {
+                value: insertion_slice.insertion_id_value,
+                replica_id: insertion_slice.insertion_id_replica_id,
+            };
+            if insertion_id != insertions_cursor.start().timestamp
                 || insertion_slice.range.start > insertions_cursor.start().split_offset
             {
                 insertions_cursor.seek_forward(
                     &InsertionFragmentKey {
-                        timestamp: insertion_slice.insertion_id,
+                        timestamp: insertion_id,
                         split_offset: insertion_slice.range.start,
                     },
                     Bias::Left,
                 );
             }
             while let Some(item) = insertions_cursor.item() {
-                if item.timestamp != insertion_slice.insertion_id
-                    || item.split_offset >= insertion_slice.range.end
+                if item.timestamp != insertion_id || item.split_offset >= insertion_slice.range.end
                 {
                     break;
                 }
@@ -1330,13 +1434,13 @@ impl Buffer {
                 let new_start = new_fragments.summary().text.visible;
                 if fragment_was_visible && !fragment.visible {
                     edits.push(Edit {
-                        old: old_start..old_start + fragment.len,
+                        old: old_start..old_start + fragment.len as usize,
                         new: new_start..new_start,
                     });
                 } else if !fragment_was_visible && fragment.visible {
                     edits.push(Edit {
                         old: old_start..old_start,
-                        new: new_start..new_start + fragment.len,
+                        new: new_start..new_start + fragment.len as usize,
                     });
                 }
                 new_ropes.push_fragment(&fragment, fragment_was_visible);
@@ -1582,7 +1686,12 @@ impl Buffer {
                 cursor.seek_forward(&Some(fragment_id), Bias::Left);
                 let fragment = cursor.item()?;
                 let start_offset = cursor.start().1;
-                let end_offset = start_offset + if fragment.visible { fragment.len } else { 0 };
+                let end_offset = start_offset
+                    + if fragment.visible {
+                        fragment.len as usize
+                    } else {
+                        0
+                    };
                 Some(start_offset..end_offset)
             });
 
@@ -2038,10 +2147,6 @@ impl BufferSnapshot {
         self.deleted_text.to_string()
     }
 
-    pub fn fragments(&self) -> impl Iterator<Item = &Fragment> {
-        self.fragments.iter()
-    }
-
     pub fn text_summary(&self) -> TextSummary {
         self.visible_text.summary()
     }
@@ -2287,7 +2392,7 @@ impl BufferSnapshot {
             let fragment = fragment_cursor.item().unwrap();
             let mut fragment_offset = fragment_cursor.start().1;
             if fragment.visible {
-                fragment_offset += anchor.offset - insertion.split_offset;
+                fragment_offset += (anchor.offset - insertion.split_offset) as usize;
             }
 
             position.add_assign(&text_cursor.summary(fragment_offset));
@@ -2332,7 +2437,7 @@ impl BufferSnapshot {
             let fragment = item.unwrap();
             let mut fragment_offset = start.1;
             if fragment.visible {
-                fragment_offset += anchor.offset - insertion.split_offset;
+                fragment_offset += (anchor.offset - insertion.split_offset) as usize;
             }
             fragment_offset
         }
@@ -2457,7 +2562,7 @@ impl BufferSnapshot {
             let overshoot = offset - start;
             Anchor::new(
                 fragment.timestamp,
-                fragment.insertion_offset + overshoot,
+                fragment.insertion_offset + overshoot as u32,
                 bias,
                 Some(self.remote_id),
             )
@@ -2546,7 +2651,7 @@ impl BufferSnapshot {
         let mut visible_start = start.1.visible;
         let mut deleted_start = start.1.deleted;
         if let Some(fragment) = item {
-            let overshoot = range.start.offset - fragment.insertion_offset;
+            let overshoot = (range.start.offset - fragment.insertion_offset) as usize;
             if fragment.visible {
                 visible_start += overshoot;
             } else {
@@ -2706,7 +2811,7 @@ impl<'a> RopeBuilder<'a> {
 
     fn push_fragment(&mut self, fragment: &Fragment, was_visible: bool) {
         debug_assert!(fragment.len > 0);
-        self.push(fragment.len, was_visible, fragment.visible)
+        self.push(fragment.len as usize, was_visible, fragment.visible)
     }
 
     fn push(&mut self, len: usize, was_visible: bool, is_visible: bool) {
@@ -2781,7 +2886,8 @@ impl<D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator for Ed
                 if fragment.id == *self.range.end.0 {
                     visible_end = cmp::min(
                         visible_end,
-                        cursor.start().visible + (self.range.end.1 - fragment.insertion_offset),
+                        cursor.start().visible
+                            + (self.range.end.1 - fragment.insertion_offset) as usize,
                     );
                 }
 
@@ -2807,7 +2913,8 @@ impl<D: TextDimension + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator for Ed
                 if fragment.id == *self.range.end.0 {
                     deleted_end = cmp::min(
                         deleted_end,
-                        cursor.start().deleted + (self.range.end.1 - fragment.insertion_offset),
+                        cursor.start().deleted
+                            + (self.range.end.1 - fragment.insertion_offset) as usize,
                     );
                 }
 
@@ -2872,7 +2979,7 @@ impl sum_tree::Item for Fragment {
             FragmentSummary {
                 max_id: self.id.clone(),
                 text: FragmentTextSummary {
-                    visible: self.len,
+                    visible: self.len as usize,
                     deleted: 0,
                 },
                 max_version,
@@ -2884,7 +2991,7 @@ impl sum_tree::Item for Fragment {
                 max_id: self.id.clone(),
                 text: FragmentTextSummary {
                     visible: 0,
-                    deleted: self.len,
+                    deleted: self.len as usize,
                 },
                 max_version,
                 min_insertion_version,