rope: Improve prepend performance for small inputs on small ropes (#50389)

Lukas Wirth created

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/rope/src/chunk.rs        |  45 +++++++++++
crates/rope/src/rope.rs         | 135 +++++++++++++++++++++++++++++++++++
crates/sum_tree/src/sum_tree.rs |  45 +++++++++++
3 files changed, 225 insertions(+)

Detailed changes

crates/rope/src/chunk.rs 🔗

@@ -102,6 +102,11 @@ impl Chunk {
         self.append(Chunk::new(text).as_slice());
     }
 
+    #[inline(always)]
+    pub fn prepend_str(&mut self, text: &str) {
+        self.prepend(Chunk::new(text).as_slice());
+    }
+
     #[inline(always)]
     pub fn append(&mut self, slice: ChunkSlice) {
         if slice.is_empty() {
@@ -116,6 +121,28 @@ impl Chunk {
         self.text.push_str(slice.text);
     }
 
+    #[inline(always)]
+    pub fn prepend(&mut self, slice: ChunkSlice) {
+        if slice.is_empty() {
+            return;
+        }
+        if self.text.is_empty() {
+            *self = Chunk::new(slice.text);
+            return;
+        }
+
+        let shift = slice.text.len();
+        self.chars = slice.chars | (self.chars << shift);
+        self.chars_utf16 = slice.chars_utf16 | (self.chars_utf16 << shift);
+        self.newlines = slice.newlines | (self.newlines << shift);
+        self.tabs = slice.tabs | (self.tabs << shift);
+
+        let mut new_text = ArrayString::<MAX_BASE>::new();
+        new_text.push_str(slice.text);
+        new_text.push_str(&self.text);
+        self.text = new_text;
+    }
+
     #[inline(always)]
     pub fn as_slice(&self) -> ChunkSlice<'_> {
         ChunkSlice {
@@ -890,6 +917,24 @@ mod tests {
         verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
     }
 
+    #[gpui::test(iterations = 1000)]
+    fn test_prepend_random_strings(mut rng: StdRng) {
+        let len1 = rng.random_range(0..=MAX_BASE);
+        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
+        let str1 = random_string_with_utf8_len(&mut rng, len1);
+        let str2 = random_string_with_utf8_len(&mut rng, len2);
+        let mut chunk1 = Chunk::new(&str1);
+        let chunk2 = Chunk::new(&str2);
+        let char_offsets = char_offsets_with_end(&str2);
+        let start_index = rng.random_range(0..char_offsets.len());
+        let start_offset = char_offsets[start_index];
+        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
+        let slice = chunk2.slice(start_offset..end_offset);
+        let prefix_text = &str2[start_offset..end_offset];
+        chunk1.prepend(slice);
+        verify_chunk(chunk1.as_slice(), &(prefix_text.to_owned() + &str1));
+    }
+
     /// Return the byte offsets for each character in a string.
     ///
     /// These are valid offsets to split the string.

crates/rope/src/rope.rs 🔗

@@ -167,6 +167,11 @@ impl Rope {
             (),
         );
 
+        if text.is_empty() {
+            self.check_invariants();
+            return;
+        }
+
         #[cfg(all(test, not(rust_analyzer)))]
         const NUM_CHUNKS: usize = 16;
         #[cfg(not(all(test, not(rust_analyzer))))]
@@ -269,6 +274,23 @@ impl Rope {
     }
 
     pub fn push_front(&mut self, text: &str) {
+        if text.is_empty() {
+            return;
+        }
+        if self.is_empty() {
+            self.push(text);
+            return;
+        }
+        if self
+            .chunks
+            .first()
+            .is_some_and(|c| c.text.len() + text.len() <= chunk::MAX_BASE)
+        {
+            self.chunks
+                .update_first(|first_chunk| first_chunk.prepend_str(text), ());
+            self.check_invariants();
+            return;
+        }
         let suffix = mem::replace(self, Rope::from(text));
         self.append(suffix);
     }
@@ -2339,6 +2361,119 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_push_front_empty_text_on_empty_rope() {
+        let mut rope = Rope::new();
+        rope.push_front("");
+        assert_eq!(rope.text(), "");
+        assert_eq!(rope.len(), 0);
+    }
+
+    #[test]
+    fn test_push_front_empty_text_on_nonempty_rope() {
+        let mut rope = Rope::from("hello");
+        rope.push_front("");
+        assert_eq!(rope.text(), "hello");
+    }
+
+    #[test]
+    fn test_push_front_on_empty_rope() {
+        let mut rope = Rope::new();
+        rope.push_front("hello");
+        assert_eq!(rope.text(), "hello");
+        assert_eq!(rope.len(), 5);
+        assert_eq!(rope.max_point(), Point::new(0, 5));
+    }
+
+    #[test]
+    fn test_push_front_single_space() {
+        let mut rope = Rope::from("hint");
+        rope.push_front(" ");
+        assert_eq!(rope.text(), " hint");
+        assert_eq!(rope.len(), 5);
+    }
+
+    #[gpui::test(iterations = 50)]
+    fn test_push_front_random(mut rng: StdRng) {
+        let initial_len = rng.random_range(0..=64);
+        let initial_text: String = RandomCharIter::new(&mut rng).take(initial_len).collect();
+        let mut rope = Rope::from(initial_text.as_str());
+
+        let mut expected = initial_text;
+
+        for _ in 0..rng.random_range(1..=10) {
+            let prefix_len = rng.random_range(0..=32);
+            let prefix: String = RandomCharIter::new(&mut rng).take(prefix_len).collect();
+
+            rope.push_front(&prefix);
+            expected.insert_str(0, &prefix);
+
+            assert_eq!(
+                rope.text(),
+                expected,
+                "text mismatch after push_front({:?})",
+                prefix
+            );
+            assert_eq!(rope.len(), expected.len());
+
+            let actual_summary = rope.summary();
+            let expected_summary = TextSummary::from(expected.as_str());
+            assert_eq!(
+                actual_summary.len, expected_summary.len,
+                "len mismatch for {:?}",
+                expected
+            );
+            assert_eq!(
+                actual_summary.lines, expected_summary.lines,
+                "lines mismatch for {:?}",
+                expected
+            );
+            assert_eq!(
+                actual_summary.chars, expected_summary.chars,
+                "chars mismatch for {:?}",
+                expected
+            );
+            assert_eq!(
+                actual_summary.longest_row, expected_summary.longest_row,
+                "longest_row mismatch for {:?}",
+                expected
+            );
+
+            // Verify offset-to-point and point-to-offset round-trip at boundaries.
+            for (ix, _) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
+                assert_eq!(
+                    rope.point_to_offset(rope.offset_to_point(ix)),
+                    ix,
+                    "offset round-trip failed at {} for {:?}",
+                    ix,
+                    expected
+                );
+            }
+        }
+    }
+
+    #[gpui::test(iterations = 50)]
+    fn test_push_front_large_prefix(mut rng: StdRng) {
+        let initial_len = rng.random_range(0..=32);
+        let initial_text: String = RandomCharIter::new(&mut rng).take(initial_len).collect();
+        let mut rope = Rope::from(initial_text.as_str());
+
+        let prefix_len = rng.random_range(64..=256);
+        let prefix: String = RandomCharIter::new(&mut rng).take(prefix_len).collect();
+
+        rope.push_front(&prefix);
+        let expected = format!("{}{}", prefix, initial_text);
+
+        assert_eq!(rope.text(), expected);
+        assert_eq!(rope.len(), expected.len());
+
+        let actual_summary = rope.summary();
+        let expected_summary = TextSummary::from(expected.as_str());
+        assert_eq!(actual_summary.len, expected_summary.len);
+        assert_eq!(actual_summary.lines, expected_summary.lines);
+        assert_eq!(actual_summary.chars, expected_summary.chars);
+    }
+
     fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
         while !text.is_char_boundary(offset) {
             match bias {

crates/sum_tree/src/sum_tree.rs 🔗

@@ -661,6 +661,51 @@ impl<T: Item> SumTree<T> {
         }
     }
 
+    pub fn update_first(
+        &mut self,
+        f: impl FnOnce(&mut T),
+        cx: <T::Summary as Summary>::Context<'_>,
+    ) {
+        self.update_first_recursive(f, cx);
+    }
+
+    fn update_first_recursive(
+        &mut self,
+        f: impl FnOnce(&mut T),
+        cx: <T::Summary as Summary>::Context<'_>,
+    ) -> Option<T::Summary> {
+        match Arc::make_mut(&mut self.0) {
+            Node::Internal {
+                summary,
+                child_summaries,
+                child_trees,
+                ..
+            } => {
+                let first_summary = child_summaries.first_mut().unwrap();
+                let first_child = child_trees.first_mut().unwrap();
+                *first_summary = first_child.update_first_recursive(f, cx).unwrap();
+                *summary = sum(child_summaries.iter(), cx);
+                Some(summary.clone())
+            }
+            Node::Leaf {
+                summary,
+                items,
+                item_summaries,
+            } => {
+                if let Some((item, item_summary)) =
+                    items.first_mut().zip(item_summaries.first_mut())
+                {
+                    (f)(item);
+                    *item_summary = item.summary(cx);
+                    *summary = sum(item_summaries.iter(), cx);
+                    Some(summary.clone())
+                } else {
+                    None
+                }
+            }
+        }
+    }
+
     pub fn extent<'a, D: Dimension<'a, T::Summary>>(
         &'a self,
         cx: <T::Summary as Summary>::Context<'_>,