multi_buffer: Assert char boundary for panic due to point_to_buffer_offset (#40777)

Smit Barmase and Lukas Wirth created

In an attempt to figure out what's wrong with `point_to_buffer_offset`
for crash https://github.com/zed-industries/zed/issues/40453. We want to
know which branch among these two is the bad one.

Release Notes:

- N/A

Co-authored-by: Lukas Wirth <lukas@zed.dev>

Change summary

crates/multi_buffer/src/multi_buffer.rs | 15 ++++++++++++++-
crates/rope/src/rope.rs                 | 23 +++++++++++++++++++++++
crates/text/src/text.rs                 | 11 +----------
3 files changed, 38 insertions(+), 11 deletions(-)

Detailed changes

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -4454,10 +4454,23 @@ impl MultiBufferSnapshot {
             && region.has_trailing_newline
             && !region.is_main_buffer
         {
-            return Some((&cursor.excerpt()?.buffer, cursor.main_buffer_position()?));
+            let main_buffer_position = cursor.main_buffer_position()?;
+            let buffer_snapshot = &cursor.excerpt()?.buffer;
+            // remove this assert once we figure out the cause of the panics for #40453
+            buffer_snapshot
+                .text
+                .as_rope()
+                .assert_char_boundary(main_buffer_position);
+            return Some((buffer_snapshot, main_buffer_position));
         } else if buffer_offset > region.buffer.len() {
             return None;
         }
+        // remove this assert once we figure out the cause of the panics for #40453
+        region
+            .buffer
+            .text
+            .as_rope()
+            .assert_char_boundary(buffer_offset);
         Some((region.buffer, buffer_offset))
     }
 

crates/rope/src/rope.rs 🔗

@@ -47,6 +47,29 @@ impl Rope {
             .unwrap_or(false)
     }
 
+    #[track_caller]
+    #[inline(always)]
+    pub fn assert_char_boundary(&self, offset: usize) {
+        if self.is_char_boundary(offset) {
+            return;
+        }
+        panic_char_boundary(self, offset);
+
+        #[cold]
+        #[inline(never)]
+        fn panic_char_boundary(rope: &Rope, offset: usize) {
+            // find the character
+            let char_start = rope.floor_char_boundary(offset);
+            // `char_start` must be less than len and a char boundary
+            let ch = rope.chars_at(char_start).next().unwrap();
+            let char_range = char_start..char_start + ch.len_utf8();
+            panic!(
+                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+                offset, ch, char_range,
+            );
+        }
+    }
+
     pub fn floor_char_boundary(&self, index: usize) -> usize {
         if index >= self.len() {
             self.len()

crates/text/src/text.rs 🔗

@@ -2402,17 +2402,8 @@ impl BufferSnapshot {
         } else {
             if offset > self.visible_text.len() {
                 panic!("offset {} is out of bounds", offset)
-            } else if !self.visible_text.is_char_boundary(offset) {
-                // find the character
-                let char_start = self.visible_text.floor_char_boundary(offset);
-                // `char_start` must be less than len and a char boundary
-                let ch = self.visible_text.chars_at(char_start).next().unwrap();
-                let char_range = char_start..char_start + ch.len_utf8();
-                panic!(
-                    "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
-                    offset, ch, char_range,
-                );
             }
+            self.visible_text.assert_char_boundary(offset);
             let (start, _, item) = self.fragments.find::<usize, _>(&None, &offset, bias);
             let fragment = item.unwrap();
             let overshoot = offset - start;