rope: Improve chunk slicing panic messages (#41023)

Lukas Wirth created

We still see a bunch of panics here but the default slicing panic
doesn't tell which side of the range is bad

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/rope/src/chunk.rs | 132 ++++++++++++++++++++++++++++++++++++++---
crates/rope/src/rope.rs  |  29 ++++----
2 files changed, 134 insertions(+), 27 deletions(-)

Detailed changes

crates/rope/src/chunk.rs 🔗

@@ -98,6 +98,65 @@ impl Chunk {
     pub fn is_char_boundary(&self, offset: usize) -> bool {
         (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
     }
+
+    pub fn floor_char_boundary(&self, index: usize) -> usize {
+        #[inline]
+        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+            // This is bit magic equivalent to: b < 128 || b >= 192
+            (u8 as i8) >= -0x40
+        }
+
+        if index >= self.text.len() {
+            self.text.len()
+        } else {
+            let mut i = index;
+            while i > 0 {
+                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
+                    break;
+                }
+                i -= 1;
+            }
+
+            i
+        }
+    }
+
+    #[track_caller]
+    #[inline(always)]
+    pub fn assert_char_boundary(&self, offset: usize) {
+        if self.is_char_boundary(offset) {
+            return;
+        }
+        panic_char_boundary(self, offset);
+
+        #[cold]
+        #[inline(never)]
+        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
+            if offset > chunk.text.len() {
+                panic!(
+                    "byte index {} is out of bounds of `{:?}` (length: {})",
+                    offset,
+                    chunk.text,
+                    chunk.text.len()
+                );
+            }
+            // find the character
+            let char_start = chunk.floor_char_boundary(offset);
+            // `char_start` must be less than len and a char boundary
+            let ch = chunk
+                .text
+                .get(char_start..)
+                .unwrap()
+                .chars()
+                .next()
+                .unwrap();
+            let char_range = char_start..char_start + ch.len_utf8();
+            panic!(
+                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+                offset, ch, char_range,
+            );
+        }
+    }
 }
 
 #[derive(Clone, Copy, Debug)]
@@ -167,12 +226,6 @@ impl<'a> ChunkSlice<'a> {
 
     #[inline(always)]
     pub fn slice(self, range: Range<usize>) -> Self {
-        debug_assert!(
-            self.is_char_boundary(range.end),
-            "Invalid range end {} in {:?}",
-            range.end,
-            self
-        );
         let mask = (1 as Bitmap)
             .unbounded_shl(range.end as u32)
             .wrapping_sub(1);
@@ -185,12 +238,8 @@ impl<'a> ChunkSlice<'a> {
                 text: "",
             }
         } else {
-            debug_assert!(
-                self.is_char_boundary(range.start),
-                "Invalid range start {} in {:?}",
-                range.start,
-                self
-            );
+            self.assert_char_boundary(range.start);
+            self.assert_char_boundary(range.end);
             Self {
                 chars: (self.chars & mask) >> range.start,
                 chars_utf16: (self.chars_utf16 & mask) >> range.start,
@@ -340,6 +389,65 @@ impl<'a> ChunkSlice<'a> {
         }
     }
 
+    #[track_caller]
+    #[inline(always)]
+    pub fn assert_char_boundary(&self, offset: usize) {
+        if self.is_char_boundary(offset) {
+            return;
+        }
+        panic_char_boundary(self, offset);
+
+        #[cold]
+        #[inline(never)]
+        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
+            if offset > chunk.text.len() {
+                panic!(
+                    "byte index {} is out of bounds of `{:?}` (length: {})",
+                    offset,
+                    chunk.text,
+                    chunk.text.len()
+                );
+            }
+            // find the character
+            let char_start = chunk.floor_char_boundary(offset);
+            // `char_start` must be less than len and a char boundary
+            let ch = chunk
+                .text
+                .get(char_start..)
+                .unwrap()
+                .chars()
+                .next()
+                .unwrap();
+            let char_range = char_start..char_start + ch.len_utf8();
+            panic!(
+                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+                offset, ch, char_range,
+            );
+        }
+    }
+
+    pub fn floor_char_boundary(&self, index: usize) -> usize {
+        #[inline]
+        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+            // This is bit magic equivalent to: b < 128 || b >= 192
+            (u8 as i8) >= -0x40
+        }
+
+        if index >= self.text.len() {
+            self.text.len()
+        } else {
+            let mut i = index;
+            while i > 0 {
+                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
+                    break;
+                }
+                i -= 1;
+            }
+
+            i
+        }
+    }
+
     #[inline(always)]
     pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
         let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);

crates/rope/src/rope.rs 🔗

@@ -51,23 +51,22 @@ impl Rope {
     #[track_caller]
     #[inline(always)]
     pub fn assert_char_boundary(&self, offset: usize) {
-        if self.is_char_boundary(offset) {
+        if self.chunks.is_empty() && offset == 0 {
             return;
         }
-        panic_char_boundary(self, offset);
-
-        #[cold]
-        #[inline(never)]
-        fn panic_char_boundary(rope: &Rope, offset: usize) {
-            // find the character
-            let char_start = rope.floor_char_boundary(offset);
-            // `char_start` must be less than len and a char boundary
-            let ch = rope.chars_at(char_start).next().unwrap();
-            let char_range = char_start..char_start + ch.len_utf8();
-            panic!(
-                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
-                offset, ch, char_range,
-            );
+        let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
+        match item {
+            Some(chunk) => {
+                let chunk_offset = offset - start;
+                chunk.assert_char_boundary(chunk_offset);
+            }
+            None => {
+                panic!(
+                    "byte index {} is out of bounds of rope (length: {})",
+                    offset,
+                    self.len()
+                );
+            }
         }
     }