rope: Turn `ChunkSlice::slice` panics into error logs (#43538)

Lukas Wirth created

While logically not really correct, its better than tearing down the
application until we figure out the root cause here

Release Notes:

- N/A *or* Added/Fixed/Improved ...

Change summary

crates/rope/src/chunk.rs | 139 +++++++++++++++++++++--------------------
crates/rope/src/rope.rs  |   4 
2 files changed, 74 insertions(+), 69 deletions(-)

Detailed changes

crates/rope/src/chunk.rs 🔗

@@ -127,39 +127,15 @@ impl Chunk {
 
     #[track_caller]
     #[inline(always)]
-    pub fn assert_char_boundary(&self, offset: usize) {
+    pub fn assert_char_boundary<const PANIC: bool>(&self, offset: usize) -> bool {
         if self.is_char_boundary(offset) {
-            return;
+            return true;
         }
-        panic_char_boundary(self, offset);
-
-        #[cold]
-        #[inline(never)]
-        #[track_caller]
-        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
-            if offset > chunk.text.len() {
-                panic!(
-                    "byte index {} is out of bounds of `{:?}` (length: {})",
-                    offset,
-                    chunk.text,
-                    chunk.text.len()
-                );
-            }
-            // find the character
-            let char_start = chunk.floor_char_boundary(offset);
-            // `char_start` must be less than len and a char boundary
-            let ch = chunk
-                .text
-                .get(char_start..)
-                .unwrap()
-                .chars()
-                .next()
-                .unwrap();
-            let char_range = char_start..char_start + ch.len_utf8();
-            panic!(
-                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
-                offset, ch, char_range,
-            );
+        if PANIC {
+            panic_char_boundary(&self.text, offset);
+        } else {
+            log_err_char_boundary(&self.text, offset);
+            false
         }
     }
 }
@@ -230,10 +206,7 @@ impl<'a> ChunkSlice<'a> {
     }
 
     #[inline(always)]
-    pub fn slice(self, range: Range<usize>) -> Self {
-        let mask = (1 as Bitmap)
-            .unbounded_shl(range.end as u32)
-            .wrapping_sub(1);
+    pub fn slice(self, mut range: Range<usize>) -> Self {
         if range.start == MAX_BASE {
             Self {
                 chars: 0,
@@ -243,8 +216,15 @@ impl<'a> ChunkSlice<'a> {
                 text: "",
             }
         } else {
-            self.assert_char_boundary(range.start);
-            self.assert_char_boundary(range.end);
+            if !self.assert_char_boundary::<false>(range.start) {
+                range.start = self.text.ceil_char_boundary(range.start);
+            }
+            if !self.assert_char_boundary::<false>(range.end) {
+                range.end = self.text.floor_char_boundary(range.end);
+            }
+            let mask = (1 as Bitmap)
+                .unbounded_shl(range.end as u32)
+                .wrapping_sub(1);
             Self {
                 chars: (self.chars & mask) >> range.start,
                 chars_utf16: (self.chars_utf16 & mask) >> range.start,
@@ -381,38 +361,15 @@ impl<'a> ChunkSlice<'a> {
 
     #[track_caller]
     #[inline(always)]
-    pub fn assert_char_boundary(&self, offset: usize) {
+    pub fn assert_char_boundary<const PANIC: bool>(&self, offset: usize) -> bool {
         if self.is_char_boundary(offset) {
-            return;
+            return true;
         }
-        panic_char_boundary(self, offset);
-
-        #[cold]
-        #[inline(never)]
-        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
-            if offset > chunk.text.len() {
-                panic!(
-                    "byte index {} is out of bounds of `{:?}` (length: {})",
-                    offset,
-                    chunk.text,
-                    chunk.text.len()
-                );
-            }
-            // find the character
-            let char_start = chunk.floor_char_boundary(offset);
-            // `char_start` must be less than len and a char boundary
-            let ch = chunk
-                .text
-                .get(char_start..)
-                .unwrap()
-                .chars()
-                .next()
-                .unwrap();
-            let char_range = char_start..char_start + ch.len_utf8();
-            panic!(
-                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
-                offset, ch, char_range,
-            );
+        if PANIC {
+            panic_char_boundary(self.text, offset);
+        } else {
+            log_err_char_boundary(self.text, offset);
+            false
         }
     }
 
@@ -696,6 +653,54 @@ fn nth_set_bit(v: u128, n: usize) -> usize {
     }
 }
 
+#[cold]
+#[inline(never)]
+#[track_caller]
+fn panic_char_boundary(text: &str, offset: usize) -> ! {
+    if offset > text.len() {
+        panic!(
+            "byte index {} is out of bounds of `{:?}` (length: {})",
+            offset,
+            text,
+            text.len()
+        );
+    }
+    // find the character
+    let char_start = text.floor_char_boundary(offset);
+    // `char_start` must be less than len and a char boundary
+    let ch = text.get(char_start..).unwrap().chars().next().unwrap();
+    let char_range = char_start..char_start + ch.len_utf8();
+    panic!(
+        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+        offset, ch, char_range,
+    );
+}
+
+#[cold]
+#[inline(never)]
+#[track_caller]
+fn log_err_char_boundary(text: &str, offset: usize) {
+    if offset > text.len() {
+        log::error!(
+            "byte index {} is out of bounds of `{:?}` (length: {})",
+            offset,
+            text,
+            text.len()
+        );
+    }
+    // find the character
+    let char_start = text.floor_char_boundary(offset);
+    // `char_start` must be less than len and a char boundary
+    let ch = text.get(char_start..).unwrap().chars().next().unwrap();
+    let char_range = char_start..char_start + ch.len_utf8();
+    log::error!(
+        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+        offset,
+        ch,
+        char_range,
+    );
+}
+
 #[inline(always)]
 fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
     let v = v.reverse_bits();

crates/rope/src/rope.rs 🔗

@@ -58,7 +58,7 @@ impl Rope {
         match item {
             Some(chunk) => {
                 let chunk_offset = offset - start;
-                chunk.assert_char_boundary(chunk_offset);
+                chunk.assert_char_boundary::<true>(chunk_offset);
             }
             None => {
                 panic!(
@@ -716,7 +716,7 @@ impl<'a> Chunks<'a> {
         };
         let chunk_offset = offset - chunks.start();
         if let Some(chunk) = chunks.item() {
-            chunk.assert_char_boundary(chunk_offset);
+            chunk.assert_char_boundary::<true>(chunk_offset);
         }
         Self {
             chunks,