From b519ab2758864410b4269692ae04124886b3a0d4 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 23 Oct 2025 18:17:11 +0200 Subject: [PATCH] rope: Improve chunk slicing panic messages (#41023) We still see a bunch of panics here but the default slicing panic doesn't tell which side of the range is bad Release Notes: - N/A *or* Added/Fixed/Improved ... --- crates/rope/src/chunk.rs | 132 +++++++++++++++++++++++++++++++++++---- crates/rope/src/rope.rs | 29 +++++---- 2 files changed, 134 insertions(+), 27 deletions(-) diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 51904cd8e2217dc56947f6026fff674147cffea5..d0be336c9faf2c5834182387307a7775ba00db38 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -98,6 +98,65 @@ impl Chunk { pub fn is_char_boundary(&self, offset: usize) -> bool { (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len() } + + pub fn floor_char_boundary(&self, index: usize) -> usize { + #[inline] + pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (u8 as i8) >= -0x40 + } + + if index >= self.text.len() { + self.text.len() + } else { + let mut i = index; + while i > 0 { + if is_utf8_char_boundary(self.text.as_bytes()[i]) { + break; + } + i -= 1; + } + + i + } + } + + #[track_caller] + #[inline(always)] + pub fn assert_char_boundary(&self, offset: usize) { + if self.is_char_boundary(offset) { + return; + } + panic_char_boundary(self, offset); + + #[cold] + #[inline(never)] + fn panic_char_boundary(chunk: &Chunk, offset: usize) { + if offset > chunk.text.len() { + panic!( + "byte index {} is out of bounds of `{:?}` (length: {})", + offset, + chunk.text, + chunk.text.len() + ); + } + // find the character + let char_start = chunk.floor_char_boundary(offset); + // `char_start` must be less than len and a char boundary + let ch = chunk + .text + .get(char_start..) + .unwrap() + .chars() + .next() + .unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + panic!( + "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", + offset, ch, char_range, + ); + } + } } #[derive(Clone, Copy, Debug)] @@ -167,12 +226,6 @@ impl<'a> ChunkSlice<'a> { #[inline(always)] pub fn slice(self, range: Range) -> Self { - debug_assert!( - self.is_char_boundary(range.end), - "Invalid range end {} in {:?}", - range.end, - self - ); let mask = (1 as Bitmap) .unbounded_shl(range.end as u32) .wrapping_sub(1); @@ -185,12 +238,8 @@ impl<'a> ChunkSlice<'a> { text: "", } } else { - debug_assert!( - self.is_char_boundary(range.start), - "Invalid range start {} in {:?}", - range.start, - self - ); + self.assert_char_boundary(range.start); + self.assert_char_boundary(range.end); Self { chars: (self.chars & mask) >> range.start, chars_utf16: (self.chars_utf16 & mask) >> range.start, @@ -340,6 +389,65 @@ impl<'a> ChunkSlice<'a> { } } + #[track_caller] + #[inline(always)] + pub fn assert_char_boundary(&self, offset: usize) { + if self.is_char_boundary(offset) { + return; + } + panic_char_boundary(self, offset); + + #[cold] + #[inline(never)] + fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) { + if offset > chunk.text.len() { + panic!( + "byte index {} is out of bounds of `{:?}` (length: {})", + offset, + chunk.text, + chunk.text.len() + ); + } + // find the character + let char_start = chunk.floor_char_boundary(offset); + // `char_start` must be less than len and a char boundary + let ch = chunk + .text + .get(char_start..) + .unwrap() + .chars() + .next() + .unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + panic!( + "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", + offset, ch, char_range, + ); + } + } + + pub fn floor_char_boundary(&self, index: usize) -> usize { + #[inline] + pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (u8 as i8) >= -0x40 + } + + if index >= self.text.len() { + self.text.len() + } else { + let mut i = index; + while i > 0 { + if is_utf8_char_boundary(self.text.as_bytes()[i]) { + break; + } + i -= 1; + } + + i + } + } + #[inline(always)] pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 { let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1); diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 23eda84481ced1228cd54741f48009e012edc0e5..5a43e22ea5ef43c5b31aeb63d52dcecdea72f5fe 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -51,23 +51,22 @@ impl Rope { #[track_caller] #[inline(always)] pub fn assert_char_boundary(&self, offset: usize) { - if self.is_char_boundary(offset) { + if self.chunks.is_empty() && offset == 0 { return; } - panic_char_boundary(self, offset); - - #[cold] - #[inline(never)] - fn panic_char_boundary(rope: &Rope, offset: usize) { - // find the character - let char_start = rope.floor_char_boundary(offset); - // `char_start` must be less than len and a char boundary - let ch = rope.chars_at(char_start).next().unwrap(); - let char_range = char_start..char_start + ch.len_utf8(); - panic!( - "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", - offset, ch, char_range, - ); + let (start, _, item) = self.chunks.find::((), &offset, Bias::Left); + match item { + Some(chunk) => { + let chunk_offset = offset - start; + chunk.assert_char_boundary(chunk_offset); + } + None => { + panic!( + "byte index {} is out of bounds of rope (length: {})", + offset, + self.len() + ); + } } }