@@ -98,6 +98,65 @@ impl Chunk {
pub fn is_char_boundary(&self, offset: usize) -> bool {
(1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
}
+
+ pub fn floor_char_boundary(&self, index: usize) -> usize {
+ #[inline]
+ pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (u8 as i8) >= -0x40
+ }
+
+ if index >= self.text.len() {
+ self.text.len()
+ } else {
+ let mut i = index;
+ while i > 0 {
+ if is_utf8_char_boundary(self.text.as_bytes()[i]) {
+ break;
+ }
+ i -= 1;
+ }
+
+ i
+ }
+ }
+
+ #[track_caller]
+ #[inline(always)]
+ pub fn assert_char_boundary(&self, offset: usize) {
+ if self.is_char_boundary(offset) {
+ return;
+ }
+ panic_char_boundary(self, offset);
+
+ #[cold]
+ #[inline(never)]
+ fn panic_char_boundary(chunk: &Chunk, offset: usize) {
+ if offset > chunk.text.len() {
+ panic!(
+ "byte index {} is out of bounds of `{:?}` (length: {})",
+ offset,
+ chunk.text,
+ chunk.text.len()
+ );
+ }
+ // find the character
+ let char_start = chunk.floor_char_boundary(offset);
+ // `char_start` must be less than len and a char boundary
+ let ch = chunk
+ .text
+ .get(char_start..)
+ .unwrap()
+ .chars()
+ .next()
+ .unwrap();
+ let char_range = char_start..char_start + ch.len_utf8();
+ panic!(
+ "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+ offset, ch, char_range,
+ );
+ }
+ }
}
#[derive(Clone, Copy, Debug)]
@@ -167,12 +226,6 @@ impl<'a> ChunkSlice<'a> {
#[inline(always)]
pub fn slice(self, range: Range<usize>) -> Self {
- debug_assert!(
- self.is_char_boundary(range.end),
- "Invalid range end {} in {:?}",
- range.end,
- self
- );
let mask = (1 as Bitmap)
.unbounded_shl(range.end as u32)
.wrapping_sub(1);
@@ -185,12 +238,8 @@ impl<'a> ChunkSlice<'a> {
text: "",
}
} else {
- debug_assert!(
- self.is_char_boundary(range.start),
- "Invalid range start {} in {:?}",
- range.start,
- self
- );
+ self.assert_char_boundary(range.start);
+ self.assert_char_boundary(range.end);
Self {
chars: (self.chars & mask) >> range.start,
chars_utf16: (self.chars_utf16 & mask) >> range.start,
@@ -340,6 +389,65 @@ impl<'a> ChunkSlice<'a> {
}
}
+ #[track_caller]
+ #[inline(always)]
+ pub fn assert_char_boundary(&self, offset: usize) {
+ if self.is_char_boundary(offset) {
+ return;
+ }
+ panic_char_boundary(self, offset);
+
+ #[cold]
+ #[inline(never)]
+ fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
+ if offset > chunk.text.len() {
+ panic!(
+ "byte index {} is out of bounds of `{:?}` (length: {})",
+ offset,
+ chunk.text,
+ chunk.text.len()
+ );
+ }
+ // find the character
+ let char_start = chunk.floor_char_boundary(offset);
+ // `char_start` must be less than len and a char boundary
+ let ch = chunk
+ .text
+ .get(char_start..)
+ .unwrap()
+ .chars()
+ .next()
+ .unwrap();
+ let char_range = char_start..char_start + ch.len_utf8();
+ panic!(
+ "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+ offset, ch, char_range,
+ );
+ }
+ }
+
+ pub fn floor_char_boundary(&self, index: usize) -> usize {
+ #[inline]
+ pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (u8 as i8) >= -0x40
+ }
+
+ if index >= self.text.len() {
+ self.text.len()
+ } else {
+ let mut i = index;
+ while i > 0 {
+ if is_utf8_char_boundary(self.text.as_bytes()[i]) {
+ break;
+ }
+ i -= 1;
+ }
+
+ i
+ }
+ }
+
#[inline(always)]
pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
@@ -51,23 +51,22 @@ impl Rope {
#[track_caller]
#[inline(always)]
pub fn assert_char_boundary(&self, offset: usize) {
- if self.is_char_boundary(offset) {
+ if self.chunks.is_empty() && offset == 0 {
return;
}
- panic_char_boundary(self, offset);
-
- #[cold]
- #[inline(never)]
- fn panic_char_boundary(rope: &Rope, offset: usize) {
- // find the character
- let char_start = rope.floor_char_boundary(offset);
- // `char_start` must be less than len and a char boundary
- let ch = rope.chars_at(char_start).next().unwrap();
- let char_range = char_start..char_start + ch.len_utf8();
- panic!(
- "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
- offset, ch, char_range,
- );
+ let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
+ match item {
+ Some(chunk) => {
+ let chunk_offset = offset - start;
+ chunk.assert_char_boundary(chunk_offset);
+ }
+ None => {
+ panic!(
+ "byte index {} is out of bounds of rope (length: {})",
+ offset,
+ self.len()
+ );
+ }
}
}