diff --git a/crates/rope/src/chunk.rs b/crates/rope/src/chunk.rs index 7ada5c2052481408bc5af56740f8e35916623f14..95df79d64bb401edf6220ba573be854297226cfe 100644 --- a/crates/rope/src/chunk.rs +++ b/crates/rope/src/chunk.rs @@ -127,39 +127,15 @@ impl Chunk { #[track_caller] #[inline(always)] - pub fn assert_char_boundary(&self, offset: usize) { + pub fn assert_char_boundary(&self, offset: usize) -> bool { if self.is_char_boundary(offset) { - return; + return true; } - panic_char_boundary(self, offset); - - #[cold] - #[inline(never)] - #[track_caller] - fn panic_char_boundary(chunk: &Chunk, offset: usize) { - if offset > chunk.text.len() { - panic!( - "byte index {} is out of bounds of `{:?}` (length: {})", - offset, - chunk.text, - chunk.text.len() - ); - } - // find the character - let char_start = chunk.floor_char_boundary(offset); - // `char_start` must be less than len and a char boundary - let ch = chunk - .text - .get(char_start..) - .unwrap() - .chars() - .next() - .unwrap(); - let char_range = char_start..char_start + ch.len_utf8(); - panic!( - "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", - offset, ch, char_range, - ); + if PANIC { + panic_char_boundary(&self.text, offset); + } else { + log_err_char_boundary(&self.text, offset); + false } } } @@ -230,10 +206,7 @@ impl<'a> ChunkSlice<'a> { } #[inline(always)] - pub fn slice(self, range: Range) -> Self { - let mask = (1 as Bitmap) - .unbounded_shl(range.end as u32) - .wrapping_sub(1); + pub fn slice(self, mut range: Range) -> Self { if range.start == MAX_BASE { Self { chars: 0, @@ -243,8 +216,15 @@ impl<'a> ChunkSlice<'a> { text: "", } } else { - self.assert_char_boundary(range.start); - self.assert_char_boundary(range.end); + if !self.assert_char_boundary::(range.start) { + range.start = self.text.ceil_char_boundary(range.start); + } + if !self.assert_char_boundary::(range.end) { + range.end = self.text.floor_char_boundary(range.end); + } + let mask = (1 as Bitmap) + .unbounded_shl(range.end as u32) + .wrapping_sub(1); Self { chars: (self.chars & mask) >> range.start, chars_utf16: (self.chars_utf16 & mask) >> range.start, @@ -381,38 +361,15 @@ impl<'a> ChunkSlice<'a> { #[track_caller] #[inline(always)] - pub fn assert_char_boundary(&self, offset: usize) { + pub fn assert_char_boundary(&self, offset: usize) -> bool { if self.is_char_boundary(offset) { - return; + return true; } - panic_char_boundary(self, offset); - - #[cold] - #[inline(never)] - fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) { - if offset > chunk.text.len() { - panic!( - "byte index {} is out of bounds of `{:?}` (length: {})", - offset, - chunk.text, - chunk.text.len() - ); - } - // find the character - let char_start = chunk.floor_char_boundary(offset); - // `char_start` must be less than len and a char boundary - let ch = chunk - .text - .get(char_start..) - .unwrap() - .chars() - .next() - .unwrap(); - let char_range = char_start..char_start + ch.len_utf8(); - panic!( - "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", - offset, ch, char_range, - ); + if PANIC { + panic_char_boundary(self.text, offset); + } else { + log_err_char_boundary(self.text, offset); + false } } @@ -696,6 +653,54 @@ fn nth_set_bit(v: u128, n: usize) -> usize { } } +#[cold] +#[inline(never)] +#[track_caller] +fn panic_char_boundary(text: &str, offset: usize) -> ! { + if offset > text.len() { + panic!( + "byte index {} is out of bounds of `{:?}` (length: {})", + offset, + text, + text.len() + ); + } + // find the character + let char_start = text.floor_char_boundary(offset); + // `char_start` must be less than len and a char boundary + let ch = text.get(char_start..).unwrap().chars().next().unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + panic!( + "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", + offset, ch, char_range, + ); +} + +#[cold] +#[inline(never)] +#[track_caller] +fn log_err_char_boundary(text: &str, offset: usize) { + if offset > text.len() { + log::error!( + "byte index {} is out of bounds of `{:?}` (length: {})", + offset, + text, + text.len() + ); + } + // find the character + let char_start = text.floor_char_boundary(offset); + // `char_start` must be less than len and a char boundary + let ch = text.get(char_start..).unwrap().chars().next().unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + log::error!( + "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", + offset, + ch, + char_range, + ); +} + #[inline(always)] fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 { let v = v.reverse_bits(); diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 32894fb84469287fb1474efc57d8180bdee13466..8379045be245cadaf79800f1d57ff418cdd24b40 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -58,7 +58,7 @@ impl Rope { match item { Some(chunk) => { let chunk_offset = offset - start; - chunk.assert_char_boundary(chunk_offset); + chunk.assert_char_boundary::(chunk_offset); } None => { panic!( @@ -716,7 +716,7 @@ impl<'a> Chunks<'a> { }; let chunk_offset = offset - chunks.start(); if let Some(chunk) = chunks.item() { - chunk.assert_char_boundary(chunk_offset); + chunk.assert_char_boundary::(chunk_offset); } Self { chunks,