@@ -248,10 +248,8 @@ impl<'a> Iterator for InlayChunks<'a> {
// Determine split index handling edge cases
let split_index = if desired_bytes >= chunk.text.len() {
chunk.text.len()
- } else if chunk.text.is_char_boundary(desired_bytes) {
- desired_bytes
} else {
- find_next_utf8_boundary(chunk.text, desired_bytes)
+ chunk.text.ceil_char_boundary(desired_bytes)
};
let (prefix, suffix) = chunk.text.split_at(split_index);
@@ -373,10 +371,8 @@ impl<'a> Iterator for InlayChunks<'a> {
.next()
.map(|c| c.len_utf8())
.unwrap_or(1)
- } else if inlay_chunk.is_char_boundary(next_inlay_highlight_endpoint) {
- next_inlay_highlight_endpoint
} else {
- find_next_utf8_boundary(inlay_chunk, next_inlay_highlight_endpoint)
+ inlay_chunk.ceil_char_boundary(next_inlay_highlight_endpoint)
};
let (chunk, remainder) = inlay_chunk.split_at(split_index);
@@ -1146,31 +1142,6 @@ fn push_isomorphic(sum_tree: &mut SumTree<Transform>, summary: TextSummary) {
}
}
-/// Given a byte index that is NOT a UTF-8 boundary, find the next one.
-/// Assumes: 0 < byte_index < text.len() and !text.is_char_boundary(byte_index)
-#[inline(always)]
-fn find_next_utf8_boundary(text: &str, byte_index: usize) -> usize {
- let bytes = text.as_bytes();
- let mut idx = byte_index + 1;
-
- // Scan forward until we find a boundary
- while idx < text.len() {
- if is_utf8_char_boundary(bytes[idx]) {
- return idx;
- }
- idx += 1;
- }
-
- // Hit the end, return the full length
- text.len()
-}
-
-// Private helper function taken from Rust's core::num module (which is both Apache2 and MIT licensed)
-const fn is_utf8_char_boundary(byte: u8) -> bool {
- // This is bit magic equivalent to: b < 128 || b >= 192
- (byte as i8) >= -0x40
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -110,18 +110,12 @@ impl Chunk {
}
pub fn floor_char_boundary(&self, index: usize) -> usize {
- #[inline]
- pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
- // This is bit magic equivalent to: b < 128 || b >= 192
- (u8 as i8) >= -0x40
- }
-
if index >= self.text.len() {
self.text.len()
} else {
let mut i = index;
while i > 0 {
- if is_utf8_char_boundary(self.text.as_bytes()[i]) {
+ if util::is_utf8_char_boundary(self.text.as_bytes()[i]) {
break;
}
i -= 1;
@@ -423,25 +417,7 @@ impl<'a> ChunkSlice<'a> {
}
pub fn floor_char_boundary(&self, index: usize) -> usize {
- #[inline]
- pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
- // This is bit magic equivalent to: b < 128 || b >= 192
- (u8 as i8) >= -0x40
- }
-
- if index >= self.text.len() {
- self.text.len()
- } else {
- let mut i = index;
- while i > 0 {
- if is_utf8_char_boundary(self.text.as_bytes()[i]) {
- break;
- }
- i -= 1;
- }
-
- i
- }
+ self.text.floor_char_boundary(index)
}
#[inline(always)]
@@ -74,29 +74,9 @@ impl Rope {
if index >= self.len() {
self.len()
} else {
- #[inline]
- pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
- // This is bit magic equivalent to: b < 128 || b >= 192
- (u8 as i8) >= -0x40
- }
-
let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
let chunk_offset = index - start;
- let lower_idx = item.map(|chunk| {
- let lower_bound = chunk_offset.saturating_sub(3);
- chunk
- .text
- .as_bytes()
- .get(lower_bound..=chunk_offset)
- .map(|it| {
- let new_idx = it
- .iter()
- .rposition(|&b| is_utf8_char_boundary(b))
- .unwrap_or(0);
- lower_bound + new_idx
- })
- .unwrap_or(chunk.text.len())
- });
+ let lower_idx = item.map(|chunk| chunk.text.floor_char_boundary(chunk_offset));
lower_idx.map_or_else(|| self.len(), |idx| start + idx)
}
}
@@ -105,22 +85,9 @@ impl Rope {
if index > self.len() {
self.len()
} else {
- #[inline]
- pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
- // This is bit magic equivalent to: b < 128 || b >= 192
- (u8 as i8) >= -0x40
- }
-
let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
let chunk_offset = index - start;
- let upper_idx = item.map(|chunk| {
- let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
- chunk.text.as_bytes()[chunk_offset..upper_bound]
- .iter()
- .position(|&b| is_utf8_char_boundary(b))
- .map_or(upper_bound, |pos| pos + chunk_offset)
- });
-
+ let upper_idx = item.map(|chunk| chunk.text.ceil_char_boundary(chunk_offset));
upper_idx.map_or_else(|| self.len(), |idx| start + idx)
}
}
@@ -2186,79 +2153,43 @@ mod tests {
#[test]
fn test_floor_char_boundary() {
- // polyfill of str::floor_char_boundary
- fn floor_char_boundary(str: &str, index: usize) -> usize {
- if index >= str.len() {
- str.len()
- } else {
- let lower_bound = index.saturating_sub(3);
- let new_index = str.as_bytes()[lower_bound..=index]
- .iter()
- .rposition(|b| (*b as i8) >= -0x40);
-
- lower_bound + new_index.unwrap()
- }
- }
-
let fixture = "å°";
let rope = Rope::from("å°");
for b in 0..=fixture.len() {
- assert_eq!(
- rope.floor_char_boundary(b),
- floor_char_boundary(&fixture, b)
- );
+ assert_eq!(rope.floor_char_boundary(b), fixture.floor_char_boundary(b));
}
let fixture = "";
let rope = Rope::from("");
for b in 0..=fixture.len() {
- assert_eq!(
- rope.floor_char_boundary(b),
- floor_char_boundary(&fixture, b)
- );
+ assert_eq!(rope.floor_char_boundary(b), fixture.floor_char_boundary(b));
}
let fixture = "š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©";
let rope = Rope::from("š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©");
for b in 0..=fixture.len() {
- assert_eq!(
- rope.floor_char_boundary(b),
- floor_char_boundary(&fixture, b)
- );
+ assert_eq!(rope.floor_char_boundary(b), fixture.floor_char_boundary(b));
}
}
#[test]
fn test_ceil_char_boundary() {
- // polyfill of str::ceil_char_boundary
- fn ceil_char_boundary(str: &str, index: usize) -> usize {
- if index > str.len() {
- str.len()
- } else {
- let upper_bound = Ord::min(index + 4, str.len());
- str.as_bytes()[index..upper_bound]
- .iter()
- .position(|b| (*b as i8) >= -0x40)
- .map_or(upper_bound, |pos| pos + index)
- }
- }
-
let fixture = "å°";
let rope = Rope::from("å°");
for b in 0..=fixture.len() {
- assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ assert_eq!(rope.ceil_char_boundary(b), fixture.ceil_char_boundary(b));
}
let fixture = "";
let rope = Rope::from("");
for b in 0..=fixture.len() {
- assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ assert_eq!(rope.ceil_char_boundary(b), fixture.ceil_char_boundary(b));
}
let fixture = "š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©";
let rope = Rope::from("š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©");
for b in 0..=fixture.len() {
- assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ assert_eq!(rope.ceil_char_boundary(b), fixture.ceil_char_boundary(b));
}
}
@@ -51,6 +51,12 @@ macro_rules! debug_panic {
};
}
+#[inline]
+pub const fn is_utf8_char_boundary(u8: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (u8 as i8) >= -0x40
+}
+
pub fn truncate(s: &str, max_chars: usize) -> &str {
match s.char_indices().nth(max_chars) {
None => s,