@@ -29,7 +29,9 @@ pub fn refresh_matching_bracket_highlights(
if (editor.cursor_shape == CursorShape::Block || editor.cursor_shape == CursorShape::Hollow)
&& head < snapshot.buffer_snapshot.len()
{
- tail += 1;
+ if let Some(tail_ch) = snapshot.buffer_snapshot.chars_at(tail).next() {
+ tail += tail_ch.len_utf8();
+ }
}
if let Some((opening_range, closing_range)) = snapshot
@@ -4121,8 +4121,7 @@ impl BufferSnapshot {
range: Range<T>,
) -> impl Iterator<Item = BracketMatch> + '_ {
// Find bracket pairs that *inclusively* contain the given range.
- let range = range.start.to_offset(self).saturating_sub(1)
- ..self.len().min(range.end.to_offset(self) + 1);
+ let range = range.start.to_previous_offset(self)..range.end.to_next_offset(self);
self.all_bracket_ranges(range)
.filter(|pair| !pair.newline_only)
}
@@ -4131,8 +4130,7 @@ impl BufferSnapshot {
&self,
range: Range<T>,
) -> impl Iterator<Item = (Range<usize>, DebuggerTextObject)> + '_ {
- let range = range.start.to_offset(self).saturating_sub(1)
- ..self.len().min(range.end.to_offset(self) + 1);
+ let range = range.start.to_previous_offset(self)..range.end.to_next_offset(self);
let mut matches = self.syntax.matches_with_options(
range.clone(),
@@ -5259,7 +5259,8 @@ impl MultiBufferSnapshot {
} else {
Anchor::max()
};
- // TODO this is a hack, remove it
+
+ // TODO this is a hack, because all APIs should be able to handle ExcerptId::min and max.
if let Some((excerpt_id, _, _)) = self.as_singleton() {
anchor.excerpt_id = *excerpt_id;
}
@@ -30,6 +30,76 @@ impl Rope {
Self::default()
}
+ pub fn is_char_boundary(&self, offset: usize) -> bool {
+ if self.chunks.is_empty() {
+ return offset == 0;
+ }
+ let mut cursor = self.chunks.cursor::<usize>(&());
+ cursor.seek(&offset, Bias::Left);
+ let chunk_offset = offset - cursor.start();
+ cursor
+ .item()
+ .map(|chunk| chunk.text.is_char_boundary(chunk_offset))
+ .unwrap_or(false)
+ }
+
+ pub fn floor_char_boundary(&self, index: usize) -> usize {
+ if index >= self.len() {
+ self.len()
+ } else {
+ #[inline]
+ pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (u8 as i8) >= -0x40
+ }
+
+ let mut cursor = self.chunks.cursor::<usize>(&());
+ cursor.seek(&index, Bias::Left);
+ let chunk_offset = index - cursor.start();
+ let lower_idx = cursor.item().map(|chunk| {
+ let lower_bound = chunk_offset.saturating_sub(3);
+ chunk
+ .text
+ .as_bytes()
+ .get(lower_bound..=chunk_offset)
+ .map(|it| {
+ let new_idx = it
+ .iter()
+ .rposition(|&b| is_utf8_char_boundary(b))
+ .unwrap_or(0);
+ lower_bound + new_idx
+ })
+ .unwrap_or(chunk.text.len())
+ });
+ lower_idx.map_or_else(|| self.len(), |idx| cursor.start() + idx)
+ }
+ }
+
+ pub fn ceil_char_boundary(&self, index: usize) -> usize {
+ if index > self.len() {
+ self.len()
+ } else {
+ #[inline]
+ pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
+ // This is bit magic equivalent to: b < 128 || b >= 192
+ (u8 as i8) >= -0x40
+ }
+
+ let mut cursor = self.chunks.cursor::<usize>(&());
+ cursor.seek(&index, Bias::Left);
+ let chunk_offset = index - cursor.start();
+ let upper_idx = cursor.item().map(|chunk| {
+ let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
+ chunk.text.as_bytes()[chunk_offset..upper_bound]
+ .iter()
+ .position(|&b| is_utf8_char_boundary(b))
+ .map_or(upper_bound, |pos| pos + chunk_offset)
+ });
+
+ upper_idx.map_or_else(|| self.len(), |idx| cursor.start() + idx)
+ }
+ }
+
pub fn append(&mut self, rope: Rope) {
if let Some(chunk) = rope.chunks.first()
&& (self
@@ -2069,6 +2139,103 @@ mod tests {
assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo"));
}
+ #[test]
+ fn test_is_char_boundary() {
+ let fixture = "å°";
+ let rope = Rope::from("å°");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
+ }
+ let fixture = "";
+ let rope = Rope::from("");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
+ }
+ let fixture = "š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©";
+ let rope = Rope::from("š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
+ }
+ }
+
+ #[test]
+ fn test_floor_char_boundary() {
+ // polyfill of str::floor_char_boundary
+ fn floor_char_boundary(str: &str, index: usize) -> usize {
+ if index >= str.len() {
+ str.len()
+ } else {
+ let lower_bound = index.saturating_sub(3);
+ let new_index = str.as_bytes()[lower_bound..=index]
+ .iter()
+ .rposition(|b| (*b as i8) >= -0x40);
+
+ lower_bound + new_index.unwrap()
+ }
+ }
+
+ let fixture = "å°";
+ let rope = Rope::from("å°");
+ for b in 0..=fixture.len() {
+ assert_eq!(
+ rope.floor_char_boundary(b),
+ floor_char_boundary(&fixture, b)
+ );
+ }
+
+ let fixture = "";
+ let rope = Rope::from("");
+ for b in 0..=fixture.len() {
+ assert_eq!(
+ rope.floor_char_boundary(b),
+ floor_char_boundary(&fixture, b)
+ );
+ }
+
+ let fixture = "š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©";
+ let rope = Rope::from("š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©");
+ for b in 0..=fixture.len() {
+ assert_eq!(
+ rope.floor_char_boundary(b),
+ floor_char_boundary(&fixture, b)
+ );
+ }
+ }
+
+ #[test]
+ fn test_ceil_char_boundary() {
+ // polyfill of str::ceil_char_boundary
+ fn ceil_char_boundary(str: &str, index: usize) -> usize {
+ if index > str.len() {
+ str.len()
+ } else {
+ let upper_bound = Ord::min(index + 4, str.len());
+ str.as_bytes()[index..upper_bound]
+ .iter()
+ .position(|b| (*b as i8) >= -0x40)
+ .map_or(upper_bound, |pos| pos + index)
+ }
+ }
+
+ let fixture = "å°";
+ let rope = Rope::from("å°");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ }
+
+ let fixture = "";
+ let rope = Rope::from("");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ }
+
+ let fixture = "š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©";
+ let rope = Rope::from("š“š š”š¢šµš£ā«ļøāŖļøš¤\nš³ļøāā§ļøšš³ļøāšš“āā ļøā³ļøš¬šš“š³ļøš©");
+ for b in 0..=fixture.len() {
+ assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
+ }
+ }
+
fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
while !text.is_char_boundary(offset) {
match bias {
@@ -2400,6 +2400,17 @@ impl BufferSnapshot {
} else if bias == Bias::Right && offset == self.len() {
Anchor::MAX
} else {
+ if !self.visible_text.is_char_boundary(offset) {
+ // find the character
+ let char_start = self.visible_text.floor_char_boundary(offset);
+ // `char_start` must be less than len and a char boundary
+ let ch = self.visible_text.chars_at(char_start).next().unwrap();
+ let char_range = char_start..char_start + ch.len_utf8();
+ panic!(
+ "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
+ offset, ch, char_range,
+ );
+ }
let mut fragment_cursor = self.fragments.cursor::<usize>(&None);
fragment_cursor.seek(&offset, bias);
let fragment = fragment_cursor.item().unwrap();
@@ -3065,6 +3076,16 @@ impl operation_queue::Operation for Operation {
pub trait ToOffset {
fn to_offset(&self, snapshot: &BufferSnapshot) -> usize;
+ fn to_next_offset(&self, snapshot: &BufferSnapshot) -> usize {
+ snapshot
+ .visible_text
+ .ceil_char_boundary(self.to_offset(snapshot) + 1)
+ }
+ fn to_previous_offset(&self, snapshot: &BufferSnapshot) -> usize {
+ snapshot
+ .visible_text
+ .floor_char_boundary(self.to_offset(snapshot).saturating_sub(1))
+ }
}
impl ToOffset for Point {