diff --git a/crates/editor/src/highlight_matching_bracket.rs b/crates/editor/src/highlight_matching_bracket.rs index aa4e616924ad6bd47627bfd95e9a5c58587afc25..aa1647cac0ba3e9d7644871faeac783e44501c57 100644 --- a/crates/editor/src/highlight_matching_bracket.rs +++ b/crates/editor/src/highlight_matching_bracket.rs @@ -29,7 +29,9 @@ pub fn refresh_matching_bracket_highlights( if (editor.cursor_shape == CursorShape::Block || editor.cursor_shape == CursorShape::Hollow) && head < snapshot.buffer_snapshot.len() { - tail += 1; + if let Some(tail_ch) = snapshot.buffer_snapshot.chars_at(tail).next() { + tail += tail_ch.len_utf8(); + } } if let Some((opening_range, closing_range)) = snapshot diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index d5d83da47bc18a4fd15f59df2ddb2238ceb768d4..1f5c35f8df9288182b294b8e52a9cd7bdb531124 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -4121,8 +4121,7 @@ impl BufferSnapshot { range: Range, ) -> impl Iterator + '_ { // Find bracket pairs that *inclusively* contain the given range. - let range = range.start.to_offset(self).saturating_sub(1) - ..self.len().min(range.end.to_offset(self) + 1); + let range = range.start.to_previous_offset(self)..range.end.to_next_offset(self); self.all_bracket_ranges(range) .filter(|pair| !pair.newline_only) } @@ -4131,8 +4130,7 @@ impl BufferSnapshot { &self, range: Range, ) -> impl Iterator, DebuggerTextObject)> + '_ { - let range = range.start.to_offset(self).saturating_sub(1) - ..self.len().min(range.end.to_offset(self) + 1); + let range = range.start.to_previous_offset(self)..range.end.to_next_offset(self); let mut matches = self.syntax.matches_with_options( range.clone(), diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index c79bc03489be89ad00d10392c520fe13e7748a60..8d72e919a4802c1875c956fc3e6d72fc0e9a2ade 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -5259,7 +5259,8 @@ impl MultiBufferSnapshot { } else { Anchor::max() }; - // TODO this is a hack, remove it + + // TODO this is a hack, because all APIs should be able to handle ExcerptId::min and max. if let Some((excerpt_id, _, _)) = self.as_singleton() { anchor.excerpt_id = *excerpt_id; } diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 9c98985989c2ac0fdcd5a39342dd9911d64dd01a..d802f972168348b15f003e1fd2fa567c1f9d75bb 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -30,6 +30,76 @@ impl Rope { Self::default() } + pub fn is_char_boundary(&self, offset: usize) -> bool { + if self.chunks.is_empty() { + return offset == 0; + } + let mut cursor = self.chunks.cursor::(&()); + cursor.seek(&offset, Bias::Left); + let chunk_offset = offset - cursor.start(); + cursor + .item() + .map(|chunk| chunk.text.is_char_boundary(chunk_offset)) + .unwrap_or(false) + } + + pub fn floor_char_boundary(&self, index: usize) -> usize { + if index >= self.len() { + self.len() + } else { + #[inline] + pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (u8 as i8) >= -0x40 + } + + let mut cursor = self.chunks.cursor::(&()); + cursor.seek(&index, Bias::Left); + let chunk_offset = index - cursor.start(); + let lower_idx = cursor.item().map(|chunk| { + let lower_bound = chunk_offset.saturating_sub(3); + chunk + .text + .as_bytes() + .get(lower_bound..=chunk_offset) + .map(|it| { + let new_idx = it + .iter() + .rposition(|&b| is_utf8_char_boundary(b)) + .unwrap_or(0); + lower_bound + new_idx + }) + .unwrap_or(chunk.text.len()) + }); + lower_idx.map_or_else(|| self.len(), |idx| cursor.start() + idx) + } + } + + pub fn ceil_char_boundary(&self, index: usize) -> usize { + if index > self.len() { + self.len() + } else { + #[inline] + pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool { + // This is bit magic equivalent to: b < 128 || b >= 192 + (u8 as i8) >= -0x40 + } + + let mut cursor = self.chunks.cursor::(&()); + cursor.seek(&index, Bias::Left); + let chunk_offset = index - cursor.start(); + let upper_idx = cursor.item().map(|chunk| { + let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len()); + chunk.text.as_bytes()[chunk_offset..upper_bound] + .iter() + .position(|&b| is_utf8_char_boundary(b)) + .map_or(upper_bound, |pos| pos + chunk_offset) + }); + + upper_idx.map_or_else(|| self.len(), |idx| cursor.start() + idx) + } + } + pub fn append(&mut self, rope: Rope) { if let Some(chunk) = rope.chunks.first() && (self @@ -2069,6 +2139,103 @@ mod tests { assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo")); } + #[test] + fn test_is_char_boundary() { + let fixture = "地"; + let rope = Rope::from("地"); + for b in 0..=fixture.len() { + assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b)); + } + let fixture = ""; + let rope = Rope::from(""); + for b in 0..=fixture.len() { + assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b)); + } + let fixture = "šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"; + let rope = Rope::from("šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"); + for b in 0..=fixture.len() { + assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b)); + } + } + + #[test] + fn test_floor_char_boundary() { + // polyfill of str::floor_char_boundary + fn floor_char_boundary(str: &str, index: usize) -> usize { + if index >= str.len() { + str.len() + } else { + let lower_bound = index.saturating_sub(3); + let new_index = str.as_bytes()[lower_bound..=index] + .iter() + .rposition(|b| (*b as i8) >= -0x40); + + lower_bound + new_index.unwrap() + } + } + + let fixture = "地"; + let rope = Rope::from("地"); + for b in 0..=fixture.len() { + assert_eq!( + rope.floor_char_boundary(b), + floor_char_boundary(&fixture, b) + ); + } + + let fixture = ""; + let rope = Rope::from(""); + for b in 0..=fixture.len() { + assert_eq!( + rope.floor_char_boundary(b), + floor_char_boundary(&fixture, b) + ); + } + + let fixture = "šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"; + let rope = Rope::from("šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"); + for b in 0..=fixture.len() { + assert_eq!( + rope.floor_char_boundary(b), + floor_char_boundary(&fixture, b) + ); + } + } + + #[test] + fn test_ceil_char_boundary() { + // polyfill of str::ceil_char_boundary + fn ceil_char_boundary(str: &str, index: usize) -> usize { + if index > str.len() { + str.len() + } else { + let upper_bound = Ord::min(index + 4, str.len()); + str.as_bytes()[index..upper_bound] + .iter() + .position(|b| (*b as i8) >= -0x40) + .map_or(upper_bound, |pos| pos + index) + } + } + + let fixture = "地"; + let rope = Rope::from("地"); + for b in 0..=fixture.len() { + assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b)); + } + + let fixture = ""; + let rope = Rope::from(""); + for b in 0..=fixture.len() { + assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b)); + } + + let fixture = "šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"; + let rope = Rope::from("šŸ”“šŸŸ šŸŸ”šŸŸ¢šŸ”µšŸŸ£āš«ļøāšŖļøšŸŸ¤\nšŸ³ļøā€āš§ļøšŸšŸ³ļøā€šŸŒˆšŸ“ā€ā˜ ļøā›³ļøšŸ“¬šŸ“­šŸ“šŸ³ļøšŸš©"); + for b in 0..=fixture.len() { + assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b)); + } + } + fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize { while !text.is_char_boundary(offset) { match bias { diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 590c30c8a73c13180e4d09dda1b3a071ef46ad7f..bbf9e2e3812238eebbdf92c63bd3b23819a9dac0 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -2400,6 +2400,17 @@ impl BufferSnapshot { } else if bias == Bias::Right && offset == self.len() { Anchor::MAX } else { + if !self.visible_text.is_char_boundary(offset) { + // find the character + let char_start = self.visible_text.floor_char_boundary(offset); + // `char_start` must be less than len and a char boundary + let ch = self.visible_text.chars_at(char_start).next().unwrap(); + let char_range = char_start..char_start + ch.len_utf8(); + panic!( + "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})", + offset, ch, char_range, + ); + } let mut fragment_cursor = self.fragments.cursor::(&None); fragment_cursor.seek(&offset, bias); let fragment = fragment_cursor.item().unwrap(); @@ -3065,6 +3076,16 @@ impl operation_queue::Operation for Operation { pub trait ToOffset { fn to_offset(&self, snapshot: &BufferSnapshot) -> usize; + fn to_next_offset(&self, snapshot: &BufferSnapshot) -> usize { + snapshot + .visible_text + .ceil_char_boundary(self.to_offset(snapshot) + 1) + } + fn to_previous_offset(&self, snapshot: &BufferSnapshot) -> usize { + snapshot + .visible_text + .floor_char_boundary(self.to_offset(snapshot).saturating_sub(1)) + } } impl ToOffset for Point {