Introduce `Rope::clip_offset_utf16`

Antonio Scandurra created

Change summary

crates/editor/src/multi_buffer.rs |  4 +-
crates/text/src/rope.rs           | 59 +++++++++++++++++++++++++++++---
crates/text/src/tests.rs          | 10 ++--
3 files changed, 59 insertions(+), 14 deletions(-)

Detailed changes

crates/editor/src/multi_buffer.rs 🔗

@@ -1888,7 +1888,7 @@ impl MultiBufferSnapshot {
                 .offset_to_offset_utf16(excerpt_start_offset + overshoot);
             *start_offset_utf16 + (buffer_offset_utf16 - excerpt_start_offset_utf16)
         } else {
-            OffsetUtf16(self.excerpts.summary().text.len_utf16)
+            self.excerpts.summary().text.len_utf16
         }
     }
 
@@ -2935,7 +2935,7 @@ impl<'a> sum_tree::SeekTarget<'a, ExcerptSummary, ExcerptSummary> for Option<&'a
 
 impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for OffsetUtf16 {
     fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) {
-        self.0 += summary.text.len_utf16;
+        *self += summary.text.len_utf16;
     }
 }
 

crates/text/src/rope.rs 🔗

@@ -166,7 +166,7 @@ impl Rope {
 
     pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
         if offset >= self.summary().len {
-            return OffsetUtf16(self.summary().len_utf16);
+            return self.summary().len_utf16;
         }
         let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
         cursor.seek(&offset, Bias::Left, &());
@@ -178,7 +178,7 @@ impl Rope {
     }
 
     pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
-        if offset.0 >= self.summary().len_utf16 {
+        if offset >= self.summary().len_utf16 {
             return self.summary().len;
         }
         let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
@@ -291,6 +291,17 @@ impl Rope {
         }
     }
 
+    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
+        let mut cursor = self.chunks.cursor::<OffsetUtf16>();
+        cursor.seek(&offset, Bias::Right, &());
+        if let Some(chunk) = cursor.item() {
+            let overshoot = offset - cursor.start();
+            *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
+        } else {
+            self.summary().len_utf16
+        }
+    }
+
     pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
         let mut cursor = self.chunks.cursor::<Point>();
         cursor.seek(&point, Bias::Right, &());
@@ -765,6 +776,18 @@ impl Chunk {
         }
         unreachable!()
     }
+
+    fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
+        let mut code_units = self.0.encode_utf16();
+        let mut offset = code_units.by_ref().take(target.0 as usize).count();
+        if char::decode_utf16(code_units).next().transpose().is_err() {
+            match bias {
+                Bias::Left => offset -= 1,
+                Bias::Right => offset += 1,
+            }
+        }
+        OffsetUtf16(offset)
+    }
 }
 
 impl sum_tree::Item for Chunk {
@@ -802,7 +825,7 @@ impl sum_tree::Summary for ChunkSummary {
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct TextSummary {
     pub len: usize,
-    pub len_utf16: usize,
+    pub len_utf16: OffsetUtf16,
     pub lines: Point,
     pub lines_utf16: PointUtf16,
     pub first_line_chars: u32,
@@ -813,7 +836,7 @@ pub struct TextSummary {
 
 impl<'a> From<&'a str> for TextSummary {
     fn from(text: &'a str) -> Self {
-        let mut len_utf16 = 0;
+        let mut len_utf16 = OffsetUtf16(0);
         let mut lines = Point::new(0, 0);
         let mut lines_utf16 = PointUtf16::new(0, 0);
         let mut first_line_chars = 0;
@@ -821,7 +844,7 @@ impl<'a> From<&'a str> for TextSummary {
         let mut longest_row = 0;
         let mut longest_row_chars = 0;
         for c in text.chars() {
-            len_utf16 += c.len_utf16();
+            len_utf16.0 += c.len_utf16();
 
             if c == '\n' {
                 lines += Point::new(1, 0);
@@ -961,13 +984,13 @@ impl TextDimension for usize {
 
 impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
     fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
-        self.0 += summary.text.len_utf16;
+        *self += summary.text.len_utf16;
     }
 }
 
 impl TextDimension for OffsetUtf16 {
     fn from_text_summary(summary: &TextSummary) -> Self {
-        Self(summary.len_utf16)
+        summary.len_utf16
     }
 
     fn add_assign(&mut self, other: &Self) {
@@ -1075,6 +1098,19 @@ mod tests {
             rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
             PointUtf16::new(0, 2)
         );
+
+        assert_eq!(
+            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
+            OffsetUtf16(0)
+        );
+        assert_eq!(
+            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
+            OffsetUtf16(2)
+        );
+        assert_eq!(
+            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
+            OffsetUtf16(2)
+        );
     }
 
     #[gpui::test(iterations = 100)]
@@ -1174,8 +1210,16 @@ mod tests {
                 offset_utf16.0 += ch.len_utf16();
             }
 
+            let mut offset_utf16 = OffsetUtf16(0);
             let mut point_utf16 = PointUtf16::zero();
             for unit in expected.encode_utf16() {
+                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
+                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
+                assert!(right_offset >= left_offset);
+                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
+                actual.offset_utf16_to_offset(left_offset);
+                actual.offset_utf16_to_offset(right_offset);
+
                 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
                 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
                 assert!(right_point >= left_point);
@@ -1183,6 +1227,7 @@ mod tests {
                 actual.point_utf16_to_offset(left_point);
                 actual.point_utf16_to_offset(right_point);
 
+                offset_utf16.0 += 1;
                 if unit == b'\n' as u16 {
                     point_utf16 += PointUtf16::new(1, 0);
                 } else {

crates/text/src/tests.rs 🔗

@@ -248,7 +248,7 @@ fn test_text_summary_for_range() {
         buffer.text_summary_for_range::<TextSummary, _>(1..3),
         TextSummary {
             len: 2,
-            len_utf16: 2,
+            len_utf16: OffsetUtf16(2),
             lines: Point::new(1, 0),
             lines_utf16: PointUtf16::new(1, 0),
             first_line_chars: 1,
@@ -261,7 +261,7 @@ fn test_text_summary_for_range() {
         buffer.text_summary_for_range::<TextSummary, _>(1..12),
         TextSummary {
             len: 11,
-            len_utf16: 11,
+            len_utf16: OffsetUtf16(11),
             lines: Point::new(3, 0),
             lines_utf16: PointUtf16::new(3, 0),
             first_line_chars: 1,
@@ -274,7 +274,7 @@ fn test_text_summary_for_range() {
         buffer.text_summary_for_range::<TextSummary, _>(0..20),
         TextSummary {
             len: 20,
-            len_utf16: 20,
+            len_utf16: OffsetUtf16(20),
             lines: Point::new(4, 1),
             lines_utf16: PointUtf16::new(4, 1),
             first_line_chars: 2,
@@ -287,7 +287,7 @@ fn test_text_summary_for_range() {
         buffer.text_summary_for_range::<TextSummary, _>(0..22),
         TextSummary {
             len: 22,
-            len_utf16: 22,
+            len_utf16: OffsetUtf16(22),
             lines: Point::new(4, 3),
             lines_utf16: PointUtf16::new(4, 3),
             first_line_chars: 2,
@@ -300,7 +300,7 @@ fn test_text_summary_for_range() {
         buffer.text_summary_for_range::<TextSummary, _>(7..22),
         TextSummary {
             len: 15,
-            len_utf16: 15,
+            len_utf16: OffsetUtf16(15),
             lines: Point::new(2, 3),
             lines_utf16: PointUtf16::new(2, 3),
             first_line_chars: 4,