Clamp UTF-16 coordinate while performing LSP edits rather than panicing

Julia created

Change summary

crates/project/src/project.rs |  2 +-
crates/rope/src/rope.rs       | 36 ++++++++++++++++++++++++------------
crates/text/src/text.rs       | 22 +++++++++++++++++++++-
3 files changed, 46 insertions(+), 14 deletions(-)

Detailed changes

crates/project/src/project.rs 🔗

@@ -5743,7 +5743,7 @@ impl Project {
                 // of any anchors positioned in the unchanged regions.
                 if range.end.row > range.start.row {
                     let mut offset = range.start.to_offset(&snapshot);
-                    let old_text = snapshot.text_for_range(range).collect::<String>();
+                    let old_text = snapshot.text_for_clamped_range(range).collect::<String>();
 
                     let diff = TextDiff::from_lines(old_text.as_str(), &new_text);
                     let mut moved_since_edit = true;

crates/rope/src/rope.rs 🔗

@@ -259,7 +259,7 @@ impl Rope {
                 .map_or(0, |chunk| chunk.point_to_offset(overshoot))
     }
 
-    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
+    pub fn point_utf16_to_offset(&self, point: PointUtf16, clamp: bool) -> usize {
         if point >= self.summary().lines_utf16() {
             return self.summary().len;
         }
@@ -269,7 +269,7 @@ impl Rope {
         cursor.start().1
             + cursor
                 .item()
-                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
+                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clamp))
     }
 
     pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
@@ -711,29 +711,41 @@ impl Chunk {
         point_utf16
     }
 
-    fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
+    fn point_utf16_to_offset(&self, target: PointUtf16, clamp: bool) -> usize {
         let mut offset = 0;
         let mut point = PointUtf16::new(0, 0);
         for ch in self.0.chars() {
-            if point >= target {
-                if point > target {
-                    panic!("point {:?} is inside of character {:?}", target, ch);
-                }
+            if point == target {
                 break;
             }
 
             if ch == '\n' {
                 point.row += 1;
+                point.column = 0;
+
                 if point.row > target.row {
+                    if clamp {
+                        //Return the offset up to but not including the newline
+                        return offset;
+                    }
                     panic!(
                         "point {:?} is beyond the end of a line with length {}",
                         target, point.column
                     );
                 }
-                point.column = 0;
             } else {
                 point.column += ch.len_utf16() as u32;
             }
+
+            if point > target {
+                if clamp {
+                    //Return the offset before adding the len of the codepoint which
+                    //we have landed within, bias left
+                    return offset;
+                }
+                panic!("point {:?} is inside of character {:?}", target, ch);
+            }
+
             offset += ch.len_utf8();
         }
         offset
@@ -1210,7 +1222,7 @@ mod tests {
                     point
                 );
                 assert_eq!(
-                    actual.point_utf16_to_offset(point_utf16),
+                    actual.point_utf16_to_offset(point_utf16, false),
                     ix,
                     "point_utf16_to_offset({:?})",
                     point_utf16
@@ -1250,9 +1262,9 @@ mod tests {
                 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
                 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
                 assert!(right_point >= left_point);
-                // Ensure translating UTF-16 points to offsets doesn't panic.
-                actual.point_utf16_to_offset(left_point);
-                actual.point_utf16_to_offset(right_point);
+                // Ensure translating valid UTF-16 points to offsets doesn't panic.
+                actual.point_utf16_to_offset(left_point, false);
+                actual.point_utf16_to_offset(right_point, false);
 
                 offset_utf16.0 += 1;
                 if unit == b'\n' as u16 {

crates/text/src/text.rs 🔗

@@ -1591,7 +1591,11 @@ impl BufferSnapshot {
     }
 
     pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
-        self.visible_text.point_utf16_to_offset(point)
+        self.visible_text.point_utf16_to_offset(point, false)
+    }
+
+    pub fn point_utf16_to_offset_clamped(&self, point: PointUtf16) -> usize {
+        self.visible_text.point_utf16_to_offset(point, true)
     }
 
     pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
@@ -1649,6 +1653,12 @@ impl BufferSnapshot {
         self.visible_text.chunks_in_range(start..end)
     }
 
+    pub fn text_for_clamped_range<T: ToOffsetClamped>(&self, range: Range<T>) -> Chunks<'_> {
+        let start = range.start.to_offset_clamped(self);
+        let end = range.end.to_offset_clamped(self);
+        self.visible_text.chunks_in_range(start..end)
+    }
+
     pub fn line_len(&self, row: u32) -> u32 {
         let row_start_offset = Point::new(row, 0).to_offset(self);
         let row_end_offset = if row >= self.max_point().row {
@@ -2390,6 +2400,16 @@ impl<'a, T: ToOffset> ToOffset for &'a T {
     }
 }
 
+pub trait ToOffsetClamped {
+    fn to_offset_clamped(&self, snapshot: &BufferSnapshot) -> usize;
+}
+
+impl ToOffsetClamped for PointUtf16 {
+    fn to_offset_clamped<'a>(&self, snapshot: &BufferSnapshot) -> usize {
+        snapshot.point_utf16_to_offset_clamped(*self)
+    }
+}
+
 pub trait ToPoint {
     fn to_point(&self, snapshot: &BufferSnapshot) -> Point;
 }