Handle newlines better in parse_edits (#48960) (cherry-pick to preview) (#49369)

zed-zippy[bot] , Eric Holk , and Ben Kunkle created

Cherry-pick of #48960 to preview

----
Release Notes:

- Fix a potential crash around multibyte characters in edit predictions

---------

Co-authored-by: Ben Kunkle <ben@zed.dev>

Co-authored-by: Eric Holk <eric@zed.dev>
Co-authored-by: Ben Kunkle <ben@zed.dev>

Change summary

crates/edit_prediction/src/zeta1.rs | 50 +++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 3 deletions(-)

Detailed changes

crates/edit_prediction/src/zeta1.rs 🔗

@@ -318,14 +318,32 @@ pub(crate) fn parse_edits(
 
     let content_start = start_markers
         .first()
-        .map(|e| e.0 + EDITABLE_REGION_START_MARKER.len() + 1) // +1 to skip \n after marker
+        .map(|e| e.0 + EDITABLE_REGION_START_MARKER.len())
+        .map(|start| {
+            if content.len() > start
+                && content.is_char_boundary(start)
+                && content[start..].starts_with('\n')
+            {
+                start + 1
+            } else {
+                start
+            }
+        })
         .unwrap_or(0);
     let content_end = end_markers
         .first()
-        .map(|e| e.0.saturating_sub(1)) // -1 to exclude \n before marker
+        .map(|e| {
+            if e.0 > 0 && content.is_char_boundary(e.0 - 1) && content[e.0 - 1..].starts_with('\n')
+            {
+                e.0 - 1
+            } else {
+                e.0
+            }
+        })
         .unwrap_or(content.strip_suffix("\n").unwrap_or(&content).len());
 
-    // if there is a single newline between markers, content_start will be 1 more than content_end. .min ensures empty slice in that case
+    // min to account for content_end and content_start both accounting for the same newline in the following case:
+    // <|editable_region_start|>\n<|editable_region_end|>
     let new_text = &content[content_start.min(content_end)..content_end];
 
     let old_text = snapshot
@@ -733,4 +751,30 @@ mod tests {
         assert_eq!(range.to_offset(&snapshot), 0..text.len(),);
         assert_eq!(new_text.as_ref(), "");
     }
+
+    #[gpui::test]
+    fn test_parse_edits_multibyte_char_before_end_marker(cx: &mut App) {
+        let text = "// café";
+        let buffer = cx.new(|cx| Buffer::local(text, cx));
+        let snapshot = buffer.read(cx).snapshot();
+
+        let output = "<|editable_region_start|>\n// café<|editable_region_end|>";
+        let editable_range = 0..text.len();
+
+        let edits = parse_edits(output, editable_range, &snapshot).unwrap();
+        assert_eq!(edits, vec![]);
+    }
+
+    #[gpui::test]
+    fn test_parse_edits_multibyte_char_after_start_marker(cx: &mut App) {
+        let text = "é is great";
+        let buffer = cx.new(|cx| Buffer::local(text, cx));
+        let snapshot = buffer.read(cx).snapshot();
+
+        let output = "<|editable_region_start|>é is great\n<|editable_region_end|>";
+        let editable_range = 0..text.len();
+
+        let edits = parse_edits(output, editable_range, &snapshot).unwrap();
+        assert!(edits.is_empty());
+    }
 }