diff --git a/crates/edit_prediction/src/zeta1.rs b/crates/edit_prediction/src/zeta1.rs index 3ab56d77280981197b0fb289cbca28f9e98b41ff..927d206c257fda4253b13261c013cef869b787e4 100644 --- a/crates/edit_prediction/src/zeta1.rs +++ b/crates/edit_prediction/src/zeta1.rs @@ -323,14 +323,32 @@ pub(crate) fn parse_edits( let content_start = start_markers .first() - .map(|e| e.0 + EDITABLE_REGION_START_MARKER.len() + 1) // +1 to skip \n after marker + .map(|e| e.0 + EDITABLE_REGION_START_MARKER.len()) + .map(|start| { + if content.len() > start + && content.is_char_boundary(start) + && content[start..].starts_with('\n') + { + start + 1 + } else { + start + } + }) .unwrap_or(0); let content_end = end_markers .first() - .map(|e| e.0.saturating_sub(1)) // -1 to exclude \n before marker + .map(|e| { + if e.0 > 0 && content.is_char_boundary(e.0 - 1) && content[e.0 - 1..].starts_with('\n') + { + e.0 - 1 + } else { + e.0 + } + }) .unwrap_or(content.strip_suffix("\n").unwrap_or(&content).len()); - // if there is a single newline between markers, content_start will be 1 more than content_end. .min ensures empty slice in that case + // min to account for content_end and content_start both accounting for the same newline in the following case: + // <|editable_region_start|>\n<|editable_region_end|> let new_text = &content[content_start.min(content_end)..content_end]; let old_text = snapshot @@ -738,4 +756,30 @@ mod tests { assert_eq!(range.to_offset(&snapshot), 0..text.len(),); assert_eq!(new_text.as_ref(), ""); } + + #[gpui::test] + fn test_parse_edits_multibyte_char_before_end_marker(cx: &mut App) { + let text = "// café"; + let buffer = cx.new(|cx| Buffer::local(text, cx)); + let snapshot = buffer.read(cx).snapshot(); + + let output = "<|editable_region_start|>\n// café<|editable_region_end|>"; + let editable_range = 0..text.len(); + + let edits = parse_edits(output, editable_range, &snapshot).unwrap(); + assert_eq!(edits, vec![]); + } + + #[gpui::test] + fn test_parse_edits_multibyte_char_after_start_marker(cx: &mut App) { + let text = "é is great"; + let buffer = cx.new(|cx| Buffer::local(text, cx)); + let snapshot = buffer.read(cx).snapshot(); + + let output = "<|editable_region_start|>é is great\n<|editable_region_end|>"; + let editable_range = 0..text.len(); + + let edits = parse_edits(output, editable_range, &snapshot).unwrap(); + assert!(edits.is_empty()); + } }