ep: Add two new prompt formats (#51836)

Oleksiy Syvokon created

V0316 differences from V0306:
- Use `<|marker_N|><|marker_N|>` instead of `NO_EDITS`
- All intermediate markers are now emitted (should be more friendly to
speculative decoding)
- No newline padding around marker tags
- No git conflict markers; we now use SeedCoder's EOS tag instead

V0317 is like V0316, but marker numbers are relative to the cursor block
(`<|marker-2|> ... -1 ... -0 ... +1 ... <|marker+2|>`)

Release Notes:

- N/A

Change summary

crates/edit_prediction/src/example_spec.rs      |  33 
crates/edit_prediction_cli/src/format_prompt.rs |  28 
crates/zeta_prompt/src/multi_region.rs          | 858 +++++++++++++++++++
crates/zeta_prompt/src/zeta_prompt.rs           | 164 +++
4 files changed, 1,078 insertions(+), 5 deletions(-)

Detailed changes

crates/edit_prediction/src/example_spec.rs 🔗

@@ -26,6 +26,14 @@ pub fn encode_cursor_in_patch(patch: &str, cursor_offset: Option<usize>) -> Stri
     let mut line_start_offset = 0usize;
 
     for line in patch.lines() {
+        if matches!(
+            DiffLine::parse(line),
+            DiffLine::Garbage(content)
+                if content.starts_with('#') && content.contains(CURSOR_POSITION_MARKER)
+        ) {
+            continue;
+        }
+
         if !result.is_empty() {
             result.push('\n');
         }
@@ -846,6 +854,31 @@ mod tests {
         assert_eq!(results, vec![(clean_patch, None)]);
     }
 
+    #[test]
+    fn test_encode_cursor_in_patch_is_idempotent() {
+        let patch = indoc! {r#"
+            --- a/test.rs
+            +++ b/test.rs
+            @@ -1,2 +1,2 @@
+            -fn old() {}
+            +fn new_name() {}
+            #       ^[CURSOR_POSITION]
+        "#};
+
+        let cursor_offset = "fn new_name() {}".find("name").unwrap();
+        let encoded_once = encode_cursor_in_patch(patch, Some(cursor_offset));
+        let encoded_twice = encode_cursor_in_patch(&encoded_once, Some(cursor_offset));
+
+        assert_eq!(encoded_once, encoded_twice);
+        assert_eq!(
+            encoded_once
+                .lines()
+                .filter(|line| line.contains(CURSOR_POSITION_MARKER))
+                .count(),
+            1
+        );
+    }
+
     #[test]
     fn test_from_markdown_accepted_prediction_marker() {
         let markdown = indoc! {r#"

crates/edit_prediction_cli/src/format_prompt.rs 🔗

@@ -136,6 +136,34 @@ pub fn zeta2_output_for_patch(
             },
         )?;
 
+    if version == ZetaFormat::V0317SeedMultiRegions {
+        let cursor_in_new = cursor_offset.map(|cursor_offset| {
+            let hunk_start = first_hunk_offset.unwrap_or(0);
+            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
+        });
+        return multi_region::encode_from_old_and_new_v0317(
+            &old_editable_region,
+            &result,
+            cursor_in_new,
+            zeta_prompt::CURSOR_MARKER,
+            multi_region::V0317_END_MARKER,
+        );
+    }
+
+    if version == ZetaFormat::V0316SeedMultiRegions {
+        let cursor_in_new = cursor_offset.map(|cursor_offset| {
+            let hunk_start = first_hunk_offset.unwrap_or(0);
+            result.floor_char_boundary((hunk_start + cursor_offset).min(result.len()))
+        });
+        return multi_region::encode_from_old_and_new_v0316(
+            &old_editable_region,
+            &result,
+            cursor_in_new,
+            zeta_prompt::CURSOR_MARKER,
+            multi_region::V0316_END_MARKER,
+        );
+    }
+
     if version == ZetaFormat::V0306SeedMultiRegions {
         let cursor_in_new = cursor_offset.map(|cursor_offset| {
             let hunk_start = first_hunk_offset.unwrap_or(0);

crates/zeta_prompt/src/multi_region.rs 🔗

@@ -2,13 +2,26 @@ use anyhow::{Context as _, Result, anyhow};
 
 pub const MARKER_TAG_PREFIX: &str = "<|marker_";
 pub const MARKER_TAG_SUFFIX: &str = "|>";
+pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
 const MIN_BLOCK_LINES: usize = 3;
 const MAX_BLOCK_LINES: usize = 8;
+pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
+pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
 
 pub fn marker_tag(number: usize) -> String {
     format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
 }
 
+pub fn marker_tag_relative(delta: isize) -> String {
+    if delta > 0 {
+        format!("<|marker+{delta}|>")
+    } else if delta == 0 {
+        String::from("<|marker-0|>")
+    } else {
+        format!("<|marker{delta}|>")
+    }
+}
+
 /// Compute byte offsets within `editable_text` where marker boundaries should
 /// be placed.
 ///
@@ -367,6 +380,622 @@ pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
     Some(result)
 }
 
+struct MarkerTag {
+    number: usize,
+    tag_start: usize,
+    tag_end: usize,
+}
+
+struct RelativeMarkerTag {
+    delta: isize,
+    tag_start: usize,
+    tag_end: usize,
+}
+
+fn collect_marker_tags(text: &str) -> Vec<MarkerTag> {
+    let mut markers = Vec::new();
+    let mut search_from = 0;
+    while let Some(rel_pos) = text[search_from..].find(MARKER_TAG_PREFIX) {
+        let tag_start = search_from + rel_pos;
+        let num_start = tag_start + MARKER_TAG_PREFIX.len();
+        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
+            let num_end = num_start + suffix_rel;
+            if let Ok(number) = text[num_start..num_end].parse::<usize>() {
+                let tag_end = num_end + MARKER_TAG_SUFFIX.len();
+                markers.push(MarkerTag {
+                    number,
+                    tag_start,
+                    tag_end,
+                });
+                search_from = tag_end;
+                continue;
+            }
+        }
+        search_from = tag_start + MARKER_TAG_PREFIX.len();
+    }
+    markers
+}
+
+fn collect_relative_marker_tags(text: &str) -> Vec<RelativeMarkerTag> {
+    let mut markers = Vec::new();
+    let mut search_from = 0;
+    while let Some(rel_pos) = text[search_from..].find(RELATIVE_MARKER_TAG_PREFIX) {
+        let tag_start = search_from + rel_pos;
+        let payload_start = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
+        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
+            let payload_end = payload_start + suffix_rel;
+            let payload = &text[payload_start..payload_end];
+            if let Ok(delta) = payload.parse::<isize>() {
+                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
+                markers.push(RelativeMarkerTag {
+                    delta,
+                    tag_start,
+                    tag_end,
+                });
+                search_from = tag_end;
+                continue;
+            }
+        }
+        search_from = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
+    }
+    markers
+}
+
+pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
+    let cursor = cursor_offset.unwrap_or(0);
+    marker_offsets
+        .iter()
+        .enumerate()
+        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
+        .map(|(idx, _)| idx + 1)
+        .unwrap_or(1)
+}
+
+fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
+    let cursor = cursor_offset.unwrap_or(0);
+    marker_offsets
+        .windows(2)
+        .position(|window| cursor >= window[0] && cursor < window[1])
+        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
+}
+
+/// Write the editable region content with V0317 byte-exact marker tags, where
+/// marker numbers are relative to the cursor block.
+pub fn write_editable_with_markers_v0317(
+    output: &mut String,
+    editable_text: &str,
+    cursor_offset_in_editable: usize,
+    cursor_marker: &str,
+) {
+    let marker_offsets = compute_marker_offsets(editable_text);
+    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
+    let mut cursor_placed = false;
+
+    for (i, &offset) in marker_offsets.iter().enumerate() {
+        let marker_delta = i as isize - anchor_idx as isize;
+        output.push_str(&marker_tag_relative(marker_delta));
+
+        if let Some(&next_offset) = marker_offsets.get(i + 1) {
+            let block = &editable_text[offset..next_offset];
+            if !cursor_placed
+                && cursor_offset_in_editable >= offset
+                && cursor_offset_in_editable <= next_offset
+            {
+                cursor_placed = true;
+                let cursor_in_block = cursor_offset_in_editable - offset;
+                output.push_str(&block[..cursor_in_block]);
+                output.push_str(cursor_marker);
+                output.push_str(&block[cursor_in_block..]);
+            } else {
+                output.push_str(block);
+            }
+        }
+    }
+}
+
+/// Write the editable region content with V0316 byte-exact marker tags.
+///
+/// Unlike the V0306 version, markers are pure delimiters with no newline
+/// padding. The content between markers is the exact bytes from the editable
+/// text.
+pub fn write_editable_with_markers_v0316(
+    output: &mut String,
+    editable_text: &str,
+    cursor_offset_in_editable: usize,
+    cursor_marker: &str,
+) {
+    let marker_offsets = compute_marker_offsets(editable_text);
+    let mut cursor_placed = false;
+    for (i, &offset) in marker_offsets.iter().enumerate() {
+        let marker_num = i + 1;
+        output.push_str(&marker_tag(marker_num));
+
+        if let Some(&next_offset) = marker_offsets.get(i + 1) {
+            let block = &editable_text[offset..next_offset];
+            if !cursor_placed
+                && cursor_offset_in_editable >= offset
+                && cursor_offset_in_editable <= next_offset
+            {
+                cursor_placed = true;
+                let cursor_in_block = cursor_offset_in_editable - offset;
+                output.push_str(&block[..cursor_in_block]);
+                output.push_str(cursor_marker);
+                output.push_str(&block[cursor_in_block..]);
+            } else {
+                output.push_str(block);
+            }
+        }
+    }
+}
+
+/// Parse V0316 model output and reconstruct the full new editable region.
+///
+/// V0316 differences from V0306:
+/// - No newline stripping or normalization (byte-exact content).
+/// - The no-edit signal is `start_num == end_num` (any repeated marker).
+/// - Intermediate marker tags are used for block-level extraction.
+pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
+    let markers = collect_marker_tags(output);
+
+    if markers.is_empty() {
+        return Err(anyhow!("no marker tags found in output"));
+    }
+
+    if markers.len() == 1 {
+        return Err(anyhow!(
+            "only one marker tag found in output, expected at least two"
+        ));
+    }
+
+    let start_num = markers
+        .first()
+        .map(|marker| marker.number)
+        .context("missing first marker")?;
+    let end_num = markers
+        .last()
+        .map(|marker| marker.number)
+        .context("missing last marker")?;
+
+    // No-edit signal: start_num == end_num
+    if start_num == end_num {
+        return Ok(old_editable.to_string());
+    }
+
+    // Validate monotonically increasing with no gaps
+    let expected_nums: Vec<usize> = (start_num..=end_num).collect();
+    let actual_nums: Vec<usize> = markers.iter().map(|m| m.number).collect();
+    if actual_nums != expected_nums {
+        eprintln!(
+            "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
+            expected_nums, actual_nums
+        );
+    }
+
+    let marker_offsets = compute_marker_offsets(old_editable);
+
+    let start_idx = start_num
+        .checked_sub(1)
+        .context("marker numbers are 1-indexed")?;
+    let end_idx = end_num
+        .checked_sub(1)
+        .context("marker numbers are 1-indexed")?;
+
+    let start_byte = *marker_offsets
+        .get(start_idx)
+        .context("start marker number out of range")?;
+    let end_byte = *marker_offsets
+        .get(end_idx)
+        .context("end marker number out of range")?;
+
+    if start_byte > end_byte {
+        return Err(anyhow!("start marker must come before end marker"));
+    }
+
+    // Extract byte-exact content between consecutive markers
+    let mut new_content = String::new();
+    for i in 0..markers.len() - 1 {
+        let content_start = markers[i].tag_end;
+        let content_end = markers[i + 1].tag_start;
+        if content_start <= content_end {
+            new_content.push_str(&output[content_start..content_end]);
+        }
+    }
+
+    // Splice into old_editable
+    let mut result = String::new();
+    result.push_str(&old_editable[..start_byte]);
+    result.push_str(&new_content);
+    result.push_str(&old_editable[end_byte..]);
+
+    Ok(result)
+}
+
+/// Parse V0317 model output and reconstruct the full new editable region.
+///
+/// V0317 differences from V0316:
+/// - Marker ids are relative to the cursor block (e.g. -2, -1, 0, +1, +2).
+/// - No-edit signal is any repeated relative marker tag.
+pub fn apply_marker_span_v0317(
+    old_editable: &str,
+    output: &str,
+    cursor_offset_in_old: Option<usize>,
+) -> Result<String> {
+    let markers = collect_relative_marker_tags(output);
+
+    if markers.is_empty() {
+        return Err(anyhow!("no marker tags found in output"));
+    }
+
+    if markers.len() == 1 {
+        return Err(anyhow!(
+            "only one marker tag found in output, expected at least two"
+        ));
+    }
+
+    let marker_offsets = compute_marker_offsets(old_editable);
+    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
+
+    let start_delta = markers
+        .first()
+        .map(|marker| marker.delta)
+        .context("missing first marker")?;
+    let end_delta = markers
+        .last()
+        .map(|marker| marker.delta)
+        .context("missing last marker")?;
+
+    if start_delta == end_delta {
+        return Ok(old_editable.to_string());
+    }
+
+    let start_idx_isize = anchor_idx as isize + start_delta;
+    let end_idx_isize = anchor_idx as isize + end_delta;
+    if start_idx_isize < 0 || end_idx_isize < 0 {
+        return Err(anyhow!("relative marker maps before first marker"));
+    }
+
+    let start_idx = usize::try_from(start_idx_isize).context("invalid start marker index")?;
+    let end_idx = usize::try_from(end_idx_isize).context("invalid end marker index")?;
+
+    let start_byte = *marker_offsets
+        .get(start_idx)
+        .context("start marker number out of range")?;
+    let end_byte = *marker_offsets
+        .get(end_idx)
+        .context("end marker number out of range")?;
+
+    if start_byte > end_byte {
+        return Err(anyhow!("start marker must come before end marker"));
+    }
+
+    let mut new_content = String::new();
+    for i in 0..markers.len() - 1 {
+        let content_start = markers[i].tag_end;
+        let content_end = markers[i + 1].tag_start;
+        if content_start <= content_end {
+            new_content.push_str(&output[content_start..content_end]);
+        }
+    }
+
+    let mut result = String::new();
+    result.push_str(&old_editable[..start_byte]);
+    result.push_str(&new_content);
+    result.push_str(&old_editable[end_byte..]);
+
+    Ok(result)
+}
+
+/// Encode the V0316 training target from old and new editable text.
+///
+/// V0316 differences from V0306:
+/// - No-edit signal: `<|marker_C|><|marker_C|>{end_marker}` where C is nearest
+///   to cursor.
+/// - All intermediate markers are emitted with byte-exact content.
+/// - No newline padding around marker tags.
+pub fn encode_from_old_and_new_v0316(
+    old_editable: &str,
+    new_editable: &str,
+    cursor_offset_in_new: Option<usize>,
+    cursor_marker: &str,
+    end_marker: &str,
+) -> Result<String> {
+    let marker_offsets = compute_marker_offsets(old_editable);
+
+    if old_editable == new_editable {
+        let marker_num = nearest_marker_number(cursor_offset_in_new, &marker_offsets);
+        let tag = marker_tag(marker_num);
+        return Ok(format!("{tag}{tag}{end_marker}"));
+    }
+
+    let common_prefix = old_editable
+        .bytes()
+        .zip(new_editable.bytes())
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let old_remaining = old_editable.len() - common_prefix;
+    let new_remaining = new_editable.len() - common_prefix;
+    let max_suffix = old_remaining.min(new_remaining);
+    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
+        .iter()
+        .rev()
+        .zip(
+            new_editable.as_bytes()[new_editable.len() - max_suffix..]
+                .iter()
+                .rev(),
+        )
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let change_end_in_old = old_editable.len() - common_suffix;
+
+    let start_marker_idx = marker_offsets
+        .iter()
+        .rposition(|&offset| offset <= common_prefix)
+        .unwrap_or(0);
+    let end_marker_idx = marker_offsets
+        .iter()
+        .position(|&offset| offset >= change_end_in_old)
+        .unwrap_or(marker_offsets.len() - 1);
+
+    let old_start = marker_offsets[start_marker_idx];
+    let old_end = marker_offsets[end_marker_idx];
+
+    let new_start = old_start;
+    let new_end = new_editable
+        .len()
+        .saturating_sub(old_editable.len().saturating_sub(old_end));
+
+    let new_span = &new_editable[new_start..new_end];
+    let old_span = &old_editable[old_start..old_end];
+
+    // Compute common prefix/suffix within the span for accurate boundary mapping
+    let span_common_prefix = old_span
+        .bytes()
+        .zip(new_span.bytes())
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let span_old_remaining = old_span.len() - span_common_prefix;
+    let span_new_remaining = new_span.len() - span_common_prefix;
+    let span_max_suffix = span_old_remaining.min(span_new_remaining);
+    let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
+        .iter()
+        .rev()
+        .zip(
+            new_span.as_bytes()[new_span.len() - span_max_suffix..]
+                .iter()
+                .rev(),
+        )
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let mut result = String::new();
+    let mut prev_new_rel = 0usize;
+    let mut cursor_placed = false;
+
+    for block_idx in start_marker_idx..end_marker_idx {
+        let marker_num = block_idx + 1;
+        result.push_str(&marker_tag(marker_num));
+
+        let new_rel_end = if block_idx + 1 == end_marker_idx {
+            // Last block: extends to end of new span
+            new_span.len()
+        } else {
+            // Map the intermediate boundary from old to new coordinates
+            let old_rel = marker_offsets[block_idx + 1] - old_start;
+            let mapped = map_boundary_offset(
+                old_rel,
+                old_span.len(),
+                new_span.len(),
+                span_common_prefix,
+                span_common_suffix,
+            );
+            // Ensure char boundary safety and monotonicity
+            new_span.floor_char_boundary(mapped)
+        };
+
+        // Ensure monotonicity (each block gets at least zero content)
+        let new_rel_end = new_rel_end.max(prev_new_rel);
+
+        let block_content = &new_span[prev_new_rel..new_rel_end];
+
+        if !cursor_placed {
+            if let Some(cursor_offset) = cursor_offset_in_new {
+                let abs_start = new_start + prev_new_rel;
+                let abs_end = new_start + new_rel_end;
+                if cursor_offset >= abs_start && cursor_offset <= abs_end {
+                    cursor_placed = true;
+                    let cursor_in_block = cursor_offset - abs_start;
+                    let bounded = cursor_in_block.min(block_content.len());
+                    result.push_str(&block_content[..bounded]);
+                    result.push_str(cursor_marker);
+                    result.push_str(&block_content[bounded..]);
+                    prev_new_rel = new_rel_end;
+                    continue;
+                }
+            }
+        }
+
+        result.push_str(block_content);
+        prev_new_rel = new_rel_end;
+    }
+
+    // Final closing marker
+    let end_marker_num = end_marker_idx + 1;
+    result.push_str(&marker_tag(end_marker_num));
+    result.push_str(end_marker);
+
+    Ok(result)
+}
+
+/// Encode the V0317 training target from old and new editable text.
+///
+/// V0317 differences from V0316:
+/// - Marker ids are relative to cursor block (..., -2, -1, 0, +1, +2, ...).
+/// - No-edit signal: repeated cursor-relative marker.
+pub fn encode_from_old_and_new_v0317(
+    old_editable: &str,
+    new_editable: &str,
+    cursor_offset_in_new: Option<usize>,
+    cursor_marker: &str,
+    end_marker: &str,
+) -> Result<String> {
+    let marker_offsets = compute_marker_offsets(old_editable);
+    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
+
+    if old_editable == new_editable {
+        let tag = marker_tag_relative(0);
+        return Ok(format!("{tag}{tag}{end_marker}"));
+    }
+
+    let common_prefix = old_editable
+        .bytes()
+        .zip(new_editable.bytes())
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let old_remaining = old_editable.len() - common_prefix;
+    let new_remaining = new_editable.len() - common_prefix;
+    let max_suffix = old_remaining.min(new_remaining);
+    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
+        .iter()
+        .rev()
+        .zip(
+            new_editable.as_bytes()[new_editable.len() - max_suffix..]
+                .iter()
+                .rev(),
+        )
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let change_end_in_old = old_editable.len() - common_suffix;
+
+    let start_marker_idx = marker_offsets
+        .iter()
+        .rposition(|&offset| offset <= common_prefix)
+        .unwrap_or(0);
+    let end_marker_idx = marker_offsets
+        .iter()
+        .position(|&offset| offset >= change_end_in_old)
+        .unwrap_or(marker_offsets.len() - 1);
+
+    let old_start = marker_offsets[start_marker_idx];
+    let old_end = marker_offsets[end_marker_idx];
+
+    let new_start = old_start;
+    let new_end = new_editable
+        .len()
+        .saturating_sub(old_editable.len().saturating_sub(old_end));
+
+    let new_span = &new_editable[new_start..new_end];
+    let old_span = &old_editable[old_start..old_end];
+
+    let span_common_prefix = old_span
+        .bytes()
+        .zip(new_span.bytes())
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let span_old_remaining = old_span.len() - span_common_prefix;
+    let span_new_remaining = new_span.len() - span_common_prefix;
+    let span_max_suffix = span_old_remaining.min(span_new_remaining);
+    let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
+        .iter()
+        .rev()
+        .zip(
+            new_span.as_bytes()[new_span.len() - span_max_suffix..]
+                .iter()
+                .rev(),
+        )
+        .take_while(|(a, b)| a == b)
+        .count();
+
+    let mut result = String::new();
+    let mut prev_new_rel = 0usize;
+    let mut cursor_placed = false;
+
+    for block_idx in start_marker_idx..end_marker_idx {
+        let marker_delta = block_idx as isize - anchor_idx as isize;
+        result.push_str(&marker_tag_relative(marker_delta));
+
+        let new_rel_end = if block_idx + 1 == end_marker_idx {
+            new_span.len()
+        } else {
+            let old_rel = marker_offsets[block_idx + 1] - old_start;
+            let mapped = map_boundary_offset(
+                old_rel,
+                old_span.len(),
+                new_span.len(),
+                span_common_prefix,
+                span_common_suffix,
+            );
+            new_span.floor_char_boundary(mapped)
+        };
+
+        let new_rel_end = new_rel_end.max(prev_new_rel);
+        let block_content = &new_span[prev_new_rel..new_rel_end];
+
+        if !cursor_placed {
+            if let Some(cursor_offset) = cursor_offset_in_new {
+                let abs_start = new_start + prev_new_rel;
+                let abs_end = new_start + new_rel_end;
+                if cursor_offset >= abs_start && cursor_offset <= abs_end {
+                    cursor_placed = true;
+                    let cursor_in_block = cursor_offset - abs_start;
+                    let bounded = cursor_in_block.min(block_content.len());
+                    result.push_str(&block_content[..bounded]);
+                    result.push_str(cursor_marker);
+                    result.push_str(&block_content[bounded..]);
+                    prev_new_rel = new_rel_end;
+                    continue;
+                }
+            }
+        }
+
+        result.push_str(block_content);
+        prev_new_rel = new_rel_end;
+    }
+
+    let end_marker_delta = end_marker_idx as isize - anchor_idx as isize;
+    result.push_str(&marker_tag_relative(end_marker_delta));
+    result.push_str(end_marker);
+
+    Ok(result)
+}
+
+/// Map a byte offset from old span coordinates to new span coordinates,
+/// using common prefix/suffix within the span for accuracy.
+fn map_boundary_offset(
+    old_rel: usize,
+    old_span_len: usize,
+    new_span_len: usize,
+    span_common_prefix: usize,
+    span_common_suffix: usize,
+) -> usize {
+    if old_rel <= span_common_prefix {
+        old_rel
+    } else if old_rel >= old_span_len - span_common_suffix {
+        new_span_len - (old_span_len - old_rel)
+    } else {
+        // Within the changed region: proportional mapping
+        let old_changed_start = span_common_prefix;
+        let old_changed_len = old_span_len
+            .saturating_sub(span_common_prefix)
+            .saturating_sub(span_common_suffix);
+        let new_changed_start = span_common_prefix;
+        let new_changed_len = new_span_len
+            .saturating_sub(span_common_prefix)
+            .saturating_sub(span_common_suffix);
+
+        if old_changed_len == 0 {
+            new_changed_start
+        } else {
+            new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -554,4 +1183,233 @@ mod tests {
             "line1\nline2"
         );
     }
+
+    #[test]
+    fn test_write_editable_with_markers_v0316_byte_exact() {
+        let editable = "aaa\nbbb\nccc\n";
+        let mut output = String::new();
+        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
+        // Should have marker tags with no extra newlines
+        assert!(output.starts_with("<|marker_1|>"));
+        assert!(output.contains("<|user_cursor|>"));
+        // Content should be byte-exact - no extra newlines added by markers
+        let stripped = output.replace("<|user_cursor|>", "");
+        let stripped = strip_marker_tags(&stripped);
+        assert_eq!(stripped, editable);
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0316_basic() {
+        let old = "aaa\nbbb\nccc\n";
+        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
+        let result = apply_marker_span_v0316(old, output).unwrap();
+        assert_eq!(result, "aaa\nBBB\nccc\n");
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0316_no_edit() {
+        let old = "aaa\nbbb\nccc\n";
+        let output = "<|marker_1|><|marker_1|>";
+        let result = apply_marker_span_v0316(old, output).unwrap();
+        assert_eq!(result, old);
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0316_no_edit_any_marker() {
+        let old = "aaa\nbbb\nccc\n";
+        let output = "<|marker_2|>ignored content<|marker_2|>";
+        let result = apply_marker_span_v0316(old, output).unwrap();
+        assert_eq!(result, old);
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0316_multi_block() {
+        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+        let marker_offsets = compute_marker_offsets(old);
+        assert!(
+            marker_offsets.len() >= 3,
+            "expected at least 3 offsets, got {:?}",
+            marker_offsets
+        );
+
+        // Build output spanning all blocks with new content
+        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
+        let mut output = String::new();
+        output.push_str("<|marker_1|>");
+        // Split new_content at old block boundaries
+        for i in 0..marker_offsets.len() - 1 {
+            if i > 0 {
+                output.push_str(&marker_tag(i + 1));
+            }
+            let start = marker_offsets[i];
+            let end = marker_offsets[i + 1];
+            let block_len = end - start;
+            // Use same length blocks from new content (they happen to be same length)
+            output.push_str(&new_content[start..start + block_len]);
+        }
+        let last_marker_num = marker_offsets.len();
+        output.push_str(&marker_tag(last_marker_num));
+        let result = apply_marker_span_v0316(old, &output).unwrap();
+        assert_eq!(result, new_content);
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
+        let old = "aaa\nbbb\nccc\n";
+        // Content doesn't end with \n - should NOT be normalized
+        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
+        let result = apply_marker_span_v0316(old, output).unwrap();
+        // V0316 is byte-exact: the missing trailing \n is NOT added
+        assert_eq!(result, "aaa\nBBB\nccc");
+    }
+
+    #[test]
+    fn test_encode_v0316_no_edits() {
+        let old = "aaa\nbbb\nccc\n";
+        let result =
+            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+        // Should be <|marker_K|><|marker_K|><|end|> where K is nearest to cursor
+        assert!(result.ends_with("<|end|>"));
+        // Parse it and verify it's a no-edit
+        let stripped = result.strip_suffix("<|end|>").unwrap();
+        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
+        assert_eq!(result_parsed, old);
+    }
+
+    #[test]
+    fn test_encode_v0316_with_change() {
+        let old = "aaa\nbbb\nccc\n";
+        let new = "aaa\nBBB\nccc\n";
+        let result =
+            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+        assert!(result.contains("<|marker_1|>"));
+        assert!(result.contains("<|marker_2|>"));
+        assert!(result.ends_with("<|end|>"));
+    }
+
+    #[test]
+    fn test_roundtrip_v0316() {
+        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
+        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
+        let encoded =
+            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+        let stripped = encoded
+            .strip_suffix("<|end|>")
+            .expect("should have end marker");
+        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
+        assert_eq!(reconstructed, new);
+    }
+
+    #[test]
+    fn test_roundtrip_v0316_with_cursor() {
+        let old = "aaa\nbbb\nccc\n";
+        let new = "aaa\nBBB\nccc\n";
+        let result =
+            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+        assert!(result.contains("<|user_cursor|>"), "result: {result}");
+        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
+    }
+
+    #[test]
+    fn test_roundtrip_v0316_multi_block_change() {
+        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
+        let encoded =
+            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+        let stripped = encoded
+            .strip_suffix("<|end|>")
+            .expect("should have end marker");
+        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
+        assert_eq!(reconstructed, new);
+    }
+
+    #[test]
+    fn test_nearest_marker_number() {
+        let offsets = vec![0, 10, 20, 30];
+        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
+        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
+        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
+        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
+        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
+        assert_eq!(nearest_marker_number(None, &offsets), 1);
+    }
+
+    #[test]
+    fn test_marker_tag_relative_formats_as_expected() {
+        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
+        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
+        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
+        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
+        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
+    }
+
+    #[test]
+    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
+        let editable = "aaa\nbbb\nccc\n";
+        let mut output = String::new();
+        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
+
+        assert!(output.contains("<|marker-0|>"));
+        assert!(output.contains("<|user_cursor|>"));
+
+        let stripped = output.replace("<|user_cursor|>", "");
+        let stripped =
+            collect_relative_marker_tags(&stripped)
+                .iter()
+                .fold(stripped.clone(), |acc, marker| {
+                    let tag = &stripped[marker.tag_start..marker.tag_end];
+                    acc.replace(tag, "")
+                });
+        assert_eq!(stripped, editable);
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0317_basic() {
+        let old = "aaa\nbbb\nccc\n";
+        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
+        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
+        assert_eq!(result, "aaa\nBBB\nccc\n");
+    }
+
+    #[test]
+    fn test_apply_marker_span_v0317_no_edit() {
+        let old = "aaa\nbbb\nccc\n";
+        let output = "<|marker-0|><|marker-0|>";
+        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
+        assert_eq!(result, old);
+    }
+
+    #[test]
+    fn test_encode_v0317_no_edits() {
+        let old = "aaa\nbbb\nccc\n";
+        let result =
+            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
+    }
+
+    #[test]
+    fn test_roundtrip_v0317() {
+        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
+        let cursor = Some(6);
+
+        let encoded =
+            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
+        let stripped = encoded
+            .strip_suffix("<|end|>")
+            .expect("should have end marker");
+        let stripped = stripped.replace("<|user_cursor|>", "");
+        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
+        assert_eq!(reconstructed, new);
+    }
+
+    #[test]
+    fn test_roundtrip_v0317_with_cursor_marker() {
+        let old = "aaa\nbbb\nccc\n";
+        let new = "aaa\nBBB\nccc\n";
+        let result =
+            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+        assert!(result.contains("<|user_cursor|>"), "result: {result}");
+        assert!(result.contains("<|marker-0|>"), "result: {result}");
+    }
 }

crates/zeta_prompt/src/zeta_prompt.rs 🔗

@@ -82,7 +82,12 @@ pub enum ZetaFormat {
     v0226Hashline,
     V0304VariableEdit,
     V0304SeedNoEdits,
+    /// Multi-block marker spans with NO_EDITS sentinel.
     V0306SeedMultiRegions,
+    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
+    V0316SeedMultiRegions,
+    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
+    V0317SeedMultiRegions,
 }
 
 impl std::fmt::Display for ZetaFormat {
@@ -220,6 +225,30 @@ pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str]
         ZetaFormat::v0226Hashline => hashline::special_tokens(),
         ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
         ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
+        ZetaFormat::V0316SeedMultiRegions => {
+            static TOKENS: &[&str] = &[
+                seed_coder::FIM_SUFFIX,
+                seed_coder::FIM_PREFIX,
+                seed_coder::FIM_MIDDLE,
+                seed_coder::FILE_MARKER,
+                multi_region::V0316_END_MARKER,
+                CURSOR_MARKER,
+                multi_region::MARKER_TAG_PREFIX,
+            ];
+            TOKENS
+        }
+        ZetaFormat::V0317SeedMultiRegions => {
+            static TOKENS: &[&str] = &[
+                seed_coder::FIM_SUFFIX,
+                seed_coder::FIM_PREFIX,
+                seed_coder::FIM_MIDDLE,
+                seed_coder::FILE_MARKER,
+                multi_region::V0317_END_MARKER,
+                CURSOR_MARKER,
+                multi_region::RELATIVE_MARKER_TAG_PREFIX,
+            ];
+            TOKENS
+        }
         ZetaFormat::V0306SeedMultiRegions => {
             static TOKENS: &[&str] = &[
                 seed_coder::FIM_SUFFIX,
@@ -248,6 +277,8 @@ pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
         | ZetaFormat::V0211SeedCoder
         | ZetaFormat::v0226Hashline
         | ZetaFormat::V0306SeedMultiRegions
+        | ZetaFormat::V0316SeedMultiRegions
+        | ZetaFormat::V0317SeedMultiRegions
         | ZetaFormat::V0304SeedNoEdits => (350, 150),
         ZetaFormat::V0304VariableEdit => (1024, 0),
     }
@@ -266,6 +297,8 @@ pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
         | ZetaFormat::V0304VariableEdit
         | ZetaFormat::V0306SeedMultiRegions
         | ZetaFormat::V0304SeedNoEdits => &[],
+        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
+        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
     }
 }
 
@@ -288,7 +321,9 @@ pub fn excerpt_ranges_for_format(
         | ZetaFormat::V0211SeedCoder
         | ZetaFormat::v0226Hashline
         | ZetaFormat::V0304SeedNoEdits
-        | ZetaFormat::V0306SeedMultiRegions => (
+        | ZetaFormat::V0306SeedMultiRegions
+        | ZetaFormat::V0316SeedMultiRegions
+        | ZetaFormat::V0317SeedMultiRegions => (
             ranges.editable_350.clone(),
             ranges.editable_350_context_150.clone(),
         ),
@@ -371,6 +406,22 @@ pub fn write_cursor_excerpt_section_for_format(
                 cursor_offset,
             ));
         }
+        ZetaFormat::V0316SeedMultiRegions => {
+            prompt.push_str(&build_v0316_cursor_prefix(
+                path,
+                context,
+                editable_range,
+                cursor_offset,
+            ));
+        }
+        ZetaFormat::V0317SeedMultiRegions => {
+            prompt.push_str(&build_v0317_cursor_prefix(
+                path,
+                context,
+                editable_range,
+                cursor_offset,
+            ));
+        }
     }
 }
 
@@ -403,6 +454,60 @@ fn build_v0306_cursor_prefix(
     section
 }
 
+fn build_v0316_cursor_prefix(
+    path: &Path,
+    context: &str,
+    editable_range: &Range<usize>,
+    cursor_offset: usize,
+) -> String {
+    let mut section = String::new();
+    let path_str = path.to_string_lossy();
+    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
+
+    section.push_str(&context[..editable_range.start]);
+
+    let editable_text = &context[editable_range.clone()];
+    let cursor_in_editable = cursor_offset - editable_range.start;
+    multi_region::write_editable_with_markers_v0316(
+        &mut section,
+        editable_text,
+        cursor_in_editable,
+        CURSOR_MARKER,
+    );
+
+    if !section.ends_with('\n') {
+        section.push('\n');
+    }
+    section
+}
+
+fn build_v0317_cursor_prefix(
+    path: &Path,
+    context: &str,
+    editable_range: &Range<usize>,
+    cursor_offset: usize,
+) -> String {
+    let mut section = String::new();
+    let path_str = path.to_string_lossy();
+    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
+
+    section.push_str(&context[..editable_range.start]);
+
+    let editable_text = &context[editable_range.clone()];
+    let cursor_in_editable = cursor_offset - editable_range.start;
+    multi_region::write_editable_with_markers_v0317(
+        &mut section,
+        editable_text,
+        cursor_in_editable,
+        CURSOR_MARKER,
+    );
+
+    if !section.ends_with('\n') {
+        section.push('\n');
+    }
+    section
+}
+
 fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
     let start_row = text[0..range.start].matches('\n').count() as u32;
     let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
@@ -439,7 +544,9 @@ pub fn format_prompt_with_budget_for_format(
     let prompt = match format {
         ZetaFormat::V0211SeedCoder
         | ZetaFormat::V0304SeedNoEdits
-        | ZetaFormat::V0306SeedMultiRegions => {
+        | ZetaFormat::V0306SeedMultiRegions
+        | ZetaFormat::V0316SeedMultiRegions
+        | ZetaFormat::V0317SeedMultiRegions => {
             let mut cursor_section = String::new();
             write_cursor_excerpt_section_for_format(
                 format,
@@ -533,7 +640,9 @@ pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
         | ZetaFormat::v0226Hashline
         | ZetaFormat::V0304SeedNoEdits
         | ZetaFormat::V0304VariableEdit
-        | ZetaFormat::V0306SeedMultiRegions => 6,
+        | ZetaFormat::V0306SeedMultiRegions
+        | ZetaFormat::V0316SeedMultiRegions
+        | ZetaFormat::V0317SeedMultiRegions => 6,
     }
 }
 
@@ -552,7 +661,10 @@ pub fn get_prefill_for_format(
         | ZetaFormat::V0211SeedCoder
         | ZetaFormat::v0226Hashline
         | ZetaFormat::V0304VariableEdit => String::new(),
-        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
+        ZetaFormat::V0304SeedNoEdits
+        | ZetaFormat::V0306SeedMultiRegions
+        | ZetaFormat::V0316SeedMultiRegions
+        | ZetaFormat::V0317SeedMultiRegions => String::new(),
     }
 }
 
@@ -564,6 +676,8 @@ pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str>
         ZetaFormat::V0211SeedCoder
         | ZetaFormat::V0304SeedNoEdits
         | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
+        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
+        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
         ZetaFormat::V0112MiddleAtEnd
         | ZetaFormat::V0113Ordered
         | ZetaFormat::V0114180EditableRegion
@@ -591,6 +705,33 @@ pub fn encode_patch_as_output_for_format(
         ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
             Ok(seed_coder::no_edits(patch))
         }
+        ZetaFormat::V0316SeedMultiRegions => {
+            let empty_patch = patch.lines().count() <= 3;
+            if empty_patch {
+                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
+                let marker_num =
+                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
+                let tag = multi_region::marker_tag(marker_num);
+                Ok(Some(format!(
+                    "{tag}{tag}{}",
+                    multi_region::V0316_END_MARKER
+                )))
+            } else {
+                Ok(None)
+            }
+        }
+        ZetaFormat::V0317SeedMultiRegions => {
+            let empty_patch = patch.lines().count() <= 3;
+            if empty_patch {
+                let tag = multi_region::marker_tag_relative(0);
+                Ok(Some(format!(
+                    "{tag}{tag}{}",
+                    multi_region::V0317_END_MARKER
+                )))
+            } else {
+                Ok(None)
+            }
+        }
         _ => Ok(None),
     }
 }
@@ -613,10 +754,11 @@ pub fn parse_zeta2_model_output(
         None => output,
     };
 
-    let (context, editable_range_in_context, context_range, _) =
+    let (context, editable_range_in_context, context_range, cursor_offset) =
         resolve_cursor_region(prompt_inputs, format);
     let context_start = context_range.start;
     let old_editable_region = &context[editable_range_in_context.clone()];
+    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 
     let (range_in_context, output) = match format {
         ZetaFormat::v0226Hashline => (
@@ -644,6 +786,18 @@ pub fn parse_zeta2_model_output(
                 multi_region::apply_marker_span(old_editable_region, output)?
             },
         ),
+        ZetaFormat::V0316SeedMultiRegions => (
+            editable_range_in_context,
+            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
+        ),
+        ZetaFormat::V0317SeedMultiRegions => (
+            editable_range_in_context,
+            multi_region::apply_marker_span_v0317(
+                old_editable_region,
+                output,
+                Some(cursor_offset_in_editable),
+            )?,
+        ),
         _ => (editable_range_in_context, output.to_string()),
     };