@@ -2,13 +2,26 @@ use anyhow::{Context as _, Result, anyhow};
pub const MARKER_TAG_PREFIX: &str = "<|marker_";
pub const MARKER_TAG_SUFFIX: &str = "|>";
+pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
const MIN_BLOCK_LINES: usize = 3;
const MAX_BLOCK_LINES: usize = 8;
+pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
+pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
pub fn marker_tag(number: usize) -> String {
format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
}
+pub fn marker_tag_relative(delta: isize) -> String {
+ if delta > 0 {
+ format!("<|marker+{delta}|>")
+ } else if delta == 0 {
+ String::from("<|marker-0|>")
+ } else {
+ format!("<|marker{delta}|>")
+ }
+}
+
/// Compute byte offsets within `editable_text` where marker boundaries should
/// be placed.
///
@@ -367,6 +380,622 @@ pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
Some(result)
}
+struct MarkerTag {
+ number: usize,
+ tag_start: usize,
+ tag_end: usize,
+}
+
+struct RelativeMarkerTag {
+ delta: isize,
+ tag_start: usize,
+ tag_end: usize,
+}
+
+fn collect_marker_tags(text: &str) -> Vec<MarkerTag> {
+ let mut markers = Vec::new();
+ let mut search_from = 0;
+ while let Some(rel_pos) = text[search_from..].find(MARKER_TAG_PREFIX) {
+ let tag_start = search_from + rel_pos;
+ let num_start = tag_start + MARKER_TAG_PREFIX.len();
+ if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
+ let num_end = num_start + suffix_rel;
+ if let Ok(number) = text[num_start..num_end].parse::<usize>() {
+ let tag_end = num_end + MARKER_TAG_SUFFIX.len();
+ markers.push(MarkerTag {
+ number,
+ tag_start,
+ tag_end,
+ });
+ search_from = tag_end;
+ continue;
+ }
+ }
+ search_from = tag_start + MARKER_TAG_PREFIX.len();
+ }
+ markers
+}
+
+fn collect_relative_marker_tags(text: &str) -> Vec<RelativeMarkerTag> {
+ let mut markers = Vec::new();
+ let mut search_from = 0;
+ while let Some(rel_pos) = text[search_from..].find(RELATIVE_MARKER_TAG_PREFIX) {
+ let tag_start = search_from + rel_pos;
+ let payload_start = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
+ if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
+ let payload_end = payload_start + suffix_rel;
+ let payload = &text[payload_start..payload_end];
+ if let Ok(delta) = payload.parse::<isize>() {
+ let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
+ markers.push(RelativeMarkerTag {
+ delta,
+ tag_start,
+ tag_end,
+ });
+ search_from = tag_end;
+ continue;
+ }
+ }
+ search_from = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
+ }
+ markers
+}
+
+pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
+ let cursor = cursor_offset.unwrap_or(0);
+ marker_offsets
+ .iter()
+ .enumerate()
+ .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
+ .map(|(idx, _)| idx + 1)
+ .unwrap_or(1)
+}
+
+fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
+ let cursor = cursor_offset.unwrap_or(0);
+ marker_offsets
+ .windows(2)
+ .position(|window| cursor >= window[0] && cursor < window[1])
+ .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
+}
+
+/// Write the editable region content with V0317 byte-exact marker tags, where
+/// marker numbers are relative to the cursor block.
+pub fn write_editable_with_markers_v0317(
+ output: &mut String,
+ editable_text: &str,
+ cursor_offset_in_editable: usize,
+ cursor_marker: &str,
+) {
+ let marker_offsets = compute_marker_offsets(editable_text);
+ let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
+ let mut cursor_placed = false;
+
+ for (i, &offset) in marker_offsets.iter().enumerate() {
+ let marker_delta = i as isize - anchor_idx as isize;
+ output.push_str(&marker_tag_relative(marker_delta));
+
+ if let Some(&next_offset) = marker_offsets.get(i + 1) {
+ let block = &editable_text[offset..next_offset];
+ if !cursor_placed
+ && cursor_offset_in_editable >= offset
+ && cursor_offset_in_editable <= next_offset
+ {
+ cursor_placed = true;
+ let cursor_in_block = cursor_offset_in_editable - offset;
+ output.push_str(&block[..cursor_in_block]);
+ output.push_str(cursor_marker);
+ output.push_str(&block[cursor_in_block..]);
+ } else {
+ output.push_str(block);
+ }
+ }
+ }
+}
+
+/// Write the editable region content with V0316 byte-exact marker tags.
+///
+/// Unlike the V0306 version, markers are pure delimiters with no newline
+/// padding. The content between markers is the exact bytes from the editable
+/// text.
+pub fn write_editable_with_markers_v0316(
+ output: &mut String,
+ editable_text: &str,
+ cursor_offset_in_editable: usize,
+ cursor_marker: &str,
+) {
+ let marker_offsets = compute_marker_offsets(editable_text);
+ let mut cursor_placed = false;
+ for (i, &offset) in marker_offsets.iter().enumerate() {
+ let marker_num = i + 1;
+ output.push_str(&marker_tag(marker_num));
+
+ if let Some(&next_offset) = marker_offsets.get(i + 1) {
+ let block = &editable_text[offset..next_offset];
+ if !cursor_placed
+ && cursor_offset_in_editable >= offset
+ && cursor_offset_in_editable <= next_offset
+ {
+ cursor_placed = true;
+ let cursor_in_block = cursor_offset_in_editable - offset;
+ output.push_str(&block[..cursor_in_block]);
+ output.push_str(cursor_marker);
+ output.push_str(&block[cursor_in_block..]);
+ } else {
+ output.push_str(block);
+ }
+ }
+ }
+}
+
+/// Parse V0316 model output and reconstruct the full new editable region.
+///
+/// V0316 differences from V0306:
+/// - No newline stripping or normalization (byte-exact content).
+/// - The no-edit signal is `start_num == end_num` (any repeated marker).
+/// - Intermediate marker tags are used for block-level extraction.
+pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
+ let markers = collect_marker_tags(output);
+
+ if markers.is_empty() {
+ return Err(anyhow!("no marker tags found in output"));
+ }
+
+ if markers.len() == 1 {
+ return Err(anyhow!(
+ "only one marker tag found in output, expected at least two"
+ ));
+ }
+
+ let start_num = markers
+ .first()
+ .map(|marker| marker.number)
+ .context("missing first marker")?;
+ let end_num = markers
+ .last()
+ .map(|marker| marker.number)
+ .context("missing last marker")?;
+
+ // No-edit signal: start_num == end_num
+ if start_num == end_num {
+ return Ok(old_editable.to_string());
+ }
+
+ // Validate monotonically increasing with no gaps
+ let expected_nums: Vec<usize> = (start_num..=end_num).collect();
+ let actual_nums: Vec<usize> = markers.iter().map(|m| m.number).collect();
+ if actual_nums != expected_nums {
+ eprintln!(
+ "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
+ expected_nums, actual_nums
+ );
+ }
+
+ let marker_offsets = compute_marker_offsets(old_editable);
+
+ let start_idx = start_num
+ .checked_sub(1)
+ .context("marker numbers are 1-indexed")?;
+ let end_idx = end_num
+ .checked_sub(1)
+ .context("marker numbers are 1-indexed")?;
+
+ let start_byte = *marker_offsets
+ .get(start_idx)
+ .context("start marker number out of range")?;
+ let end_byte = *marker_offsets
+ .get(end_idx)
+ .context("end marker number out of range")?;
+
+ if start_byte > end_byte {
+ return Err(anyhow!("start marker must come before end marker"));
+ }
+
+ // Extract byte-exact content between consecutive markers
+ let mut new_content = String::new();
+ for i in 0..markers.len() - 1 {
+ let content_start = markers[i].tag_end;
+ let content_end = markers[i + 1].tag_start;
+ if content_start <= content_end {
+ new_content.push_str(&output[content_start..content_end]);
+ }
+ }
+
+ // Splice into old_editable
+ let mut result = String::new();
+ result.push_str(&old_editable[..start_byte]);
+ result.push_str(&new_content);
+ result.push_str(&old_editable[end_byte..]);
+
+ Ok(result)
+}
+
+/// Parse V0317 model output and reconstruct the full new editable region.
+///
+/// V0317 differences from V0316:
+/// - Marker ids are relative to the cursor block (e.g. -2, -1, 0, +1, +2).
+/// - No-edit signal is any repeated relative marker tag.
+pub fn apply_marker_span_v0317(
+ old_editable: &str,
+ output: &str,
+ cursor_offset_in_old: Option<usize>,
+) -> Result<String> {
+ let markers = collect_relative_marker_tags(output);
+
+ if markers.is_empty() {
+ return Err(anyhow!("no marker tags found in output"));
+ }
+
+ if markers.len() == 1 {
+ return Err(anyhow!(
+ "only one marker tag found in output, expected at least two"
+ ));
+ }
+
+ let marker_offsets = compute_marker_offsets(old_editable);
+ let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
+
+ let start_delta = markers
+ .first()
+ .map(|marker| marker.delta)
+ .context("missing first marker")?;
+ let end_delta = markers
+ .last()
+ .map(|marker| marker.delta)
+ .context("missing last marker")?;
+
+ if start_delta == end_delta {
+ return Ok(old_editable.to_string());
+ }
+
+ let start_idx_isize = anchor_idx as isize + start_delta;
+ let end_idx_isize = anchor_idx as isize + end_delta;
+ if start_idx_isize < 0 || end_idx_isize < 0 {
+ return Err(anyhow!("relative marker maps before first marker"));
+ }
+
+ let start_idx = usize::try_from(start_idx_isize).context("invalid start marker index")?;
+ let end_idx = usize::try_from(end_idx_isize).context("invalid end marker index")?;
+
+ let start_byte = *marker_offsets
+ .get(start_idx)
+ .context("start marker number out of range")?;
+ let end_byte = *marker_offsets
+ .get(end_idx)
+ .context("end marker number out of range")?;
+
+ if start_byte > end_byte {
+ return Err(anyhow!("start marker must come before end marker"));
+ }
+
+ let mut new_content = String::new();
+ for i in 0..markers.len() - 1 {
+ let content_start = markers[i].tag_end;
+ let content_end = markers[i + 1].tag_start;
+ if content_start <= content_end {
+ new_content.push_str(&output[content_start..content_end]);
+ }
+ }
+
+ let mut result = String::new();
+ result.push_str(&old_editable[..start_byte]);
+ result.push_str(&new_content);
+ result.push_str(&old_editable[end_byte..]);
+
+ Ok(result)
+}
+
+/// Encode the V0316 training target from old and new editable text.
+///
+/// V0316 differences from V0306:
+/// - No-edit signal: `<|marker_C|><|marker_C|>{end_marker}` where C is nearest
+/// to cursor.
+/// - All intermediate markers are emitted with byte-exact content.
+/// - No newline padding around marker tags.
+pub fn encode_from_old_and_new_v0316(
+ old_editable: &str,
+ new_editable: &str,
+ cursor_offset_in_new: Option<usize>,
+ cursor_marker: &str,
+ end_marker: &str,
+) -> Result<String> {
+ let marker_offsets = compute_marker_offsets(old_editable);
+
+ if old_editable == new_editable {
+ let marker_num = nearest_marker_number(cursor_offset_in_new, &marker_offsets);
+ let tag = marker_tag(marker_num);
+ return Ok(format!("{tag}{tag}{end_marker}"));
+ }
+
+ let common_prefix = old_editable
+ .bytes()
+ .zip(new_editable.bytes())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let old_remaining = old_editable.len() - common_prefix;
+ let new_remaining = new_editable.len() - common_prefix;
+ let max_suffix = old_remaining.min(new_remaining);
+ let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
+ .iter()
+ .rev()
+ .zip(
+ new_editable.as_bytes()[new_editable.len() - max_suffix..]
+ .iter()
+ .rev(),
+ )
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let change_end_in_old = old_editable.len() - common_suffix;
+
+ let start_marker_idx = marker_offsets
+ .iter()
+ .rposition(|&offset| offset <= common_prefix)
+ .unwrap_or(0);
+ let end_marker_idx = marker_offsets
+ .iter()
+ .position(|&offset| offset >= change_end_in_old)
+ .unwrap_or(marker_offsets.len() - 1);
+
+ let old_start = marker_offsets[start_marker_idx];
+ let old_end = marker_offsets[end_marker_idx];
+
+ let new_start = old_start;
+ let new_end = new_editable
+ .len()
+ .saturating_sub(old_editable.len().saturating_sub(old_end));
+
+ let new_span = &new_editable[new_start..new_end];
+ let old_span = &old_editable[old_start..old_end];
+
+ // Compute common prefix/suffix within the span for accurate boundary mapping
+ let span_common_prefix = old_span
+ .bytes()
+ .zip(new_span.bytes())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let span_old_remaining = old_span.len() - span_common_prefix;
+ let span_new_remaining = new_span.len() - span_common_prefix;
+ let span_max_suffix = span_old_remaining.min(span_new_remaining);
+ let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
+ .iter()
+ .rev()
+ .zip(
+ new_span.as_bytes()[new_span.len() - span_max_suffix..]
+ .iter()
+ .rev(),
+ )
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let mut result = String::new();
+ let mut prev_new_rel = 0usize;
+ let mut cursor_placed = false;
+
+ for block_idx in start_marker_idx..end_marker_idx {
+ let marker_num = block_idx + 1;
+ result.push_str(&marker_tag(marker_num));
+
+ let new_rel_end = if block_idx + 1 == end_marker_idx {
+ // Last block: extends to end of new span
+ new_span.len()
+ } else {
+ // Map the intermediate boundary from old to new coordinates
+ let old_rel = marker_offsets[block_idx + 1] - old_start;
+ let mapped = map_boundary_offset(
+ old_rel,
+ old_span.len(),
+ new_span.len(),
+ span_common_prefix,
+ span_common_suffix,
+ );
+ // Ensure char boundary safety and monotonicity
+ new_span.floor_char_boundary(mapped)
+ };
+
+ // Ensure monotonicity (each block gets at least zero content)
+ let new_rel_end = new_rel_end.max(prev_new_rel);
+
+ let block_content = &new_span[prev_new_rel..new_rel_end];
+
+ if !cursor_placed {
+ if let Some(cursor_offset) = cursor_offset_in_new {
+ let abs_start = new_start + prev_new_rel;
+ let abs_end = new_start + new_rel_end;
+ if cursor_offset >= abs_start && cursor_offset <= abs_end {
+ cursor_placed = true;
+ let cursor_in_block = cursor_offset - abs_start;
+ let bounded = cursor_in_block.min(block_content.len());
+ result.push_str(&block_content[..bounded]);
+ result.push_str(cursor_marker);
+ result.push_str(&block_content[bounded..]);
+ prev_new_rel = new_rel_end;
+ continue;
+ }
+ }
+ }
+
+ result.push_str(block_content);
+ prev_new_rel = new_rel_end;
+ }
+
+ // Final closing marker
+ let end_marker_num = end_marker_idx + 1;
+ result.push_str(&marker_tag(end_marker_num));
+ result.push_str(end_marker);
+
+ Ok(result)
+}
+
+/// Encode the V0317 training target from old and new editable text.
+///
+/// V0317 differences from V0316:
+/// - Marker ids are relative to cursor block (..., -2, -1, 0, +1, +2, ...).
+/// - No-edit signal: repeated cursor-relative marker.
+pub fn encode_from_old_and_new_v0317(
+ old_editable: &str,
+ new_editable: &str,
+ cursor_offset_in_new: Option<usize>,
+ cursor_marker: &str,
+ end_marker: &str,
+) -> Result<String> {
+ let marker_offsets = compute_marker_offsets(old_editable);
+ let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
+
+ if old_editable == new_editable {
+ let tag = marker_tag_relative(0);
+ return Ok(format!("{tag}{tag}{end_marker}"));
+ }
+
+ let common_prefix = old_editable
+ .bytes()
+ .zip(new_editable.bytes())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let old_remaining = old_editable.len() - common_prefix;
+ let new_remaining = new_editable.len() - common_prefix;
+ let max_suffix = old_remaining.min(new_remaining);
+ let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
+ .iter()
+ .rev()
+ .zip(
+ new_editable.as_bytes()[new_editable.len() - max_suffix..]
+ .iter()
+ .rev(),
+ )
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let change_end_in_old = old_editable.len() - common_suffix;
+
+ let start_marker_idx = marker_offsets
+ .iter()
+ .rposition(|&offset| offset <= common_prefix)
+ .unwrap_or(0);
+ let end_marker_idx = marker_offsets
+ .iter()
+ .position(|&offset| offset >= change_end_in_old)
+ .unwrap_or(marker_offsets.len() - 1);
+
+ let old_start = marker_offsets[start_marker_idx];
+ let old_end = marker_offsets[end_marker_idx];
+
+ let new_start = old_start;
+ let new_end = new_editable
+ .len()
+ .saturating_sub(old_editable.len().saturating_sub(old_end));
+
+ let new_span = &new_editable[new_start..new_end];
+ let old_span = &old_editable[old_start..old_end];
+
+ let span_common_prefix = old_span
+ .bytes()
+ .zip(new_span.bytes())
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let span_old_remaining = old_span.len() - span_common_prefix;
+ let span_new_remaining = new_span.len() - span_common_prefix;
+ let span_max_suffix = span_old_remaining.min(span_new_remaining);
+ let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
+ .iter()
+ .rev()
+ .zip(
+ new_span.as_bytes()[new_span.len() - span_max_suffix..]
+ .iter()
+ .rev(),
+ )
+ .take_while(|(a, b)| a == b)
+ .count();
+
+ let mut result = String::new();
+ let mut prev_new_rel = 0usize;
+ let mut cursor_placed = false;
+
+ for block_idx in start_marker_idx..end_marker_idx {
+ let marker_delta = block_idx as isize - anchor_idx as isize;
+ result.push_str(&marker_tag_relative(marker_delta));
+
+ let new_rel_end = if block_idx + 1 == end_marker_idx {
+ new_span.len()
+ } else {
+ let old_rel = marker_offsets[block_idx + 1] - old_start;
+ let mapped = map_boundary_offset(
+ old_rel,
+ old_span.len(),
+ new_span.len(),
+ span_common_prefix,
+ span_common_suffix,
+ );
+ new_span.floor_char_boundary(mapped)
+ };
+
+ let new_rel_end = new_rel_end.max(prev_new_rel);
+ let block_content = &new_span[prev_new_rel..new_rel_end];
+
+ if !cursor_placed {
+ if let Some(cursor_offset) = cursor_offset_in_new {
+ let abs_start = new_start + prev_new_rel;
+ let abs_end = new_start + new_rel_end;
+ if cursor_offset >= abs_start && cursor_offset <= abs_end {
+ cursor_placed = true;
+ let cursor_in_block = cursor_offset - abs_start;
+ let bounded = cursor_in_block.min(block_content.len());
+ result.push_str(&block_content[..bounded]);
+ result.push_str(cursor_marker);
+ result.push_str(&block_content[bounded..]);
+ prev_new_rel = new_rel_end;
+ continue;
+ }
+ }
+ }
+
+ result.push_str(block_content);
+ prev_new_rel = new_rel_end;
+ }
+
+ let end_marker_delta = end_marker_idx as isize - anchor_idx as isize;
+ result.push_str(&marker_tag_relative(end_marker_delta));
+ result.push_str(end_marker);
+
+ Ok(result)
+}
+
+/// Map a byte offset from old span coordinates to new span coordinates,
+/// using common prefix/suffix within the span for accuracy.
+fn map_boundary_offset(
+ old_rel: usize,
+ old_span_len: usize,
+ new_span_len: usize,
+ span_common_prefix: usize,
+ span_common_suffix: usize,
+) -> usize {
+ if old_rel <= span_common_prefix {
+ old_rel
+ } else if old_rel >= old_span_len - span_common_suffix {
+ new_span_len - (old_span_len - old_rel)
+ } else {
+ // Within the changed region: proportional mapping
+ let old_changed_start = span_common_prefix;
+ let old_changed_len = old_span_len
+ .saturating_sub(span_common_prefix)
+ .saturating_sub(span_common_suffix);
+ let new_changed_start = span_common_prefix;
+ let new_changed_len = new_span_len
+ .saturating_sub(span_common_prefix)
+ .saturating_sub(span_common_suffix);
+
+ if old_changed_len == 0 {
+ new_changed_start
+ } else {
+ new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
+ }
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -554,4 +1183,233 @@ mod tests {
"line1\nline2"
);
}
+
+ #[test]
+ fn test_write_editable_with_markers_v0316_byte_exact() {
+ let editable = "aaa\nbbb\nccc\n";
+ let mut output = String::new();
+ write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
+ // Should have marker tags with no extra newlines
+ assert!(output.starts_with("<|marker_1|>"));
+ assert!(output.contains("<|user_cursor|>"));
+ // Content should be byte-exact - no extra newlines added by markers
+ let stripped = output.replace("<|user_cursor|>", "");
+ let stripped = strip_marker_tags(&stripped);
+ assert_eq!(stripped, editable);
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0316_basic() {
+ let old = "aaa\nbbb\nccc\n";
+ let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
+ let result = apply_marker_span_v0316(old, output).unwrap();
+ assert_eq!(result, "aaa\nBBB\nccc\n");
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0316_no_edit() {
+ let old = "aaa\nbbb\nccc\n";
+ let output = "<|marker_1|><|marker_1|>";
+ let result = apply_marker_span_v0316(old, output).unwrap();
+ assert_eq!(result, old);
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0316_no_edit_any_marker() {
+ let old = "aaa\nbbb\nccc\n";
+ let output = "<|marker_2|>ignored content<|marker_2|>";
+ let result = apply_marker_span_v0316(old, output).unwrap();
+ assert_eq!(result, old);
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0316_multi_block() {
+ let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+ let marker_offsets = compute_marker_offsets(old);
+ assert!(
+ marker_offsets.len() >= 3,
+ "expected at least 3 offsets, got {:?}",
+ marker_offsets
+ );
+
+ // Build output spanning all blocks with new content
+ let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
+ let mut output = String::new();
+ output.push_str("<|marker_1|>");
+ // Split new_content at old block boundaries
+ for i in 0..marker_offsets.len() - 1 {
+ if i > 0 {
+ output.push_str(&marker_tag(i + 1));
+ }
+ let start = marker_offsets[i];
+ let end = marker_offsets[i + 1];
+ let block_len = end - start;
+ // Use same length blocks from new content (they happen to be same length)
+ output.push_str(&new_content[start..start + block_len]);
+ }
+ let last_marker_num = marker_offsets.len();
+ output.push_str(&marker_tag(last_marker_num));
+ let result = apply_marker_span_v0316(old, &output).unwrap();
+ assert_eq!(result, new_content);
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
+ let old = "aaa\nbbb\nccc\n";
+ // Content doesn't end with \n - should NOT be normalized
+ let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
+ let result = apply_marker_span_v0316(old, output).unwrap();
+ // V0316 is byte-exact: the missing trailing \n is NOT added
+ assert_eq!(result, "aaa\nBBB\nccc");
+ }
+
+ #[test]
+ fn test_encode_v0316_no_edits() {
+ let old = "aaa\nbbb\nccc\n";
+ let result =
+ encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+ // Should be <|marker_K|><|marker_K|><|end|> where K is nearest to cursor
+ assert!(result.ends_with("<|end|>"));
+ // Parse it and verify it's a no-edit
+ let stripped = result.strip_suffix("<|end|>").unwrap();
+ let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
+ assert_eq!(result_parsed, old);
+ }
+
+ #[test]
+ fn test_encode_v0316_with_change() {
+ let old = "aaa\nbbb\nccc\n";
+ let new = "aaa\nBBB\nccc\n";
+ let result =
+ encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+ assert!(result.contains("<|marker_1|>"));
+ assert!(result.contains("<|marker_2|>"));
+ assert!(result.ends_with("<|end|>"));
+ }
+
+ #[test]
+ fn test_roundtrip_v0316() {
+ let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
+ let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
+ let encoded =
+ encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+ let stripped = encoded
+ .strip_suffix("<|end|>")
+ .expect("should have end marker");
+ let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
+ assert_eq!(reconstructed, new);
+ }
+
+ #[test]
+ fn test_roundtrip_v0316_with_cursor() {
+ let old = "aaa\nbbb\nccc\n";
+ let new = "aaa\nBBB\nccc\n";
+ let result =
+ encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+ assert!(result.contains("<|user_cursor|>"), "result: {result}");
+ assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
+ }
+
+ #[test]
+ fn test_roundtrip_v0316_multi_block_change() {
+ let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+ let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
+ let encoded =
+ encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
+ let stripped = encoded
+ .strip_suffix("<|end|>")
+ .expect("should have end marker");
+ let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
+ assert_eq!(reconstructed, new);
+ }
+
+ #[test]
+ fn test_nearest_marker_number() {
+ let offsets = vec![0, 10, 20, 30];
+ assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
+ assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
+ assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
+ assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
+ assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
+ assert_eq!(nearest_marker_number(None, &offsets), 1);
+ }
+
+ #[test]
+ fn test_marker_tag_relative_formats_as_expected() {
+ assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
+ assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
+ assert_eq!(marker_tag_relative(0), "<|marker-0|>");
+ assert_eq!(marker_tag_relative(1), "<|marker+1|>");
+ assert_eq!(marker_tag_relative(2), "<|marker+2|>");
+ }
+
+ #[test]
+ fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
+ let editable = "aaa\nbbb\nccc\n";
+ let mut output = String::new();
+ write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
+
+ assert!(output.contains("<|marker-0|>"));
+ assert!(output.contains("<|user_cursor|>"));
+
+ let stripped = output.replace("<|user_cursor|>", "");
+ let stripped =
+ collect_relative_marker_tags(&stripped)
+ .iter()
+ .fold(stripped.clone(), |acc, marker| {
+ let tag = &stripped[marker.tag_start..marker.tag_end];
+ acc.replace(tag, "")
+ });
+ assert_eq!(stripped, editable);
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0317_basic() {
+ let old = "aaa\nbbb\nccc\n";
+ let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
+ let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
+ assert_eq!(result, "aaa\nBBB\nccc\n");
+ }
+
+ #[test]
+ fn test_apply_marker_span_v0317_no_edit() {
+ let old = "aaa\nbbb\nccc\n";
+ let output = "<|marker-0|><|marker-0|>";
+ let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
+ assert_eq!(result, old);
+ }
+
+ #[test]
+ fn test_encode_v0317_no_edits() {
+ let old = "aaa\nbbb\nccc\n";
+ let result =
+ encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+ assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
+ }
+
+ #[test]
+ fn test_roundtrip_v0317() {
+ let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
+ let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
+ let cursor = Some(6);
+
+ let encoded =
+ encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
+ let stripped = encoded
+ .strip_suffix("<|end|>")
+ .expect("should have end marker");
+ let stripped = stripped.replace("<|user_cursor|>", "");
+ let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
+ assert_eq!(reconstructed, new);
+ }
+
+ #[test]
+ fn test_roundtrip_v0317_with_cursor_marker() {
+ let old = "aaa\nbbb\nccc\n";
+ let new = "aaa\nBBB\nccc\n";
+ let result =
+ encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
+ assert!(result.contains("<|user_cursor|>"), "result: {result}");
+ assert!(result.contains("<|marker-0|>"), "result: {result}");
+ }
}
@@ -82,7 +82,12 @@ pub enum ZetaFormat {
v0226Hashline,
V0304VariableEdit,
V0304SeedNoEdits,
+ /// Multi-block marker spans with NO_EDITS sentinel.
V0306SeedMultiRegions,
+ /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
+ V0316SeedMultiRegions,
+ /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
+ V0317SeedMultiRegions,
}
impl std::fmt::Display for ZetaFormat {
@@ -220,6 +225,30 @@ pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str]
ZetaFormat::v0226Hashline => hashline::special_tokens(),
ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
+ ZetaFormat::V0316SeedMultiRegions => {
+ static TOKENS: &[&str] = &[
+ seed_coder::FIM_SUFFIX,
+ seed_coder::FIM_PREFIX,
+ seed_coder::FIM_MIDDLE,
+ seed_coder::FILE_MARKER,
+ multi_region::V0316_END_MARKER,
+ CURSOR_MARKER,
+ multi_region::MARKER_TAG_PREFIX,
+ ];
+ TOKENS
+ }
+ ZetaFormat::V0317SeedMultiRegions => {
+ static TOKENS: &[&str] = &[
+ seed_coder::FIM_SUFFIX,
+ seed_coder::FIM_PREFIX,
+ seed_coder::FIM_MIDDLE,
+ seed_coder::FILE_MARKER,
+ multi_region::V0317_END_MARKER,
+ CURSOR_MARKER,
+ multi_region::RELATIVE_MARKER_TAG_PREFIX,
+ ];
+ TOKENS
+ }
ZetaFormat::V0306SeedMultiRegions => {
static TOKENS: &[&str] = &[
seed_coder::FIM_SUFFIX,
@@ -248,6 +277,8 @@ pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
| ZetaFormat::V0211SeedCoder
| ZetaFormat::v0226Hashline
| ZetaFormat::V0306SeedMultiRegions
+ | ZetaFormat::V0316SeedMultiRegions
+ | ZetaFormat::V0317SeedMultiRegions
| ZetaFormat::V0304SeedNoEdits => (350, 150),
ZetaFormat::V0304VariableEdit => (1024, 0),
}
@@ -266,6 +297,8 @@ pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
| ZetaFormat::V0304VariableEdit
| ZetaFormat::V0306SeedMultiRegions
| ZetaFormat::V0304SeedNoEdits => &[],
+ ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
+ ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
}
}
@@ -288,7 +321,9 @@ pub fn excerpt_ranges_for_format(
| ZetaFormat::V0211SeedCoder
| ZetaFormat::v0226Hashline
| ZetaFormat::V0304SeedNoEdits
- | ZetaFormat::V0306SeedMultiRegions => (
+ | ZetaFormat::V0306SeedMultiRegions
+ | ZetaFormat::V0316SeedMultiRegions
+ | ZetaFormat::V0317SeedMultiRegions => (
ranges.editable_350.clone(),
ranges.editable_350_context_150.clone(),
),
@@ -371,6 +406,22 @@ pub fn write_cursor_excerpt_section_for_format(
cursor_offset,
));
}
+ ZetaFormat::V0316SeedMultiRegions => {
+ prompt.push_str(&build_v0316_cursor_prefix(
+ path,
+ context,
+ editable_range,
+ cursor_offset,
+ ));
+ }
+ ZetaFormat::V0317SeedMultiRegions => {
+ prompt.push_str(&build_v0317_cursor_prefix(
+ path,
+ context,
+ editable_range,
+ cursor_offset,
+ ));
+ }
}
}
@@ -403,6 +454,60 @@ fn build_v0306_cursor_prefix(
section
}
+fn build_v0316_cursor_prefix(
+ path: &Path,
+ context: &str,
+ editable_range: &Range<usize>,
+ cursor_offset: usize,
+) -> String {
+ let mut section = String::new();
+ let path_str = path.to_string_lossy();
+ write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
+
+ section.push_str(&context[..editable_range.start]);
+
+ let editable_text = &context[editable_range.clone()];
+ let cursor_in_editable = cursor_offset - editable_range.start;
+ multi_region::write_editable_with_markers_v0316(
+ &mut section,
+ editable_text,
+ cursor_in_editable,
+ CURSOR_MARKER,
+ );
+
+ if !section.ends_with('\n') {
+ section.push('\n');
+ }
+ section
+}
+
+fn build_v0317_cursor_prefix(
+ path: &Path,
+ context: &str,
+ editable_range: &Range<usize>,
+ cursor_offset: usize,
+) -> String {
+ let mut section = String::new();
+ let path_str = path.to_string_lossy();
+ write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
+
+ section.push_str(&context[..editable_range.start]);
+
+ let editable_text = &context[editable_range.clone()];
+ let cursor_in_editable = cursor_offset - editable_range.start;
+ multi_region::write_editable_with_markers_v0317(
+ &mut section,
+ editable_text,
+ cursor_in_editable,
+ CURSOR_MARKER,
+ );
+
+ if !section.ends_with('\n') {
+ section.push('\n');
+ }
+ section
+}
+
fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
let start_row = text[0..range.start].matches('\n').count() as u32;
let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
@@ -439,7 +544,9 @@ pub fn format_prompt_with_budget_for_format(
let prompt = match format {
ZetaFormat::V0211SeedCoder
| ZetaFormat::V0304SeedNoEdits
- | ZetaFormat::V0306SeedMultiRegions => {
+ | ZetaFormat::V0306SeedMultiRegions
+ | ZetaFormat::V0316SeedMultiRegions
+ | ZetaFormat::V0317SeedMultiRegions => {
let mut cursor_section = String::new();
write_cursor_excerpt_section_for_format(
format,
@@ -533,7 +640,9 @@ pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
| ZetaFormat::v0226Hashline
| ZetaFormat::V0304SeedNoEdits
| ZetaFormat::V0304VariableEdit
- | ZetaFormat::V0306SeedMultiRegions => 6,
+ | ZetaFormat::V0306SeedMultiRegions
+ | ZetaFormat::V0316SeedMultiRegions
+ | ZetaFormat::V0317SeedMultiRegions => 6,
}
}
@@ -552,7 +661,10 @@ pub fn get_prefill_for_format(
| ZetaFormat::V0211SeedCoder
| ZetaFormat::v0226Hashline
| ZetaFormat::V0304VariableEdit => String::new(),
- ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
+ ZetaFormat::V0304SeedNoEdits
+ | ZetaFormat::V0306SeedMultiRegions
+ | ZetaFormat::V0316SeedMultiRegions
+ | ZetaFormat::V0317SeedMultiRegions => String::new(),
}
}
@@ -564,6 +676,8 @@ pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str>
ZetaFormat::V0211SeedCoder
| ZetaFormat::V0304SeedNoEdits
| ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
+ ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
+ ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
ZetaFormat::V0112MiddleAtEnd
| ZetaFormat::V0113Ordered
| ZetaFormat::V0114180EditableRegion
@@ -591,6 +705,33 @@ pub fn encode_patch_as_output_for_format(
ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
Ok(seed_coder::no_edits(patch))
}
+ ZetaFormat::V0316SeedMultiRegions => {
+ let empty_patch = patch.lines().count() <= 3;
+ if empty_patch {
+ let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
+ let marker_num =
+ multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
+ let tag = multi_region::marker_tag(marker_num);
+ Ok(Some(format!(
+ "{tag}{tag}{}",
+ multi_region::V0316_END_MARKER
+ )))
+ } else {
+ Ok(None)
+ }
+ }
+ ZetaFormat::V0317SeedMultiRegions => {
+ let empty_patch = patch.lines().count() <= 3;
+ if empty_patch {
+ let tag = multi_region::marker_tag_relative(0);
+ Ok(Some(format!(
+ "{tag}{tag}{}",
+ multi_region::V0317_END_MARKER
+ )))
+ } else {
+ Ok(None)
+ }
+ }
_ => Ok(None),
}
}
@@ -613,10 +754,11 @@ pub fn parse_zeta2_model_output(
None => output,
};
- let (context, editable_range_in_context, context_range, _) =
+ let (context, editable_range_in_context, context_range, cursor_offset) =
resolve_cursor_region(prompt_inputs, format);
let context_start = context_range.start;
let old_editable_region = &context[editable_range_in_context.clone()];
+ let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
let (range_in_context, output) = match format {
ZetaFormat::v0226Hashline => (
@@ -644,6 +786,18 @@ pub fn parse_zeta2_model_output(
multi_region::apply_marker_span(old_editable_region, output)?
},
),
+ ZetaFormat::V0316SeedMultiRegions => (
+ editable_range_in_context,
+ multi_region::apply_marker_span_v0316(old_editable_region, output)?,
+ ),
+ ZetaFormat::V0317SeedMultiRegions => (
+ editable_range_in_context,
+ multi_region::apply_marker_span_v0317(
+ old_editable_region,
+ output,
+ Some(cursor_offset_in_editable),
+ )?,
+ ),
_ => (editable_range_in_context, output.to_string()),
};