multi_region.rs

   1use anyhow::{Context as _, Result, anyhow};
   2
   3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
   4pub const MARKER_TAG_SUFFIX: &str = "|>";
   5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
   6const MIN_BLOCK_LINES: usize = 3;
   7const MAX_BLOCK_LINES: usize = 8;
   8pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
   9pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
  10
  11pub fn marker_tag(number: usize) -> String {
  12    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
  13}
  14
  15pub fn marker_tag_relative(delta: isize) -> String {
  16    if delta > 0 {
  17        format!("<|marker+{delta}|>")
  18    } else if delta == 0 {
  19        String::from("<|marker-0|>")
  20    } else {
  21        format!("<|marker{delta}|>")
  22    }
  23}
  24
  25/// Compute byte offsets within `editable_text` where marker boundaries should
  26/// be placed.
  27///
  28/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
  29/// `editable_text.len()`. Interior offsets are placed at line boundaries
  30/// (right after a `\n`), preferring blank-line boundaries when available and
  31/// respecting `MIN_BLOCK_LINES` / `MAX_BLOCK_LINES` constraints.
  32pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
  33    if editable_text.is_empty() {
  34        return vec![0, 0];
  35    }
  36
  37    let mut offsets = vec![0usize];
  38    let mut lines_since_last_marker = 0usize;
  39    let mut byte_offset = 0usize;
  40
  41    for line in editable_text.split('\n') {
  42        let line_end = byte_offset + line.len() + 1;
  43        let is_past_end = line_end > editable_text.len();
  44        let actual_line_end = line_end.min(editable_text.len());
  45        lines_since_last_marker += 1;
  46
  47        let is_blank = line.trim().is_empty();
  48
  49        if !is_past_end && lines_since_last_marker >= MIN_BLOCK_LINES {
  50            if is_blank {
  51                // Blank-line boundary found. We'll place the marker when we
  52                // find the next non-blank line (handled below).
  53            } else if lines_since_last_marker >= MAX_BLOCK_LINES {
  54                offsets.push(actual_line_end);
  55                lines_since_last_marker = 0;
  56            }
  57        }
  58
  59        // Non-blank line immediately following blank line(s): split here so
  60        // the new block starts with this line.
  61        if !is_blank && byte_offset > 0 && lines_since_last_marker >= MIN_BLOCK_LINES {
  62            let before = &editable_text[..byte_offset];
  63            let has_preceding_blank_line = before
  64                .strip_suffix('\n')
  65                .map(|stripped| {
  66                    let last_line = match stripped.rfind('\n') {
  67                        Some(pos) => &stripped[pos + 1..],
  68                        None => stripped,
  69                    };
  70                    last_line.trim().is_empty()
  71                })
  72                .unwrap_or(false);
  73
  74            if has_preceding_blank_line {
  75                offsets.push(byte_offset);
  76                lines_since_last_marker = 1;
  77            }
  78        }
  79
  80        byte_offset = actual_line_end;
  81
  82        // Re-check after blank-line logic since lines_since_last_marker may
  83        // have been reset.
  84        if !is_past_end && lines_since_last_marker >= MAX_BLOCK_LINES {
  85            if *offsets.last().unwrap_or(&0) != actual_line_end {
  86                offsets.push(actual_line_end);
  87                lines_since_last_marker = 0;
  88            }
  89        }
  90    }
  91
  92    let end = editable_text.len();
  93    if *offsets.last().unwrap_or(&0) != end {
  94        offsets.push(end);
  95    }
  96
  97    offsets
  98}
  99
 100/// Write the editable region content with marker tags, inserting the cursor
 101/// marker at the given offset within the editable text.
 102pub fn write_editable_with_markers(
 103    output: &mut String,
 104    editable_text: &str,
 105    cursor_offset_in_editable: usize,
 106    cursor_marker: &str,
 107) {
 108    let marker_offsets = compute_marker_offsets(editable_text);
 109    let mut cursor_placed = false;
 110    for (i, &offset) in marker_offsets.iter().enumerate() {
 111        let marker_num = i + 1;
 112        if !output.is_empty() && !output.ends_with('\n') {
 113            output.push('\n');
 114        }
 115        output.push_str(&marker_tag(marker_num));
 116
 117        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 118            output.push('\n');
 119            let block = &editable_text[offset..next_offset];
 120            if !cursor_placed
 121                && cursor_offset_in_editable >= offset
 122                && cursor_offset_in_editable <= next_offset
 123            {
 124                cursor_placed = true;
 125                let cursor_in_block = cursor_offset_in_editable - offset;
 126                output.push_str(&block[..cursor_in_block]);
 127                output.push_str(cursor_marker);
 128                output.push_str(&block[cursor_in_block..]);
 129            } else {
 130                output.push_str(block);
 131            }
 132        }
 133    }
 134}
 135
 136/// Strip any `<|marker_N|>` tags from `text`.
 137///
 138/// When a marker tag sits on its own line (followed by `\n`), the trailing
 139/// newline is also removed so the surrounding lines stay joined naturally.
 140fn strip_marker_tags(text: &str) -> String {
 141    let mut result = String::with_capacity(text.len());
 142    let mut pos = 0;
 143    let bytes = text.as_bytes();
 144    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
 145        result.push_str(&text[pos..pos + rel]);
 146        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
 147        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 148            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
 149            if bytes.get(tag_end) == Some(&b'\n') {
 150                tag_end += 1;
 151            }
 152            pos = tag_end;
 153        } else {
 154            result.push_str(MARKER_TAG_PREFIX);
 155            pos = num_start;
 156        }
 157    }
 158    result.push_str(&text[pos..]);
 159    result
 160}
 161
 162/// Parse model output that uses the marker format.
 163///
 164/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
 165/// The leading format-level newline after the start marker is stripped.
 166/// Trailing newlines are preserved so blank-line endings in the editable
 167/// region are not lost.
 168///
 169/// Any extra intermediate marker tags that the model may have inserted
 170/// between the first and last markers are stripped from the returned content.
 171pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
 172    let first_tag_start = text
 173        .find(MARKER_TAG_PREFIX)
 174        .context("no start marker found in output")?;
 175    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
 176    let first_num_end = text[first_num_start..]
 177        .find(MARKER_TAG_SUFFIX)
 178        .map(|i| i + first_num_start)
 179        .context("malformed start marker tag")?;
 180    let start_num: usize = text[first_num_start..first_num_end]
 181        .parse()
 182        .context("start marker number is not a valid integer")?;
 183    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
 184
 185    let last_tag_start = text
 186        .rfind(MARKER_TAG_PREFIX)
 187        .context("no end marker found in output")?;
 188    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
 189    let last_num_end = text[last_num_start..]
 190        .find(MARKER_TAG_SUFFIX)
 191        .map(|i| i + last_num_start)
 192        .context("malformed end marker tag")?;
 193    let end_num: usize = text[last_num_start..last_num_end]
 194        .parse()
 195        .context("end marker number is not a valid integer")?;
 196
 197    if start_num == end_num {
 198        return Err(anyhow!(
 199            "start and end markers are the same (marker {})",
 200            start_num
 201        ));
 202    }
 203
 204    let mut content_start = first_tag_end;
 205    if text.as_bytes().get(content_start) == Some(&b'\n') {
 206        content_start += 1;
 207    }
 208    let content_end = last_tag_start;
 209
 210    let content = &text[content_start..content_end.max(content_start)];
 211    let content = strip_marker_tags(content);
 212    Ok((start_num, end_num, content))
 213}
 214
 215/// Given old editable text and model output with marker span, reconstruct the
 216/// full new editable region.
 217pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
 218    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
 219    let marker_offsets = compute_marker_offsets(old_editable);
 220
 221    let start_idx = start_num
 222        .checked_sub(1)
 223        .context("marker numbers are 1-indexed")?;
 224    let end_idx = end_num
 225        .checked_sub(1)
 226        .context("marker numbers are 1-indexed")?;
 227    let start_byte = *marker_offsets
 228        .get(start_idx)
 229        .context("start marker number out of range")?;
 230    let end_byte = *marker_offsets
 231        .get(end_idx)
 232        .context("end marker number out of range")?;
 233
 234    if start_byte > end_byte {
 235        return Err(anyhow!("start marker must come before end marker"));
 236    }
 237
 238    let old_span = &old_editable[start_byte..end_byte];
 239    let mut new_span = raw_new_span;
 240    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
 241        new_span.push('\n');
 242    }
 243    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
 244        new_span.pop();
 245    }
 246
 247    let mut result = String::new();
 248    result.push_str(&old_editable[..start_byte]);
 249    result.push_str(&new_span);
 250    result.push_str(&old_editable[end_byte..]);
 251
 252    Ok(result)
 253}
 254
 255/// Compare old and new editable text, find the minimal marker span that covers
 256/// all changes, and encode the result with marker tags.
 257pub fn encode_from_old_and_new(
 258    old_editable: &str,
 259    new_editable: &str,
 260    cursor_offset_in_new: Option<usize>,
 261    cursor_marker: &str,
 262    end_marker: &str,
 263    no_edits_marker: &str,
 264) -> Result<String> {
 265    if old_editable == new_editable {
 266        return Ok(format!("{no_edits_marker}{end_marker}"));
 267    }
 268
 269    let marker_offsets = compute_marker_offsets(old_editable);
 270
 271    let common_prefix = old_editable
 272        .bytes()
 273        .zip(new_editable.bytes())
 274        .take_while(|(a, b)| a == b)
 275        .count();
 276
 277    let old_remaining = old_editable.len() - common_prefix;
 278    let new_remaining = new_editable.len() - common_prefix;
 279    let max_suffix = old_remaining.min(new_remaining);
 280    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
 281        .iter()
 282        .rev()
 283        .zip(
 284            new_editable.as_bytes()[new_editable.len() - max_suffix..]
 285                .iter()
 286                .rev(),
 287        )
 288        .take_while(|(a, b)| a == b)
 289        .count();
 290
 291    let change_end_in_old = old_editable.len() - common_suffix;
 292
 293    let start_marker_idx = marker_offsets
 294        .iter()
 295        .rposition(|&offset| offset <= common_prefix)
 296        .unwrap_or(0);
 297    let end_marker_idx = marker_offsets
 298        .iter()
 299        .position(|&offset| offset >= change_end_in_old)
 300        .unwrap_or(marker_offsets.len() - 1);
 301
 302    let old_start = marker_offsets[start_marker_idx];
 303    let old_end = marker_offsets[end_marker_idx];
 304
 305    let new_start = old_start;
 306    let new_end = new_editable
 307        .len()
 308        .saturating_sub(old_editable.len().saturating_sub(old_end));
 309
 310    let new_span = &new_editable[new_start..new_end];
 311
 312    let start_marker_num = start_marker_idx + 1;
 313    let end_marker_num = end_marker_idx + 1;
 314
 315    let mut result = String::new();
 316    result.push_str(&marker_tag(start_marker_num));
 317    result.push('\n');
 318
 319    if let Some(cursor_offset) = cursor_offset_in_new {
 320        if cursor_offset >= new_start && cursor_offset <= new_end {
 321            let cursor_in_span = cursor_offset - new_start;
 322            let bounded = cursor_in_span.min(new_span.len());
 323            result.push_str(&new_span[..bounded]);
 324            result.push_str(cursor_marker);
 325            result.push_str(&new_span[bounded..]);
 326        } else {
 327            result.push_str(new_span);
 328        }
 329    } else {
 330        result.push_str(new_span);
 331    }
 332
 333    if !result.ends_with('\n') {
 334        result.push('\n');
 335    }
 336    result.push_str(&marker_tag(end_marker_num));
 337    result.push('\n');
 338    result.push_str(end_marker);
 339
 340    Ok(result)
 341}
 342
 343/// Extract the full editable region from text that uses marker tags.
 344///
 345/// Returns the concatenation of all block contents between the first and last
 346/// markers, with intermediate marker tags stripped.
 347pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
 348    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
 349
 350    let mut markers: Vec<(usize, usize)> = Vec::new();
 351    let mut search_start = first_marker_start;
 352    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
 353        let tag_start = search_start + rel_pos;
 354        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 355        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
 356        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
 357        markers.push((tag_start, tag_end));
 358        search_start = tag_end;
 359    }
 360
 361    if markers.len() < 2 {
 362        return None;
 363    }
 364
 365    let (_, first_tag_end) = markers[0];
 366    let (last_tag_start, _) = markers[markers.len() - 1];
 367
 368    let mut content_start = first_tag_end;
 369    if text.as_bytes().get(content_start) == Some(&b'\n') {
 370        content_start += 1;
 371    }
 372    let mut content_end = last_tag_start;
 373    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
 374        content_end -= 1;
 375    }
 376
 377    let raw = &text[content_start..content_end];
 378    let result = strip_marker_tags(raw);
 379    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
 380    Some(result)
 381}
 382
 383struct MarkerTag {
 384    number: usize,
 385    tag_start: usize,
 386    tag_end: usize,
 387}
 388
 389struct RelativeMarkerTag {
 390    delta: isize,
 391    tag_start: usize,
 392    tag_end: usize,
 393}
 394
 395fn collect_marker_tags(text: &str) -> Vec<MarkerTag> {
 396    let mut markers = Vec::new();
 397    let mut search_from = 0;
 398    while let Some(rel_pos) = text[search_from..].find(MARKER_TAG_PREFIX) {
 399        let tag_start = search_from + rel_pos;
 400        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 401        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 402            let num_end = num_start + suffix_rel;
 403            if let Ok(number) = text[num_start..num_end].parse::<usize>() {
 404                let tag_end = num_end + MARKER_TAG_SUFFIX.len();
 405                markers.push(MarkerTag {
 406                    number,
 407                    tag_start,
 408                    tag_end,
 409                });
 410                search_from = tag_end;
 411                continue;
 412            }
 413        }
 414        search_from = tag_start + MARKER_TAG_PREFIX.len();
 415    }
 416    markers
 417}
 418
 419fn collect_relative_marker_tags(text: &str) -> Vec<RelativeMarkerTag> {
 420    let mut markers = Vec::new();
 421    let mut search_from = 0;
 422    while let Some(rel_pos) = text[search_from..].find(RELATIVE_MARKER_TAG_PREFIX) {
 423        let tag_start = search_from + rel_pos;
 424        let payload_start = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
 425        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
 426            let payload_end = payload_start + suffix_rel;
 427            let payload = &text[payload_start..payload_end];
 428            if let Ok(delta) = payload.parse::<isize>() {
 429                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
 430                markers.push(RelativeMarkerTag {
 431                    delta,
 432                    tag_start,
 433                    tag_end,
 434                });
 435                search_from = tag_end;
 436                continue;
 437            }
 438        }
 439        search_from = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
 440    }
 441    markers
 442}
 443
 444pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 445    let cursor = cursor_offset.unwrap_or(0);
 446    marker_offsets
 447        .iter()
 448        .enumerate()
 449        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
 450        .map(|(idx, _)| idx + 1)
 451        .unwrap_or(1)
 452}
 453
 454fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 455    let cursor = cursor_offset.unwrap_or(0);
 456    marker_offsets
 457        .windows(2)
 458        .position(|window| cursor >= window[0] && cursor < window[1])
 459        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
 460}
 461
 462/// Write the editable region content with V0317 byte-exact marker tags, where
 463/// marker numbers are relative to the cursor block.
 464pub fn write_editable_with_markers_v0317(
 465    output: &mut String,
 466    editable_text: &str,
 467    cursor_offset_in_editable: usize,
 468    cursor_marker: &str,
 469) {
 470    let marker_offsets = compute_marker_offsets(editable_text);
 471    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
 472    let mut cursor_placed = false;
 473
 474    for (i, &offset) in marker_offsets.iter().enumerate() {
 475        let marker_delta = i as isize - anchor_idx as isize;
 476        output.push_str(&marker_tag_relative(marker_delta));
 477
 478        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 479            let block = &editable_text[offset..next_offset];
 480            if !cursor_placed
 481                && cursor_offset_in_editable >= offset
 482                && cursor_offset_in_editable <= next_offset
 483            {
 484                cursor_placed = true;
 485                let cursor_in_block = cursor_offset_in_editable - offset;
 486                output.push_str(&block[..cursor_in_block]);
 487                output.push_str(cursor_marker);
 488                output.push_str(&block[cursor_in_block..]);
 489            } else {
 490                output.push_str(block);
 491            }
 492        }
 493    }
 494}
 495
 496/// Write the editable region content with V0316 byte-exact marker tags.
 497///
 498/// Unlike the V0306 version, markers are pure delimiters with no newline
 499/// padding. The content between markers is the exact bytes from the editable
 500/// text.
 501pub fn write_editable_with_markers_v0316(
 502    output: &mut String,
 503    editable_text: &str,
 504    cursor_offset_in_editable: usize,
 505    cursor_marker: &str,
 506) {
 507    let marker_offsets = compute_marker_offsets(editable_text);
 508    let mut cursor_placed = false;
 509    for (i, &offset) in marker_offsets.iter().enumerate() {
 510        let marker_num = i + 1;
 511        output.push_str(&marker_tag(marker_num));
 512
 513        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 514            let block = &editable_text[offset..next_offset];
 515            if !cursor_placed
 516                && cursor_offset_in_editable >= offset
 517                && cursor_offset_in_editable <= next_offset
 518            {
 519                cursor_placed = true;
 520                let cursor_in_block = cursor_offset_in_editable - offset;
 521                output.push_str(&block[..cursor_in_block]);
 522                output.push_str(cursor_marker);
 523                output.push_str(&block[cursor_in_block..]);
 524            } else {
 525                output.push_str(block);
 526            }
 527        }
 528    }
 529}
 530
 531/// Parse V0316 model output and reconstruct the full new editable region.
 532///
 533/// V0316 differences from V0306:
 534/// - No newline stripping or normalization (byte-exact content).
 535/// - The no-edit signal is `start_num == end_num` (any repeated marker).
 536/// - Intermediate marker tags are used for block-level extraction.
 537pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
 538    let markers = collect_marker_tags(output);
 539
 540    if markers.is_empty() {
 541        return Err(anyhow!("no marker tags found in output"));
 542    }
 543
 544    if markers.len() == 1 {
 545        return Err(anyhow!(
 546            "only one marker tag found in output, expected at least two"
 547        ));
 548    }
 549
 550    let start_num = markers
 551        .first()
 552        .map(|marker| marker.number)
 553        .context("missing first marker")?;
 554    let end_num = markers
 555        .last()
 556        .map(|marker| marker.number)
 557        .context("missing last marker")?;
 558
 559    // No-edit signal: start_num == end_num
 560    if start_num == end_num {
 561        return Ok(old_editable.to_string());
 562    }
 563
 564    // Validate monotonically increasing with no gaps
 565    let expected_nums: Vec<usize> = (start_num..=end_num).collect();
 566    let actual_nums: Vec<usize> = markers.iter().map(|m| m.number).collect();
 567    if actual_nums != expected_nums {
 568        eprintln!(
 569            "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 570            expected_nums, actual_nums
 571        );
 572    }
 573
 574    let marker_offsets = compute_marker_offsets(old_editable);
 575
 576    let start_idx = start_num
 577        .checked_sub(1)
 578        .context("marker numbers are 1-indexed")?;
 579    let end_idx = end_num
 580        .checked_sub(1)
 581        .context("marker numbers are 1-indexed")?;
 582
 583    let start_byte = *marker_offsets
 584        .get(start_idx)
 585        .context("start marker number out of range")?;
 586    let end_byte = *marker_offsets
 587        .get(end_idx)
 588        .context("end marker number out of range")?;
 589
 590    if start_byte > end_byte {
 591        return Err(anyhow!("start marker must come before end marker"));
 592    }
 593
 594    // Extract byte-exact content between consecutive markers
 595    let mut new_content = String::new();
 596    for i in 0..markers.len() - 1 {
 597        let content_start = markers[i].tag_end;
 598        let content_end = markers[i + 1].tag_start;
 599        if content_start <= content_end {
 600            new_content.push_str(&output[content_start..content_end]);
 601        }
 602    }
 603
 604    // Splice into old_editable
 605    let mut result = String::new();
 606    result.push_str(&old_editable[..start_byte]);
 607    result.push_str(&new_content);
 608    result.push_str(&old_editable[end_byte..]);
 609
 610    Ok(result)
 611}
 612
 613/// Parse V0317 model output and reconstruct the full new editable region.
 614///
 615/// V0317 differences from V0316:
 616/// - Marker ids are relative to the cursor block (e.g. -2, -1, 0, +1, +2).
 617/// - No-edit signal is any repeated relative marker tag.
 618pub fn apply_marker_span_v0317(
 619    old_editable: &str,
 620    output: &str,
 621    cursor_offset_in_old: Option<usize>,
 622) -> Result<String> {
 623    let markers = collect_relative_marker_tags(output);
 624
 625    if markers.is_empty() {
 626        return Err(anyhow!("no marker tags found in output"));
 627    }
 628
 629    if markers.len() == 1 {
 630        return Err(anyhow!(
 631            "only one marker tag found in output, expected at least two"
 632        ));
 633    }
 634
 635    let marker_offsets = compute_marker_offsets(old_editable);
 636    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
 637
 638    let start_delta = markers
 639        .first()
 640        .map(|marker| marker.delta)
 641        .context("missing first marker")?;
 642    let end_delta = markers
 643        .last()
 644        .map(|marker| marker.delta)
 645        .context("missing last marker")?;
 646
 647    if start_delta == end_delta {
 648        return Ok(old_editable.to_string());
 649    }
 650
 651    let start_idx_isize = anchor_idx as isize + start_delta;
 652    let end_idx_isize = anchor_idx as isize + end_delta;
 653    if start_idx_isize < 0 || end_idx_isize < 0 {
 654        return Err(anyhow!("relative marker maps before first marker"));
 655    }
 656
 657    let start_idx = usize::try_from(start_idx_isize).context("invalid start marker index")?;
 658    let end_idx = usize::try_from(end_idx_isize).context("invalid end marker index")?;
 659
 660    let start_byte = *marker_offsets
 661        .get(start_idx)
 662        .context("start marker number out of range")?;
 663    let end_byte = *marker_offsets
 664        .get(end_idx)
 665        .context("end marker number out of range")?;
 666
 667    if start_byte > end_byte {
 668        return Err(anyhow!("start marker must come before end marker"));
 669    }
 670
 671    let mut new_content = String::new();
 672    for i in 0..markers.len() - 1 {
 673        let content_start = markers[i].tag_end;
 674        let content_end = markers[i + 1].tag_start;
 675        if content_start <= content_end {
 676            new_content.push_str(&output[content_start..content_end]);
 677        }
 678    }
 679
 680    let mut result = String::new();
 681    result.push_str(&old_editable[..start_byte]);
 682    result.push_str(&new_content);
 683    result.push_str(&old_editable[end_byte..]);
 684
 685    Ok(result)
 686}
 687
 688/// Encode the V0316 training target from old and new editable text.
 689///
 690/// V0316 differences from V0306:
 691/// - No-edit signal: `<|marker_C|><|marker_C|>{end_marker}` where C is nearest
 692///   to cursor.
 693/// - All intermediate markers are emitted with byte-exact content.
 694/// - No newline padding around marker tags.
 695pub fn encode_from_old_and_new_v0316(
 696    old_editable: &str,
 697    new_editable: &str,
 698    cursor_offset_in_new: Option<usize>,
 699    cursor_marker: &str,
 700    end_marker: &str,
 701) -> Result<String> {
 702    let marker_offsets = compute_marker_offsets(old_editable);
 703
 704    if old_editable == new_editable {
 705        let marker_num = nearest_marker_number(cursor_offset_in_new, &marker_offsets);
 706        let tag = marker_tag(marker_num);
 707        return Ok(format!("{tag}{tag}{end_marker}"));
 708    }
 709
 710    let common_prefix = old_editable
 711        .bytes()
 712        .zip(new_editable.bytes())
 713        .take_while(|(a, b)| a == b)
 714        .count();
 715
 716    let old_remaining = old_editable.len() - common_prefix;
 717    let new_remaining = new_editable.len() - common_prefix;
 718    let max_suffix = old_remaining.min(new_remaining);
 719    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
 720        .iter()
 721        .rev()
 722        .zip(
 723            new_editable.as_bytes()[new_editable.len() - max_suffix..]
 724                .iter()
 725                .rev(),
 726        )
 727        .take_while(|(a, b)| a == b)
 728        .count();
 729
 730    let change_end_in_old = old_editable.len() - common_suffix;
 731
 732    let start_marker_idx = marker_offsets
 733        .iter()
 734        .rposition(|&offset| offset <= common_prefix)
 735        .unwrap_or(0);
 736    let end_marker_idx = marker_offsets
 737        .iter()
 738        .position(|&offset| offset >= change_end_in_old)
 739        .unwrap_or(marker_offsets.len() - 1);
 740
 741    let old_start = marker_offsets[start_marker_idx];
 742    let old_end = marker_offsets[end_marker_idx];
 743
 744    let new_start = old_start;
 745    let new_end = new_editable
 746        .len()
 747        .saturating_sub(old_editable.len().saturating_sub(old_end));
 748
 749    let new_span = &new_editable[new_start..new_end];
 750    let old_span = &old_editable[old_start..old_end];
 751
 752    // Compute common prefix/suffix within the span for accurate boundary mapping
 753    let span_common_prefix = old_span
 754        .bytes()
 755        .zip(new_span.bytes())
 756        .take_while(|(a, b)| a == b)
 757        .count();
 758
 759    let span_old_remaining = old_span.len() - span_common_prefix;
 760    let span_new_remaining = new_span.len() - span_common_prefix;
 761    let span_max_suffix = span_old_remaining.min(span_new_remaining);
 762    let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
 763        .iter()
 764        .rev()
 765        .zip(
 766            new_span.as_bytes()[new_span.len() - span_max_suffix..]
 767                .iter()
 768                .rev(),
 769        )
 770        .take_while(|(a, b)| a == b)
 771        .count();
 772
 773    let mut result = String::new();
 774    let mut prev_new_rel = 0usize;
 775    let mut cursor_placed = false;
 776
 777    for block_idx in start_marker_idx..end_marker_idx {
 778        let marker_num = block_idx + 1;
 779        result.push_str(&marker_tag(marker_num));
 780
 781        let new_rel_end = if block_idx + 1 == end_marker_idx {
 782            // Last block: extends to end of new span
 783            new_span.len()
 784        } else {
 785            // Map the intermediate boundary from old to new coordinates
 786            let old_rel = marker_offsets[block_idx + 1] - old_start;
 787            let mapped = map_boundary_offset(
 788                old_rel,
 789                old_span.len(),
 790                new_span.len(),
 791                span_common_prefix,
 792                span_common_suffix,
 793            );
 794            // Ensure char boundary safety and monotonicity
 795            new_span.floor_char_boundary(mapped)
 796        };
 797
 798        // Ensure monotonicity (each block gets at least zero content)
 799        let new_rel_end = new_rel_end.max(prev_new_rel);
 800
 801        let block_content = &new_span[prev_new_rel..new_rel_end];
 802
 803        if !cursor_placed {
 804            if let Some(cursor_offset) = cursor_offset_in_new {
 805                let abs_start = new_start + prev_new_rel;
 806                let abs_end = new_start + new_rel_end;
 807                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 808                    cursor_placed = true;
 809                    let cursor_in_block = cursor_offset - abs_start;
 810                    let bounded = cursor_in_block.min(block_content.len());
 811                    result.push_str(&block_content[..bounded]);
 812                    result.push_str(cursor_marker);
 813                    result.push_str(&block_content[bounded..]);
 814                    prev_new_rel = new_rel_end;
 815                    continue;
 816                }
 817            }
 818        }
 819
 820        result.push_str(block_content);
 821        prev_new_rel = new_rel_end;
 822    }
 823
 824    // Final closing marker
 825    let end_marker_num = end_marker_idx + 1;
 826    result.push_str(&marker_tag(end_marker_num));
 827    result.push_str(end_marker);
 828
 829    Ok(result)
 830}
 831
 832/// Encode the V0317 training target from old and new editable text.
 833///
 834/// V0317 differences from V0316:
 835/// - Marker ids are relative to cursor block (..., -2, -1, 0, +1, +2, ...).
 836/// - No-edit signal: repeated cursor-relative marker.
 837pub fn encode_from_old_and_new_v0317(
 838    old_editable: &str,
 839    new_editable: &str,
 840    cursor_offset_in_new: Option<usize>,
 841    cursor_marker: &str,
 842    end_marker: &str,
 843) -> Result<String> {
 844    let marker_offsets = compute_marker_offsets(old_editable);
 845    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
 846
 847    if old_editable == new_editable {
 848        let tag = marker_tag_relative(0);
 849        return Ok(format!("{tag}{tag}{end_marker}"));
 850    }
 851
 852    let common_prefix = old_editable
 853        .bytes()
 854        .zip(new_editable.bytes())
 855        .take_while(|(a, b)| a == b)
 856        .count();
 857
 858    let old_remaining = old_editable.len() - common_prefix;
 859    let new_remaining = new_editable.len() - common_prefix;
 860    let max_suffix = old_remaining.min(new_remaining);
 861    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
 862        .iter()
 863        .rev()
 864        .zip(
 865            new_editable.as_bytes()[new_editable.len() - max_suffix..]
 866                .iter()
 867                .rev(),
 868        )
 869        .take_while(|(a, b)| a == b)
 870        .count();
 871
 872    let change_end_in_old = old_editable.len() - common_suffix;
 873
 874    let start_marker_idx = marker_offsets
 875        .iter()
 876        .rposition(|&offset| offset <= common_prefix)
 877        .unwrap_or(0);
 878    let end_marker_idx = marker_offsets
 879        .iter()
 880        .position(|&offset| offset >= change_end_in_old)
 881        .unwrap_or(marker_offsets.len() - 1);
 882
 883    let old_start = marker_offsets[start_marker_idx];
 884    let old_end = marker_offsets[end_marker_idx];
 885
 886    let new_start = old_start;
 887    let new_end = new_editable
 888        .len()
 889        .saturating_sub(old_editable.len().saturating_sub(old_end));
 890
 891    let new_span = &new_editable[new_start..new_end];
 892    let old_span = &old_editable[old_start..old_end];
 893
 894    let span_common_prefix = old_span
 895        .bytes()
 896        .zip(new_span.bytes())
 897        .take_while(|(a, b)| a == b)
 898        .count();
 899
 900    let span_old_remaining = old_span.len() - span_common_prefix;
 901    let span_new_remaining = new_span.len() - span_common_prefix;
 902    let span_max_suffix = span_old_remaining.min(span_new_remaining);
 903    let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
 904        .iter()
 905        .rev()
 906        .zip(
 907            new_span.as_bytes()[new_span.len() - span_max_suffix..]
 908                .iter()
 909                .rev(),
 910        )
 911        .take_while(|(a, b)| a == b)
 912        .count();
 913
 914    let mut result = String::new();
 915    let mut prev_new_rel = 0usize;
 916    let mut cursor_placed = false;
 917
 918    for block_idx in start_marker_idx..end_marker_idx {
 919        let marker_delta = block_idx as isize - anchor_idx as isize;
 920        result.push_str(&marker_tag_relative(marker_delta));
 921
 922        let new_rel_end = if block_idx + 1 == end_marker_idx {
 923            new_span.len()
 924        } else {
 925            let old_rel = marker_offsets[block_idx + 1] - old_start;
 926            let mapped = map_boundary_offset(
 927                old_rel,
 928                old_span.len(),
 929                new_span.len(),
 930                span_common_prefix,
 931                span_common_suffix,
 932            );
 933            new_span.floor_char_boundary(mapped)
 934        };
 935
 936        let new_rel_end = new_rel_end.max(prev_new_rel);
 937        let block_content = &new_span[prev_new_rel..new_rel_end];
 938
 939        if !cursor_placed {
 940            if let Some(cursor_offset) = cursor_offset_in_new {
 941                let abs_start = new_start + prev_new_rel;
 942                let abs_end = new_start + new_rel_end;
 943                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 944                    cursor_placed = true;
 945                    let cursor_in_block = cursor_offset - abs_start;
 946                    let bounded = cursor_in_block.min(block_content.len());
 947                    result.push_str(&block_content[..bounded]);
 948                    result.push_str(cursor_marker);
 949                    result.push_str(&block_content[bounded..]);
 950                    prev_new_rel = new_rel_end;
 951                    continue;
 952                }
 953            }
 954        }
 955
 956        result.push_str(block_content);
 957        prev_new_rel = new_rel_end;
 958    }
 959
 960    let end_marker_delta = end_marker_idx as isize - anchor_idx as isize;
 961    result.push_str(&marker_tag_relative(end_marker_delta));
 962    result.push_str(end_marker);
 963
 964    Ok(result)
 965}
 966
 967/// Map a byte offset from old span coordinates to new span coordinates,
 968/// using common prefix/suffix within the span for accuracy.
 969fn map_boundary_offset(
 970    old_rel: usize,
 971    old_span_len: usize,
 972    new_span_len: usize,
 973    span_common_prefix: usize,
 974    span_common_suffix: usize,
 975) -> usize {
 976    if old_rel <= span_common_prefix {
 977        old_rel
 978    } else if old_rel >= old_span_len - span_common_suffix {
 979        new_span_len - (old_span_len - old_rel)
 980    } else {
 981        // Within the changed region: proportional mapping
 982        let old_changed_start = span_common_prefix;
 983        let old_changed_len = old_span_len
 984            .saturating_sub(span_common_prefix)
 985            .saturating_sub(span_common_suffix);
 986        let new_changed_start = span_common_prefix;
 987        let new_changed_len = new_span_len
 988            .saturating_sub(span_common_prefix)
 989            .saturating_sub(span_common_suffix);
 990
 991        if old_changed_len == 0 {
 992            new_changed_start
 993        } else {
 994            new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
 995        }
 996    }
 997}
 998
 999#[cfg(test)]
1000mod tests {
1001    use super::*;
1002
1003    #[test]
1004    fn test_compute_marker_offsets_small_block() {
1005        let text = "aaa\nbbb\nccc\n";
1006        let offsets = compute_marker_offsets(text);
1007        assert_eq!(offsets, vec![0, text.len()]);
1008    }
1009
1010    #[test]
1011    fn test_compute_marker_offsets_blank_line_split() {
1012        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1013        let offsets = compute_marker_offsets(text);
1014        assert_eq!(offsets[0], 0);
1015        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1016        assert_eq!(*offsets.last().unwrap(), text.len());
1017    }
1018
1019    #[test]
1020    fn test_compute_marker_offsets_max_lines_split() {
1021        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1022        let offsets = compute_marker_offsets(text);
1023        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1024    }
1025
1026    #[test]
1027    fn test_compute_marker_offsets_empty() {
1028        let offsets = compute_marker_offsets("");
1029        assert_eq!(offsets, vec![0, 0]);
1030    }
1031
1032    #[test]
1033    fn test_extract_marker_span() {
1034        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
1035        let (start, end, content) = extract_marker_span(text).unwrap();
1036        assert_eq!(start, 2);
1037        assert_eq!(end, 3);
1038        assert_eq!(content, "    new content\n");
1039    }
1040
1041    #[test]
1042    fn test_extract_marker_span_multi_line() {
1043        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1044        let (start, end, content) = extract_marker_span(text).unwrap();
1045        assert_eq!(start, 1);
1046        assert_eq!(end, 4);
1047        assert_eq!(content, "line1\nline2\nline3\n");
1048    }
1049
1050    #[test]
1051    fn test_apply_marker_span_basic() {
1052        let old = "aaa\nbbb\nccc\n";
1053        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1054        let result = apply_marker_span(old, output).unwrap();
1055        assert_eq!(result, "aaa\nBBB\nccc\n");
1056    }
1057
1058    #[test]
1059    fn test_apply_marker_span_preserves_trailing_blank_line() {
1060        let old = "/\nresult\n\n";
1061        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1062        let result = apply_marker_span(old, output).unwrap();
1063        assert_eq!(result, "//\nresult\n\n");
1064    }
1065
1066    #[test]
1067    fn test_encode_no_edits() {
1068        let old = "aaa\nbbb\nccc\n";
1069        let result = encode_from_old_and_new(
1070            old,
1071            old,
1072            None,
1073            "<|user_cursor|>",
1074            ">>>>>>> UPDATED\n",
1075            "NO_EDITS\n",
1076        )
1077        .unwrap();
1078        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1079    }
1080
1081    #[test]
1082    fn test_encode_with_change() {
1083        let old = "aaa\nbbb\nccc\n";
1084        let new = "aaa\nBBB\nccc\n";
1085        let result = encode_from_old_and_new(
1086            old,
1087            new,
1088            None,
1089            "<|user_cursor|>",
1090            ">>>>>>> UPDATED\n",
1091            "NO_EDITS\n",
1092        )
1093        .unwrap();
1094        assert!(result.contains("<|marker_1|>"));
1095        assert!(result.contains("<|marker_2|>"));
1096        assert!(result.contains("aaa\nBBB\nccc\n"));
1097        assert!(result.ends_with(">>>>>>> UPDATED\n"));
1098    }
1099
1100    #[test]
1101    fn test_roundtrip_encode_apply() {
1102        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1103        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1104        let encoded = encode_from_old_and_new(
1105            old,
1106            new,
1107            None,
1108            "<|user_cursor|>",
1109            ">>>>>>> UPDATED\n",
1110            "NO_EDITS\n",
1111        )
1112        .unwrap();
1113        let output = encoded
1114            .strip_suffix(">>>>>>> UPDATED\n")
1115            .expect("should have end marker");
1116        let reconstructed = apply_marker_span(old, output).unwrap();
1117        assert_eq!(reconstructed, new);
1118    }
1119
1120    #[test]
1121    fn test_extract_editable_region_from_markers_multi() {
1122        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1123        let parsed = extract_editable_region_from_markers(text).unwrap();
1124        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1125    }
1126
1127    #[test]
1128    fn test_extract_editable_region_two_markers() {
1129        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1130        let parsed = extract_editable_region_from_markers(text).unwrap();
1131        assert_eq!(parsed, "one\ntwo three");
1132    }
1133
1134    #[test]
1135    fn test_encode_with_cursor() {
1136        let old = "aaa\nbbb\nccc\n";
1137        let new = "aaa\nBBB\nccc\n";
1138        let result = encode_from_old_and_new(
1139            old,
1140            new,
1141            Some(5),
1142            "<|user_cursor|>",
1143            ">>>>>>> UPDATED\n",
1144            "NO_EDITS\n",
1145        )
1146        .unwrap();
1147        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1148        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1149    }
1150
1151    #[test]
1152    fn test_extract_marker_span_strips_intermediate_markers() {
1153        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1154        let (start, end, content) = extract_marker_span(text).unwrap();
1155        assert_eq!(start, 2);
1156        assert_eq!(end, 4);
1157        assert_eq!(content, "line1\nline2\n");
1158    }
1159
1160    #[test]
1161    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1162        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1163        let (start, end, content) = extract_marker_span(text).unwrap();
1164        assert_eq!(start, 1);
1165        assert_eq!(end, 4);
1166        assert_eq!(content, "aaa\nbbb\nccc\n");
1167    }
1168
1169    #[test]
1170    fn test_apply_marker_span_with_extra_intermediate_marker() {
1171        let old = "aaa\nbbb\nccc\n";
1172        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1173        let result = apply_marker_span(old, output).unwrap();
1174        assert_eq!(result, "aaa\nBBB\nccc\n");
1175    }
1176
1177    #[test]
1178    fn test_strip_marker_tags_inline() {
1179        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1180        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1181        assert_eq!(
1182            strip_marker_tags("line1\n<|marker_3|>\nline2"),
1183            "line1\nline2"
1184        );
1185    }
1186
1187    #[test]
1188    fn test_write_editable_with_markers_v0316_byte_exact() {
1189        let editable = "aaa\nbbb\nccc\n";
1190        let mut output = String::new();
1191        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1192        // Should have marker tags with no extra newlines
1193        assert!(output.starts_with("<|marker_1|>"));
1194        assert!(output.contains("<|user_cursor|>"));
1195        // Content should be byte-exact - no extra newlines added by markers
1196        let stripped = output.replace("<|user_cursor|>", "");
1197        let stripped = strip_marker_tags(&stripped);
1198        assert_eq!(stripped, editable);
1199    }
1200
1201    #[test]
1202    fn test_apply_marker_span_v0316_basic() {
1203        let old = "aaa\nbbb\nccc\n";
1204        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1205        let result = apply_marker_span_v0316(old, output).unwrap();
1206        assert_eq!(result, "aaa\nBBB\nccc\n");
1207    }
1208
1209    #[test]
1210    fn test_apply_marker_span_v0316_no_edit() {
1211        let old = "aaa\nbbb\nccc\n";
1212        let output = "<|marker_1|><|marker_1|>";
1213        let result = apply_marker_span_v0316(old, output).unwrap();
1214        assert_eq!(result, old);
1215    }
1216
1217    #[test]
1218    fn test_apply_marker_span_v0316_no_edit_any_marker() {
1219        let old = "aaa\nbbb\nccc\n";
1220        let output = "<|marker_2|>ignored content<|marker_2|>";
1221        let result = apply_marker_span_v0316(old, output).unwrap();
1222        assert_eq!(result, old);
1223    }
1224
1225    #[test]
1226    fn test_apply_marker_span_v0316_multi_block() {
1227        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1228        let marker_offsets = compute_marker_offsets(old);
1229        assert!(
1230            marker_offsets.len() >= 3,
1231            "expected at least 3 offsets, got {:?}",
1232            marker_offsets
1233        );
1234
1235        // Build output spanning all blocks with new content
1236        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1237        let mut output = String::new();
1238        output.push_str("<|marker_1|>");
1239        // Split new_content at old block boundaries
1240        for i in 0..marker_offsets.len() - 1 {
1241            if i > 0 {
1242                output.push_str(&marker_tag(i + 1));
1243            }
1244            let start = marker_offsets[i];
1245            let end = marker_offsets[i + 1];
1246            let block_len = end - start;
1247            // Use same length blocks from new content (they happen to be same length)
1248            output.push_str(&new_content[start..start + block_len]);
1249        }
1250        let last_marker_num = marker_offsets.len();
1251        output.push_str(&marker_tag(last_marker_num));
1252        let result = apply_marker_span_v0316(old, &output).unwrap();
1253        assert_eq!(result, new_content);
1254    }
1255
1256    #[test]
1257    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1258        let old = "aaa\nbbb\nccc\n";
1259        // Content doesn't end with \n - should NOT be normalized
1260        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1261        let result = apply_marker_span_v0316(old, output).unwrap();
1262        // V0316 is byte-exact: the missing trailing \n is NOT added
1263        assert_eq!(result, "aaa\nBBB\nccc");
1264    }
1265
1266    #[test]
1267    fn test_encode_v0316_no_edits() {
1268        let old = "aaa\nbbb\nccc\n";
1269        let result =
1270            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1271        // Should be <|marker_K|><|marker_K|><|end|> where K is nearest to cursor
1272        assert!(result.ends_with("<|end|>"));
1273        // Parse it and verify it's a no-edit
1274        let stripped = result.strip_suffix("<|end|>").unwrap();
1275        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1276        assert_eq!(result_parsed, old);
1277    }
1278
1279    #[test]
1280    fn test_encode_v0316_with_change() {
1281        let old = "aaa\nbbb\nccc\n";
1282        let new = "aaa\nBBB\nccc\n";
1283        let result =
1284            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1285        assert!(result.contains("<|marker_1|>"));
1286        assert!(result.contains("<|marker_2|>"));
1287        assert!(result.ends_with("<|end|>"));
1288    }
1289
1290    #[test]
1291    fn test_roundtrip_v0316() {
1292        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1293        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1294        let encoded =
1295            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1296        let stripped = encoded
1297            .strip_suffix("<|end|>")
1298            .expect("should have end marker");
1299        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1300        assert_eq!(reconstructed, new);
1301    }
1302
1303    #[test]
1304    fn test_roundtrip_v0316_with_cursor() {
1305        let old = "aaa\nbbb\nccc\n";
1306        let new = "aaa\nBBB\nccc\n";
1307        let result =
1308            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1309        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1310        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1311    }
1312
1313    #[test]
1314    fn test_roundtrip_v0316_multi_block_change() {
1315        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1316        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1317        let encoded =
1318            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1319        let stripped = encoded
1320            .strip_suffix("<|end|>")
1321            .expect("should have end marker");
1322        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1323        assert_eq!(reconstructed, new);
1324    }
1325
1326    #[test]
1327    fn test_nearest_marker_number() {
1328        let offsets = vec![0, 10, 20, 30];
1329        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1330        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1331        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1332        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1333        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1334        assert_eq!(nearest_marker_number(None, &offsets), 1);
1335    }
1336
1337    #[test]
1338    fn test_marker_tag_relative_formats_as_expected() {
1339        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1340        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1341        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1342        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1343        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1344    }
1345
1346    #[test]
1347    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1348        let editable = "aaa\nbbb\nccc\n";
1349        let mut output = String::new();
1350        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1351
1352        assert!(output.contains("<|marker-0|>"));
1353        assert!(output.contains("<|user_cursor|>"));
1354
1355        let stripped = output.replace("<|user_cursor|>", "");
1356        let stripped =
1357            collect_relative_marker_tags(&stripped)
1358                .iter()
1359                .fold(stripped.clone(), |acc, marker| {
1360                    let tag = &stripped[marker.tag_start..marker.tag_end];
1361                    acc.replace(tag, "")
1362                });
1363        assert_eq!(stripped, editable);
1364    }
1365
1366    #[test]
1367    fn test_apply_marker_span_v0317_basic() {
1368        let old = "aaa\nbbb\nccc\n";
1369        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1370        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1371        assert_eq!(result, "aaa\nBBB\nccc\n");
1372    }
1373
1374    #[test]
1375    fn test_apply_marker_span_v0317_no_edit() {
1376        let old = "aaa\nbbb\nccc\n";
1377        let output = "<|marker-0|><|marker-0|>";
1378        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1379        assert_eq!(result, old);
1380    }
1381
1382    #[test]
1383    fn test_encode_v0317_no_edits() {
1384        let old = "aaa\nbbb\nccc\n";
1385        let result =
1386            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1387        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1388    }
1389
1390    #[test]
1391    fn test_roundtrip_v0317() {
1392        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1393        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1394        let cursor = Some(6);
1395
1396        let encoded =
1397            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1398        let stripped = encoded
1399            .strip_suffix("<|end|>")
1400            .expect("should have end marker");
1401        let stripped = stripped.replace("<|user_cursor|>", "");
1402        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1403        assert_eq!(reconstructed, new);
1404    }
1405
1406    #[test]
1407    fn test_roundtrip_v0317_with_cursor_marker() {
1408        let old = "aaa\nbbb\nccc\n";
1409        let new = "aaa\nBBB\nccc\n";
1410        let result =
1411            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1412        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1413        assert!(result.contains("<|marker-0|>"), "result: {result}");
1414    }
1415}