multi_region.rs

   1use anyhow::{Context as _, Result, anyhow};
   2
   3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
   4pub const MARKER_TAG_SUFFIX: &str = "|>";
   5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
   6const V0316_MIN_BLOCK_LINES: usize = 3;
   7const V0316_MAX_BLOCK_LINES: usize = 8;
   8const V0318_MIN_BLOCK_LINES: usize = 6;
   9const V0318_MAX_BLOCK_LINES: usize = 16;
  10const MAX_NUDGE_LINES: usize = 5;
  11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
  12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
  13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
  14
  15pub fn marker_tag(number: usize) -> String {
  16    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
  17}
  18
  19pub fn marker_tag_relative(delta: isize) -> String {
  20    if delta > 0 {
  21        format!("<|marker+{delta}|>")
  22    } else if delta == 0 {
  23        String::from("<|marker-0|>")
  24    } else {
  25        format!("<|marker{delta}|>")
  26    }
  27}
  28
  29struct LineInfo {
  30    start: usize,
  31    is_blank: bool,
  32    is_good_start: bool,
  33}
  34
  35fn collect_line_info(text: &str) -> Vec<LineInfo> {
  36    let mut lines = Vec::new();
  37    let mut offset = 0;
  38    for line in text.split('\n') {
  39        let trimmed = line.trim();
  40        let is_blank = trimmed.is_empty();
  41        let is_good_start = !is_blank && !is_structural_tail(trimmed);
  42        lines.push(LineInfo {
  43            start: offset,
  44            is_blank,
  45            is_good_start,
  46        });
  47        offset += line.len() + 1;
  48    }
  49    // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
  50    // empty element when the text ends with '\n'.
  51    if text.ends_with('\n') && lines.len() > 1 {
  52        lines.pop();
  53    }
  54    lines
  55}
  56
  57fn is_structural_tail(trimmed_line: &str) -> bool {
  58    if trimmed_line.starts_with(&['}', ']', ')']) {
  59        return true;
  60    }
  61    matches!(
  62        trimmed_line.trim_end_matches(';'),
  63        "break" | "continue" | "return" | "throw" | "end"
  64    )
  65}
  66
  67/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
  68/// line with `is_good_start`. Returns `None` if no suitable line is found.
  69fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
  70    (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
  71}
  72
  73/// Compute byte offsets within `editable_text` where marker boundaries should
  74/// be placed.
  75///
  76/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
  77/// `editable_text.len()`. Interior offsets are placed at line boundaries
  78/// (right after a `\n`), preferring blank-line boundaries when available and
  79/// respecting `min_block_lines` / `max_block_lines` constraints.
  80fn compute_marker_offsets_with_limits(
  81    editable_text: &str,
  82    min_block_lines: usize,
  83    max_block_lines: usize,
  84) -> Vec<usize> {
  85    if editable_text.is_empty() {
  86        return vec![0, 0];
  87    }
  88
  89    let lines = collect_line_info(editable_text);
  90    let mut offsets = vec![0usize];
  91    let mut last_boundary_line = 0;
  92    let mut i = 0;
  93
  94    while i < lines.len() {
  95        let gap = i - last_boundary_line;
  96
  97        // Blank-line split: non-blank line following blank line(s) with enough
  98        // accumulated lines.
  99        if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
 100            let target = if lines[i].is_good_start {
 101                i
 102            } else {
 103                skip_to_good_start(&lines, i).unwrap_or(i)
 104            };
 105            if lines.len() - target >= min_block_lines
 106                && lines[target].start > *offsets.last().unwrap_or(&0)
 107            {
 108                offsets.push(lines[target].start);
 109                last_boundary_line = target;
 110                i = target + 1;
 111                continue;
 112            }
 113        }
 114
 115        // Hard cap: too many lines without a split.
 116        if gap >= max_block_lines {
 117            let target = skip_to_good_start(&lines, i).unwrap_or(i);
 118            if lines[target].start > *offsets.last().unwrap_or(&0) {
 119                offsets.push(lines[target].start);
 120                last_boundary_line = target;
 121                i = target + 1;
 122                continue;
 123            }
 124        }
 125
 126        i += 1;
 127    }
 128
 129    let end = editable_text.len();
 130    if *offsets.last().unwrap_or(&0) != end {
 131        offsets.push(end);
 132    }
 133
 134    offsets
 135}
 136
 137/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
 138pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
 139    compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
 140}
 141
 142pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
 143    compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
 144}
 145
 146/// Write the editable region content with marker tags, inserting the cursor
 147/// marker at the given offset within the editable text.
 148pub fn write_editable_with_markers(
 149    output: &mut String,
 150    editable_text: &str,
 151    cursor_offset_in_editable: usize,
 152    cursor_marker: &str,
 153) {
 154    let marker_offsets = compute_marker_offsets(editable_text);
 155    let mut cursor_placed = false;
 156    for (i, &offset) in marker_offsets.iter().enumerate() {
 157        let marker_num = i + 1;
 158        if !output.is_empty() && !output.ends_with('\n') {
 159            output.push('\n');
 160        }
 161        output.push_str(&marker_tag(marker_num));
 162
 163        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 164            output.push('\n');
 165            let block = &editable_text[offset..next_offset];
 166            if !cursor_placed
 167                && cursor_offset_in_editable >= offset
 168                && cursor_offset_in_editable <= next_offset
 169            {
 170                cursor_placed = true;
 171                let cursor_in_block = cursor_offset_in_editable - offset;
 172                output.push_str(&block[..cursor_in_block]);
 173                output.push_str(cursor_marker);
 174                output.push_str(&block[cursor_in_block..]);
 175            } else {
 176                output.push_str(block);
 177            }
 178        }
 179    }
 180}
 181
 182/// Strip any `<|marker_N|>` tags from `text`.
 183///
 184/// When a marker tag sits on its own line (followed by `\n`), the trailing
 185/// newline is also removed so the surrounding lines stay joined naturally.
 186fn strip_marker_tags(text: &str) -> String {
 187    let mut result = String::with_capacity(text.len());
 188    let mut pos = 0;
 189    let bytes = text.as_bytes();
 190    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
 191        result.push_str(&text[pos..pos + rel]);
 192        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
 193        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 194            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
 195            if bytes.get(tag_end) == Some(&b'\n') {
 196                tag_end += 1;
 197            }
 198            pos = tag_end;
 199        } else {
 200            result.push_str(MARKER_TAG_PREFIX);
 201            pos = num_start;
 202        }
 203    }
 204    result.push_str(&text[pos..]);
 205    result
 206}
 207
 208/// Parse model output that uses the marker format.
 209///
 210/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
 211/// The leading format-level newline after the start marker is stripped.
 212/// Trailing newlines are preserved so blank-line endings in the editable
 213/// region are not lost.
 214///
 215/// Any extra intermediate marker tags that the model may have inserted
 216/// between the first and last markers are stripped from the returned content.
 217pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
 218    let first_tag_start = text
 219        .find(MARKER_TAG_PREFIX)
 220        .context("no start marker found in output")?;
 221    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
 222    let first_num_end = text[first_num_start..]
 223        .find(MARKER_TAG_SUFFIX)
 224        .map(|i| i + first_num_start)
 225        .context("malformed start marker tag")?;
 226    let start_num: usize = text[first_num_start..first_num_end]
 227        .parse()
 228        .context("start marker number is not a valid integer")?;
 229    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
 230
 231    let last_tag_start = text
 232        .rfind(MARKER_TAG_PREFIX)
 233        .context("no end marker found in output")?;
 234    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
 235    let last_num_end = text[last_num_start..]
 236        .find(MARKER_TAG_SUFFIX)
 237        .map(|i| i + last_num_start)
 238        .context("malformed end marker tag")?;
 239    let end_num: usize = text[last_num_start..last_num_end]
 240        .parse()
 241        .context("end marker number is not a valid integer")?;
 242
 243    if start_num == end_num {
 244        return Err(anyhow!(
 245            "start and end markers are the same (marker {})",
 246            start_num
 247        ));
 248    }
 249
 250    let mut content_start = first_tag_end;
 251    if text.as_bytes().get(content_start) == Some(&b'\n') {
 252        content_start += 1;
 253    }
 254    let content_end = last_tag_start;
 255
 256    let content = &text[content_start..content_end.max(content_start)];
 257    let content = strip_marker_tags(content);
 258    Ok((start_num, end_num, content))
 259}
 260
 261/// Given old editable text and model output with marker span, reconstruct the
 262/// full new editable region.
 263pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
 264    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
 265    let marker_offsets = compute_marker_offsets(old_editable);
 266
 267    let start_idx = start_num
 268        .checked_sub(1)
 269        .context("marker numbers are 1-indexed")?;
 270    let end_idx = end_num
 271        .checked_sub(1)
 272        .context("marker numbers are 1-indexed")?;
 273    let start_byte = *marker_offsets
 274        .get(start_idx)
 275        .context("start marker number out of range")?;
 276    let end_byte = *marker_offsets
 277        .get(end_idx)
 278        .context("end marker number out of range")?;
 279
 280    if start_byte > end_byte {
 281        return Err(anyhow!("start marker must come before end marker"));
 282    }
 283
 284    let old_span = &old_editable[start_byte..end_byte];
 285    let mut new_span = raw_new_span;
 286    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
 287        new_span.push('\n');
 288    }
 289    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
 290        new_span.pop();
 291    }
 292
 293    let mut result = String::new();
 294    result.push_str(&old_editable[..start_byte]);
 295    result.push_str(&new_span);
 296    result.push_str(&old_editable[end_byte..]);
 297
 298    Ok(result)
 299}
 300
 301/// Compare old and new editable text, find the minimal marker span that covers
 302/// all changes, and encode the result with marker tags.
 303pub fn encode_from_old_and_new(
 304    old_editable: &str,
 305    new_editable: &str,
 306    cursor_offset_in_new: Option<usize>,
 307    cursor_marker: &str,
 308    end_marker: &str,
 309    no_edits_marker: &str,
 310) -> Result<String> {
 311    if old_editable == new_editable {
 312        return Ok(format!("{no_edits_marker}{end_marker}"));
 313    }
 314
 315    let marker_offsets = compute_marker_offsets(old_editable);
 316    let (common_prefix, common_suffix) =
 317        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 318    let change_end_in_old = old_editable.len() - common_suffix;
 319
 320    let start_marker_idx = marker_offsets
 321        .iter()
 322        .rposition(|&offset| offset <= common_prefix)
 323        .unwrap_or(0);
 324    let end_marker_idx = marker_offsets
 325        .iter()
 326        .position(|&offset| offset >= change_end_in_old)
 327        .unwrap_or(marker_offsets.len() - 1);
 328
 329    let old_start = marker_offsets[start_marker_idx];
 330    let old_end = marker_offsets[end_marker_idx];
 331
 332    let new_start = old_start;
 333    let new_end = new_editable
 334        .len()
 335        .saturating_sub(old_editable.len().saturating_sub(old_end));
 336
 337    let new_span = &new_editable[new_start..new_end];
 338
 339    let start_marker_num = start_marker_idx + 1;
 340    let end_marker_num = end_marker_idx + 1;
 341
 342    let mut result = String::new();
 343    result.push_str(&marker_tag(start_marker_num));
 344    result.push('\n');
 345
 346    if let Some(cursor_offset) = cursor_offset_in_new {
 347        if cursor_offset >= new_start && cursor_offset <= new_end {
 348            let cursor_in_span = cursor_offset - new_start;
 349            let bounded = cursor_in_span.min(new_span.len());
 350            result.push_str(&new_span[..bounded]);
 351            result.push_str(cursor_marker);
 352            result.push_str(&new_span[bounded..]);
 353        } else {
 354            result.push_str(new_span);
 355        }
 356    } else {
 357        result.push_str(new_span);
 358    }
 359
 360    if !result.ends_with('\n') {
 361        result.push('\n');
 362    }
 363    result.push_str(&marker_tag(end_marker_num));
 364    result.push('\n');
 365    result.push_str(end_marker);
 366
 367    Ok(result)
 368}
 369
 370/// Extract the full editable region from text that uses marker tags.
 371///
 372/// Returns the concatenation of all block contents between the first and last
 373/// markers, with intermediate marker tags stripped.
 374pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
 375    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
 376
 377    let mut markers: Vec<(usize, usize)> = Vec::new();
 378    let mut search_start = first_marker_start;
 379    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
 380        let tag_start = search_start + rel_pos;
 381        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 382        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
 383        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
 384        markers.push((tag_start, tag_end));
 385        search_start = tag_end;
 386    }
 387
 388    if markers.len() < 2 {
 389        return None;
 390    }
 391
 392    let (_, first_tag_end) = markers[0];
 393    let (last_tag_start, _) = markers[markers.len() - 1];
 394
 395    let mut content_start = first_tag_end;
 396    if text.as_bytes().get(content_start) == Some(&b'\n') {
 397        content_start += 1;
 398    }
 399    let mut content_end = last_tag_start;
 400    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
 401        content_end -= 1;
 402    }
 403
 404    let raw = &text[content_start..content_end];
 405    let result = strip_marker_tags(raw);
 406    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
 407    Some(result)
 408}
 409
 410struct ParsedTag {
 411    value: isize,
 412    tag_start: usize,
 413    tag_end: usize,
 414}
 415
 416fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
 417    let mut tags = Vec::new();
 418    let mut search_from = 0;
 419    while let Some(rel_pos) = text[search_from..].find(prefix) {
 420        let tag_start = search_from + rel_pos;
 421        let payload_start = tag_start + prefix.len();
 422        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
 423            let payload_end = payload_start + suffix_rel;
 424            if let Some(value) = parse(&text[payload_start..payload_end]) {
 425                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
 426                tags.push(ParsedTag {
 427                    value,
 428                    tag_start,
 429                    tag_end,
 430                });
 431                search_from = tag_end;
 432                continue;
 433            }
 434        }
 435        search_from = tag_start + prefix.len();
 436    }
 437    tags
 438}
 439
 440fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
 441    collect_tags(text, MARKER_TAG_PREFIX, |s| {
 442        s.parse::<usize>().ok().map(|n| n as isize)
 443    })
 444}
 445
 446fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
 447    collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
 448        s.parse::<isize>().ok()
 449    })
 450}
 451
 452pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 453    let cursor = cursor_offset.unwrap_or(0);
 454    marker_offsets
 455        .iter()
 456        .enumerate()
 457        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
 458        .map(|(idx, _)| idx + 1)
 459        .unwrap_or(1)
 460}
 461
 462fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 463    let cursor = cursor_offset.unwrap_or(0);
 464    marker_offsets
 465        .windows(2)
 466        .position(|window| cursor >= window[0] && cursor < window[1])
 467        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
 468}
 469
 470fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
 471    let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
 472    let remaining_a = a.len() - prefix;
 473    let remaining_b = b.len() - prefix;
 474    let max_suffix = remaining_a.min(remaining_b);
 475    let suffix = a[a.len() - max_suffix..]
 476        .iter()
 477        .rev()
 478        .zip(b[b.len() - max_suffix..].iter().rev())
 479        .take_while(|(x, y)| x == y)
 480        .count();
 481    (prefix, suffix)
 482}
 483
 484/// Map a byte offset from old span coordinates to new span coordinates,
 485/// using common prefix/suffix within the span for accuracy.
 486fn map_boundary_offset(
 487    old_rel: usize,
 488    old_span_len: usize,
 489    new_span_len: usize,
 490    span_common_prefix: usize,
 491    span_common_suffix: usize,
 492) -> usize {
 493    if old_rel <= span_common_prefix {
 494        old_rel
 495    } else if old_rel >= old_span_len - span_common_suffix {
 496        new_span_len - (old_span_len - old_rel)
 497    } else {
 498        let old_changed_start = span_common_prefix;
 499        let old_changed_len = old_span_len
 500            .saturating_sub(span_common_prefix)
 501            .saturating_sub(span_common_suffix);
 502        let new_changed_start = span_common_prefix;
 503        let new_changed_len = new_span_len
 504            .saturating_sub(span_common_prefix)
 505            .saturating_sub(span_common_suffix);
 506
 507        if old_changed_len == 0 {
 508            new_changed_start
 509        } else {
 510            new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
 511        }
 512    }
 513}
 514
 515fn snap_to_line_start(text: &str, offset: usize) -> usize {
 516    let bounded = offset.min(text.len());
 517    let bounded = text.floor_char_boundary(bounded);
 518
 519    if bounded >= text.len() {
 520        return text.len();
 521    }
 522
 523    if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
 524        return bounded;
 525    }
 526
 527    if let Some(next_nl_rel) = text[bounded..].find('\n') {
 528        let next = bounded + next_nl_rel + 1;
 529        return text.floor_char_boundary(next.min(text.len()));
 530    }
 531
 532    let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
 533    text.floor_char_boundary(prev_start)
 534}
 535
 536/// Write the editable region content with byte-exact marker tags, inserting the
 537/// cursor marker at the given offset within the editable text.
 538///
 539/// The `tag_for_index` closure maps a boundary index to the marker tag string.
 540fn write_editable_with_markers_impl(
 541    output: &mut String,
 542    editable_text: &str,
 543    cursor_offset_in_editable: usize,
 544    cursor_marker: &str,
 545    marker_offsets: &[usize],
 546    tag_for_index: impl Fn(usize) -> String,
 547) {
 548    let mut cursor_placed = false;
 549    for (i, &offset) in marker_offsets.iter().enumerate() {
 550        output.push_str(&tag_for_index(i));
 551
 552        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 553            let block = &editable_text[offset..next_offset];
 554            if !cursor_placed
 555                && cursor_offset_in_editable >= offset
 556                && cursor_offset_in_editable <= next_offset
 557            {
 558                cursor_placed = true;
 559                let cursor_in_block = cursor_offset_in_editable - offset;
 560                output.push_str(&block[..cursor_in_block]);
 561                output.push_str(cursor_marker);
 562                output.push_str(&block[cursor_in_block..]);
 563            } else {
 564                output.push_str(block);
 565            }
 566        }
 567    }
 568}
 569
 570pub fn write_editable_with_markers_v0316(
 571    output: &mut String,
 572    editable_text: &str,
 573    cursor_offset_in_editable: usize,
 574    cursor_marker: &str,
 575) {
 576    let marker_offsets = compute_marker_offsets(editable_text);
 577    write_editable_with_markers_impl(
 578        output,
 579        editable_text,
 580        cursor_offset_in_editable,
 581        cursor_marker,
 582        &marker_offsets,
 583        |i| marker_tag(i + 1),
 584    );
 585}
 586
 587pub fn write_editable_with_markers_v0317(
 588    output: &mut String,
 589    editable_text: &str,
 590    cursor_offset_in_editable: usize,
 591    cursor_marker: &str,
 592) {
 593    let marker_offsets = compute_marker_offsets(editable_text);
 594    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
 595    write_editable_with_markers_impl(
 596        output,
 597        editable_text,
 598        cursor_offset_in_editable,
 599        cursor_marker,
 600        &marker_offsets,
 601        |i| marker_tag_relative(i as isize - anchor_idx as isize),
 602    );
 603}
 604
 605pub fn write_editable_with_markers_v0318(
 606    output: &mut String,
 607    editable_text: &str,
 608    cursor_offset_in_editable: usize,
 609    cursor_marker: &str,
 610) {
 611    let marker_offsets = compute_marker_offsets_v0318(editable_text);
 612    write_editable_with_markers_impl(
 613        output,
 614        editable_text,
 615        cursor_offset_in_editable,
 616        cursor_marker,
 617        &marker_offsets,
 618        |i| marker_tag(i + 1),
 619    );
 620}
 621
 622/// Parse byte-exact model output and reconstruct the full new editable region.
 623///
 624/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
 625/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
 626/// an error.
 627fn apply_marker_span_impl(
 628    old_editable: &str,
 629    tags: &[ParsedTag],
 630    output: &str,
 631    resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
 632) -> Result<String> {
 633    if tags.is_empty() {
 634        return Err(anyhow!("no marker tags found in output"));
 635    }
 636    if tags.len() == 1 {
 637        return Err(anyhow!(
 638            "only one marker tag found in output, expected at least two"
 639        ));
 640    }
 641
 642    let start_value = tags[0].value;
 643    let end_value = tags[tags.len() - 1].value;
 644
 645    if start_value == end_value {
 646        return Ok(old_editable.to_string());
 647    }
 648
 649    let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
 650
 651    if start_byte > end_byte {
 652        return Err(anyhow!("start marker must come before end marker"));
 653    }
 654
 655    let mut new_content = String::new();
 656    for i in 0..tags.len() - 1 {
 657        let content_start = tags[i].tag_end;
 658        let content_end = tags[i + 1].tag_start;
 659        if content_start <= content_end {
 660            new_content.push_str(&output[content_start..content_end]);
 661        }
 662    }
 663
 664    let mut result = String::new();
 665    result.push_str(&old_editable[..start_byte]);
 666    result.push_str(&new_content);
 667    result.push_str(&old_editable[end_byte..]);
 668
 669    Ok(result)
 670}
 671
 672pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
 673    let tags = collect_marker_tags(output);
 674
 675    // Validate monotonically increasing with no gaps (best-effort warning)
 676    if tags.len() >= 2 {
 677        let start_num = tags[0].value;
 678        let end_num = tags[tags.len() - 1].value;
 679        if start_num != end_num {
 680            let expected: Vec<isize> = (start_num..=end_num).collect();
 681            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 682            if actual != expected {
 683                eprintln!(
 684                    "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 685                    expected, actual
 686                );
 687            }
 688        }
 689    }
 690
 691    let marker_offsets = compute_marker_offsets(old_editable);
 692    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 693        let start_idx = (start_val as usize)
 694            .checked_sub(1)
 695            .context("marker numbers are 1-indexed")?;
 696        let end_idx = (end_val as usize)
 697            .checked_sub(1)
 698            .context("marker numbers are 1-indexed")?;
 699        let start_byte = *marker_offsets
 700            .get(start_idx)
 701            .context("start marker number out of range")?;
 702        let end_byte = *marker_offsets
 703            .get(end_idx)
 704            .context("end marker number out of range")?;
 705        Ok((start_byte, end_byte))
 706    })
 707}
 708
 709pub fn apply_marker_span_v0317(
 710    old_editable: &str,
 711    output: &str,
 712    cursor_offset_in_old: Option<usize>,
 713) -> Result<String> {
 714    let tags = collect_relative_marker_tags(output);
 715    let marker_offsets = compute_marker_offsets(old_editable);
 716    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
 717
 718    apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
 719        let start_idx_signed = anchor_idx as isize + start_delta;
 720        let end_idx_signed = anchor_idx as isize + end_delta;
 721        if start_idx_signed < 0 || end_idx_signed < 0 {
 722            return Err(anyhow!("relative marker maps before first marker"));
 723        }
 724        let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
 725        let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
 726        let start_byte = *marker_offsets
 727            .get(start_idx)
 728            .context("start marker number out of range")?;
 729        let end_byte = *marker_offsets
 730            .get(end_idx)
 731            .context("end marker number out of range")?;
 732        Ok((start_byte, end_byte))
 733    })
 734}
 735
 736pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
 737    let tags = collect_marker_tags(output);
 738
 739    if tags.len() >= 2 {
 740        let start_num = tags[0].value;
 741        let end_num = tags[tags.len() - 1].value;
 742        if start_num != end_num {
 743            let expected: Vec<isize> = (start_num..=end_num).collect();
 744            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 745            if actual != expected {
 746                eprintln!(
 747                    "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 748                    expected, actual
 749                );
 750            }
 751        }
 752    }
 753
 754    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 755    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 756        let start_idx = (start_val as usize)
 757            .checked_sub(1)
 758            .context("marker numbers are 1-indexed")?;
 759        let end_idx = (end_val as usize)
 760            .checked_sub(1)
 761            .context("marker numbers are 1-indexed")?;
 762        let start_byte = *marker_offsets
 763            .get(start_idx)
 764            .context("start marker number out of range")?;
 765        let end_byte = *marker_offsets
 766            .get(end_idx)
 767            .context("end marker number out of range")?;
 768        Ok((start_byte, end_byte))
 769    })
 770}
 771
 772/// Encode the training target from old and new editable text.
 773///
 774/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
 775/// closure maps a block index to the appropriate marker tag string.
 776/// `no_edit_tag` is the marker tag to repeat when there are no edits.
 777fn encode_from_old_and_new_impl(
 778    old_editable: &str,
 779    new_editable: &str,
 780    cursor_offset_in_new: Option<usize>,
 781    cursor_marker: &str,
 782    end_marker: &str,
 783    no_edit_tag: &str,
 784    marker_offsets: &[usize],
 785    tag_for_block_idx: impl Fn(usize) -> String,
 786) -> Result<String> {
 787    if old_editable == new_editable {
 788        return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
 789    }
 790
 791    let (common_prefix, common_suffix) =
 792        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 793    let change_end_in_old = old_editable.len() - common_suffix;
 794
 795    let mut start_marker_idx = marker_offsets
 796        .iter()
 797        .rposition(|&offset| offset <= common_prefix)
 798        .unwrap_or(0);
 799    let mut end_marker_idx = marker_offsets
 800        .iter()
 801        .position(|&offset| offset >= change_end_in_old)
 802        .unwrap_or(marker_offsets.len() - 1);
 803
 804    if start_marker_idx == end_marker_idx {
 805        if end_marker_idx < marker_offsets.len().saturating_sub(1) {
 806            end_marker_idx += 1;
 807        } else if start_marker_idx > 0 {
 808            start_marker_idx -= 1;
 809        }
 810    }
 811
 812    let old_start = marker_offsets[start_marker_idx];
 813    let old_end = marker_offsets[end_marker_idx];
 814
 815    let new_start = old_start;
 816    let new_end = new_editable
 817        .len()
 818        .saturating_sub(old_editable.len().saturating_sub(old_end));
 819
 820    let new_span = &new_editable[new_start..new_end];
 821    let old_span = &old_editable[old_start..old_end];
 822
 823    let (span_common_prefix, span_common_suffix) =
 824        common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
 825
 826    let mut result = String::new();
 827    let mut prev_new_rel = 0usize;
 828    let mut cursor_placed = false;
 829
 830    for block_idx in start_marker_idx..end_marker_idx {
 831        result.push_str(&tag_for_block_idx(block_idx));
 832
 833        let new_rel_end = if block_idx + 1 == end_marker_idx {
 834            new_span.len()
 835        } else {
 836            let old_rel = marker_offsets[block_idx + 1] - old_start;
 837            let mapped = map_boundary_offset(
 838                old_rel,
 839                old_span.len(),
 840                new_span.len(),
 841                span_common_prefix,
 842                span_common_suffix,
 843            );
 844            snap_to_line_start(new_span, mapped)
 845        };
 846
 847        let new_rel_end = new_rel_end.max(prev_new_rel);
 848        let block_content = &new_span[prev_new_rel..new_rel_end];
 849
 850        if !cursor_placed {
 851            if let Some(cursor_offset) = cursor_offset_in_new {
 852                let abs_start = new_start + prev_new_rel;
 853                let abs_end = new_start + new_rel_end;
 854                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 855                    cursor_placed = true;
 856                    let cursor_in_block = cursor_offset - abs_start;
 857                    let bounded = cursor_in_block.min(block_content.len());
 858                    result.push_str(&block_content[..bounded]);
 859                    result.push_str(cursor_marker);
 860                    result.push_str(&block_content[bounded..]);
 861                    prev_new_rel = new_rel_end;
 862                    continue;
 863                }
 864            }
 865        }
 866
 867        result.push_str(block_content);
 868        prev_new_rel = new_rel_end;
 869    }
 870
 871    result.push_str(&tag_for_block_idx(end_marker_idx));
 872    result.push_str(end_marker);
 873
 874    Ok(result)
 875}
 876
 877pub fn encode_from_old_and_new_v0316(
 878    old_editable: &str,
 879    new_editable: &str,
 880    cursor_offset_in_new: Option<usize>,
 881    cursor_marker: &str,
 882    end_marker: &str,
 883) -> Result<String> {
 884    let marker_offsets = compute_marker_offsets(old_editable);
 885    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 886    encode_from_old_and_new_impl(
 887        old_editable,
 888        new_editable,
 889        cursor_offset_in_new,
 890        cursor_marker,
 891        end_marker,
 892        &no_edit_tag,
 893        &marker_offsets,
 894        |block_idx| marker_tag(block_idx + 1),
 895    )
 896}
 897
 898pub fn encode_from_old_and_new_v0317(
 899    old_editable: &str,
 900    new_editable: &str,
 901    cursor_offset_in_new: Option<usize>,
 902    cursor_marker: &str,
 903    end_marker: &str,
 904) -> Result<String> {
 905    let marker_offsets = compute_marker_offsets(old_editable);
 906    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
 907    let no_edit_tag = marker_tag_relative(0);
 908    encode_from_old_and_new_impl(
 909        old_editable,
 910        new_editable,
 911        cursor_offset_in_new,
 912        cursor_marker,
 913        end_marker,
 914        &no_edit_tag,
 915        &marker_offsets,
 916        |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
 917    )
 918}
 919
 920pub fn encode_from_old_and_new_v0318(
 921    old_editable: &str,
 922    new_editable: &str,
 923    cursor_offset_in_new: Option<usize>,
 924    cursor_marker: &str,
 925    end_marker: &str,
 926) -> Result<String> {
 927    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 928    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 929    encode_from_old_and_new_impl(
 930        old_editable,
 931        new_editable,
 932        cursor_offset_in_new,
 933        cursor_marker,
 934        end_marker,
 935        &no_edit_tag,
 936        &marker_offsets,
 937        |block_idx| marker_tag(block_idx + 1),
 938    )
 939}
 940
 941#[cfg(test)]
 942mod tests {
 943    use super::*;
 944
 945    #[test]
 946    fn test_compute_marker_offsets_small_block() {
 947        let text = "aaa\nbbb\nccc\n";
 948        let offsets = compute_marker_offsets(text);
 949        assert_eq!(offsets, vec![0, text.len()]);
 950    }
 951
 952    #[test]
 953    fn test_compute_marker_offsets_blank_line_split() {
 954        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
 955        let offsets = compute_marker_offsets(text);
 956        assert_eq!(offsets[0], 0);
 957        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
 958        assert_eq!(*offsets.last().unwrap(), text.len());
 959    }
 960
 961    #[test]
 962    fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
 963        let text = "\
 964class OCRDataframe(BaseModel):
 965    model_config = ConfigDict(arbitrary_types_allowed=True)
 966
 967    df: pl.DataFrame
 968
 969    def page(self, page_number: int = 0) -> \"OCRDataframe\":
 970        # Filter dataframe on specific page
 971        df_page = self.df.filter(pl.col(\"page\") == page_number)
 972        return OCRDataframe(df=df_page)
 973
 974    def get_text_cell(
 975        self,
 976        cell: Cell,
 977        margin: int = 0,
 978        page_number: Optional[int] = None,
 979        min_confidence: int = 50,
 980    ) -> Optional[str]:
 981        \"\"\"
 982        Get text corresponding to cell
 983";
 984        let offsets = compute_marker_offsets(text);
 985
 986        let def_start = text
 987            .find("    def get_text_cell(")
 988            .expect("def line exists");
 989        let self_start = text.find("        self,").expect("self line exists");
 990
 991        assert!(
 992            offsets.contains(&def_start),
 993            "expected boundary at def line start ({def_start}), got {offsets:?}"
 994        );
 995        assert!(
 996            !offsets.contains(&self_start),
 997            "did not expect boundary at self line start ({self_start}), got {offsets:?}"
 998        );
 999    }
1000
1001    #[test]
1002    fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1003        let text = "\
1004impl Plugin for AhoySchedulePlugin {
1005    fn build(&self, app: &mut App) {
1006        app.configure_sets(
1007            self.schedule,
1008            (
1009                AhoySystems::MoveCharacters,
1010                AhoySystems::ApplyForcesToDynamicRigidBodies,
1011            )
1012                .chain()
1013                .before(PhysicsSystems::First),
1014        );
1015
1016    }
1017}
1018
1019/// System set used by all systems of `bevy_ahoy`.
1020#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1021pub enum AhoySystems {
1022    MoveCharacters,
1023    ApplyForcesToDynamicRigidBodies,
1024}
1025";
1026        let offsets = compute_marker_offsets(text);
1027
1028        let closer_start = text.find("    }\n").expect("closer line exists");
1029        let doc_start = text
1030            .find("/// System set used by all systems of `bevy_ahoy`.")
1031            .expect("doc line exists");
1032
1033        assert!(
1034            !offsets.contains(&closer_start),
1035            "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1036        );
1037        assert!(
1038            offsets.contains(&doc_start),
1039            "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1040        );
1041    }
1042
1043    #[test]
1044    fn test_compute_marker_offsets_max_lines_split() {
1045        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1046        let offsets = compute_marker_offsets(text);
1047        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1048    }
1049
1050    #[test]
1051    fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1052        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1053        let offsets = compute_marker_offsets(text);
1054
1055        let expected = text.find("case 'x': {").expect("case line exists");
1056        assert!(
1057            offsets.contains(&expected),
1058            "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1059        );
1060    }
1061
1062    #[test]
1063    fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1064        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1065        let offsets = compute_marker_offsets(text);
1066
1067        let case_start = text.find("case 'x': {").expect("case line exists");
1068        assert!(
1069            !offsets.contains(&case_start),
1070            "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1071        );
1072    }
1073
1074    #[test]
1075    fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1076        let text = "\
1077aaaaaaaaaa = 1;
1078bbbbbbbbbb = 2;
1079cccccccccc = 3;
1080dddddddddd = 4;
1081eeeeeeeeee = 5;
1082ffffffffff = 6;
1083gggggggggg = 7;
1084hhhhhhhhhh = 8;
1085          };
1086        };
1087
1088        grafanaDashboards = {
1089          cluster-overview.spec = {
1090            inherit instanceSelector;
1091            folderRef = \"infrastructure\";
1092            json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1093          };
1094        };
1095";
1096        let offsets = compute_marker_offsets(text);
1097
1098        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1099        assert_eq!(
1100            offsets.last().copied(),
1101            Some(text.len()),
1102            "offsets: {offsets:?}"
1103        );
1104        assert!(
1105            offsets.windows(2).all(|window| window[0] <= window[1]),
1106            "offsets must be sorted: {offsets:?}"
1107        );
1108    }
1109
1110    #[test]
1111    fn test_compute_marker_offsets_empty() {
1112        let offsets = compute_marker_offsets("");
1113        assert_eq!(offsets, vec![0, 0]);
1114    }
1115
1116    #[test]
1117    fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1118        let text = "\
1119# Spree Posts
1120
1121This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1122
1123## Installation
1124
11251. Add this extension to your Gemfile with this line:
1126
1127    ```ruby
1128    bundle add spree_posts
1129    ```
1130
11312. Run the install generator
1132
1133    ```ruby
1134    bundle exec rails g spree_posts:install
1135    ```
1136
11373. Restart your server
1138
1139  If your server was running, restart it so that it can find the assets properly.
1140
1141## Developing
1142
11431. Create a dummy app
1144
1145    ```bash
1146    bundle update
1147    bundle exec rake test_app
1148    ```
1149
11502. Add your new code
11513. Run tests
1152
1153    ```bash
1154    bundle exec rspec
1155    ```
1156
1157When testing your applications integration with this extension you may use it's factories.
1158Simply add this require statement to your spec_helper:
1159
1160```ruby
1161require 'spree_posts/factories'
1162```
1163
1164## Releasing a new version
1165
1166```shell
1167bundle exec gem bump -p -t
1168bundle exec gem release
1169```
1170
1171For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1172
1173## Contributing
1174
1175If you'd like to contribute, please take a look at the contributing guide.
1176";
1177        let offsets = compute_marker_offsets(text);
1178
1179        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1180        assert_eq!(
1181            offsets.last().copied(),
1182            Some(text.len()),
1183            "offsets: {offsets:?}"
1184        );
1185
1186        for window in offsets.windows(2) {
1187            let block = &text[window[0]..window[1]];
1188            let line_count = block.lines().count();
1189            assert!(
1190                line_count >= V0316_MIN_BLOCK_LINES,
1191                "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1192            );
1193        }
1194    }
1195
1196    #[test]
1197    fn test_extract_marker_span() {
1198        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
1199        let (start, end, content) = extract_marker_span(text).unwrap();
1200        assert_eq!(start, 2);
1201        assert_eq!(end, 3);
1202        assert_eq!(content, "    new content\n");
1203    }
1204
1205    #[test]
1206    fn test_extract_marker_span_multi_line() {
1207        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1208        let (start, end, content) = extract_marker_span(text).unwrap();
1209        assert_eq!(start, 1);
1210        assert_eq!(end, 4);
1211        assert_eq!(content, "line1\nline2\nline3\n");
1212    }
1213
1214    #[test]
1215    fn test_apply_marker_span_basic() {
1216        let old = "aaa\nbbb\nccc\n";
1217        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1218        let result = apply_marker_span(old, output).unwrap();
1219        assert_eq!(result, "aaa\nBBB\nccc\n");
1220    }
1221
1222    #[test]
1223    fn test_apply_marker_span_preserves_trailing_blank_line() {
1224        let old = "/\nresult\n\n";
1225        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1226        let result = apply_marker_span(old, output).unwrap();
1227        assert_eq!(result, "//\nresult\n\n");
1228    }
1229
1230    #[test]
1231    fn test_encode_no_edits() {
1232        let old = "aaa\nbbb\nccc\n";
1233        let result = encode_from_old_and_new(
1234            old,
1235            old,
1236            None,
1237            "<|user_cursor|>",
1238            ">>>>>>> UPDATED\n",
1239            "NO_EDITS\n",
1240        )
1241        .unwrap();
1242        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1243    }
1244
1245    #[test]
1246    fn test_encode_with_change() {
1247        let old = "aaa\nbbb\nccc\n";
1248        let new = "aaa\nBBB\nccc\n";
1249        let result = encode_from_old_and_new(
1250            old,
1251            new,
1252            None,
1253            "<|user_cursor|>",
1254            ">>>>>>> UPDATED\n",
1255            "NO_EDITS\n",
1256        )
1257        .unwrap();
1258        assert!(result.contains("<|marker_1|>"));
1259        assert!(result.contains("<|marker_2|>"));
1260        assert!(result.contains("aaa\nBBB\nccc\n"));
1261        assert!(result.ends_with(">>>>>>> UPDATED\n"));
1262    }
1263
1264    #[test]
1265    fn test_roundtrip_encode_apply() {
1266        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1267        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1268        let encoded = encode_from_old_and_new(
1269            old,
1270            new,
1271            None,
1272            "<|user_cursor|>",
1273            ">>>>>>> UPDATED\n",
1274            "NO_EDITS\n",
1275        )
1276        .unwrap();
1277        let output = encoded
1278            .strip_suffix(">>>>>>> UPDATED\n")
1279            .expect("should have end marker");
1280        let reconstructed = apply_marker_span(old, output).unwrap();
1281        assert_eq!(reconstructed, new);
1282    }
1283
1284    #[test]
1285    fn test_extract_editable_region_from_markers_multi() {
1286        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1287        let parsed = extract_editable_region_from_markers(text).unwrap();
1288        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1289    }
1290
1291    #[test]
1292    fn test_extract_editable_region_two_markers() {
1293        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1294        let parsed = extract_editable_region_from_markers(text).unwrap();
1295        assert_eq!(parsed, "one\ntwo three");
1296    }
1297
1298    #[test]
1299    fn test_encode_with_cursor() {
1300        let old = "aaa\nbbb\nccc\n";
1301        let new = "aaa\nBBB\nccc\n";
1302        let result = encode_from_old_and_new(
1303            old,
1304            new,
1305            Some(5),
1306            "<|user_cursor|>",
1307            ">>>>>>> UPDATED\n",
1308            "NO_EDITS\n",
1309        )
1310        .unwrap();
1311        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1312        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1313    }
1314
1315    #[test]
1316    fn test_extract_marker_span_strips_intermediate_markers() {
1317        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1318        let (start, end, content) = extract_marker_span(text).unwrap();
1319        assert_eq!(start, 2);
1320        assert_eq!(end, 4);
1321        assert_eq!(content, "line1\nline2\n");
1322    }
1323
1324    #[test]
1325    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1326        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1327        let (start, end, content) = extract_marker_span(text).unwrap();
1328        assert_eq!(start, 1);
1329        assert_eq!(end, 4);
1330        assert_eq!(content, "aaa\nbbb\nccc\n");
1331    }
1332
1333    #[test]
1334    fn test_apply_marker_span_with_extra_intermediate_marker() {
1335        let old = "aaa\nbbb\nccc\n";
1336        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1337        let result = apply_marker_span(old, output).unwrap();
1338        assert_eq!(result, "aaa\nBBB\nccc\n");
1339    }
1340
1341    #[test]
1342    fn test_strip_marker_tags_inline() {
1343        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1344        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1345        assert_eq!(
1346            strip_marker_tags("line1\n<|marker_3|>\nline2"),
1347            "line1\nline2"
1348        );
1349    }
1350
1351    #[test]
1352    fn test_write_editable_with_markers_v0316_byte_exact() {
1353        let editable = "aaa\nbbb\nccc\n";
1354        let mut output = String::new();
1355        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1356        assert!(output.starts_with("<|marker_1|>"));
1357        assert!(output.contains("<|user_cursor|>"));
1358        let stripped = output.replace("<|user_cursor|>", "");
1359        let stripped = strip_marker_tags(&stripped);
1360        assert_eq!(stripped, editable);
1361    }
1362
1363    #[test]
1364    fn test_apply_marker_span_v0316_basic() {
1365        let old = "aaa\nbbb\nccc\n";
1366        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1367        let result = apply_marker_span_v0316(old, output).unwrap();
1368        assert_eq!(result, "aaa\nBBB\nccc\n");
1369    }
1370
1371    #[test]
1372    fn test_apply_marker_span_v0316_no_edit() {
1373        let old = "aaa\nbbb\nccc\n";
1374        let output = "<|marker_1|><|marker_1|>";
1375        let result = apply_marker_span_v0316(old, output).unwrap();
1376        assert_eq!(result, old);
1377    }
1378
1379    #[test]
1380    fn test_apply_marker_span_v0316_no_edit_any_marker() {
1381        let old = "aaa\nbbb\nccc\n";
1382        let output = "<|marker_2|>ignored content<|marker_2|>";
1383        let result = apply_marker_span_v0316(old, output).unwrap();
1384        assert_eq!(result, old);
1385    }
1386
1387    #[test]
1388    fn test_apply_marker_span_v0316_multi_block() {
1389        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1390        let marker_offsets = compute_marker_offsets(old);
1391        assert!(
1392            marker_offsets.len() >= 3,
1393            "expected at least 3 offsets, got {:?}",
1394            marker_offsets
1395        );
1396
1397        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1398        let mut output = String::new();
1399        output.push_str("<|marker_1|>");
1400        for i in 0..marker_offsets.len() - 1 {
1401            if i > 0 {
1402                output.push_str(&marker_tag(i + 1));
1403            }
1404            let start = marker_offsets[i];
1405            let end = marker_offsets[i + 1];
1406            let block_len = end - start;
1407            output.push_str(&new_content[start..start + block_len]);
1408        }
1409        let last_marker_num = marker_offsets.len();
1410        output.push_str(&marker_tag(last_marker_num));
1411        let result = apply_marker_span_v0316(old, &output).unwrap();
1412        assert_eq!(result, new_content);
1413    }
1414
1415    #[test]
1416    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1417        let old = "aaa\nbbb\nccc\n";
1418        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1419        let result = apply_marker_span_v0316(old, output).unwrap();
1420        assert_eq!(result, "aaa\nBBB\nccc");
1421    }
1422
1423    #[test]
1424    fn test_encode_v0316_no_edits() {
1425        let old = "aaa\nbbb\nccc\n";
1426        let result =
1427            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1428        assert!(result.ends_with("<|end|>"));
1429        let stripped = result.strip_suffix("<|end|>").unwrap();
1430        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1431        assert_eq!(result_parsed, old);
1432    }
1433
1434    #[test]
1435    fn test_encode_v0316_with_change() {
1436        let old = "aaa\nbbb\nccc\n";
1437        let new = "aaa\nBBB\nccc\n";
1438        let result =
1439            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1440        assert!(result.contains("<|marker_1|>"));
1441        assert!(result.contains("<|marker_2|>"));
1442        assert!(result.ends_with("<|end|>"));
1443    }
1444
1445    #[test]
1446    fn test_roundtrip_v0316() {
1447        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1448        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1449        let encoded =
1450            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1451        let stripped = encoded
1452            .strip_suffix("<|end|>")
1453            .expect("should have end marker");
1454        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1455        assert_eq!(reconstructed, new);
1456    }
1457
1458    #[test]
1459    fn test_roundtrip_v0316_with_cursor() {
1460        let old = "aaa\nbbb\nccc\n";
1461        let new = "aaa\nBBB\nccc\n";
1462        let result =
1463            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1464        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1465        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1466    }
1467
1468    #[test]
1469    fn test_roundtrip_v0316_multi_block_change() {
1470        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1471        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1472        let encoded =
1473            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1474        let stripped = encoded
1475            .strip_suffix("<|end|>")
1476            .expect("should have end marker");
1477        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1478        assert_eq!(reconstructed, new);
1479    }
1480
1481    #[test]
1482    fn test_nearest_marker_number() {
1483        let offsets = vec![0, 10, 20, 30];
1484        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1485        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1486        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1487        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1488        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1489        assert_eq!(nearest_marker_number(None, &offsets), 1);
1490    }
1491
1492    #[test]
1493    fn test_marker_tag_relative_formats_as_expected() {
1494        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1495        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1496        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1497        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1498        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1499    }
1500
1501    #[test]
1502    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1503        let editable = "aaa\nbbb\nccc\n";
1504        let mut output = String::new();
1505        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1506
1507        assert!(output.contains("<|marker-0|>"));
1508        assert!(output.contains("<|user_cursor|>"));
1509
1510        let stripped = output.replace("<|user_cursor|>", "");
1511        let stripped =
1512            collect_relative_marker_tags(&stripped)
1513                .iter()
1514                .fold(stripped.clone(), |acc, marker| {
1515                    let tag = &stripped[marker.tag_start..marker.tag_end];
1516                    acc.replace(tag, "")
1517                });
1518        assert_eq!(stripped, editable);
1519    }
1520
1521    #[test]
1522    fn test_apply_marker_span_v0317_basic() {
1523        let old = "aaa\nbbb\nccc\n";
1524        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1525        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1526        assert_eq!(result, "aaa\nBBB\nccc\n");
1527    }
1528
1529    #[test]
1530    fn test_apply_marker_span_v0317_no_edit() {
1531        let old = "aaa\nbbb\nccc\n";
1532        let output = "<|marker-0|><|marker-0|>";
1533        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1534        assert_eq!(result, old);
1535    }
1536
1537    #[test]
1538    fn test_encode_v0317_no_edits() {
1539        let old = "aaa\nbbb\nccc\n";
1540        let result =
1541            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1542        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1543    }
1544
1545    #[test]
1546    fn test_roundtrip_v0317() {
1547        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1548        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1549        let cursor = Some(6);
1550
1551        let encoded =
1552            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1553        let stripped = encoded
1554            .strip_suffix("<|end|>")
1555            .expect("should have end marker");
1556        let stripped = stripped.replace("<|user_cursor|>", "");
1557        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1558        assert_eq!(reconstructed, new);
1559    }
1560
1561    #[test]
1562    fn test_roundtrip_v0317_with_cursor_marker() {
1563        let old = "aaa\nbbb\nccc\n";
1564        let new = "aaa\nBBB\nccc\n";
1565        let result =
1566            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1567        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1568        assert!(result.contains("<|marker-0|>"), "result: {result}");
1569    }
1570
1571    #[test]
1572    fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1573        let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1574        let v0316_offsets = compute_marker_offsets(text);
1575        let v0318_offsets = compute_marker_offsets_v0318(text);
1576
1577        assert!(v0318_offsets.len() < v0316_offsets.len());
1578        assert_eq!(v0316_offsets.first().copied(), Some(0));
1579        assert_eq!(v0318_offsets.first().copied(), Some(0));
1580        assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1581        assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1582    }
1583
1584    #[test]
1585    fn test_roundtrip_v0318() {
1586        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1587        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1588        let encoded =
1589            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1590        let stripped = encoded
1591            .strip_suffix("<|end|>")
1592            .expect("should have end marker");
1593        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1594        assert_eq!(reconstructed, new);
1595    }
1596
1597    #[test]
1598    fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1599        let old = "line1\nline2\nline3\n";
1600        let new = "line1\nline2\nline3\nline4\n";
1601        let encoded =
1602            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1603
1604        assert_ne!(encoded, "<|marker_2|><|end|>");
1605
1606        let stripped = encoded
1607            .strip_suffix("<|end|>")
1608            .expect("should have end marker");
1609        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1610        assert_eq!(reconstructed, new);
1611    }
1612
1613    #[test]
1614    fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1615        let old = "alpha\nbeta\n\ngamma\ndelta\n";
1616        let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1617        let encoded =
1618            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1619
1620        let stripped = encoded
1621            .strip_suffix("<|end|>")
1622            .expect("should have end marker");
1623        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1624        assert_eq!(reconstructed, new);
1625    }
1626
1627    #[test]
1628    fn test_encode_v0317_markers_stay_on_line_boundaries() {
1629        let old = "\
1630\t\t\t\tcontinue outer;
1631\t\t\t}
1632\t\t}
1633\t}
1634
1635\tconst intersectionObserver = new IntersectionObserver((entries) => {
1636\t\tfor (const entry of entries) {
1637\t\t\tif (entry.isIntersecting) {
1638\t\t\t\tintersectionObserver.unobserve(entry.target);
1639\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1640\t\t\t}
1641\t\t}
1642\t});
1643
1644\tconst observer = new MutationObserver(() => {
1645\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1646\t\t\tdocument.querySelectorAll('a[data-preload]')
1647\t\t);
1648
1649\t\tfor (const link of links) {
1650\t\t\tif (linkSet.has(link)) continue;
1651\t\t\tlinkSet.add(link);
1652
1653\t\t\tswitch (link.dataset.preload) {
1654\t\t\t\tcase '':
1655\t\t\t\tcase 'true':
1656\t\t\t\tcase 'hover': {
1657\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1658\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1659\t\t\t\t\t\tanchorPreload(link);
1660\t\t\t\t\t});
1661";
1662        let new = old.replacen(
1663            "\t\t\t\tcase 'true':\n",
1664            "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1665            1,
1666        );
1667
1668        let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1669        let new_without_cursor = new.replace("<|user_cursor|>", "");
1670
1671        let encoded = encode_from_old_and_new_v0317(
1672            old,
1673            &new_without_cursor,
1674            Some(cursor_offset),
1675            "<|user_cursor|>",
1676            "<|end|>",
1677        )
1678        .unwrap();
1679
1680        let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1681        for marker in collect_relative_marker_tags(core) {
1682            let tag_start = marker.tag_start;
1683            assert!(
1684                tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1685                "marker not at line boundary: {} in output:\n{}",
1686                marker_tag_relative(marker.value),
1687                core
1688            );
1689        }
1690    }
1691}