multi_region.rs

   1use anyhow::{Context as _, Result, anyhow};
   2
   3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
   4pub const MARKER_TAG_SUFFIX: &str = "|>";
   5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
   6const V0316_MIN_BLOCK_LINES: usize = 3;
   7const V0316_MAX_BLOCK_LINES: usize = 8;
   8const V0318_MIN_BLOCK_LINES: usize = 6;
   9const V0318_MAX_BLOCK_LINES: usize = 16;
  10const MAX_NUDGE_LINES: usize = 5;
  11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
  12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
  13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
  14
  15pub fn marker_tag(number: usize) -> String {
  16    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
  17}
  18
  19pub fn marker_tag_relative(delta: isize) -> String {
  20    if delta > 0 {
  21        format!("<|marker+{delta}|>")
  22    } else if delta == 0 {
  23        String::from("<|marker-0|>")
  24    } else {
  25        format!("<|marker{delta}|>")
  26    }
  27}
  28
  29struct LineInfo {
  30    start: usize,
  31    is_blank: bool,
  32    is_good_start: bool,
  33}
  34
  35fn collect_line_info(text: &str) -> Vec<LineInfo> {
  36    let mut lines = Vec::new();
  37    let mut offset = 0;
  38    for line in text.split('\n') {
  39        let trimmed = line.trim();
  40        let is_blank = trimmed.is_empty();
  41        let is_good_start = !is_blank && !is_structural_tail(trimmed);
  42        lines.push(LineInfo {
  43            start: offset,
  44            is_blank,
  45            is_good_start,
  46        });
  47        offset += line.len() + 1;
  48    }
  49    // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
  50    // empty element when the text ends with '\n'.
  51    if text.ends_with('\n') && lines.len() > 1 {
  52        lines.pop();
  53    }
  54    lines
  55}
  56
  57fn is_structural_tail(trimmed_line: &str) -> bool {
  58    if trimmed_line.starts_with(&['}', ']', ')']) {
  59        return true;
  60    }
  61    matches!(
  62        trimmed_line.trim_end_matches(';'),
  63        "break" | "continue" | "return" | "throw" | "end"
  64    )
  65}
  66
  67/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
  68/// line with `is_good_start`. Returns `None` if no suitable line is found.
  69fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
  70    (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
  71}
  72
  73/// Compute byte offsets within `editable_text` where marker boundaries should
  74/// be placed.
  75///
  76/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
  77/// `editable_text.len()`. Interior offsets are placed at line boundaries
  78/// (right after a `\n`), preferring blank-line boundaries when available and
  79/// respecting `min_block_lines` / `max_block_lines` constraints.
  80fn compute_marker_offsets_with_limits(
  81    editable_text: &str,
  82    min_block_lines: usize,
  83    max_block_lines: usize,
  84) -> Vec<usize> {
  85    if editable_text.is_empty() {
  86        return vec![0, 0];
  87    }
  88
  89    let lines = collect_line_info(editable_text);
  90    let mut offsets = vec![0usize];
  91    let mut last_boundary_line = 0;
  92    let mut i = 0;
  93
  94    while i < lines.len() {
  95        let gap = i - last_boundary_line;
  96
  97        // Blank-line split: non-blank line following blank line(s) with enough
  98        // accumulated lines.
  99        if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
 100            let target = if lines[i].is_good_start {
 101                i
 102            } else {
 103                skip_to_good_start(&lines, i).unwrap_or(i)
 104            };
 105            if lines.len() - target >= min_block_lines
 106                && lines[target].start > *offsets.last().unwrap_or(&0)
 107            {
 108                offsets.push(lines[target].start);
 109                last_boundary_line = target;
 110                i = target + 1;
 111                continue;
 112            }
 113        }
 114
 115        // Hard cap: too many lines without a split.
 116        if gap >= max_block_lines {
 117            let target = skip_to_good_start(&lines, i).unwrap_or(i);
 118            if lines[target].start > *offsets.last().unwrap_or(&0) {
 119                offsets.push(lines[target].start);
 120                last_boundary_line = target;
 121                i = target + 1;
 122                continue;
 123            }
 124        }
 125
 126        i += 1;
 127    }
 128
 129    let end = editable_text.len();
 130    if *offsets.last().unwrap_or(&0) != end {
 131        offsets.push(end);
 132    }
 133
 134    offsets
 135}
 136
 137/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
 138pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
 139    compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
 140}
 141
 142pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
 143    compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
 144}
 145
 146/// Write the editable region content with marker tags, inserting the cursor
 147/// marker at the given offset within the editable text.
 148pub fn write_editable_with_markers(
 149    output: &mut String,
 150    editable_text: &str,
 151    cursor_offset_in_editable: usize,
 152    cursor_marker: &str,
 153) {
 154    let marker_offsets = compute_marker_offsets(editable_text);
 155    let mut cursor_placed = false;
 156    for (i, &offset) in marker_offsets.iter().enumerate() {
 157        let marker_num = i + 1;
 158        if !output.is_empty() && !output.ends_with('\n') {
 159            output.push('\n');
 160        }
 161        output.push_str(&marker_tag(marker_num));
 162
 163        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 164            output.push('\n');
 165            let block = &editable_text[offset..next_offset];
 166            if !cursor_placed
 167                && cursor_offset_in_editable >= offset
 168                && cursor_offset_in_editable <= next_offset
 169            {
 170                cursor_placed = true;
 171                let cursor_in_block = cursor_offset_in_editable - offset;
 172                output.push_str(&block[..cursor_in_block]);
 173                output.push_str(cursor_marker);
 174                output.push_str(&block[cursor_in_block..]);
 175            } else {
 176                output.push_str(block);
 177            }
 178        }
 179    }
 180}
 181
 182/// Strip any `<|marker_N|>` tags from `text`.
 183///
 184/// When a marker tag sits on its own line (followed by `\n`), the trailing
 185/// newline is also removed so the surrounding lines stay joined naturally.
 186fn strip_marker_tags(text: &str) -> String {
 187    let mut result = String::with_capacity(text.len());
 188    let mut pos = 0;
 189    let bytes = text.as_bytes();
 190    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
 191        result.push_str(&text[pos..pos + rel]);
 192        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
 193        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 194            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
 195            if bytes.get(tag_end) == Some(&b'\n') {
 196                tag_end += 1;
 197            }
 198            pos = tag_end;
 199        } else {
 200            result.push_str(MARKER_TAG_PREFIX);
 201            pos = num_start;
 202        }
 203    }
 204    result.push_str(&text[pos..]);
 205    result
 206}
 207
 208/// Parse model output that uses the marker format.
 209///
 210/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
 211/// The leading format-level newline after the start marker is stripped.
 212/// Trailing newlines are preserved so blank-line endings in the editable
 213/// region are not lost.
 214///
 215/// Any extra intermediate marker tags that the model may have inserted
 216/// between the first and last markers are stripped from the returned content.
 217pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
 218    let first_tag_start = text
 219        .find(MARKER_TAG_PREFIX)
 220        .context("no start marker found in output")?;
 221    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
 222    let first_num_end = text[first_num_start..]
 223        .find(MARKER_TAG_SUFFIX)
 224        .map(|i| i + first_num_start)
 225        .context("malformed start marker tag")?;
 226    let start_num: usize = text[first_num_start..first_num_end]
 227        .parse()
 228        .context("start marker number is not a valid integer")?;
 229    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
 230
 231    let last_tag_start = text
 232        .rfind(MARKER_TAG_PREFIX)
 233        .context("no end marker found in output")?;
 234    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
 235    let last_num_end = text[last_num_start..]
 236        .find(MARKER_TAG_SUFFIX)
 237        .map(|i| i + last_num_start)
 238        .context("malformed end marker tag")?;
 239    let end_num: usize = text[last_num_start..last_num_end]
 240        .parse()
 241        .context("end marker number is not a valid integer")?;
 242
 243    if start_num == end_num {
 244        return Err(anyhow!(
 245            "start and end markers are the same (marker {})",
 246            start_num
 247        ));
 248    }
 249
 250    let mut content_start = first_tag_end;
 251    if text.as_bytes().get(content_start) == Some(&b'\n') {
 252        content_start += 1;
 253    }
 254    let content_end = last_tag_start;
 255
 256    let content = &text[content_start..content_end.max(content_start)];
 257    let content = strip_marker_tags(content);
 258    Ok((start_num, end_num, content))
 259}
 260
 261/// Given old editable text and model output with marker span, reconstruct the
 262/// full new editable region.
 263pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
 264    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
 265    let marker_offsets = compute_marker_offsets(old_editable);
 266
 267    let start_idx = start_num
 268        .checked_sub(1)
 269        .context("marker numbers are 1-indexed")?;
 270    let end_idx = end_num
 271        .checked_sub(1)
 272        .context("marker numbers are 1-indexed")?;
 273    let start_byte = *marker_offsets
 274        .get(start_idx)
 275        .context("start marker number out of range")?;
 276    let end_byte = *marker_offsets
 277        .get(end_idx)
 278        .context("end marker number out of range")?;
 279
 280    if start_byte > end_byte {
 281        return Err(anyhow!("start marker must come before end marker"));
 282    }
 283
 284    let old_span = &old_editable[start_byte..end_byte];
 285    let mut new_span = raw_new_span;
 286    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
 287        new_span.push('\n');
 288    }
 289    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
 290        new_span.pop();
 291    }
 292
 293    let mut result = String::new();
 294    result.push_str(&old_editable[..start_byte]);
 295    result.push_str(&new_span);
 296    result.push_str(&old_editable[end_byte..]);
 297
 298    Ok(result)
 299}
 300
 301/// Compare old and new editable text, find the minimal marker span that covers
 302/// all changes, and encode the result with marker tags.
 303pub fn encode_from_old_and_new(
 304    old_editable: &str,
 305    new_editable: &str,
 306    cursor_offset_in_new: Option<usize>,
 307    cursor_marker: &str,
 308    end_marker: &str,
 309    no_edits_marker: &str,
 310) -> Result<String> {
 311    if old_editable == new_editable {
 312        return Ok(format!("{no_edits_marker}{end_marker}"));
 313    }
 314
 315    let marker_offsets = compute_marker_offsets(old_editable);
 316    let (common_prefix, common_suffix) =
 317        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 318    let change_end_in_old = old_editable.len() - common_suffix;
 319
 320    let start_marker_idx = marker_offsets
 321        .iter()
 322        .rposition(|&offset| offset <= common_prefix)
 323        .unwrap_or(0);
 324    let end_marker_idx = marker_offsets
 325        .iter()
 326        .position(|&offset| offset >= change_end_in_old)
 327        .unwrap_or(marker_offsets.len() - 1);
 328
 329    let old_start = marker_offsets[start_marker_idx];
 330    let old_end = marker_offsets[end_marker_idx];
 331
 332    let new_start = old_start;
 333    let new_end = new_editable
 334        .len()
 335        .saturating_sub(old_editable.len().saturating_sub(old_end));
 336
 337    let new_span = &new_editable[new_start..new_end];
 338
 339    let start_marker_num = start_marker_idx + 1;
 340    let end_marker_num = end_marker_idx + 1;
 341
 342    let mut result = String::new();
 343    result.push_str(&marker_tag(start_marker_num));
 344    result.push('\n');
 345
 346    if let Some(cursor_offset) = cursor_offset_in_new {
 347        if cursor_offset >= new_start && cursor_offset <= new_end {
 348            let cursor_in_span = cursor_offset - new_start;
 349            let bounded = cursor_in_span.min(new_span.len());
 350            result.push_str(&new_span[..bounded]);
 351            result.push_str(cursor_marker);
 352            result.push_str(&new_span[bounded..]);
 353        } else {
 354            result.push_str(new_span);
 355        }
 356    } else {
 357        result.push_str(new_span);
 358    }
 359
 360    if !result.ends_with('\n') {
 361        result.push('\n');
 362    }
 363    result.push_str(&marker_tag(end_marker_num));
 364    result.push('\n');
 365    result.push_str(end_marker);
 366
 367    Ok(result)
 368}
 369
 370/// Extract the full editable region from text that uses marker tags.
 371///
 372/// Returns the concatenation of all block contents between the first and last
 373/// markers, with intermediate marker tags stripped.
 374pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
 375    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
 376
 377    let mut markers: Vec<(usize, usize)> = Vec::new();
 378    let mut search_start = first_marker_start;
 379    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
 380        let tag_start = search_start + rel_pos;
 381        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 382        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
 383        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
 384        markers.push((tag_start, tag_end));
 385        search_start = tag_end;
 386    }
 387
 388    if markers.len() < 2 {
 389        return None;
 390    }
 391
 392    let (_, first_tag_end) = markers[0];
 393    let (last_tag_start, _) = markers[markers.len() - 1];
 394
 395    let mut content_start = first_tag_end;
 396    if text.as_bytes().get(content_start) == Some(&b'\n') {
 397        content_start += 1;
 398    }
 399    let mut content_end = last_tag_start;
 400    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
 401        content_end -= 1;
 402    }
 403
 404    let raw = &text[content_start..content_end];
 405    let result = strip_marker_tags(raw);
 406    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
 407    Some(result)
 408}
 409
 410struct ParsedTag {
 411    value: isize,
 412    tag_start: usize,
 413    tag_end: usize,
 414}
 415
 416fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
 417    let mut tags = Vec::new();
 418    let mut search_from = 0;
 419    while let Some(rel_pos) = text[search_from..].find(prefix) {
 420        let tag_start = search_from + rel_pos;
 421        let payload_start = tag_start + prefix.len();
 422        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
 423            let payload_end = payload_start + suffix_rel;
 424            if let Some(value) = parse(&text[payload_start..payload_end]) {
 425                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
 426                tags.push(ParsedTag {
 427                    value,
 428                    tag_start,
 429                    tag_end,
 430                });
 431                search_from = tag_end;
 432                continue;
 433            }
 434        }
 435        search_from = tag_start + prefix.len();
 436    }
 437    tags
 438}
 439
 440fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
 441    collect_tags(text, MARKER_TAG_PREFIX, |s| {
 442        s.parse::<usize>().ok().map(|n| n as isize)
 443    })
 444}
 445
 446fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
 447    collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
 448        s.parse::<isize>().ok()
 449    })
 450}
 451
 452pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 453    let cursor = cursor_offset.unwrap_or(0);
 454    marker_offsets
 455        .iter()
 456        .enumerate()
 457        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
 458        .map(|(idx, _)| idx + 1)
 459        .unwrap_or(1)
 460}
 461
 462fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 463    let cursor = cursor_offset.unwrap_or(0);
 464    marker_offsets
 465        .windows(2)
 466        .position(|window| cursor >= window[0] && cursor < window[1])
 467        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
 468}
 469
 470fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
 471    let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
 472    let remaining_a = a.len() - prefix;
 473    let remaining_b = b.len() - prefix;
 474    let max_suffix = remaining_a.min(remaining_b);
 475    let suffix = a[a.len() - max_suffix..]
 476        .iter()
 477        .rev()
 478        .zip(b[b.len() - max_suffix..].iter().rev())
 479        .take_while(|(x, y)| x == y)
 480        .count();
 481    (prefix, suffix)
 482}
 483
 484/// Map a byte offset from old span coordinates to new span coordinates,
 485/// using common prefix/suffix within the span for accuracy.
 486fn map_boundary_offset(
 487    old_rel: usize,
 488    old_span_len: usize,
 489    new_span_len: usize,
 490    span_common_prefix: usize,
 491    span_common_suffix: usize,
 492) -> usize {
 493    if old_rel <= span_common_prefix {
 494        old_rel
 495    } else if old_rel >= old_span_len - span_common_suffix {
 496        new_span_len - (old_span_len - old_rel)
 497    } else {
 498        let old_changed_start = span_common_prefix;
 499        let old_changed_len = old_span_len
 500            .saturating_sub(span_common_prefix)
 501            .saturating_sub(span_common_suffix);
 502        let new_changed_start = span_common_prefix;
 503        let new_changed_len = new_span_len
 504            .saturating_sub(span_common_prefix)
 505            .saturating_sub(span_common_suffix);
 506
 507        if old_changed_len == 0 {
 508            new_changed_start
 509        } else {
 510            new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
 511        }
 512    }
 513}
 514
 515fn snap_to_line_start(text: &str, offset: usize) -> usize {
 516    let bounded = offset.min(text.len());
 517    let bounded = text.floor_char_boundary(bounded);
 518
 519    if bounded >= text.len() {
 520        return text.len();
 521    }
 522
 523    if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
 524        return bounded;
 525    }
 526
 527    if let Some(next_nl_rel) = text[bounded..].find('\n') {
 528        let next = bounded + next_nl_rel + 1;
 529        return text.floor_char_boundary(next.min(text.len()));
 530    }
 531
 532    let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
 533    text.floor_char_boundary(prev_start)
 534}
 535
 536/// Write the editable region content with byte-exact marker tags, inserting the
 537/// cursor marker at the given offset within the editable text.
 538///
 539/// The `tag_for_index` closure maps a boundary index to the marker tag string.
 540fn write_editable_with_markers_impl(
 541    output: &mut String,
 542    editable_text: &str,
 543    cursor_offset_in_editable: usize,
 544    cursor_marker: &str,
 545    marker_offsets: &[usize],
 546    tag_for_index: impl Fn(usize) -> String,
 547) {
 548    let mut cursor_placed = false;
 549    for (i, &offset) in marker_offsets.iter().enumerate() {
 550        output.push_str(&tag_for_index(i));
 551
 552        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 553            let block = &editable_text[offset..next_offset];
 554            if !cursor_placed
 555                && cursor_offset_in_editable >= offset
 556                && cursor_offset_in_editable <= next_offset
 557            {
 558                cursor_placed = true;
 559                let cursor_in_block = cursor_offset_in_editable - offset;
 560                output.push_str(&block[..cursor_in_block]);
 561                output.push_str(cursor_marker);
 562                output.push_str(&block[cursor_in_block..]);
 563            } else {
 564                output.push_str(block);
 565            }
 566        }
 567    }
 568}
 569
 570pub fn write_editable_with_markers_v0316(
 571    output: &mut String,
 572    editable_text: &str,
 573    cursor_offset_in_editable: usize,
 574    cursor_marker: &str,
 575) {
 576    let marker_offsets = compute_marker_offsets(editable_text);
 577    write_editable_with_markers_impl(
 578        output,
 579        editable_text,
 580        cursor_offset_in_editable,
 581        cursor_marker,
 582        &marker_offsets,
 583        |i| marker_tag(i + 1),
 584    );
 585}
 586
 587pub fn write_editable_with_markers_v0317(
 588    output: &mut String,
 589    editable_text: &str,
 590    cursor_offset_in_editable: usize,
 591    cursor_marker: &str,
 592) {
 593    let marker_offsets = compute_marker_offsets(editable_text);
 594    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
 595    write_editable_with_markers_impl(
 596        output,
 597        editable_text,
 598        cursor_offset_in_editable,
 599        cursor_marker,
 600        &marker_offsets,
 601        |i| marker_tag_relative(i as isize - anchor_idx as isize),
 602    );
 603}
 604
 605pub fn write_editable_with_markers_v0318(
 606    output: &mut String,
 607    editable_text: &str,
 608    cursor_offset_in_editable: usize,
 609    cursor_marker: &str,
 610) {
 611    let marker_offsets = compute_marker_offsets_v0318(editable_text);
 612    write_editable_with_markers_impl(
 613        output,
 614        editable_text,
 615        cursor_offset_in_editable,
 616        cursor_marker,
 617        &marker_offsets,
 618        |i| marker_tag(i + 1),
 619    );
 620}
 621
 622/// Parse byte-exact model output and reconstruct the full new editable region.
 623///
 624/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
 625/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
 626/// an error.
 627fn apply_marker_span_impl(
 628    old_editable: &str,
 629    tags: &[ParsedTag],
 630    output: &str,
 631    resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
 632) -> Result<String> {
 633    if tags.is_empty() {
 634        return Err(anyhow!("no marker tags found in output"));
 635    }
 636    if tags.len() == 1 {
 637        return Err(anyhow!(
 638            "only one marker tag found in output, expected at least two"
 639        ));
 640    }
 641
 642    let start_value = tags[0].value;
 643    let end_value = tags[tags.len() - 1].value;
 644
 645    if start_value == end_value {
 646        return Ok(old_editable.to_string());
 647    }
 648
 649    let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
 650
 651    if start_byte > end_byte {
 652        return Err(anyhow!("start marker must come before end marker"));
 653    }
 654
 655    let mut new_content = String::new();
 656    for i in 0..tags.len() - 1 {
 657        let content_start = tags[i].tag_end;
 658        let content_end = tags[i + 1].tag_start;
 659        if content_start <= content_end {
 660            new_content.push_str(&output[content_start..content_end]);
 661        }
 662    }
 663
 664    let mut result = String::new();
 665    result.push_str(&old_editable[..start_byte]);
 666    result.push_str(&new_content);
 667    result.push_str(&old_editable[end_byte..]);
 668
 669    Ok(result)
 670}
 671
 672pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
 673    let tags = collect_marker_tags(output);
 674
 675    // Validate monotonically increasing with no gaps (best-effort warning)
 676    if tags.len() >= 2 {
 677        let start_num = tags[0].value;
 678        let end_num = tags[tags.len() - 1].value;
 679        if start_num != end_num {
 680            let expected: Vec<isize> = (start_num..=end_num).collect();
 681            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 682            if actual != expected {
 683                eprintln!(
 684                    "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 685                    expected, actual
 686                );
 687            }
 688        }
 689    }
 690
 691    let marker_offsets = compute_marker_offsets(old_editable);
 692    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 693        let start_idx = (start_val as usize)
 694            .checked_sub(1)
 695            .context("marker numbers are 1-indexed")?;
 696        let end_idx = (end_val as usize)
 697            .checked_sub(1)
 698            .context("marker numbers are 1-indexed")?;
 699        let start_byte = *marker_offsets
 700            .get(start_idx)
 701            .context("start marker number out of range")?;
 702        let end_byte = *marker_offsets
 703            .get(end_idx)
 704            .context("end marker number out of range")?;
 705        Ok((start_byte, end_byte))
 706    })
 707}
 708
 709pub fn apply_marker_span_v0317(
 710    old_editable: &str,
 711    output: &str,
 712    cursor_offset_in_old: Option<usize>,
 713) -> Result<String> {
 714    let tags = collect_relative_marker_tags(output);
 715    let marker_offsets = compute_marker_offsets(old_editable);
 716    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
 717
 718    apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
 719        let start_idx_signed = anchor_idx as isize + start_delta;
 720        let end_idx_signed = anchor_idx as isize + end_delta;
 721        if start_idx_signed < 0 || end_idx_signed < 0 {
 722            return Err(anyhow!("relative marker maps before first marker"));
 723        }
 724        let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
 725        let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
 726        let start_byte = *marker_offsets
 727            .get(start_idx)
 728            .context("start marker number out of range")?;
 729        let end_byte = *marker_offsets
 730            .get(end_idx)
 731            .context("end marker number out of range")?;
 732        Ok((start_byte, end_byte))
 733    })
 734}
 735
 736pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
 737    let tags = collect_marker_tags(output);
 738
 739    if tags.len() >= 2 {
 740        let start_num = tags[0].value;
 741        let end_num = tags[tags.len() - 1].value;
 742        if start_num != end_num {
 743            let expected: Vec<isize> = (start_num..=end_num).collect();
 744            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 745            if actual != expected {
 746                eprintln!(
 747                    "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 748                    expected, actual
 749                );
 750            }
 751        }
 752    }
 753
 754    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 755    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 756        let start_idx = (start_val as usize)
 757            .checked_sub(1)
 758            .context("marker numbers are 1-indexed")?;
 759        let end_idx = (end_val as usize)
 760            .checked_sub(1)
 761            .context("marker numbers are 1-indexed")?;
 762        let start_byte = *marker_offsets
 763            .get(start_idx)
 764            .context("start marker number out of range")?;
 765        let end_byte = *marker_offsets
 766            .get(end_idx)
 767            .context("end marker number out of range")?;
 768        Ok((start_byte, end_byte))
 769    })
 770}
 771
 772/// Encode the training target from old and new editable text.
 773///
 774/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
 775/// closure maps a block index to the appropriate marker tag string.
 776/// `no_edit_tag` is the marker tag to repeat when there are no edits.
 777fn encode_from_old_and_new_impl(
 778    old_editable: &str,
 779    new_editable: &str,
 780    cursor_offset_in_new: Option<usize>,
 781    cursor_marker: &str,
 782    end_marker: &str,
 783    no_edit_tag: &str,
 784    marker_offsets: &[usize],
 785    tag_for_block_idx: impl Fn(usize) -> String,
 786) -> Result<String> {
 787    if old_editable == new_editable {
 788        return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
 789    }
 790
 791    let (common_prefix, common_suffix) =
 792        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 793    let change_end_in_old = old_editable.len() - common_suffix;
 794
 795    let start_marker_idx = marker_offsets
 796        .iter()
 797        .rposition(|&offset| offset <= common_prefix)
 798        .unwrap_or(0);
 799    let end_marker_idx = marker_offsets
 800        .iter()
 801        .position(|&offset| offset >= change_end_in_old)
 802        .unwrap_or(marker_offsets.len() - 1);
 803
 804    let old_start = marker_offsets[start_marker_idx];
 805    let old_end = marker_offsets[end_marker_idx];
 806
 807    let new_start = old_start;
 808    let new_end = new_editable
 809        .len()
 810        .saturating_sub(old_editable.len().saturating_sub(old_end));
 811
 812    let new_span = &new_editable[new_start..new_end];
 813    let old_span = &old_editable[old_start..old_end];
 814
 815    let (span_common_prefix, span_common_suffix) =
 816        common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
 817
 818    let mut result = String::new();
 819    let mut prev_new_rel = 0usize;
 820    let mut cursor_placed = false;
 821
 822    for block_idx in start_marker_idx..end_marker_idx {
 823        result.push_str(&tag_for_block_idx(block_idx));
 824
 825        let new_rel_end = if block_idx + 1 == end_marker_idx {
 826            new_span.len()
 827        } else {
 828            let old_rel = marker_offsets[block_idx + 1] - old_start;
 829            let mapped = map_boundary_offset(
 830                old_rel,
 831                old_span.len(),
 832                new_span.len(),
 833                span_common_prefix,
 834                span_common_suffix,
 835            );
 836            snap_to_line_start(new_span, mapped)
 837        };
 838
 839        let new_rel_end = new_rel_end.max(prev_new_rel);
 840        let block_content = &new_span[prev_new_rel..new_rel_end];
 841
 842        if !cursor_placed {
 843            if let Some(cursor_offset) = cursor_offset_in_new {
 844                let abs_start = new_start + prev_new_rel;
 845                let abs_end = new_start + new_rel_end;
 846                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 847                    cursor_placed = true;
 848                    let cursor_in_block = cursor_offset - abs_start;
 849                    let bounded = cursor_in_block.min(block_content.len());
 850                    result.push_str(&block_content[..bounded]);
 851                    result.push_str(cursor_marker);
 852                    result.push_str(&block_content[bounded..]);
 853                    prev_new_rel = new_rel_end;
 854                    continue;
 855                }
 856            }
 857        }
 858
 859        result.push_str(block_content);
 860        prev_new_rel = new_rel_end;
 861    }
 862
 863    result.push_str(&tag_for_block_idx(end_marker_idx));
 864    result.push_str(end_marker);
 865
 866    Ok(result)
 867}
 868
 869pub fn encode_from_old_and_new_v0316(
 870    old_editable: &str,
 871    new_editable: &str,
 872    cursor_offset_in_new: Option<usize>,
 873    cursor_marker: &str,
 874    end_marker: &str,
 875) -> Result<String> {
 876    let marker_offsets = compute_marker_offsets(old_editable);
 877    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 878    encode_from_old_and_new_impl(
 879        old_editable,
 880        new_editable,
 881        cursor_offset_in_new,
 882        cursor_marker,
 883        end_marker,
 884        &no_edit_tag,
 885        &marker_offsets,
 886        |block_idx| marker_tag(block_idx + 1),
 887    )
 888}
 889
 890pub fn encode_from_old_and_new_v0317(
 891    old_editable: &str,
 892    new_editable: &str,
 893    cursor_offset_in_new: Option<usize>,
 894    cursor_marker: &str,
 895    end_marker: &str,
 896) -> Result<String> {
 897    let marker_offsets = compute_marker_offsets(old_editable);
 898    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
 899    let no_edit_tag = marker_tag_relative(0);
 900    encode_from_old_and_new_impl(
 901        old_editable,
 902        new_editable,
 903        cursor_offset_in_new,
 904        cursor_marker,
 905        end_marker,
 906        &no_edit_tag,
 907        &marker_offsets,
 908        |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
 909    )
 910}
 911
 912pub fn encode_from_old_and_new_v0318(
 913    old_editable: &str,
 914    new_editable: &str,
 915    cursor_offset_in_new: Option<usize>,
 916    cursor_marker: &str,
 917    end_marker: &str,
 918) -> Result<String> {
 919    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 920    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 921    encode_from_old_and_new_impl(
 922        old_editable,
 923        new_editable,
 924        cursor_offset_in_new,
 925        cursor_marker,
 926        end_marker,
 927        &no_edit_tag,
 928        &marker_offsets,
 929        |block_idx| marker_tag(block_idx + 1),
 930    )
 931}
 932
 933#[cfg(test)]
 934mod tests {
 935    use super::*;
 936
 937    #[test]
 938    fn test_compute_marker_offsets_small_block() {
 939        let text = "aaa\nbbb\nccc\n";
 940        let offsets = compute_marker_offsets(text);
 941        assert_eq!(offsets, vec![0, text.len()]);
 942    }
 943
 944    #[test]
 945    fn test_compute_marker_offsets_blank_line_split() {
 946        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
 947        let offsets = compute_marker_offsets(text);
 948        assert_eq!(offsets[0], 0);
 949        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
 950        assert_eq!(*offsets.last().unwrap(), text.len());
 951    }
 952
 953    #[test]
 954    fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
 955        let text = "\
 956class OCRDataframe(BaseModel):
 957    model_config = ConfigDict(arbitrary_types_allowed=True)
 958
 959    df: pl.DataFrame
 960
 961    def page(self, page_number: int = 0) -> \"OCRDataframe\":
 962        # Filter dataframe on specific page
 963        df_page = self.df.filter(pl.col(\"page\") == page_number)
 964        return OCRDataframe(df=df_page)
 965
 966    def get_text_cell(
 967        self,
 968        cell: Cell,
 969        margin: int = 0,
 970        page_number: Optional[int] = None,
 971        min_confidence: int = 50,
 972    ) -> Optional[str]:
 973        \"\"\"
 974        Get text corresponding to cell
 975";
 976        let offsets = compute_marker_offsets(text);
 977
 978        let def_start = text
 979            .find("    def get_text_cell(")
 980            .expect("def line exists");
 981        let self_start = text.find("        self,").expect("self line exists");
 982
 983        assert!(
 984            offsets.contains(&def_start),
 985            "expected boundary at def line start ({def_start}), got {offsets:?}"
 986        );
 987        assert!(
 988            !offsets.contains(&self_start),
 989            "did not expect boundary at self line start ({self_start}), got {offsets:?}"
 990        );
 991    }
 992
 993    #[test]
 994    fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
 995        let text = "\
 996impl Plugin for AhoySchedulePlugin {
 997    fn build(&self, app: &mut App) {
 998        app.configure_sets(
 999            self.schedule,
1000            (
1001                AhoySystems::MoveCharacters,
1002                AhoySystems::ApplyForcesToDynamicRigidBodies,
1003            )
1004                .chain()
1005                .before(PhysicsSystems::First),
1006        );
1007
1008    }
1009}
1010
1011/// System set used by all systems of `bevy_ahoy`.
1012#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1013pub enum AhoySystems {
1014    MoveCharacters,
1015    ApplyForcesToDynamicRigidBodies,
1016}
1017";
1018        let offsets = compute_marker_offsets(text);
1019
1020        let closer_start = text.find("    }\n").expect("closer line exists");
1021        let doc_start = text
1022            .find("/// System set used by all systems of `bevy_ahoy`.")
1023            .expect("doc line exists");
1024
1025        assert!(
1026            !offsets.contains(&closer_start),
1027            "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1028        );
1029        assert!(
1030            offsets.contains(&doc_start),
1031            "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1032        );
1033    }
1034
1035    #[test]
1036    fn test_compute_marker_offsets_max_lines_split() {
1037        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1038        let offsets = compute_marker_offsets(text);
1039        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1040    }
1041
1042    #[test]
1043    fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1044        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1045        let offsets = compute_marker_offsets(text);
1046
1047        let expected = text.find("case 'x': {").expect("case line exists");
1048        assert!(
1049            offsets.contains(&expected),
1050            "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1051        );
1052    }
1053
1054    #[test]
1055    fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1056        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1057        let offsets = compute_marker_offsets(text);
1058
1059        let case_start = text.find("case 'x': {").expect("case line exists");
1060        assert!(
1061            !offsets.contains(&case_start),
1062            "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1063        );
1064    }
1065
1066    #[test]
1067    fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1068        let text = "\
1069aaaaaaaaaa = 1;
1070bbbbbbbbbb = 2;
1071cccccccccc = 3;
1072dddddddddd = 4;
1073eeeeeeeeee = 5;
1074ffffffffff = 6;
1075gggggggggg = 7;
1076hhhhhhhhhh = 8;
1077          };
1078        };
1079
1080        grafanaDashboards = {
1081          cluster-overview.spec = {
1082            inherit instanceSelector;
1083            folderRef = \"infrastructure\";
1084            json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1085          };
1086        };
1087";
1088        let offsets = compute_marker_offsets(text);
1089
1090        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1091        assert_eq!(
1092            offsets.last().copied(),
1093            Some(text.len()),
1094            "offsets: {offsets:?}"
1095        );
1096        assert!(
1097            offsets.windows(2).all(|window| window[0] <= window[1]),
1098            "offsets must be sorted: {offsets:?}"
1099        );
1100    }
1101
1102    #[test]
1103    fn test_compute_marker_offsets_empty() {
1104        let offsets = compute_marker_offsets("");
1105        assert_eq!(offsets, vec![0, 0]);
1106    }
1107
1108    #[test]
1109    fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1110        let text = "\
1111# Spree Posts
1112
1113This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1114
1115## Installation
1116
11171. Add this extension to your Gemfile with this line:
1118
1119    ```ruby
1120    bundle add spree_posts
1121    ```
1122
11232. Run the install generator
1124
1125    ```ruby
1126    bundle exec rails g spree_posts:install
1127    ```
1128
11293. Restart your server
1130
1131  If your server was running, restart it so that it can find the assets properly.
1132
1133## Developing
1134
11351. Create a dummy app
1136
1137    ```bash
1138    bundle update
1139    bundle exec rake test_app
1140    ```
1141
11422. Add your new code
11433. Run tests
1144
1145    ```bash
1146    bundle exec rspec
1147    ```
1148
1149When testing your applications integration with this extension you may use it's factories.
1150Simply add this require statement to your spec_helper:
1151
1152```ruby
1153require 'spree_posts/factories'
1154```
1155
1156## Releasing a new version
1157
1158```shell
1159bundle exec gem bump -p -t
1160bundle exec gem release
1161```
1162
1163For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1164
1165## Contributing
1166
1167If you'd like to contribute, please take a look at the contributing guide.
1168";
1169        let offsets = compute_marker_offsets(text);
1170
1171        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1172        assert_eq!(
1173            offsets.last().copied(),
1174            Some(text.len()),
1175            "offsets: {offsets:?}"
1176        );
1177
1178        for window in offsets.windows(2) {
1179            let block = &text[window[0]..window[1]];
1180            let line_count = block.lines().count();
1181            assert!(
1182                line_count >= V0316_MIN_BLOCK_LINES,
1183                "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1184            );
1185        }
1186    }
1187
1188    #[test]
1189    fn test_extract_marker_span() {
1190        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
1191        let (start, end, content) = extract_marker_span(text).unwrap();
1192        assert_eq!(start, 2);
1193        assert_eq!(end, 3);
1194        assert_eq!(content, "    new content\n");
1195    }
1196
1197    #[test]
1198    fn test_extract_marker_span_multi_line() {
1199        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1200        let (start, end, content) = extract_marker_span(text).unwrap();
1201        assert_eq!(start, 1);
1202        assert_eq!(end, 4);
1203        assert_eq!(content, "line1\nline2\nline3\n");
1204    }
1205
1206    #[test]
1207    fn test_apply_marker_span_basic() {
1208        let old = "aaa\nbbb\nccc\n";
1209        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1210        let result = apply_marker_span(old, output).unwrap();
1211        assert_eq!(result, "aaa\nBBB\nccc\n");
1212    }
1213
1214    #[test]
1215    fn test_apply_marker_span_preserves_trailing_blank_line() {
1216        let old = "/\nresult\n\n";
1217        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1218        let result = apply_marker_span(old, output).unwrap();
1219        assert_eq!(result, "//\nresult\n\n");
1220    }
1221
1222    #[test]
1223    fn test_encode_no_edits() {
1224        let old = "aaa\nbbb\nccc\n";
1225        let result = encode_from_old_and_new(
1226            old,
1227            old,
1228            None,
1229            "<|user_cursor|>",
1230            ">>>>>>> UPDATED\n",
1231            "NO_EDITS\n",
1232        )
1233        .unwrap();
1234        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1235    }
1236
1237    #[test]
1238    fn test_encode_with_change() {
1239        let old = "aaa\nbbb\nccc\n";
1240        let new = "aaa\nBBB\nccc\n";
1241        let result = encode_from_old_and_new(
1242            old,
1243            new,
1244            None,
1245            "<|user_cursor|>",
1246            ">>>>>>> UPDATED\n",
1247            "NO_EDITS\n",
1248        )
1249        .unwrap();
1250        assert!(result.contains("<|marker_1|>"));
1251        assert!(result.contains("<|marker_2|>"));
1252        assert!(result.contains("aaa\nBBB\nccc\n"));
1253        assert!(result.ends_with(">>>>>>> UPDATED\n"));
1254    }
1255
1256    #[test]
1257    fn test_roundtrip_encode_apply() {
1258        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1259        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1260        let encoded = encode_from_old_and_new(
1261            old,
1262            new,
1263            None,
1264            "<|user_cursor|>",
1265            ">>>>>>> UPDATED\n",
1266            "NO_EDITS\n",
1267        )
1268        .unwrap();
1269        let output = encoded
1270            .strip_suffix(">>>>>>> UPDATED\n")
1271            .expect("should have end marker");
1272        let reconstructed = apply_marker_span(old, output).unwrap();
1273        assert_eq!(reconstructed, new);
1274    }
1275
1276    #[test]
1277    fn test_extract_editable_region_from_markers_multi() {
1278        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1279        let parsed = extract_editable_region_from_markers(text).unwrap();
1280        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1281    }
1282
1283    #[test]
1284    fn test_extract_editable_region_two_markers() {
1285        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1286        let parsed = extract_editable_region_from_markers(text).unwrap();
1287        assert_eq!(parsed, "one\ntwo three");
1288    }
1289
1290    #[test]
1291    fn test_encode_with_cursor() {
1292        let old = "aaa\nbbb\nccc\n";
1293        let new = "aaa\nBBB\nccc\n";
1294        let result = encode_from_old_and_new(
1295            old,
1296            new,
1297            Some(5),
1298            "<|user_cursor|>",
1299            ">>>>>>> UPDATED\n",
1300            "NO_EDITS\n",
1301        )
1302        .unwrap();
1303        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1304        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1305    }
1306
1307    #[test]
1308    fn test_extract_marker_span_strips_intermediate_markers() {
1309        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1310        let (start, end, content) = extract_marker_span(text).unwrap();
1311        assert_eq!(start, 2);
1312        assert_eq!(end, 4);
1313        assert_eq!(content, "line1\nline2\n");
1314    }
1315
1316    #[test]
1317    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1318        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1319        let (start, end, content) = extract_marker_span(text).unwrap();
1320        assert_eq!(start, 1);
1321        assert_eq!(end, 4);
1322        assert_eq!(content, "aaa\nbbb\nccc\n");
1323    }
1324
1325    #[test]
1326    fn test_apply_marker_span_with_extra_intermediate_marker() {
1327        let old = "aaa\nbbb\nccc\n";
1328        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1329        let result = apply_marker_span(old, output).unwrap();
1330        assert_eq!(result, "aaa\nBBB\nccc\n");
1331    }
1332
1333    #[test]
1334    fn test_strip_marker_tags_inline() {
1335        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1336        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1337        assert_eq!(
1338            strip_marker_tags("line1\n<|marker_3|>\nline2"),
1339            "line1\nline2"
1340        );
1341    }
1342
1343    #[test]
1344    fn test_write_editable_with_markers_v0316_byte_exact() {
1345        let editable = "aaa\nbbb\nccc\n";
1346        let mut output = String::new();
1347        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1348        assert!(output.starts_with("<|marker_1|>"));
1349        assert!(output.contains("<|user_cursor|>"));
1350        let stripped = output.replace("<|user_cursor|>", "");
1351        let stripped = strip_marker_tags(&stripped);
1352        assert_eq!(stripped, editable);
1353    }
1354
1355    #[test]
1356    fn test_apply_marker_span_v0316_basic() {
1357        let old = "aaa\nbbb\nccc\n";
1358        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1359        let result = apply_marker_span_v0316(old, output).unwrap();
1360        assert_eq!(result, "aaa\nBBB\nccc\n");
1361    }
1362
1363    #[test]
1364    fn test_apply_marker_span_v0316_no_edit() {
1365        let old = "aaa\nbbb\nccc\n";
1366        let output = "<|marker_1|><|marker_1|>";
1367        let result = apply_marker_span_v0316(old, output).unwrap();
1368        assert_eq!(result, old);
1369    }
1370
1371    #[test]
1372    fn test_apply_marker_span_v0316_no_edit_any_marker() {
1373        let old = "aaa\nbbb\nccc\n";
1374        let output = "<|marker_2|>ignored content<|marker_2|>";
1375        let result = apply_marker_span_v0316(old, output).unwrap();
1376        assert_eq!(result, old);
1377    }
1378
1379    #[test]
1380    fn test_apply_marker_span_v0316_multi_block() {
1381        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1382        let marker_offsets = compute_marker_offsets(old);
1383        assert!(
1384            marker_offsets.len() >= 3,
1385            "expected at least 3 offsets, got {:?}",
1386            marker_offsets
1387        );
1388
1389        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1390        let mut output = String::new();
1391        output.push_str("<|marker_1|>");
1392        for i in 0..marker_offsets.len() - 1 {
1393            if i > 0 {
1394                output.push_str(&marker_tag(i + 1));
1395            }
1396            let start = marker_offsets[i];
1397            let end = marker_offsets[i + 1];
1398            let block_len = end - start;
1399            output.push_str(&new_content[start..start + block_len]);
1400        }
1401        let last_marker_num = marker_offsets.len();
1402        output.push_str(&marker_tag(last_marker_num));
1403        let result = apply_marker_span_v0316(old, &output).unwrap();
1404        assert_eq!(result, new_content);
1405    }
1406
1407    #[test]
1408    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1409        let old = "aaa\nbbb\nccc\n";
1410        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1411        let result = apply_marker_span_v0316(old, output).unwrap();
1412        assert_eq!(result, "aaa\nBBB\nccc");
1413    }
1414
1415    #[test]
1416    fn test_encode_v0316_no_edits() {
1417        let old = "aaa\nbbb\nccc\n";
1418        let result =
1419            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1420        assert!(result.ends_with("<|end|>"));
1421        let stripped = result.strip_suffix("<|end|>").unwrap();
1422        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1423        assert_eq!(result_parsed, old);
1424    }
1425
1426    #[test]
1427    fn test_encode_v0316_with_change() {
1428        let old = "aaa\nbbb\nccc\n";
1429        let new = "aaa\nBBB\nccc\n";
1430        let result =
1431            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1432        assert!(result.contains("<|marker_1|>"));
1433        assert!(result.contains("<|marker_2|>"));
1434        assert!(result.ends_with("<|end|>"));
1435    }
1436
1437    #[test]
1438    fn test_roundtrip_v0316() {
1439        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1440        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1441        let encoded =
1442            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1443        let stripped = encoded
1444            .strip_suffix("<|end|>")
1445            .expect("should have end marker");
1446        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1447        assert_eq!(reconstructed, new);
1448    }
1449
1450    #[test]
1451    fn test_roundtrip_v0316_with_cursor() {
1452        let old = "aaa\nbbb\nccc\n";
1453        let new = "aaa\nBBB\nccc\n";
1454        let result =
1455            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1456        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1457        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1458    }
1459
1460    #[test]
1461    fn test_roundtrip_v0316_multi_block_change() {
1462        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1463        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1464        let encoded =
1465            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1466        let stripped = encoded
1467            .strip_suffix("<|end|>")
1468            .expect("should have end marker");
1469        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1470        assert_eq!(reconstructed, new);
1471    }
1472
1473    #[test]
1474    fn test_nearest_marker_number() {
1475        let offsets = vec![0, 10, 20, 30];
1476        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1477        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1478        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1479        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1480        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1481        assert_eq!(nearest_marker_number(None, &offsets), 1);
1482    }
1483
1484    #[test]
1485    fn test_marker_tag_relative_formats_as_expected() {
1486        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1487        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1488        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1489        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1490        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1491    }
1492
1493    #[test]
1494    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1495        let editable = "aaa\nbbb\nccc\n";
1496        let mut output = String::new();
1497        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1498
1499        assert!(output.contains("<|marker-0|>"));
1500        assert!(output.contains("<|user_cursor|>"));
1501
1502        let stripped = output.replace("<|user_cursor|>", "");
1503        let stripped =
1504            collect_relative_marker_tags(&stripped)
1505                .iter()
1506                .fold(stripped.clone(), |acc, marker| {
1507                    let tag = &stripped[marker.tag_start..marker.tag_end];
1508                    acc.replace(tag, "")
1509                });
1510        assert_eq!(stripped, editable);
1511    }
1512
1513    #[test]
1514    fn test_apply_marker_span_v0317_basic() {
1515        let old = "aaa\nbbb\nccc\n";
1516        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1517        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1518        assert_eq!(result, "aaa\nBBB\nccc\n");
1519    }
1520
1521    #[test]
1522    fn test_apply_marker_span_v0317_no_edit() {
1523        let old = "aaa\nbbb\nccc\n";
1524        let output = "<|marker-0|><|marker-0|>";
1525        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1526        assert_eq!(result, old);
1527    }
1528
1529    #[test]
1530    fn test_encode_v0317_no_edits() {
1531        let old = "aaa\nbbb\nccc\n";
1532        let result =
1533            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1534        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1535    }
1536
1537    #[test]
1538    fn test_roundtrip_v0317() {
1539        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1540        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1541        let cursor = Some(6);
1542
1543        let encoded =
1544            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1545        let stripped = encoded
1546            .strip_suffix("<|end|>")
1547            .expect("should have end marker");
1548        let stripped = stripped.replace("<|user_cursor|>", "");
1549        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1550        assert_eq!(reconstructed, new);
1551    }
1552
1553    #[test]
1554    fn test_roundtrip_v0317_with_cursor_marker() {
1555        let old = "aaa\nbbb\nccc\n";
1556        let new = "aaa\nBBB\nccc\n";
1557        let result =
1558            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1559        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1560        assert!(result.contains("<|marker-0|>"), "result: {result}");
1561    }
1562
1563    #[test]
1564    fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1565        let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1566        let v0316_offsets = compute_marker_offsets(text);
1567        let v0318_offsets = compute_marker_offsets_v0318(text);
1568
1569        assert!(v0318_offsets.len() < v0316_offsets.len());
1570        assert_eq!(v0316_offsets.first().copied(), Some(0));
1571        assert_eq!(v0318_offsets.first().copied(), Some(0));
1572        assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1573        assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1574    }
1575
1576    #[test]
1577    fn test_roundtrip_v0318() {
1578        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1579        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1580        let encoded =
1581            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1582        let stripped = encoded
1583            .strip_suffix("<|end|>")
1584            .expect("should have end marker");
1585        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1586        assert_eq!(reconstructed, new);
1587    }
1588
1589    #[test]
1590    fn test_encode_v0317_markers_stay_on_line_boundaries() {
1591        let old = "\
1592\t\t\t\tcontinue outer;
1593\t\t\t}
1594\t\t}
1595\t}
1596
1597\tconst intersectionObserver = new IntersectionObserver((entries) => {
1598\t\tfor (const entry of entries) {
1599\t\t\tif (entry.isIntersecting) {
1600\t\t\t\tintersectionObserver.unobserve(entry.target);
1601\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1602\t\t\t}
1603\t\t}
1604\t});
1605
1606\tconst observer = new MutationObserver(() => {
1607\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1608\t\t\tdocument.querySelectorAll('a[data-preload]')
1609\t\t);
1610
1611\t\tfor (const link of links) {
1612\t\t\tif (linkSet.has(link)) continue;
1613\t\t\tlinkSet.add(link);
1614
1615\t\t\tswitch (link.dataset.preload) {
1616\t\t\t\tcase '':
1617\t\t\t\tcase 'true':
1618\t\t\t\tcase 'hover': {
1619\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1620\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1621\t\t\t\t\t\tanchorPreload(link);
1622\t\t\t\t\t});
1623";
1624        let new = old.replacen(
1625            "\t\t\t\tcase 'true':\n",
1626            "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1627            1,
1628        );
1629
1630        let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1631        let new_without_cursor = new.replace("<|user_cursor|>", "");
1632
1633        let encoded = encode_from_old_and_new_v0317(
1634            old,
1635            &new_without_cursor,
1636            Some(cursor_offset),
1637            "<|user_cursor|>",
1638            "<|end|>",
1639        )
1640        .unwrap();
1641
1642        let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1643        for marker in collect_relative_marker_tags(core) {
1644            let tag_start = marker.tag_start;
1645            assert!(
1646                tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1647                "marker not at line boundary: {} in output:\n{}",
1648                marker_tag_relative(marker.value),
1649                core
1650            );
1651        }
1652    }
1653}