multi_region.rs

   1use anyhow::{Context as _, Result, anyhow};
   2
   3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
   4pub const MARKER_TAG_SUFFIX: &str = "|>";
   5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
   6const V0316_MIN_BLOCK_LINES: usize = 3;
   7const V0316_MAX_BLOCK_LINES: usize = 8;
   8const V0318_MIN_BLOCK_LINES: usize = 6;
   9const V0318_MAX_BLOCK_LINES: usize = 16;
  10const MAX_NUDGE_LINES: usize = 5;
  11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
  12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
  13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
  14pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>";
  15
  16pub fn marker_tag(number: usize) -> String {
  17    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
  18}
  19
  20pub fn marker_tag_relative(delta: isize) -> String {
  21    if delta > 0 {
  22        format!("<|marker+{delta}|>")
  23    } else if delta == 0 {
  24        String::from("<|marker-0|>")
  25    } else {
  26        format!("<|marker{delta}|>")
  27    }
  28}
  29
  30struct LineInfo {
  31    start: usize,
  32    is_blank: bool,
  33    is_good_start: bool,
  34}
  35
  36fn collect_line_info(text: &str) -> Vec<LineInfo> {
  37    let mut lines = Vec::new();
  38    let mut offset = 0;
  39    for line in text.split('\n') {
  40        let trimmed = line.trim();
  41        let is_blank = trimmed.is_empty();
  42        let is_good_start = !is_blank && !is_structural_tail(trimmed);
  43        lines.push(LineInfo {
  44            start: offset,
  45            is_blank,
  46            is_good_start,
  47        });
  48        offset += line.len() + 1;
  49    }
  50    // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
  51    // empty element when the text ends with '\n'.
  52    if text.ends_with('\n') && lines.len() > 1 {
  53        lines.pop();
  54    }
  55    lines
  56}
  57
  58fn is_structural_tail(trimmed_line: &str) -> bool {
  59    if trimmed_line.starts_with(&['}', ']', ')']) {
  60        return true;
  61    }
  62    matches!(
  63        trimmed_line.trim_end_matches(';'),
  64        "break" | "continue" | "return" | "throw" | "end"
  65    )
  66}
  67
  68/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
  69/// line with `is_good_start`. Returns `None` if no suitable line is found.
  70fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
  71    (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
  72}
  73
  74/// Compute byte offsets within `editable_text` where marker boundaries should
  75/// be placed.
  76///
  77/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
  78/// `editable_text.len()`. Interior offsets are placed at line boundaries
  79/// (right after a `\n`), preferring blank-line boundaries when available and
  80/// respecting `min_block_lines` / `max_block_lines` constraints.
  81fn compute_marker_offsets_with_limits(
  82    editable_text: &str,
  83    min_block_lines: usize,
  84    max_block_lines: usize,
  85) -> Vec<usize> {
  86    if editable_text.is_empty() {
  87        return vec![0, 0];
  88    }
  89
  90    let lines = collect_line_info(editable_text);
  91    let mut offsets = vec![0usize];
  92    let mut last_boundary_line = 0;
  93    let mut i = 0;
  94
  95    while i < lines.len() {
  96        let gap = i - last_boundary_line;
  97
  98        // Blank-line split: non-blank line following blank line(s) with enough
  99        // accumulated lines.
 100        if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
 101            let target = if lines[i].is_good_start {
 102                i
 103            } else {
 104                skip_to_good_start(&lines, i).unwrap_or(i)
 105            };
 106            if lines.len() - target >= min_block_lines
 107                && lines[target].start > *offsets.last().unwrap_or(&0)
 108            {
 109                offsets.push(lines[target].start);
 110                last_boundary_line = target;
 111                i = target + 1;
 112                continue;
 113            }
 114        }
 115
 116        // Hard cap: too many lines without a split.
 117        if gap >= max_block_lines {
 118            let target = skip_to_good_start(&lines, i).unwrap_or(i);
 119            if lines[target].start > *offsets.last().unwrap_or(&0) {
 120                offsets.push(lines[target].start);
 121                last_boundary_line = target;
 122                i = target + 1;
 123                continue;
 124            }
 125        }
 126
 127        i += 1;
 128    }
 129
 130    let end = editable_text.len();
 131    if *offsets.last().unwrap_or(&0) != end {
 132        offsets.push(end);
 133    }
 134
 135    offsets
 136}
 137
 138/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
 139pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
 140    compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
 141}
 142
 143pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
 144    compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
 145}
 146
 147fn line_start_at_or_before(text: &str, offset: usize) -> usize {
 148    let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
 149    text[..bounded_offset]
 150        .rfind('\n')
 151        .map(|index| index + 1)
 152        .unwrap_or(0)
 153}
 154
 155fn line_end_at_or_after(text: &str, offset: usize) -> usize {
 156    let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
 157    if bounded_offset >= text.len() {
 158        return text.len();
 159    }
 160
 161    text[bounded_offset..]
 162        .find('\n')
 163        .map(|index| bounded_offset + index + 1)
 164        .unwrap_or(text.len())
 165}
 166
 167fn grow_v0327_candidate_range(
 168    text: &str,
 169    cursor_offset: usize,
 170    editable_token_limit: usize,
 171) -> std::ops::Range<usize> {
 172    if text.is_empty() {
 173        return 0..0;
 174    }
 175
 176    let byte_budget = editable_token_limit.saturating_mul(3).max(1);
 177    let half_budget = byte_budget / 2;
 178
 179    let mut start = cursor_offset.saturating_sub(half_budget);
 180    let mut end = start.saturating_add(byte_budget).min(text.len());
 181
 182    if end.saturating_sub(start) < byte_budget {
 183        start = end.saturating_sub(byte_budget);
 184    }
 185
 186    start = line_start_at_or_before(text, start);
 187    end = line_end_at_or_after(text, end);
 188
 189    if start < end {
 190        start..end
 191    } else {
 192        let line_start = line_start_at_or_before(text, cursor_offset);
 193        let line_end = line_end_at_or_after(text, cursor_offset);
 194        line_start..line_end.max(line_start)
 195    }
 196}
 197
 198fn trim_v0327_candidate_range_to_markers(
 199    text: &str,
 200    candidate_range: std::ops::Range<usize>,
 201    cursor_offset: usize,
 202) -> std::ops::Range<usize> {
 203    let candidate_text = &text[candidate_range.clone()];
 204    let marker_offsets = compute_marker_offsets_v0318(candidate_text);
 205
 206    if marker_offsets.len() <= 2 {
 207        return candidate_range;
 208    }
 209
 210    let candidate_cursor_offset = cursor_offset
 211        .saturating_sub(candidate_range.start)
 212        .min(candidate_text.len());
 213    let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] {
 214        1
 215    } else {
 216        0
 217    };
 218    let last_internal_marker_index = marker_offsets.len() - 2;
 219    let last_marker_index = marker_offsets.len() - 1;
 220    let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index]
 221    {
 222        last_internal_marker_index
 223    } else {
 224        last_marker_index
 225    };
 226
 227    let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index];
 228    let trimmed_end = candidate_range.start + marker_offsets[end_marker_index];
 229
 230    if trimmed_start < trimmed_end {
 231        trimmed_start..trimmed_end
 232    } else {
 233        let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets);
 234        let start = candidate_range.start + marker_offsets[block_index];
 235        let end = candidate_range.start + marker_offsets[block_index + 1];
 236        if start < end {
 237            start..end
 238        } else {
 239            candidate_range
 240        }
 241    }
 242}
 243
 244pub fn compute_v0327_editable_range(
 245    text: &str,
 246    cursor_offset: usize,
 247    editable_token_limit: usize,
 248) -> std::ops::Range<usize> {
 249    let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit);
 250    trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset)
 251}
 252
 253/// Write the editable region content with marker tags, inserting the cursor
 254/// marker at the given offset within the editable text.
 255pub fn write_editable_with_markers(
 256    output: &mut String,
 257    editable_text: &str,
 258    cursor_offset_in_editable: usize,
 259    cursor_marker: &str,
 260) {
 261    let marker_offsets = compute_marker_offsets(editable_text);
 262    let mut cursor_placed = false;
 263    for (i, &offset) in marker_offsets.iter().enumerate() {
 264        let marker_num = i + 1;
 265        if !output.is_empty() && !output.ends_with('\n') {
 266            output.push('\n');
 267        }
 268        output.push_str(&marker_tag(marker_num));
 269
 270        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 271            output.push('\n');
 272            let block = &editable_text[offset..next_offset];
 273            if !cursor_placed
 274                && cursor_offset_in_editable >= offset
 275                && cursor_offset_in_editable <= next_offset
 276            {
 277                cursor_placed = true;
 278                let cursor_in_block = cursor_offset_in_editable - offset;
 279                output.push_str(&block[..cursor_in_block]);
 280                output.push_str(cursor_marker);
 281                output.push_str(&block[cursor_in_block..]);
 282            } else {
 283                output.push_str(block);
 284            }
 285        }
 286    }
 287}
 288
 289/// Strip any `<|marker_N|>` tags from `text`.
 290///
 291/// When a marker tag sits on its own line (followed by `\n`), the trailing
 292/// newline is also removed so the surrounding lines stay joined naturally.
 293fn strip_marker_tags(text: &str) -> String {
 294    let mut result = String::with_capacity(text.len());
 295    let mut pos = 0;
 296    let bytes = text.as_bytes();
 297    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
 298        result.push_str(&text[pos..pos + rel]);
 299        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
 300        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 301            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
 302            if bytes.get(tag_end) == Some(&b'\n') {
 303                tag_end += 1;
 304            }
 305            pos = tag_end;
 306        } else {
 307            result.push_str(MARKER_TAG_PREFIX);
 308            pos = num_start;
 309        }
 310    }
 311    result.push_str(&text[pos..]);
 312    result
 313}
 314
 315/// Parse model output that uses the marker format.
 316///
 317/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
 318/// The leading format-level newline after the start marker is stripped.
 319/// Trailing newlines are preserved so blank-line endings in the editable
 320/// region are not lost.
 321///
 322/// Any extra intermediate marker tags that the model may have inserted
 323/// between the first and last markers are stripped from the returned content.
 324pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
 325    let first_tag_start = text
 326        .find(MARKER_TAG_PREFIX)
 327        .context("no start marker found in output")?;
 328    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
 329    let first_num_end = text[first_num_start..]
 330        .find(MARKER_TAG_SUFFIX)
 331        .map(|i| i + first_num_start)
 332        .context("malformed start marker tag")?;
 333    let start_num: usize = text[first_num_start..first_num_end]
 334        .parse()
 335        .context("start marker number is not a valid integer")?;
 336    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
 337
 338    let last_tag_start = text
 339        .rfind(MARKER_TAG_PREFIX)
 340        .context("no end marker found in output")?;
 341    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
 342    let last_num_end = text[last_num_start..]
 343        .find(MARKER_TAG_SUFFIX)
 344        .map(|i| i + last_num_start)
 345        .context("malformed end marker tag")?;
 346    let end_num: usize = text[last_num_start..last_num_end]
 347        .parse()
 348        .context("end marker number is not a valid integer")?;
 349
 350    if start_num == end_num {
 351        return Err(anyhow!(
 352            "start and end markers are the same (marker {})",
 353            start_num
 354        ));
 355    }
 356
 357    let mut content_start = first_tag_end;
 358    if text.as_bytes().get(content_start) == Some(&b'\n') {
 359        content_start += 1;
 360    }
 361    let content_end = last_tag_start;
 362
 363    let content = &text[content_start..content_end.max(content_start)];
 364    let content = strip_marker_tags(content);
 365    Ok((start_num, end_num, content))
 366}
 367
 368/// Given old editable text and model output with marker span, reconstruct the
 369/// full new editable region.
 370pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
 371    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
 372    let marker_offsets = compute_marker_offsets(old_editable);
 373
 374    let start_idx = start_num
 375        .checked_sub(1)
 376        .context("marker numbers are 1-indexed")?;
 377    let end_idx = end_num
 378        .checked_sub(1)
 379        .context("marker numbers are 1-indexed")?;
 380    let start_byte = *marker_offsets
 381        .get(start_idx)
 382        .context("start marker number out of range")?;
 383    let end_byte = *marker_offsets
 384        .get(end_idx)
 385        .context("end marker number out of range")?;
 386
 387    if start_byte > end_byte {
 388        return Err(anyhow!("start marker must come before end marker"));
 389    }
 390
 391    let old_span = &old_editable[start_byte..end_byte];
 392    let mut new_span = raw_new_span;
 393    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
 394        new_span.push('\n');
 395    }
 396    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
 397        new_span.pop();
 398    }
 399
 400    let mut result = String::new();
 401    result.push_str(&old_editable[..start_byte]);
 402    result.push_str(&new_span);
 403    result.push_str(&old_editable[end_byte..]);
 404
 405    Ok(result)
 406}
 407
 408/// Compare old and new editable text, find the minimal marker span that covers
 409/// all changes, and encode the result with marker tags.
 410pub fn encode_from_old_and_new(
 411    old_editable: &str,
 412    new_editable: &str,
 413    cursor_offset_in_new: Option<usize>,
 414    cursor_marker: &str,
 415    end_marker: &str,
 416    no_edits_marker: &str,
 417) -> Result<String> {
 418    if old_editable == new_editable {
 419        return Ok(format!("{no_edits_marker}{end_marker}"));
 420    }
 421
 422    let marker_offsets = compute_marker_offsets(old_editable);
 423    let (common_prefix, common_suffix) =
 424        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 425    let change_end_in_old = old_editable.len() - common_suffix;
 426
 427    let start_marker_idx = marker_offsets
 428        .iter()
 429        .rposition(|&offset| offset <= common_prefix)
 430        .unwrap_or(0);
 431    let end_marker_idx = marker_offsets
 432        .iter()
 433        .position(|&offset| offset >= change_end_in_old)
 434        .unwrap_or(marker_offsets.len() - 1);
 435
 436    let old_start = marker_offsets[start_marker_idx];
 437    let old_end = marker_offsets[end_marker_idx];
 438
 439    let new_start = old_start;
 440    let new_end = new_editable
 441        .len()
 442        .saturating_sub(old_editable.len().saturating_sub(old_end));
 443
 444    let new_span = &new_editable[new_start..new_end];
 445
 446    let start_marker_num = start_marker_idx + 1;
 447    let end_marker_num = end_marker_idx + 1;
 448
 449    let mut result = String::new();
 450    result.push_str(&marker_tag(start_marker_num));
 451    result.push('\n');
 452
 453    if let Some(cursor_offset) = cursor_offset_in_new {
 454        if cursor_offset >= new_start && cursor_offset <= new_end {
 455            let cursor_in_span = cursor_offset - new_start;
 456            let bounded = cursor_in_span.min(new_span.len());
 457            result.push_str(&new_span[..bounded]);
 458            result.push_str(cursor_marker);
 459            result.push_str(&new_span[bounded..]);
 460        } else {
 461            result.push_str(new_span);
 462        }
 463    } else {
 464        result.push_str(new_span);
 465    }
 466
 467    if !result.ends_with('\n') {
 468        result.push('\n');
 469    }
 470    result.push_str(&marker_tag(end_marker_num));
 471    result.push('\n');
 472    result.push_str(end_marker);
 473
 474    Ok(result)
 475}
 476
 477/// Extract the full editable region from text that uses marker tags.
 478///
 479/// Returns the concatenation of all block contents between the first and last
 480/// markers, with intermediate marker tags stripped.
 481pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
 482    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
 483
 484    let mut markers: Vec<(usize, usize)> = Vec::new();
 485    let mut search_start = first_marker_start;
 486    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
 487        let tag_start = search_start + rel_pos;
 488        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 489        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
 490        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
 491        markers.push((tag_start, tag_end));
 492        search_start = tag_end;
 493    }
 494
 495    if markers.len() < 2 {
 496        return None;
 497    }
 498
 499    let (_, first_tag_end) = markers[0];
 500    let (last_tag_start, _) = markers[markers.len() - 1];
 501
 502    let mut content_start = first_tag_end;
 503    if text.as_bytes().get(content_start) == Some(&b'\n') {
 504        content_start += 1;
 505    }
 506    let mut content_end = last_tag_start;
 507    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
 508        content_end -= 1;
 509    }
 510
 511    let raw = &text[content_start..content_end];
 512    let result = strip_marker_tags(raw);
 513    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
 514    Some(result)
 515}
 516
 517struct ParsedTag {
 518    value: isize,
 519    tag_start: usize,
 520    tag_end: usize,
 521}
 522
 523fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
 524    let mut tags = Vec::new();
 525    let mut search_from = 0;
 526    while let Some(rel_pos) = text[search_from..].find(prefix) {
 527        let tag_start = search_from + rel_pos;
 528        let payload_start = tag_start + prefix.len();
 529        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
 530            let payload_end = payload_start + suffix_rel;
 531            if let Some(value) = parse(&text[payload_start..payload_end]) {
 532                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
 533                tags.push(ParsedTag {
 534                    value,
 535                    tag_start,
 536                    tag_end,
 537                });
 538                search_from = tag_end;
 539                continue;
 540            }
 541        }
 542        search_from = tag_start + prefix.len();
 543    }
 544    tags
 545}
 546
 547fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
 548    collect_tags(text, MARKER_TAG_PREFIX, |s| {
 549        s.parse::<usize>().ok().map(|n| n as isize)
 550    })
 551}
 552
 553fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
 554    collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
 555        s.parse::<isize>().ok()
 556    })
 557}
 558
 559pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 560    let cursor = cursor_offset.unwrap_or(0);
 561    marker_offsets
 562        .iter()
 563        .enumerate()
 564        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
 565        .map(|(idx, _)| idx + 1)
 566        .unwrap_or(1)
 567}
 568
 569fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 570    let cursor = cursor_offset.unwrap_or(0);
 571    marker_offsets
 572        .windows(2)
 573        .position(|window| cursor >= window[0] && cursor < window[1])
 574        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
 575}
 576
 577fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
 578    let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
 579    let remaining_a = a.len() - prefix;
 580    let remaining_b = b.len() - prefix;
 581    let max_suffix = remaining_a.min(remaining_b);
 582    let suffix = a[a.len() - max_suffix..]
 583        .iter()
 584        .rev()
 585        .zip(b[b.len() - max_suffix..].iter().rev())
 586        .take_while(|(x, y)| x == y)
 587        .count();
 588    (prefix, suffix)
 589}
 590
 591/// Map a byte offset from old span coordinates to new span coordinates,
 592/// using common prefix/suffix within the span for accuracy.
 593fn map_boundary_offset(
 594    old_rel: usize,
 595    old_span_len: usize,
 596    new_span_len: usize,
 597    span_common_prefix: usize,
 598    span_common_suffix: usize,
 599) -> usize {
 600    if old_rel <= span_common_prefix {
 601        old_rel
 602    } else if old_rel >= old_span_len - span_common_suffix {
 603        new_span_len - (old_span_len - old_rel)
 604    } else {
 605        let old_changed_start = span_common_prefix;
 606        let old_changed_len = old_span_len
 607            .saturating_sub(span_common_prefix)
 608            .saturating_sub(span_common_suffix);
 609        let new_changed_start = span_common_prefix;
 610        let new_changed_len = new_span_len
 611            .saturating_sub(span_common_prefix)
 612            .saturating_sub(span_common_suffix);
 613
 614        new_changed_start
 615            + ((old_rel - old_changed_start) * new_changed_len)
 616                .checked_div(old_changed_len)
 617                .unwrap_or(new_changed_len)
 618    }
 619}
 620
 621fn snap_to_line_start(text: &str, offset: usize) -> usize {
 622    let bounded = offset.min(text.len());
 623    let bounded = text.floor_char_boundary(bounded);
 624
 625    if bounded >= text.len() {
 626        return text.len();
 627    }
 628
 629    if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
 630        return bounded;
 631    }
 632
 633    if let Some(next_nl_rel) = text[bounded..].find('\n') {
 634        let next = bounded + next_nl_rel + 1;
 635        return text.floor_char_boundary(next.min(text.len()));
 636    }
 637
 638    let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
 639    text.floor_char_boundary(prev_start)
 640}
 641
 642/// Write the editable region content with byte-exact marker tags, inserting the
 643/// cursor marker at the given offset within the editable text.
 644///
 645/// The `tag_for_index` closure maps a boundary index to the marker tag string.
 646fn write_editable_with_markers_impl(
 647    output: &mut String,
 648    editable_text: &str,
 649    cursor_offset_in_editable: usize,
 650    cursor_marker: &str,
 651    marker_offsets: &[usize],
 652    tag_for_index: impl Fn(usize) -> String,
 653) {
 654    let mut cursor_placed = false;
 655    for (i, &offset) in marker_offsets.iter().enumerate() {
 656        output.push_str(&tag_for_index(i));
 657
 658        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 659            let block = &editable_text[offset..next_offset];
 660            if !cursor_placed
 661                && cursor_offset_in_editable >= offset
 662                && cursor_offset_in_editable <= next_offset
 663            {
 664                cursor_placed = true;
 665                let cursor_in_block = cursor_offset_in_editable - offset;
 666                output.push_str(&block[..cursor_in_block]);
 667                output.push_str(cursor_marker);
 668                output.push_str(&block[cursor_in_block..]);
 669            } else {
 670                output.push_str(block);
 671            }
 672        }
 673    }
 674}
 675
 676pub fn write_editable_with_markers_v0316(
 677    output: &mut String,
 678    editable_text: &str,
 679    cursor_offset_in_editable: usize,
 680    cursor_marker: &str,
 681) {
 682    let marker_offsets = compute_marker_offsets(editable_text);
 683    write_editable_with_markers_impl(
 684        output,
 685        editable_text,
 686        cursor_offset_in_editable,
 687        cursor_marker,
 688        &marker_offsets,
 689        |i| marker_tag(i + 1),
 690    );
 691}
 692
 693pub fn write_editable_with_markers_v0317(
 694    output: &mut String,
 695    editable_text: &str,
 696    cursor_offset_in_editable: usize,
 697    cursor_marker: &str,
 698) {
 699    let marker_offsets = compute_marker_offsets(editable_text);
 700    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
 701    write_editable_with_markers_impl(
 702        output,
 703        editable_text,
 704        cursor_offset_in_editable,
 705        cursor_marker,
 706        &marker_offsets,
 707        |i| marker_tag_relative(i as isize - anchor_idx as isize),
 708    );
 709}
 710
 711pub fn write_editable_with_markers_v0318(
 712    output: &mut String,
 713    editable_text: &str,
 714    cursor_offset_in_editable: usize,
 715    cursor_marker: &str,
 716) {
 717    let marker_offsets = compute_marker_offsets_v0318(editable_text);
 718    write_editable_with_markers_impl(
 719        output,
 720        editable_text,
 721        cursor_offset_in_editable,
 722        cursor_marker,
 723        &marker_offsets,
 724        |i| marker_tag(i + 1),
 725    );
 726}
 727
 728/// Parse byte-exact model output and reconstruct the full new editable region.
 729///
 730/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
 731/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
 732/// an error.
 733fn apply_marker_span_impl(
 734    old_editable: &str,
 735    tags: &[ParsedTag],
 736    output: &str,
 737    resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
 738) -> Result<String> {
 739    if tags.is_empty() {
 740        return Err(anyhow!("no marker tags found in output"));
 741    }
 742    if tags.len() == 1 {
 743        return Err(anyhow!(
 744            "only one marker tag found in output, expected at least two"
 745        ));
 746    }
 747
 748    let start_value = tags[0].value;
 749    let end_value = tags[tags.len() - 1].value;
 750
 751    if start_value == end_value {
 752        return Ok(old_editable.to_string());
 753    }
 754
 755    let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
 756
 757    if start_byte > end_byte {
 758        return Err(anyhow!("start marker must come before end marker"));
 759    }
 760
 761    let mut new_content = String::new();
 762    for i in 0..tags.len() - 1 {
 763        let content_start = tags[i].tag_end;
 764        let content_end = tags[i + 1].tag_start;
 765        if content_start <= content_end {
 766            new_content.push_str(&output[content_start..content_end]);
 767        }
 768    }
 769
 770    let mut result = String::new();
 771    result.push_str(&old_editable[..start_byte]);
 772    result.push_str(&new_content);
 773    result.push_str(&old_editable[end_byte..]);
 774
 775    Ok(result)
 776}
 777
 778pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
 779    let tags = collect_marker_tags(output);
 780
 781    // Validate monotonically increasing with no gaps (best-effort warning)
 782    if tags.len() >= 2 {
 783        let start_num = tags[0].value;
 784        let end_num = tags[tags.len() - 1].value;
 785        if start_num != end_num {
 786            let expected: Vec<isize> = (start_num..=end_num).collect();
 787            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 788            if actual != expected {
 789                eprintln!(
 790                    "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 791                    expected, actual
 792                );
 793            }
 794        }
 795    }
 796
 797    let marker_offsets = compute_marker_offsets(old_editable);
 798    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 799        let start_idx = (start_val as usize)
 800            .checked_sub(1)
 801            .context("marker numbers are 1-indexed")?;
 802        let end_idx = (end_val as usize)
 803            .checked_sub(1)
 804            .context("marker numbers are 1-indexed")?;
 805        let start_byte = *marker_offsets
 806            .get(start_idx)
 807            .context("start marker number out of range")?;
 808        let end_byte = *marker_offsets
 809            .get(end_idx)
 810            .context("end marker number out of range")?;
 811        Ok((start_byte, end_byte))
 812    })
 813}
 814
 815pub fn apply_marker_span_v0317(
 816    old_editable: &str,
 817    output: &str,
 818    cursor_offset_in_old: Option<usize>,
 819) -> Result<String> {
 820    let tags = collect_relative_marker_tags(output);
 821    let marker_offsets = compute_marker_offsets(old_editable);
 822    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
 823
 824    apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
 825        let start_idx_signed = anchor_idx as isize + start_delta;
 826        let end_idx_signed = anchor_idx as isize + end_delta;
 827        if start_idx_signed < 0 || end_idx_signed < 0 {
 828            return Err(anyhow!("relative marker maps before first marker"));
 829        }
 830        let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
 831        let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
 832        let start_byte = *marker_offsets
 833            .get(start_idx)
 834            .context("start marker number out of range")?;
 835        let end_byte = *marker_offsets
 836            .get(end_idx)
 837            .context("end marker number out of range")?;
 838        Ok((start_byte, end_byte))
 839    })
 840}
 841
 842pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
 843    let tags = collect_marker_tags(output);
 844
 845    if tags.len() >= 2 {
 846        let start_num = tags[0].value;
 847        let end_num = tags[tags.len() - 1].value;
 848        if start_num != end_num {
 849            let expected: Vec<isize> = (start_num..=end_num).collect();
 850            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 851            if actual != expected {
 852                eprintln!(
 853                    "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 854                    expected, actual
 855                );
 856            }
 857        }
 858    }
 859
 860    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 861    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 862        let start_idx = (start_val as usize)
 863            .checked_sub(1)
 864            .context("marker numbers are 1-indexed")?;
 865        let end_idx = (end_val as usize)
 866            .checked_sub(1)
 867            .context("marker numbers are 1-indexed")?;
 868        let start_byte = *marker_offsets
 869            .get(start_idx)
 870            .context("start marker number out of range")?;
 871        let end_byte = *marker_offsets
 872            .get(end_idx)
 873            .context("end marker number out of range")?;
 874        Ok((start_byte, end_byte))
 875    })
 876}
 877
 878/// Encode the training target from old and new editable text.
 879///
 880/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
 881/// closure maps a block index to the appropriate marker tag string.
 882/// `no_edit_tag` is the marker tag to repeat when there are no edits.
 883fn encode_from_old_and_new_impl(
 884    old_editable: &str,
 885    new_editable: &str,
 886    cursor_offset_in_new: Option<usize>,
 887    cursor_marker: &str,
 888    end_marker: &str,
 889    no_edit_tag: &str,
 890    marker_offsets: &[usize],
 891    tag_for_block_idx: impl Fn(usize) -> String,
 892) -> Result<String> {
 893    if old_editable == new_editable {
 894        return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
 895    }
 896
 897    let (common_prefix, common_suffix) =
 898        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 899    let change_end_in_old = old_editable.len() - common_suffix;
 900
 901    let mut start_marker_idx = marker_offsets
 902        .iter()
 903        .rposition(|&offset| offset <= common_prefix)
 904        .unwrap_or(0);
 905    let mut end_marker_idx = marker_offsets
 906        .iter()
 907        .position(|&offset| offset >= change_end_in_old)
 908        .unwrap_or(marker_offsets.len() - 1);
 909
 910    if start_marker_idx == end_marker_idx {
 911        if end_marker_idx < marker_offsets.len().saturating_sub(1) {
 912            end_marker_idx += 1;
 913        } else if start_marker_idx > 0 {
 914            start_marker_idx -= 1;
 915        }
 916    }
 917
 918    let old_start = marker_offsets[start_marker_idx];
 919    let old_end = marker_offsets[end_marker_idx];
 920
 921    let new_start = old_start;
 922    let new_end = new_editable
 923        .len()
 924        .saturating_sub(old_editable.len().saturating_sub(old_end));
 925
 926    let new_span = &new_editable[new_start..new_end];
 927    let old_span = &old_editable[old_start..old_end];
 928
 929    let (span_common_prefix, span_common_suffix) =
 930        common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
 931
 932    let mut result = String::new();
 933    let mut prev_new_rel = 0usize;
 934    let mut cursor_placed = false;
 935
 936    for block_idx in start_marker_idx..end_marker_idx {
 937        result.push_str(&tag_for_block_idx(block_idx));
 938
 939        let new_rel_end = if block_idx + 1 == end_marker_idx {
 940            new_span.len()
 941        } else {
 942            let old_rel = marker_offsets[block_idx + 1] - old_start;
 943            let mapped = map_boundary_offset(
 944                old_rel,
 945                old_span.len(),
 946                new_span.len(),
 947                span_common_prefix,
 948                span_common_suffix,
 949            );
 950            snap_to_line_start(new_span, mapped)
 951        };
 952
 953        let new_rel_end = new_rel_end.max(prev_new_rel);
 954        let block_content = &new_span[prev_new_rel..new_rel_end];
 955
 956        if !cursor_placed {
 957            if let Some(cursor_offset) = cursor_offset_in_new {
 958                let abs_start = new_start + prev_new_rel;
 959                let abs_end = new_start + new_rel_end;
 960                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 961                    cursor_placed = true;
 962                    let cursor_in_block = cursor_offset - abs_start;
 963                    let bounded = cursor_in_block.min(block_content.len());
 964                    result.push_str(&block_content[..bounded]);
 965                    result.push_str(cursor_marker);
 966                    result.push_str(&block_content[bounded..]);
 967                    prev_new_rel = new_rel_end;
 968                    continue;
 969                }
 970            }
 971        }
 972
 973        result.push_str(block_content);
 974        prev_new_rel = new_rel_end;
 975    }
 976
 977    result.push_str(&tag_for_block_idx(end_marker_idx));
 978    result.push_str(end_marker);
 979
 980    Ok(result)
 981}
 982
 983pub fn encode_from_old_and_new_v0316(
 984    old_editable: &str,
 985    new_editable: &str,
 986    cursor_offset_in_new: Option<usize>,
 987    cursor_marker: &str,
 988    end_marker: &str,
 989) -> Result<String> {
 990    let marker_offsets = compute_marker_offsets(old_editable);
 991    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 992    encode_from_old_and_new_impl(
 993        old_editable,
 994        new_editable,
 995        cursor_offset_in_new,
 996        cursor_marker,
 997        end_marker,
 998        &no_edit_tag,
 999        &marker_offsets,
1000        |block_idx| marker_tag(block_idx + 1),
1001    )
1002}
1003
1004pub fn encode_from_old_and_new_v0317(
1005    old_editable: &str,
1006    new_editable: &str,
1007    cursor_offset_in_new: Option<usize>,
1008    cursor_marker: &str,
1009    end_marker: &str,
1010) -> Result<String> {
1011    let marker_offsets = compute_marker_offsets(old_editable);
1012    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
1013    let no_edit_tag = marker_tag_relative(0);
1014    encode_from_old_and_new_impl(
1015        old_editable,
1016        new_editable,
1017        cursor_offset_in_new,
1018        cursor_marker,
1019        end_marker,
1020        &no_edit_tag,
1021        &marker_offsets,
1022        |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
1023    )
1024}
1025
1026pub fn encode_from_old_and_new_v0318(
1027    old_editable: &str,
1028    new_editable: &str,
1029    cursor_offset_in_new: Option<usize>,
1030    cursor_marker: &str,
1031    end_marker: &str,
1032) -> Result<String> {
1033    let marker_offsets = compute_marker_offsets_v0318(old_editable);
1034    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
1035    encode_from_old_and_new_impl(
1036        old_editable,
1037        new_editable,
1038        cursor_offset_in_new,
1039        cursor_marker,
1040        end_marker,
1041        &no_edit_tag,
1042        &marker_offsets,
1043        |block_idx| marker_tag(block_idx + 1),
1044    )
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049    use super::*;
1050
1051    #[test]
1052    fn test_compute_marker_offsets_small_block() {
1053        let text = "aaa\nbbb\nccc\n";
1054        let offsets = compute_marker_offsets(text);
1055        assert_eq!(offsets, vec![0, text.len()]);
1056    }
1057
1058    #[test]
1059    fn test_compute_marker_offsets_blank_line_split() {
1060        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1061        let offsets = compute_marker_offsets(text);
1062        assert_eq!(offsets[0], 0);
1063        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1064        assert_eq!(*offsets.last().unwrap(), text.len());
1065    }
1066
1067    #[test]
1068    fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
1069        let text = "\
1070class OCRDataframe(BaseModel):
1071    model_config = ConfigDict(arbitrary_types_allowed=True)
1072
1073    df: pl.DataFrame
1074
1075    def page(self, page_number: int = 0) -> \"OCRDataframe\":
1076        # Filter dataframe on specific page
1077        df_page = self.df.filter(pl.col(\"page\") == page_number)
1078        return OCRDataframe(df=df_page)
1079
1080    def get_text_cell(
1081        self,
1082        cell: Cell,
1083        margin: int = 0,
1084        page_number: Optional[int] = None,
1085        min_confidence: int = 50,
1086    ) -> Optional[str]:
1087        \"\"\"
1088        Get text corresponding to cell
1089";
1090        let offsets = compute_marker_offsets(text);
1091
1092        let def_start = text
1093            .find("    def get_text_cell(")
1094            .expect("def line exists");
1095        let self_start = text.find("        self,").expect("self line exists");
1096
1097        assert!(
1098            offsets.contains(&def_start),
1099            "expected boundary at def line start ({def_start}), got {offsets:?}"
1100        );
1101        assert!(
1102            !offsets.contains(&self_start),
1103            "did not expect boundary at self line start ({self_start}), got {offsets:?}"
1104        );
1105    }
1106
1107    #[test]
1108    fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1109        let text = "\
1110impl Plugin for AhoySchedulePlugin {
1111    fn build(&self, app: &mut App) {
1112        app.configure_sets(
1113            self.schedule,
1114            (
1115                AhoySystems::MoveCharacters,
1116                AhoySystems::ApplyForcesToDynamicRigidBodies,
1117            )
1118                .chain()
1119                .before(PhysicsSystems::First),
1120        );
1121
1122    }
1123}
1124
1125/// System set used by all systems of `bevy_ahoy`.
1126#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1127pub enum AhoySystems {
1128    MoveCharacters,
1129    ApplyForcesToDynamicRigidBodies,
1130}
1131";
1132        let offsets = compute_marker_offsets(text);
1133
1134        let closer_start = text.find("    }\n").expect("closer line exists");
1135        let doc_start = text
1136            .find("/// System set used by all systems of `bevy_ahoy`.")
1137            .expect("doc line exists");
1138
1139        assert!(
1140            !offsets.contains(&closer_start),
1141            "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1142        );
1143        assert!(
1144            offsets.contains(&doc_start),
1145            "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1146        );
1147    }
1148
1149    #[test]
1150    fn test_compute_marker_offsets_max_lines_split() {
1151        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1152        let offsets = compute_marker_offsets(text);
1153        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1154    }
1155
1156    #[test]
1157    fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1158        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1159        let offsets = compute_marker_offsets(text);
1160
1161        let expected = text.find("case 'x': {").expect("case line exists");
1162        assert!(
1163            offsets.contains(&expected),
1164            "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1165        );
1166    }
1167
1168    #[test]
1169    fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1170        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1171        let offsets = compute_marker_offsets(text);
1172
1173        let case_start = text.find("case 'x': {").expect("case line exists");
1174        assert!(
1175            !offsets.contains(&case_start),
1176            "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1177        );
1178    }
1179
1180    #[test]
1181    fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1182        let text = "\
1183aaaaaaaaaa = 1;
1184bbbbbbbbbb = 2;
1185cccccccccc = 3;
1186dddddddddd = 4;
1187eeeeeeeeee = 5;
1188ffffffffff = 6;
1189gggggggggg = 7;
1190hhhhhhhhhh = 8;
1191          };
1192        };
1193
1194        grafanaDashboards = {
1195          cluster-overview.spec = {
1196            inherit instanceSelector;
1197            folderRef = \"infrastructure\";
1198            json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1199          };
1200        };
1201";
1202        let offsets = compute_marker_offsets(text);
1203
1204        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1205        assert_eq!(
1206            offsets.last().copied(),
1207            Some(text.len()),
1208            "offsets: {offsets:?}"
1209        );
1210        assert!(
1211            offsets.windows(2).all(|window| window[0] <= window[1]),
1212            "offsets must be sorted: {offsets:?}"
1213        );
1214    }
1215
1216    #[test]
1217    fn test_compute_marker_offsets_empty() {
1218        let offsets = compute_marker_offsets("");
1219        assert_eq!(offsets, vec![0, 0]);
1220    }
1221
1222    #[test]
1223    fn test_compute_v0327_editable_range_trims_to_marker_boundaries() {
1224        let text = (0..80).map(|_| "x\n").collect::<String>();
1225        let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40;
1226
1227        let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20);
1228        let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20);
1229        let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]);
1230        let relative_start = editable_range.start - candidate_range.start;
1231        let relative_end = editable_range.end - candidate_range.start;
1232
1233        assert!(
1234            marker_offsets.len() > 2,
1235            "expected interior markers: {marker_offsets:?}"
1236        );
1237        assert!(marker_offsets.contains(&relative_start));
1238        assert!(marker_offsets.contains(&relative_end));
1239        assert!(editable_range.start <= cursor_offset);
1240        assert!(editable_range.end >= cursor_offset);
1241        assert!(
1242            editable_range.start > candidate_range.start
1243                || editable_range.end < candidate_range.end,
1244            "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}"
1245        );
1246    }
1247
1248    #[test]
1249    fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1250        let text = "\
1251# Spree Posts
1252
1253This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1254
1255## Installation
1256
12571. Add this extension to your Gemfile with this line:
1258
1259    ```ruby
1260    bundle add spree_posts
1261    ```
1262
12632. Run the install generator
1264
1265    ```ruby
1266    bundle exec rails g spree_posts:install
1267    ```
1268
12693. Restart your server
1270
1271  If your server was running, restart it so that it can find the assets properly.
1272
1273## Developing
1274
12751. Create a dummy app
1276
1277    ```bash
1278    bundle update
1279    bundle exec rake test_app
1280    ```
1281
12822. Add your new code
12833. Run tests
1284
1285    ```bash
1286    bundle exec rspec
1287    ```
1288
1289When testing your applications integration with this extension you may use it's factories.
1290Simply add this require statement to your spec_helper:
1291
1292```ruby
1293require 'spree_posts/factories'
1294```
1295
1296## Releasing a new version
1297
1298```shell
1299bundle exec gem bump -p -t
1300bundle exec gem release
1301```
1302
1303For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1304
1305## Contributing
1306
1307If you'd like to contribute, please take a look at the contributing guide.
1308";
1309        let offsets = compute_marker_offsets(text);
1310
1311        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1312        assert_eq!(
1313            offsets.last().copied(),
1314            Some(text.len()),
1315            "offsets: {offsets:?}"
1316        );
1317
1318        for window in offsets.windows(2) {
1319            let block = &text[window[0]..window[1]];
1320            let line_count = block.lines().count();
1321            assert!(
1322                line_count >= V0316_MIN_BLOCK_LINES,
1323                "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1324            );
1325        }
1326    }
1327
1328    #[test]
1329    fn test_extract_marker_span() {
1330        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
1331        let (start, end, content) = extract_marker_span(text).unwrap();
1332        assert_eq!(start, 2);
1333        assert_eq!(end, 3);
1334        assert_eq!(content, "    new content\n");
1335    }
1336
1337    #[test]
1338    fn test_extract_marker_span_multi_line() {
1339        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1340        let (start, end, content) = extract_marker_span(text).unwrap();
1341        assert_eq!(start, 1);
1342        assert_eq!(end, 4);
1343        assert_eq!(content, "line1\nline2\nline3\n");
1344    }
1345
1346    #[test]
1347    fn test_apply_marker_span_basic() {
1348        let old = "aaa\nbbb\nccc\n";
1349        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1350        let result = apply_marker_span(old, output).unwrap();
1351        assert_eq!(result, "aaa\nBBB\nccc\n");
1352    }
1353
1354    #[test]
1355    fn test_apply_marker_span_preserves_trailing_blank_line() {
1356        let old = "/\nresult\n\n";
1357        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1358        let result = apply_marker_span(old, output).unwrap();
1359        assert_eq!(result, "//\nresult\n\n");
1360    }
1361
1362    #[test]
1363    fn test_encode_no_edits() {
1364        let old = "aaa\nbbb\nccc\n";
1365        let result = encode_from_old_and_new(
1366            old,
1367            old,
1368            None,
1369            "<|user_cursor|>",
1370            ">>>>>>> UPDATED\n",
1371            "NO_EDITS\n",
1372        )
1373        .unwrap();
1374        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1375    }
1376
1377    #[test]
1378    fn test_encode_with_change() {
1379        let old = "aaa\nbbb\nccc\n";
1380        let new = "aaa\nBBB\nccc\n";
1381        let result = encode_from_old_and_new(
1382            old,
1383            new,
1384            None,
1385            "<|user_cursor|>",
1386            ">>>>>>> UPDATED\n",
1387            "NO_EDITS\n",
1388        )
1389        .unwrap();
1390        assert!(result.contains("<|marker_1|>"));
1391        assert!(result.contains("<|marker_2|>"));
1392        assert!(result.contains("aaa\nBBB\nccc\n"));
1393        assert!(result.ends_with(">>>>>>> UPDATED\n"));
1394    }
1395
1396    #[test]
1397    fn test_roundtrip_encode_apply() {
1398        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1399        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1400        let encoded = encode_from_old_and_new(
1401            old,
1402            new,
1403            None,
1404            "<|user_cursor|>",
1405            ">>>>>>> UPDATED\n",
1406            "NO_EDITS\n",
1407        )
1408        .unwrap();
1409        let output = encoded
1410            .strip_suffix(">>>>>>> UPDATED\n")
1411            .expect("should have end marker");
1412        let reconstructed = apply_marker_span(old, output).unwrap();
1413        assert_eq!(reconstructed, new);
1414    }
1415
1416    #[test]
1417    fn test_extract_editable_region_from_markers_multi() {
1418        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1419        let parsed = extract_editable_region_from_markers(text).unwrap();
1420        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1421    }
1422
1423    #[test]
1424    fn test_extract_editable_region_two_markers() {
1425        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1426        let parsed = extract_editable_region_from_markers(text).unwrap();
1427        assert_eq!(parsed, "one\ntwo three");
1428    }
1429
1430    #[test]
1431    fn test_encode_with_cursor() {
1432        let old = "aaa\nbbb\nccc\n";
1433        let new = "aaa\nBBB\nccc\n";
1434        let result = encode_from_old_and_new(
1435            old,
1436            new,
1437            Some(5),
1438            "<|user_cursor|>",
1439            ">>>>>>> UPDATED\n",
1440            "NO_EDITS\n",
1441        )
1442        .unwrap();
1443        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1444        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1445    }
1446
1447    #[test]
1448    fn test_extract_marker_span_strips_intermediate_markers() {
1449        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1450        let (start, end, content) = extract_marker_span(text).unwrap();
1451        assert_eq!(start, 2);
1452        assert_eq!(end, 4);
1453        assert_eq!(content, "line1\nline2\n");
1454    }
1455
1456    #[test]
1457    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1458        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1459        let (start, end, content) = extract_marker_span(text).unwrap();
1460        assert_eq!(start, 1);
1461        assert_eq!(end, 4);
1462        assert_eq!(content, "aaa\nbbb\nccc\n");
1463    }
1464
1465    #[test]
1466    fn test_apply_marker_span_with_extra_intermediate_marker() {
1467        let old = "aaa\nbbb\nccc\n";
1468        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1469        let result = apply_marker_span(old, output).unwrap();
1470        assert_eq!(result, "aaa\nBBB\nccc\n");
1471    }
1472
1473    #[test]
1474    fn test_strip_marker_tags_inline() {
1475        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1476        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1477        assert_eq!(
1478            strip_marker_tags("line1\n<|marker_3|>\nline2"),
1479            "line1\nline2"
1480        );
1481    }
1482
1483    #[test]
1484    fn test_write_editable_with_markers_v0316_byte_exact() {
1485        let editable = "aaa\nbbb\nccc\n";
1486        let mut output = String::new();
1487        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1488        assert!(output.starts_with("<|marker_1|>"));
1489        assert!(output.contains("<|user_cursor|>"));
1490        let stripped = output.replace("<|user_cursor|>", "");
1491        let stripped = strip_marker_tags(&stripped);
1492        assert_eq!(stripped, editable);
1493    }
1494
1495    #[test]
1496    fn test_apply_marker_span_v0316_basic() {
1497        let old = "aaa\nbbb\nccc\n";
1498        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1499        let result = apply_marker_span_v0316(old, output).unwrap();
1500        assert_eq!(result, "aaa\nBBB\nccc\n");
1501    }
1502
1503    #[test]
1504    fn test_apply_marker_span_v0316_no_edit() {
1505        let old = "aaa\nbbb\nccc\n";
1506        let output = "<|marker_1|><|marker_1|>";
1507        let result = apply_marker_span_v0316(old, output).unwrap();
1508        assert_eq!(result, old);
1509    }
1510
1511    #[test]
1512    fn test_apply_marker_span_v0316_no_edit_any_marker() {
1513        let old = "aaa\nbbb\nccc\n";
1514        let output = "<|marker_2|>ignored content<|marker_2|>";
1515        let result = apply_marker_span_v0316(old, output).unwrap();
1516        assert_eq!(result, old);
1517    }
1518
1519    #[test]
1520    fn test_apply_marker_span_v0316_multi_block() {
1521        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1522        let marker_offsets = compute_marker_offsets(old);
1523        assert!(
1524            marker_offsets.len() >= 3,
1525            "expected at least 3 offsets, got {:?}",
1526            marker_offsets
1527        );
1528
1529        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1530        let mut output = String::new();
1531        output.push_str("<|marker_1|>");
1532        for i in 0..marker_offsets.len() - 1 {
1533            if i > 0 {
1534                output.push_str(&marker_tag(i + 1));
1535            }
1536            let start = marker_offsets[i];
1537            let end = marker_offsets[i + 1];
1538            let block_len = end - start;
1539            output.push_str(&new_content[start..start + block_len]);
1540        }
1541        let last_marker_num = marker_offsets.len();
1542        output.push_str(&marker_tag(last_marker_num));
1543        let result = apply_marker_span_v0316(old, &output).unwrap();
1544        assert_eq!(result, new_content);
1545    }
1546
1547    #[test]
1548    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1549        let old = "aaa\nbbb\nccc\n";
1550        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1551        let result = apply_marker_span_v0316(old, output).unwrap();
1552        assert_eq!(result, "aaa\nBBB\nccc");
1553    }
1554
1555    #[test]
1556    fn test_encode_v0316_no_edits() {
1557        let old = "aaa\nbbb\nccc\n";
1558        let result =
1559            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1560        assert!(result.ends_with("<|end|>"));
1561        let stripped = result.strip_suffix("<|end|>").unwrap();
1562        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1563        assert_eq!(result_parsed, old);
1564    }
1565
1566    #[test]
1567    fn test_encode_v0316_with_change() {
1568        let old = "aaa\nbbb\nccc\n";
1569        let new = "aaa\nBBB\nccc\n";
1570        let result =
1571            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1572        assert!(result.contains("<|marker_1|>"));
1573        assert!(result.contains("<|marker_2|>"));
1574        assert!(result.ends_with("<|end|>"));
1575    }
1576
1577    #[test]
1578    fn test_roundtrip_v0316() {
1579        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1580        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1581        let encoded =
1582            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1583        let stripped = encoded
1584            .strip_suffix("<|end|>")
1585            .expect("should have end marker");
1586        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1587        assert_eq!(reconstructed, new);
1588    }
1589
1590    #[test]
1591    fn test_roundtrip_v0316_with_cursor() {
1592        let old = "aaa\nbbb\nccc\n";
1593        let new = "aaa\nBBB\nccc\n";
1594        let result =
1595            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1596        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1597        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1598    }
1599
1600    #[test]
1601    fn test_roundtrip_v0316_multi_block_change() {
1602        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1603        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1604        let encoded =
1605            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1606        let stripped = encoded
1607            .strip_suffix("<|end|>")
1608            .expect("should have end marker");
1609        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1610        assert_eq!(reconstructed, new);
1611    }
1612
1613    #[test]
1614    fn test_nearest_marker_number() {
1615        let offsets = vec![0, 10, 20, 30];
1616        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1617        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1618        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1619        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1620        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1621        assert_eq!(nearest_marker_number(None, &offsets), 1);
1622    }
1623
1624    #[test]
1625    fn test_marker_tag_relative_formats_as_expected() {
1626        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1627        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1628        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1629        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1630        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1631    }
1632
1633    #[test]
1634    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1635        let editable = "aaa\nbbb\nccc\n";
1636        let mut output = String::new();
1637        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1638
1639        assert!(output.contains("<|marker-0|>"));
1640        assert!(output.contains("<|user_cursor|>"));
1641
1642        let stripped = output.replace("<|user_cursor|>", "");
1643        let stripped =
1644            collect_relative_marker_tags(&stripped)
1645                .iter()
1646                .fold(stripped.clone(), |acc, marker| {
1647                    let tag = &stripped[marker.tag_start..marker.tag_end];
1648                    acc.replace(tag, "")
1649                });
1650        assert_eq!(stripped, editable);
1651    }
1652
1653    #[test]
1654    fn test_apply_marker_span_v0317_basic() {
1655        let old = "aaa\nbbb\nccc\n";
1656        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1657        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1658        assert_eq!(result, "aaa\nBBB\nccc\n");
1659    }
1660
1661    #[test]
1662    fn test_apply_marker_span_v0317_no_edit() {
1663        let old = "aaa\nbbb\nccc\n";
1664        let output = "<|marker-0|><|marker-0|>";
1665        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1666        assert_eq!(result, old);
1667    }
1668
1669    #[test]
1670    fn test_encode_v0317_no_edits() {
1671        let old = "aaa\nbbb\nccc\n";
1672        let result =
1673            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1674        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1675    }
1676
1677    #[test]
1678    fn test_roundtrip_v0317() {
1679        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1680        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1681        let cursor = Some(6);
1682
1683        let encoded =
1684            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1685        let stripped = encoded
1686            .strip_suffix("<|end|>")
1687            .expect("should have end marker");
1688        let stripped = stripped.replace("<|user_cursor|>", "");
1689        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1690        assert_eq!(reconstructed, new);
1691    }
1692
1693    #[test]
1694    fn test_roundtrip_v0317_with_cursor_marker() {
1695        let old = "aaa\nbbb\nccc\n";
1696        let new = "aaa\nBBB\nccc\n";
1697        let result =
1698            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1699        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1700        assert!(result.contains("<|marker-0|>"), "result: {result}");
1701    }
1702
1703    #[test]
1704    fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1705        let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1706        let v0316_offsets = compute_marker_offsets(text);
1707        let v0318_offsets = compute_marker_offsets_v0318(text);
1708
1709        assert!(v0318_offsets.len() < v0316_offsets.len());
1710        assert_eq!(v0316_offsets.first().copied(), Some(0));
1711        assert_eq!(v0318_offsets.first().copied(), Some(0));
1712        assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1713        assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1714    }
1715
1716    #[test]
1717    fn test_roundtrip_v0318() {
1718        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1719        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1720        let encoded =
1721            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1722        let stripped = encoded
1723            .strip_suffix("<|end|>")
1724            .expect("should have end marker");
1725        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1726        assert_eq!(reconstructed, new);
1727    }
1728
1729    #[test]
1730    fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1731        let old = "line1\nline2\nline3\n";
1732        let new = "line1\nline2\nline3\nline4\n";
1733        let encoded =
1734            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1735
1736        assert_ne!(encoded, "<|marker_2|><|end|>");
1737
1738        let stripped = encoded
1739            .strip_suffix("<|end|>")
1740            .expect("should have end marker");
1741        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1742        assert_eq!(reconstructed, new);
1743    }
1744
1745    #[test]
1746    fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1747        let old = "alpha\nbeta\n\ngamma\ndelta\n";
1748        let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1749        let encoded =
1750            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1751
1752        let stripped = encoded
1753            .strip_suffix("<|end|>")
1754            .expect("should have end marker");
1755        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1756        assert_eq!(reconstructed, new);
1757    }
1758
1759    #[test]
1760    fn test_encode_v0317_markers_stay_on_line_boundaries() {
1761        let old = "\
1762\t\t\t\tcontinue outer;
1763\t\t\t}
1764\t\t}
1765\t}
1766
1767\tconst intersectionObserver = new IntersectionObserver((entries) => {
1768\t\tfor (const entry of entries) {
1769\t\t\tif (entry.isIntersecting) {
1770\t\t\t\tintersectionObserver.unobserve(entry.target);
1771\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1772\t\t\t}
1773\t\t}
1774\t});
1775
1776\tconst observer = new MutationObserver(() => {
1777\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1778\t\t\tdocument.querySelectorAll('a[data-preload]')
1779\t\t);
1780
1781\t\tfor (const link of links) {
1782\t\t\tif (linkSet.has(link)) continue;
1783\t\t\tlinkSet.add(link);
1784
1785\t\t\tswitch (link.dataset.preload) {
1786\t\t\t\tcase '':
1787\t\t\t\tcase 'true':
1788\t\t\t\tcase 'hover': {
1789\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1790\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1791\t\t\t\t\t\tanchorPreload(link);
1792\t\t\t\t\t});
1793";
1794        let new = old.replacen(
1795            "\t\t\t\tcase 'true':\n",
1796            "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1797            1,
1798        );
1799
1800        let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1801        let new_without_cursor = new.replace("<|user_cursor|>", "");
1802
1803        let encoded = encode_from_old_and_new_v0317(
1804            old,
1805            &new_without_cursor,
1806            Some(cursor_offset),
1807            "<|user_cursor|>",
1808            "<|end|>",
1809        )
1810        .unwrap();
1811
1812        let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1813        for marker in collect_relative_marker_tags(core) {
1814            let tag_start = marker.tag_start;
1815            assert!(
1816                tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1817                "marker not at line boundary: {} in output:\n{}",
1818                marker_tag_relative(marker.value),
1819                core
1820            );
1821        }
1822    }
1823}