multi_region.rs

   1use anyhow::{Context as _, Result, anyhow};
   2
   3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
   4pub const MARKER_TAG_SUFFIX: &str = "|>";
   5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
   6const V0316_MIN_BLOCK_LINES: usize = 3;
   7const V0316_MAX_BLOCK_LINES: usize = 8;
   8const V0318_MIN_BLOCK_LINES: usize = 6;
   9const V0318_MAX_BLOCK_LINES: usize = 16;
  10const MAX_NUDGE_LINES: usize = 5;
  11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
  12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
  13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
  14pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>";
  15
  16pub fn marker_tag(number: usize) -> String {
  17    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
  18}
  19
  20pub fn marker_tag_relative(delta: isize) -> String {
  21    if delta > 0 {
  22        format!("<|marker+{delta}|>")
  23    } else if delta == 0 {
  24        String::from("<|marker-0|>")
  25    } else {
  26        format!("<|marker{delta}|>")
  27    }
  28}
  29
  30struct LineInfo {
  31    start: usize,
  32    is_blank: bool,
  33    is_good_start: bool,
  34}
  35
  36fn collect_line_info(text: &str) -> Vec<LineInfo> {
  37    let mut lines = Vec::new();
  38    let mut offset = 0;
  39    for line in text.split('\n') {
  40        let trimmed = line.trim();
  41        let is_blank = trimmed.is_empty();
  42        let is_good_start = !is_blank && !is_structural_tail(trimmed);
  43        lines.push(LineInfo {
  44            start: offset,
  45            is_blank,
  46            is_good_start,
  47        });
  48        offset += line.len() + 1;
  49    }
  50    // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
  51    // empty element when the text ends with '\n'.
  52    if text.ends_with('\n') && lines.len() > 1 {
  53        lines.pop();
  54    }
  55    lines
  56}
  57
  58fn is_structural_tail(trimmed_line: &str) -> bool {
  59    if trimmed_line.starts_with(&['}', ']', ')']) {
  60        return true;
  61    }
  62    matches!(
  63        trimmed_line.trim_end_matches(';'),
  64        "break" | "continue" | "return" | "throw" | "end"
  65    )
  66}
  67
  68/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
  69/// line with `is_good_start`. Returns `None` if no suitable line is found.
  70fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
  71    (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
  72}
  73
  74/// Compute byte offsets within `editable_text` where marker boundaries should
  75/// be placed.
  76///
  77/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
  78/// `editable_text.len()`. Interior offsets are placed at line boundaries
  79/// (right after a `\n`), preferring blank-line boundaries when available and
  80/// respecting `min_block_lines` / `max_block_lines` constraints.
  81fn compute_marker_offsets_with_limits(
  82    editable_text: &str,
  83    min_block_lines: usize,
  84    max_block_lines: usize,
  85) -> Vec<usize> {
  86    if editable_text.is_empty() {
  87        return vec![0, 0];
  88    }
  89
  90    let lines = collect_line_info(editable_text);
  91    let mut offsets = vec![0usize];
  92    let mut last_boundary_line = 0;
  93    let mut i = 0;
  94
  95    while i < lines.len() {
  96        let gap = i - last_boundary_line;
  97
  98        // Blank-line split: non-blank line following blank line(s) with enough
  99        // accumulated lines.
 100        if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
 101            let target = if lines[i].is_good_start {
 102                i
 103            } else {
 104                skip_to_good_start(&lines, i).unwrap_or(i)
 105            };
 106            if lines.len() - target >= min_block_lines
 107                && lines[target].start > *offsets.last().unwrap_or(&0)
 108            {
 109                offsets.push(lines[target].start);
 110                last_boundary_line = target;
 111                i = target + 1;
 112                continue;
 113            }
 114        }
 115
 116        // Hard cap: too many lines without a split.
 117        if gap >= max_block_lines {
 118            let target = skip_to_good_start(&lines, i).unwrap_or(i);
 119            if lines[target].start > *offsets.last().unwrap_or(&0) {
 120                offsets.push(lines[target].start);
 121                last_boundary_line = target;
 122                i = target + 1;
 123                continue;
 124            }
 125        }
 126
 127        i += 1;
 128    }
 129
 130    let end = editable_text.len();
 131    if *offsets.last().unwrap_or(&0) != end {
 132        offsets.push(end);
 133    }
 134
 135    offsets
 136}
 137
 138/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
 139pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
 140    compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
 141}
 142
 143pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
 144    compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
 145}
 146
 147fn line_start_at_or_before(text: &str, offset: usize) -> usize {
 148    let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
 149    text[..bounded_offset]
 150        .rfind('\n')
 151        .map(|index| index + 1)
 152        .unwrap_or(0)
 153}
 154
 155fn line_end_at_or_after(text: &str, offset: usize) -> usize {
 156    let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
 157    if bounded_offset >= text.len() {
 158        return text.len();
 159    }
 160
 161    text[bounded_offset..]
 162        .find('\n')
 163        .map(|index| bounded_offset + index + 1)
 164        .unwrap_or(text.len())
 165}
 166
 167fn grow_v0327_candidate_range(
 168    text: &str,
 169    cursor_offset: usize,
 170    editable_token_limit: usize,
 171) -> std::ops::Range<usize> {
 172    if text.is_empty() {
 173        return 0..0;
 174    }
 175
 176    let byte_budget = editable_token_limit.saturating_mul(3).max(1);
 177    let half_budget = byte_budget / 2;
 178
 179    let mut start = cursor_offset.saturating_sub(half_budget);
 180    let mut end = start.saturating_add(byte_budget).min(text.len());
 181
 182    if end.saturating_sub(start) < byte_budget {
 183        start = end.saturating_sub(byte_budget);
 184    }
 185
 186    start = line_start_at_or_before(text, start);
 187    end = line_end_at_or_after(text, end);
 188
 189    if start < end {
 190        start..end
 191    } else {
 192        let line_start = line_start_at_or_before(text, cursor_offset);
 193        let line_end = line_end_at_or_after(text, cursor_offset);
 194        line_start..line_end.max(line_start)
 195    }
 196}
 197
 198fn trim_v0327_candidate_range_to_markers(
 199    text: &str,
 200    candidate_range: std::ops::Range<usize>,
 201    cursor_offset: usize,
 202) -> std::ops::Range<usize> {
 203    let candidate_text = &text[candidate_range.clone()];
 204    let marker_offsets = compute_marker_offsets_v0318(candidate_text);
 205
 206    if marker_offsets.len() <= 2 {
 207        return candidate_range;
 208    }
 209
 210    let candidate_cursor_offset = cursor_offset
 211        .saturating_sub(candidate_range.start)
 212        .min(candidate_text.len());
 213    let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] {
 214        1
 215    } else {
 216        0
 217    };
 218    let last_internal_marker_index = marker_offsets.len() - 2;
 219    let last_marker_index = marker_offsets.len() - 1;
 220    let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index]
 221    {
 222        last_internal_marker_index
 223    } else {
 224        last_marker_index
 225    };
 226
 227    let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index];
 228    let trimmed_end = candidate_range.start + marker_offsets[end_marker_index];
 229
 230    if trimmed_start < trimmed_end {
 231        trimmed_start..trimmed_end
 232    } else {
 233        let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets);
 234        let start = candidate_range.start + marker_offsets[block_index];
 235        let end = candidate_range.start + marker_offsets[block_index + 1];
 236        if start < end {
 237            start..end
 238        } else {
 239            candidate_range
 240        }
 241    }
 242}
 243
 244pub fn compute_v0327_editable_range(
 245    text: &str,
 246    cursor_offset: usize,
 247    editable_token_limit: usize,
 248) -> std::ops::Range<usize> {
 249    let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit);
 250    trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset)
 251}
 252
 253/// Write the editable region content with marker tags, inserting the cursor
 254/// marker at the given offset within the editable text.
 255pub fn write_editable_with_markers(
 256    output: &mut String,
 257    editable_text: &str,
 258    cursor_offset_in_editable: usize,
 259    cursor_marker: &str,
 260) {
 261    let marker_offsets = compute_marker_offsets(editable_text);
 262    let mut cursor_placed = false;
 263    for (i, &offset) in marker_offsets.iter().enumerate() {
 264        let marker_num = i + 1;
 265        if !output.is_empty() && !output.ends_with('\n') {
 266            output.push('\n');
 267        }
 268        output.push_str(&marker_tag(marker_num));
 269
 270        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 271            output.push('\n');
 272            let block = &editable_text[offset..next_offset];
 273            if !cursor_placed
 274                && cursor_offset_in_editable >= offset
 275                && cursor_offset_in_editable <= next_offset
 276            {
 277                cursor_placed = true;
 278                let cursor_in_block = cursor_offset_in_editable - offset;
 279                output.push_str(&block[..cursor_in_block]);
 280                output.push_str(cursor_marker);
 281                output.push_str(&block[cursor_in_block..]);
 282            } else {
 283                output.push_str(block);
 284            }
 285        }
 286    }
 287}
 288
 289/// Strip any `<|marker_N|>` tags from `text`.
 290///
 291/// When a marker tag sits on its own line (followed by `\n`), the trailing
 292/// newline is also removed so the surrounding lines stay joined naturally.
 293fn strip_marker_tags(text: &str) -> String {
 294    let mut result = String::with_capacity(text.len());
 295    let mut pos = 0;
 296    let bytes = text.as_bytes();
 297    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
 298        result.push_str(&text[pos..pos + rel]);
 299        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
 300        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
 301            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
 302            if bytes.get(tag_end) == Some(&b'\n') {
 303                tag_end += 1;
 304            }
 305            pos = tag_end;
 306        } else {
 307            result.push_str(MARKER_TAG_PREFIX);
 308            pos = num_start;
 309        }
 310    }
 311    result.push_str(&text[pos..]);
 312    result
 313}
 314
 315/// Parse model output that uses the marker format.
 316///
 317/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
 318/// The leading format-level newline after the start marker is stripped.
 319/// Trailing newlines are preserved so blank-line endings in the editable
 320/// region are not lost.
 321///
 322/// Any extra intermediate marker tags that the model may have inserted
 323/// between the first and last markers are stripped from the returned content.
 324pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
 325    let first_tag_start = text
 326        .find(MARKER_TAG_PREFIX)
 327        .context("no start marker found in output")?;
 328    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
 329    let first_num_end = text[first_num_start..]
 330        .find(MARKER_TAG_SUFFIX)
 331        .map(|i| i + first_num_start)
 332        .context("malformed start marker tag")?;
 333    let start_num: usize = text[first_num_start..first_num_end]
 334        .parse()
 335        .context("start marker number is not a valid integer")?;
 336    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
 337
 338    let last_tag_start = text
 339        .rfind(MARKER_TAG_PREFIX)
 340        .context("no end marker found in output")?;
 341    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
 342    let last_num_end = text[last_num_start..]
 343        .find(MARKER_TAG_SUFFIX)
 344        .map(|i| i + last_num_start)
 345        .context("malformed end marker tag")?;
 346    let end_num: usize = text[last_num_start..last_num_end]
 347        .parse()
 348        .context("end marker number is not a valid integer")?;
 349
 350    if start_num == end_num {
 351        return Err(anyhow!(
 352            "start and end markers are the same (marker {})",
 353            start_num
 354        ));
 355    }
 356
 357    let mut content_start = first_tag_end;
 358    if text.as_bytes().get(content_start) == Some(&b'\n') {
 359        content_start += 1;
 360    }
 361    let content_end = last_tag_start;
 362
 363    let content = &text[content_start..content_end.max(content_start)];
 364    let content = strip_marker_tags(content);
 365    Ok((start_num, end_num, content))
 366}
 367
 368/// Given old editable text and model output with marker span, reconstruct the
 369/// full new editable region.
 370pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
 371    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
 372    let marker_offsets = compute_marker_offsets(old_editable);
 373
 374    let start_idx = start_num
 375        .checked_sub(1)
 376        .context("marker numbers are 1-indexed")?;
 377    let end_idx = end_num
 378        .checked_sub(1)
 379        .context("marker numbers are 1-indexed")?;
 380    let start_byte = *marker_offsets
 381        .get(start_idx)
 382        .context("start marker number out of range")?;
 383    let end_byte = *marker_offsets
 384        .get(end_idx)
 385        .context("end marker number out of range")?;
 386
 387    if start_byte > end_byte {
 388        return Err(anyhow!("start marker must come before end marker"));
 389    }
 390
 391    let old_span = &old_editable[start_byte..end_byte];
 392    let mut new_span = raw_new_span;
 393    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
 394        new_span.push('\n');
 395    }
 396    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
 397        new_span.pop();
 398    }
 399
 400    let mut result = String::new();
 401    result.push_str(&old_editable[..start_byte]);
 402    result.push_str(&new_span);
 403    result.push_str(&old_editable[end_byte..]);
 404
 405    Ok(result)
 406}
 407
 408/// Compare old and new editable text, find the minimal marker span that covers
 409/// all changes, and encode the result with marker tags.
 410pub fn encode_from_old_and_new(
 411    old_editable: &str,
 412    new_editable: &str,
 413    cursor_offset_in_new: Option<usize>,
 414    cursor_marker: &str,
 415    end_marker: &str,
 416    no_edits_marker: &str,
 417) -> Result<String> {
 418    if old_editable == new_editable {
 419        return Ok(format!("{no_edits_marker}{end_marker}"));
 420    }
 421
 422    let marker_offsets = compute_marker_offsets(old_editable);
 423    let (common_prefix, common_suffix) =
 424        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 425    let change_end_in_old = old_editable.len() - common_suffix;
 426
 427    let start_marker_idx = marker_offsets
 428        .iter()
 429        .rposition(|&offset| offset <= common_prefix)
 430        .unwrap_or(0);
 431    let end_marker_idx = marker_offsets
 432        .iter()
 433        .position(|&offset| offset >= change_end_in_old)
 434        .unwrap_or(marker_offsets.len() - 1);
 435
 436    let old_start = marker_offsets[start_marker_idx];
 437    let old_end = marker_offsets[end_marker_idx];
 438
 439    let new_start = old_start;
 440    let new_end = new_editable
 441        .len()
 442        .saturating_sub(old_editable.len().saturating_sub(old_end));
 443
 444    let new_span = &new_editable[new_start..new_end];
 445
 446    let start_marker_num = start_marker_idx + 1;
 447    let end_marker_num = end_marker_idx + 1;
 448
 449    let mut result = String::new();
 450    result.push_str(&marker_tag(start_marker_num));
 451    result.push('\n');
 452
 453    if let Some(cursor_offset) = cursor_offset_in_new {
 454        if cursor_offset >= new_start && cursor_offset <= new_end {
 455            let cursor_in_span = cursor_offset - new_start;
 456            let bounded = cursor_in_span.min(new_span.len());
 457            result.push_str(&new_span[..bounded]);
 458            result.push_str(cursor_marker);
 459            result.push_str(&new_span[bounded..]);
 460        } else {
 461            result.push_str(new_span);
 462        }
 463    } else {
 464        result.push_str(new_span);
 465    }
 466
 467    if !result.ends_with('\n') {
 468        result.push('\n');
 469    }
 470    result.push_str(&marker_tag(end_marker_num));
 471    result.push('\n');
 472    result.push_str(end_marker);
 473
 474    Ok(result)
 475}
 476
 477/// Extract the full editable region from text that uses marker tags.
 478///
 479/// Returns the concatenation of all block contents between the first and last
 480/// markers, with intermediate marker tags stripped.
 481pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
 482    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
 483
 484    let mut markers: Vec<(usize, usize)> = Vec::new();
 485    let mut search_start = first_marker_start;
 486    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
 487        let tag_start = search_start + rel_pos;
 488        let num_start = tag_start + MARKER_TAG_PREFIX.len();
 489        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
 490        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
 491        markers.push((tag_start, tag_end));
 492        search_start = tag_end;
 493    }
 494
 495    if markers.len() < 2 {
 496        return None;
 497    }
 498
 499    let (_, first_tag_end) = markers[0];
 500    let (last_tag_start, _) = markers[markers.len() - 1];
 501
 502    let mut content_start = first_tag_end;
 503    if text.as_bytes().get(content_start) == Some(&b'\n') {
 504        content_start += 1;
 505    }
 506    let mut content_end = last_tag_start;
 507    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
 508        content_end -= 1;
 509    }
 510
 511    let raw = &text[content_start..content_end];
 512    let result = strip_marker_tags(raw);
 513    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
 514    Some(result)
 515}
 516
 517struct ParsedTag {
 518    value: isize,
 519    tag_start: usize,
 520    tag_end: usize,
 521}
 522
 523fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
 524    let mut tags = Vec::new();
 525    let mut search_from = 0;
 526    while let Some(rel_pos) = text[search_from..].find(prefix) {
 527        let tag_start = search_from + rel_pos;
 528        let payload_start = tag_start + prefix.len();
 529        if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
 530            let payload_end = payload_start + suffix_rel;
 531            if let Some(value) = parse(&text[payload_start..payload_end]) {
 532                let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
 533                tags.push(ParsedTag {
 534                    value,
 535                    tag_start,
 536                    tag_end,
 537                });
 538                search_from = tag_end;
 539                continue;
 540            }
 541        }
 542        search_from = tag_start + prefix.len();
 543    }
 544    tags
 545}
 546
 547fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
 548    collect_tags(text, MARKER_TAG_PREFIX, |s| {
 549        s.parse::<usize>().ok().map(|n| n as isize)
 550    })
 551}
 552
 553fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
 554    collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
 555        s.parse::<isize>().ok()
 556    })
 557}
 558
 559pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 560    let cursor = cursor_offset.unwrap_or(0);
 561    marker_offsets
 562        .iter()
 563        .enumerate()
 564        .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
 565        .map(|(idx, _)| idx + 1)
 566        .unwrap_or(1)
 567}
 568
 569fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
 570    let cursor = cursor_offset.unwrap_or(0);
 571    marker_offsets
 572        .windows(2)
 573        .position(|window| cursor >= window[0] && cursor < window[1])
 574        .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
 575}
 576
 577fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
 578    let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
 579    let remaining_a = a.len() - prefix;
 580    let remaining_b = b.len() - prefix;
 581    let max_suffix = remaining_a.min(remaining_b);
 582    let suffix = a[a.len() - max_suffix..]
 583        .iter()
 584        .rev()
 585        .zip(b[b.len() - max_suffix..].iter().rev())
 586        .take_while(|(x, y)| x == y)
 587        .count();
 588    (prefix, suffix)
 589}
 590
 591/// Map a byte offset from old span coordinates to new span coordinates,
 592/// using common prefix/suffix within the span for accuracy.
 593fn map_boundary_offset(
 594    old_rel: usize,
 595    old_span_len: usize,
 596    new_span_len: usize,
 597    span_common_prefix: usize,
 598    span_common_suffix: usize,
 599) -> usize {
 600    if old_rel <= span_common_prefix {
 601        old_rel
 602    } else if old_rel >= old_span_len - span_common_suffix {
 603        new_span_len - (old_span_len - old_rel)
 604    } else {
 605        let old_changed_start = span_common_prefix;
 606        let old_changed_len = old_span_len
 607            .saturating_sub(span_common_prefix)
 608            .saturating_sub(span_common_suffix);
 609        let new_changed_start = span_common_prefix;
 610        let new_changed_len = new_span_len
 611            .saturating_sub(span_common_prefix)
 612            .saturating_sub(span_common_suffix);
 613
 614        if old_changed_len == 0 {
 615            new_changed_start
 616        } else {
 617            new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
 618        }
 619    }
 620}
 621
 622fn snap_to_line_start(text: &str, offset: usize) -> usize {
 623    let bounded = offset.min(text.len());
 624    let bounded = text.floor_char_boundary(bounded);
 625
 626    if bounded >= text.len() {
 627        return text.len();
 628    }
 629
 630    if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
 631        return bounded;
 632    }
 633
 634    if let Some(next_nl_rel) = text[bounded..].find('\n') {
 635        let next = bounded + next_nl_rel + 1;
 636        return text.floor_char_boundary(next.min(text.len()));
 637    }
 638
 639    let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
 640    text.floor_char_boundary(prev_start)
 641}
 642
 643/// Write the editable region content with byte-exact marker tags, inserting the
 644/// cursor marker at the given offset within the editable text.
 645///
 646/// The `tag_for_index` closure maps a boundary index to the marker tag string.
 647fn write_editable_with_markers_impl(
 648    output: &mut String,
 649    editable_text: &str,
 650    cursor_offset_in_editable: usize,
 651    cursor_marker: &str,
 652    marker_offsets: &[usize],
 653    tag_for_index: impl Fn(usize) -> String,
 654) {
 655    let mut cursor_placed = false;
 656    for (i, &offset) in marker_offsets.iter().enumerate() {
 657        output.push_str(&tag_for_index(i));
 658
 659        if let Some(&next_offset) = marker_offsets.get(i + 1) {
 660            let block = &editable_text[offset..next_offset];
 661            if !cursor_placed
 662                && cursor_offset_in_editable >= offset
 663                && cursor_offset_in_editable <= next_offset
 664            {
 665                cursor_placed = true;
 666                let cursor_in_block = cursor_offset_in_editable - offset;
 667                output.push_str(&block[..cursor_in_block]);
 668                output.push_str(cursor_marker);
 669                output.push_str(&block[cursor_in_block..]);
 670            } else {
 671                output.push_str(block);
 672            }
 673        }
 674    }
 675}
 676
 677pub fn write_editable_with_markers_v0316(
 678    output: &mut String,
 679    editable_text: &str,
 680    cursor_offset_in_editable: usize,
 681    cursor_marker: &str,
 682) {
 683    let marker_offsets = compute_marker_offsets(editable_text);
 684    write_editable_with_markers_impl(
 685        output,
 686        editable_text,
 687        cursor_offset_in_editable,
 688        cursor_marker,
 689        &marker_offsets,
 690        |i| marker_tag(i + 1),
 691    );
 692}
 693
 694pub fn write_editable_with_markers_v0317(
 695    output: &mut String,
 696    editable_text: &str,
 697    cursor_offset_in_editable: usize,
 698    cursor_marker: &str,
 699) {
 700    let marker_offsets = compute_marker_offsets(editable_text);
 701    let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
 702    write_editable_with_markers_impl(
 703        output,
 704        editable_text,
 705        cursor_offset_in_editable,
 706        cursor_marker,
 707        &marker_offsets,
 708        |i| marker_tag_relative(i as isize - anchor_idx as isize),
 709    );
 710}
 711
 712pub fn write_editable_with_markers_v0318(
 713    output: &mut String,
 714    editable_text: &str,
 715    cursor_offset_in_editable: usize,
 716    cursor_marker: &str,
 717) {
 718    let marker_offsets = compute_marker_offsets_v0318(editable_text);
 719    write_editable_with_markers_impl(
 720        output,
 721        editable_text,
 722        cursor_offset_in_editable,
 723        cursor_marker,
 724        &marker_offsets,
 725        |i| marker_tag(i + 1),
 726    );
 727}
 728
 729/// Parse byte-exact model output and reconstruct the full new editable region.
 730///
 731/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
 732/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
 733/// an error.
 734fn apply_marker_span_impl(
 735    old_editable: &str,
 736    tags: &[ParsedTag],
 737    output: &str,
 738    resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
 739) -> Result<String> {
 740    if tags.is_empty() {
 741        return Err(anyhow!("no marker tags found in output"));
 742    }
 743    if tags.len() == 1 {
 744        return Err(anyhow!(
 745            "only one marker tag found in output, expected at least two"
 746        ));
 747    }
 748
 749    let start_value = tags[0].value;
 750    let end_value = tags[tags.len() - 1].value;
 751
 752    if start_value == end_value {
 753        return Ok(old_editable.to_string());
 754    }
 755
 756    let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
 757
 758    if start_byte > end_byte {
 759        return Err(anyhow!("start marker must come before end marker"));
 760    }
 761
 762    let mut new_content = String::new();
 763    for i in 0..tags.len() - 1 {
 764        let content_start = tags[i].tag_end;
 765        let content_end = tags[i + 1].tag_start;
 766        if content_start <= content_end {
 767            new_content.push_str(&output[content_start..content_end]);
 768        }
 769    }
 770
 771    let mut result = String::new();
 772    result.push_str(&old_editable[..start_byte]);
 773    result.push_str(&new_content);
 774    result.push_str(&old_editable[end_byte..]);
 775
 776    Ok(result)
 777}
 778
 779pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
 780    let tags = collect_marker_tags(output);
 781
 782    // Validate monotonically increasing with no gaps (best-effort warning)
 783    if tags.len() >= 2 {
 784        let start_num = tags[0].value;
 785        let end_num = tags[tags.len() - 1].value;
 786        if start_num != end_num {
 787            let expected: Vec<isize> = (start_num..=end_num).collect();
 788            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 789            if actual != expected {
 790                eprintln!(
 791                    "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 792                    expected, actual
 793                );
 794            }
 795        }
 796    }
 797
 798    let marker_offsets = compute_marker_offsets(old_editable);
 799    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 800        let start_idx = (start_val as usize)
 801            .checked_sub(1)
 802            .context("marker numbers are 1-indexed")?;
 803        let end_idx = (end_val as usize)
 804            .checked_sub(1)
 805            .context("marker numbers are 1-indexed")?;
 806        let start_byte = *marker_offsets
 807            .get(start_idx)
 808            .context("start marker number out of range")?;
 809        let end_byte = *marker_offsets
 810            .get(end_idx)
 811            .context("end marker number out of range")?;
 812        Ok((start_byte, end_byte))
 813    })
 814}
 815
 816pub fn apply_marker_span_v0317(
 817    old_editable: &str,
 818    output: &str,
 819    cursor_offset_in_old: Option<usize>,
 820) -> Result<String> {
 821    let tags = collect_relative_marker_tags(output);
 822    let marker_offsets = compute_marker_offsets(old_editable);
 823    let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
 824
 825    apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
 826        let start_idx_signed = anchor_idx as isize + start_delta;
 827        let end_idx_signed = anchor_idx as isize + end_delta;
 828        if start_idx_signed < 0 || end_idx_signed < 0 {
 829            return Err(anyhow!("relative marker maps before first marker"));
 830        }
 831        let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
 832        let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
 833        let start_byte = *marker_offsets
 834            .get(start_idx)
 835            .context("start marker number out of range")?;
 836        let end_byte = *marker_offsets
 837            .get(end_idx)
 838            .context("end marker number out of range")?;
 839        Ok((start_byte, end_byte))
 840    })
 841}
 842
 843pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
 844    let tags = collect_marker_tags(output);
 845
 846    if tags.len() >= 2 {
 847        let start_num = tags[0].value;
 848        let end_num = tags[tags.len() - 1].value;
 849        if start_num != end_num {
 850            let expected: Vec<isize> = (start_num..=end_num).collect();
 851            let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
 852            if actual != expected {
 853                eprintln!(
 854                    "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
 855                    expected, actual
 856                );
 857            }
 858        }
 859    }
 860
 861    let marker_offsets = compute_marker_offsets_v0318(old_editable);
 862    apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
 863        let start_idx = (start_val as usize)
 864            .checked_sub(1)
 865            .context("marker numbers are 1-indexed")?;
 866        let end_idx = (end_val as usize)
 867            .checked_sub(1)
 868            .context("marker numbers are 1-indexed")?;
 869        let start_byte = *marker_offsets
 870            .get(start_idx)
 871            .context("start marker number out of range")?;
 872        let end_byte = *marker_offsets
 873            .get(end_idx)
 874            .context("end marker number out of range")?;
 875        Ok((start_byte, end_byte))
 876    })
 877}
 878
 879/// Encode the training target from old and new editable text.
 880///
 881/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
 882/// closure maps a block index to the appropriate marker tag string.
 883/// `no_edit_tag` is the marker tag to repeat when there are no edits.
 884fn encode_from_old_and_new_impl(
 885    old_editable: &str,
 886    new_editable: &str,
 887    cursor_offset_in_new: Option<usize>,
 888    cursor_marker: &str,
 889    end_marker: &str,
 890    no_edit_tag: &str,
 891    marker_offsets: &[usize],
 892    tag_for_block_idx: impl Fn(usize) -> String,
 893) -> Result<String> {
 894    if old_editable == new_editable {
 895        return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
 896    }
 897
 898    let (common_prefix, common_suffix) =
 899        common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
 900    let change_end_in_old = old_editable.len() - common_suffix;
 901
 902    let mut start_marker_idx = marker_offsets
 903        .iter()
 904        .rposition(|&offset| offset <= common_prefix)
 905        .unwrap_or(0);
 906    let mut end_marker_idx = marker_offsets
 907        .iter()
 908        .position(|&offset| offset >= change_end_in_old)
 909        .unwrap_or(marker_offsets.len() - 1);
 910
 911    if start_marker_idx == end_marker_idx {
 912        if end_marker_idx < marker_offsets.len().saturating_sub(1) {
 913            end_marker_idx += 1;
 914        } else if start_marker_idx > 0 {
 915            start_marker_idx -= 1;
 916        }
 917    }
 918
 919    let old_start = marker_offsets[start_marker_idx];
 920    let old_end = marker_offsets[end_marker_idx];
 921
 922    let new_start = old_start;
 923    let new_end = new_editable
 924        .len()
 925        .saturating_sub(old_editable.len().saturating_sub(old_end));
 926
 927    let new_span = &new_editable[new_start..new_end];
 928    let old_span = &old_editable[old_start..old_end];
 929
 930    let (span_common_prefix, span_common_suffix) =
 931        common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
 932
 933    let mut result = String::new();
 934    let mut prev_new_rel = 0usize;
 935    let mut cursor_placed = false;
 936
 937    for block_idx in start_marker_idx..end_marker_idx {
 938        result.push_str(&tag_for_block_idx(block_idx));
 939
 940        let new_rel_end = if block_idx + 1 == end_marker_idx {
 941            new_span.len()
 942        } else {
 943            let old_rel = marker_offsets[block_idx + 1] - old_start;
 944            let mapped = map_boundary_offset(
 945                old_rel,
 946                old_span.len(),
 947                new_span.len(),
 948                span_common_prefix,
 949                span_common_suffix,
 950            );
 951            snap_to_line_start(new_span, mapped)
 952        };
 953
 954        let new_rel_end = new_rel_end.max(prev_new_rel);
 955        let block_content = &new_span[prev_new_rel..new_rel_end];
 956
 957        if !cursor_placed {
 958            if let Some(cursor_offset) = cursor_offset_in_new {
 959                let abs_start = new_start + prev_new_rel;
 960                let abs_end = new_start + new_rel_end;
 961                if cursor_offset >= abs_start && cursor_offset <= abs_end {
 962                    cursor_placed = true;
 963                    let cursor_in_block = cursor_offset - abs_start;
 964                    let bounded = cursor_in_block.min(block_content.len());
 965                    result.push_str(&block_content[..bounded]);
 966                    result.push_str(cursor_marker);
 967                    result.push_str(&block_content[bounded..]);
 968                    prev_new_rel = new_rel_end;
 969                    continue;
 970                }
 971            }
 972        }
 973
 974        result.push_str(block_content);
 975        prev_new_rel = new_rel_end;
 976    }
 977
 978    result.push_str(&tag_for_block_idx(end_marker_idx));
 979    result.push_str(end_marker);
 980
 981    Ok(result)
 982}
 983
 984pub fn encode_from_old_and_new_v0316(
 985    old_editable: &str,
 986    new_editable: &str,
 987    cursor_offset_in_new: Option<usize>,
 988    cursor_marker: &str,
 989    end_marker: &str,
 990) -> Result<String> {
 991    let marker_offsets = compute_marker_offsets(old_editable);
 992    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
 993    encode_from_old_and_new_impl(
 994        old_editable,
 995        new_editable,
 996        cursor_offset_in_new,
 997        cursor_marker,
 998        end_marker,
 999        &no_edit_tag,
1000        &marker_offsets,
1001        |block_idx| marker_tag(block_idx + 1),
1002    )
1003}
1004
1005pub fn encode_from_old_and_new_v0317(
1006    old_editable: &str,
1007    new_editable: &str,
1008    cursor_offset_in_new: Option<usize>,
1009    cursor_marker: &str,
1010    end_marker: &str,
1011) -> Result<String> {
1012    let marker_offsets = compute_marker_offsets(old_editable);
1013    let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
1014    let no_edit_tag = marker_tag_relative(0);
1015    encode_from_old_and_new_impl(
1016        old_editable,
1017        new_editable,
1018        cursor_offset_in_new,
1019        cursor_marker,
1020        end_marker,
1021        &no_edit_tag,
1022        &marker_offsets,
1023        |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
1024    )
1025}
1026
1027pub fn encode_from_old_and_new_v0318(
1028    old_editable: &str,
1029    new_editable: &str,
1030    cursor_offset_in_new: Option<usize>,
1031    cursor_marker: &str,
1032    end_marker: &str,
1033) -> Result<String> {
1034    let marker_offsets = compute_marker_offsets_v0318(old_editable);
1035    let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
1036    encode_from_old_and_new_impl(
1037        old_editable,
1038        new_editable,
1039        cursor_offset_in_new,
1040        cursor_marker,
1041        end_marker,
1042        &no_edit_tag,
1043        &marker_offsets,
1044        |block_idx| marker_tag(block_idx + 1),
1045    )
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050    use super::*;
1051
1052    #[test]
1053    fn test_compute_marker_offsets_small_block() {
1054        let text = "aaa\nbbb\nccc\n";
1055        let offsets = compute_marker_offsets(text);
1056        assert_eq!(offsets, vec![0, text.len()]);
1057    }
1058
1059    #[test]
1060    fn test_compute_marker_offsets_blank_line_split() {
1061        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1062        let offsets = compute_marker_offsets(text);
1063        assert_eq!(offsets[0], 0);
1064        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1065        assert_eq!(*offsets.last().unwrap(), text.len());
1066    }
1067
1068    #[test]
1069    fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
1070        let text = "\
1071class OCRDataframe(BaseModel):
1072    model_config = ConfigDict(arbitrary_types_allowed=True)
1073
1074    df: pl.DataFrame
1075
1076    def page(self, page_number: int = 0) -> \"OCRDataframe\":
1077        # Filter dataframe on specific page
1078        df_page = self.df.filter(pl.col(\"page\") == page_number)
1079        return OCRDataframe(df=df_page)
1080
1081    def get_text_cell(
1082        self,
1083        cell: Cell,
1084        margin: int = 0,
1085        page_number: Optional[int] = None,
1086        min_confidence: int = 50,
1087    ) -> Optional[str]:
1088        \"\"\"
1089        Get text corresponding to cell
1090";
1091        let offsets = compute_marker_offsets(text);
1092
1093        let def_start = text
1094            .find("    def get_text_cell(")
1095            .expect("def line exists");
1096        let self_start = text.find("        self,").expect("self line exists");
1097
1098        assert!(
1099            offsets.contains(&def_start),
1100            "expected boundary at def line start ({def_start}), got {offsets:?}"
1101        );
1102        assert!(
1103            !offsets.contains(&self_start),
1104            "did not expect boundary at self line start ({self_start}), got {offsets:?}"
1105        );
1106    }
1107
1108    #[test]
1109    fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1110        let text = "\
1111impl Plugin for AhoySchedulePlugin {
1112    fn build(&self, app: &mut App) {
1113        app.configure_sets(
1114            self.schedule,
1115            (
1116                AhoySystems::MoveCharacters,
1117                AhoySystems::ApplyForcesToDynamicRigidBodies,
1118            )
1119                .chain()
1120                .before(PhysicsSystems::First),
1121        );
1122
1123    }
1124}
1125
1126/// System set used by all systems of `bevy_ahoy`.
1127#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1128pub enum AhoySystems {
1129    MoveCharacters,
1130    ApplyForcesToDynamicRigidBodies,
1131}
1132";
1133        let offsets = compute_marker_offsets(text);
1134
1135        let closer_start = text.find("    }\n").expect("closer line exists");
1136        let doc_start = text
1137            .find("/// System set used by all systems of `bevy_ahoy`.")
1138            .expect("doc line exists");
1139
1140        assert!(
1141            !offsets.contains(&closer_start),
1142            "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1143        );
1144        assert!(
1145            offsets.contains(&doc_start),
1146            "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1147        );
1148    }
1149
1150    #[test]
1151    fn test_compute_marker_offsets_max_lines_split() {
1152        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1153        let offsets = compute_marker_offsets(text);
1154        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1155    }
1156
1157    #[test]
1158    fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1159        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1160        let offsets = compute_marker_offsets(text);
1161
1162        let expected = text.find("case 'x': {").expect("case line exists");
1163        assert!(
1164            offsets.contains(&expected),
1165            "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1166        );
1167    }
1168
1169    #[test]
1170    fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1171        let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1172        let offsets = compute_marker_offsets(text);
1173
1174        let case_start = text.find("case 'x': {").expect("case line exists");
1175        assert!(
1176            !offsets.contains(&case_start),
1177            "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1178        );
1179    }
1180
1181    #[test]
1182    fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1183        let text = "\
1184aaaaaaaaaa = 1;
1185bbbbbbbbbb = 2;
1186cccccccccc = 3;
1187dddddddddd = 4;
1188eeeeeeeeee = 5;
1189ffffffffff = 6;
1190gggggggggg = 7;
1191hhhhhhhhhh = 8;
1192          };
1193        };
1194
1195        grafanaDashboards = {
1196          cluster-overview.spec = {
1197            inherit instanceSelector;
1198            folderRef = \"infrastructure\";
1199            json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1200          };
1201        };
1202";
1203        let offsets = compute_marker_offsets(text);
1204
1205        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1206        assert_eq!(
1207            offsets.last().copied(),
1208            Some(text.len()),
1209            "offsets: {offsets:?}"
1210        );
1211        assert!(
1212            offsets.windows(2).all(|window| window[0] <= window[1]),
1213            "offsets must be sorted: {offsets:?}"
1214        );
1215    }
1216
1217    #[test]
1218    fn test_compute_marker_offsets_empty() {
1219        let offsets = compute_marker_offsets("");
1220        assert_eq!(offsets, vec![0, 0]);
1221    }
1222
1223    #[test]
1224    fn test_compute_v0327_editable_range_trims_to_marker_boundaries() {
1225        let text = (0..80).map(|_| "x\n").collect::<String>();
1226        let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40;
1227
1228        let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20);
1229        let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20);
1230        let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]);
1231        let relative_start = editable_range.start - candidate_range.start;
1232        let relative_end = editable_range.end - candidate_range.start;
1233
1234        assert!(
1235            marker_offsets.len() > 2,
1236            "expected interior markers: {marker_offsets:?}"
1237        );
1238        assert!(marker_offsets.contains(&relative_start));
1239        assert!(marker_offsets.contains(&relative_end));
1240        assert!(editable_range.start <= cursor_offset);
1241        assert!(editable_range.end >= cursor_offset);
1242        assert!(
1243            editable_range.start > candidate_range.start
1244                || editable_range.end < candidate_range.end,
1245            "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}"
1246        );
1247    }
1248
1249    #[test]
1250    fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1251        let text = "\
1252# Spree Posts
1253
1254This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1255
1256## Installation
1257
12581. Add this extension to your Gemfile with this line:
1259
1260    ```ruby
1261    bundle add spree_posts
1262    ```
1263
12642. Run the install generator
1265
1266    ```ruby
1267    bundle exec rails g spree_posts:install
1268    ```
1269
12703. Restart your server
1271
1272  If your server was running, restart it so that it can find the assets properly.
1273
1274## Developing
1275
12761. Create a dummy app
1277
1278    ```bash
1279    bundle update
1280    bundle exec rake test_app
1281    ```
1282
12832. Add your new code
12843. Run tests
1285
1286    ```bash
1287    bundle exec rspec
1288    ```
1289
1290When testing your applications integration with this extension you may use it's factories.
1291Simply add this require statement to your spec_helper:
1292
1293```ruby
1294require 'spree_posts/factories'
1295```
1296
1297## Releasing a new version
1298
1299```shell
1300bundle exec gem bump -p -t
1301bundle exec gem release
1302```
1303
1304For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1305
1306## Contributing
1307
1308If you'd like to contribute, please take a look at the contributing guide.
1309";
1310        let offsets = compute_marker_offsets(text);
1311
1312        assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1313        assert_eq!(
1314            offsets.last().copied(),
1315            Some(text.len()),
1316            "offsets: {offsets:?}"
1317        );
1318
1319        for window in offsets.windows(2) {
1320            let block = &text[window[0]..window[1]];
1321            let line_count = block.lines().count();
1322            assert!(
1323                line_count >= V0316_MIN_BLOCK_LINES,
1324                "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1325            );
1326        }
1327    }
1328
1329    #[test]
1330    fn test_extract_marker_span() {
1331        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
1332        let (start, end, content) = extract_marker_span(text).unwrap();
1333        assert_eq!(start, 2);
1334        assert_eq!(end, 3);
1335        assert_eq!(content, "    new content\n");
1336    }
1337
1338    #[test]
1339    fn test_extract_marker_span_multi_line() {
1340        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1341        let (start, end, content) = extract_marker_span(text).unwrap();
1342        assert_eq!(start, 1);
1343        assert_eq!(end, 4);
1344        assert_eq!(content, "line1\nline2\nline3\n");
1345    }
1346
1347    #[test]
1348    fn test_apply_marker_span_basic() {
1349        let old = "aaa\nbbb\nccc\n";
1350        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1351        let result = apply_marker_span(old, output).unwrap();
1352        assert_eq!(result, "aaa\nBBB\nccc\n");
1353    }
1354
1355    #[test]
1356    fn test_apply_marker_span_preserves_trailing_blank_line() {
1357        let old = "/\nresult\n\n";
1358        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1359        let result = apply_marker_span(old, output).unwrap();
1360        assert_eq!(result, "//\nresult\n\n");
1361    }
1362
1363    #[test]
1364    fn test_encode_no_edits() {
1365        let old = "aaa\nbbb\nccc\n";
1366        let result = encode_from_old_and_new(
1367            old,
1368            old,
1369            None,
1370            "<|user_cursor|>",
1371            ">>>>>>> UPDATED\n",
1372            "NO_EDITS\n",
1373        )
1374        .unwrap();
1375        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1376    }
1377
1378    #[test]
1379    fn test_encode_with_change() {
1380        let old = "aaa\nbbb\nccc\n";
1381        let new = "aaa\nBBB\nccc\n";
1382        let result = encode_from_old_and_new(
1383            old,
1384            new,
1385            None,
1386            "<|user_cursor|>",
1387            ">>>>>>> UPDATED\n",
1388            "NO_EDITS\n",
1389        )
1390        .unwrap();
1391        assert!(result.contains("<|marker_1|>"));
1392        assert!(result.contains("<|marker_2|>"));
1393        assert!(result.contains("aaa\nBBB\nccc\n"));
1394        assert!(result.ends_with(">>>>>>> UPDATED\n"));
1395    }
1396
1397    #[test]
1398    fn test_roundtrip_encode_apply() {
1399        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1400        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1401        let encoded = encode_from_old_and_new(
1402            old,
1403            new,
1404            None,
1405            "<|user_cursor|>",
1406            ">>>>>>> UPDATED\n",
1407            "NO_EDITS\n",
1408        )
1409        .unwrap();
1410        let output = encoded
1411            .strip_suffix(">>>>>>> UPDATED\n")
1412            .expect("should have end marker");
1413        let reconstructed = apply_marker_span(old, output).unwrap();
1414        assert_eq!(reconstructed, new);
1415    }
1416
1417    #[test]
1418    fn test_extract_editable_region_from_markers_multi() {
1419        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1420        let parsed = extract_editable_region_from_markers(text).unwrap();
1421        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1422    }
1423
1424    #[test]
1425    fn test_extract_editable_region_two_markers() {
1426        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1427        let parsed = extract_editable_region_from_markers(text).unwrap();
1428        assert_eq!(parsed, "one\ntwo three");
1429    }
1430
1431    #[test]
1432    fn test_encode_with_cursor() {
1433        let old = "aaa\nbbb\nccc\n";
1434        let new = "aaa\nBBB\nccc\n";
1435        let result = encode_from_old_and_new(
1436            old,
1437            new,
1438            Some(5),
1439            "<|user_cursor|>",
1440            ">>>>>>> UPDATED\n",
1441            "NO_EDITS\n",
1442        )
1443        .unwrap();
1444        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1445        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1446    }
1447
1448    #[test]
1449    fn test_extract_marker_span_strips_intermediate_markers() {
1450        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1451        let (start, end, content) = extract_marker_span(text).unwrap();
1452        assert_eq!(start, 2);
1453        assert_eq!(end, 4);
1454        assert_eq!(content, "line1\nline2\n");
1455    }
1456
1457    #[test]
1458    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1459        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1460        let (start, end, content) = extract_marker_span(text).unwrap();
1461        assert_eq!(start, 1);
1462        assert_eq!(end, 4);
1463        assert_eq!(content, "aaa\nbbb\nccc\n");
1464    }
1465
1466    #[test]
1467    fn test_apply_marker_span_with_extra_intermediate_marker() {
1468        let old = "aaa\nbbb\nccc\n";
1469        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1470        let result = apply_marker_span(old, output).unwrap();
1471        assert_eq!(result, "aaa\nBBB\nccc\n");
1472    }
1473
1474    #[test]
1475    fn test_strip_marker_tags_inline() {
1476        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1477        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1478        assert_eq!(
1479            strip_marker_tags("line1\n<|marker_3|>\nline2"),
1480            "line1\nline2"
1481        );
1482    }
1483
1484    #[test]
1485    fn test_write_editable_with_markers_v0316_byte_exact() {
1486        let editable = "aaa\nbbb\nccc\n";
1487        let mut output = String::new();
1488        write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1489        assert!(output.starts_with("<|marker_1|>"));
1490        assert!(output.contains("<|user_cursor|>"));
1491        let stripped = output.replace("<|user_cursor|>", "");
1492        let stripped = strip_marker_tags(&stripped);
1493        assert_eq!(stripped, editable);
1494    }
1495
1496    #[test]
1497    fn test_apply_marker_span_v0316_basic() {
1498        let old = "aaa\nbbb\nccc\n";
1499        let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1500        let result = apply_marker_span_v0316(old, output).unwrap();
1501        assert_eq!(result, "aaa\nBBB\nccc\n");
1502    }
1503
1504    #[test]
1505    fn test_apply_marker_span_v0316_no_edit() {
1506        let old = "aaa\nbbb\nccc\n";
1507        let output = "<|marker_1|><|marker_1|>";
1508        let result = apply_marker_span_v0316(old, output).unwrap();
1509        assert_eq!(result, old);
1510    }
1511
1512    #[test]
1513    fn test_apply_marker_span_v0316_no_edit_any_marker() {
1514        let old = "aaa\nbbb\nccc\n";
1515        let output = "<|marker_2|>ignored content<|marker_2|>";
1516        let result = apply_marker_span_v0316(old, output).unwrap();
1517        assert_eq!(result, old);
1518    }
1519
1520    #[test]
1521    fn test_apply_marker_span_v0316_multi_block() {
1522        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1523        let marker_offsets = compute_marker_offsets(old);
1524        assert!(
1525            marker_offsets.len() >= 3,
1526            "expected at least 3 offsets, got {:?}",
1527            marker_offsets
1528        );
1529
1530        let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1531        let mut output = String::new();
1532        output.push_str("<|marker_1|>");
1533        for i in 0..marker_offsets.len() - 1 {
1534            if i > 0 {
1535                output.push_str(&marker_tag(i + 1));
1536            }
1537            let start = marker_offsets[i];
1538            let end = marker_offsets[i + 1];
1539            let block_len = end - start;
1540            output.push_str(&new_content[start..start + block_len]);
1541        }
1542        let last_marker_num = marker_offsets.len();
1543        output.push_str(&marker_tag(last_marker_num));
1544        let result = apply_marker_span_v0316(old, &output).unwrap();
1545        assert_eq!(result, new_content);
1546    }
1547
1548    #[test]
1549    fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1550        let old = "aaa\nbbb\nccc\n";
1551        let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1552        let result = apply_marker_span_v0316(old, output).unwrap();
1553        assert_eq!(result, "aaa\nBBB\nccc");
1554    }
1555
1556    #[test]
1557    fn test_encode_v0316_no_edits() {
1558        let old = "aaa\nbbb\nccc\n";
1559        let result =
1560            encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1561        assert!(result.ends_with("<|end|>"));
1562        let stripped = result.strip_suffix("<|end|>").unwrap();
1563        let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1564        assert_eq!(result_parsed, old);
1565    }
1566
1567    #[test]
1568    fn test_encode_v0316_with_change() {
1569        let old = "aaa\nbbb\nccc\n";
1570        let new = "aaa\nBBB\nccc\n";
1571        let result =
1572            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1573        assert!(result.contains("<|marker_1|>"));
1574        assert!(result.contains("<|marker_2|>"));
1575        assert!(result.ends_with("<|end|>"));
1576    }
1577
1578    #[test]
1579    fn test_roundtrip_v0316() {
1580        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1581        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1582        let encoded =
1583            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1584        let stripped = encoded
1585            .strip_suffix("<|end|>")
1586            .expect("should have end marker");
1587        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1588        assert_eq!(reconstructed, new);
1589    }
1590
1591    #[test]
1592    fn test_roundtrip_v0316_with_cursor() {
1593        let old = "aaa\nbbb\nccc\n";
1594        let new = "aaa\nBBB\nccc\n";
1595        let result =
1596            encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1597        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1598        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1599    }
1600
1601    #[test]
1602    fn test_roundtrip_v0316_multi_block_change() {
1603        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1604        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1605        let encoded =
1606            encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1607        let stripped = encoded
1608            .strip_suffix("<|end|>")
1609            .expect("should have end marker");
1610        let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1611        assert_eq!(reconstructed, new);
1612    }
1613
1614    #[test]
1615    fn test_nearest_marker_number() {
1616        let offsets = vec![0, 10, 20, 30];
1617        assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1618        assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1619        assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1620        assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1621        assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1622        assert_eq!(nearest_marker_number(None, &offsets), 1);
1623    }
1624
1625    #[test]
1626    fn test_marker_tag_relative_formats_as_expected() {
1627        assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1628        assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1629        assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1630        assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1631        assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1632    }
1633
1634    #[test]
1635    fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1636        let editable = "aaa\nbbb\nccc\n";
1637        let mut output = String::new();
1638        write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1639
1640        assert!(output.contains("<|marker-0|>"));
1641        assert!(output.contains("<|user_cursor|>"));
1642
1643        let stripped = output.replace("<|user_cursor|>", "");
1644        let stripped =
1645            collect_relative_marker_tags(&stripped)
1646                .iter()
1647                .fold(stripped.clone(), |acc, marker| {
1648                    let tag = &stripped[marker.tag_start..marker.tag_end];
1649                    acc.replace(tag, "")
1650                });
1651        assert_eq!(stripped, editable);
1652    }
1653
1654    #[test]
1655    fn test_apply_marker_span_v0317_basic() {
1656        let old = "aaa\nbbb\nccc\n";
1657        let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1658        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1659        assert_eq!(result, "aaa\nBBB\nccc\n");
1660    }
1661
1662    #[test]
1663    fn test_apply_marker_span_v0317_no_edit() {
1664        let old = "aaa\nbbb\nccc\n";
1665        let output = "<|marker-0|><|marker-0|>";
1666        let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1667        assert_eq!(result, old);
1668    }
1669
1670    #[test]
1671    fn test_encode_v0317_no_edits() {
1672        let old = "aaa\nbbb\nccc\n";
1673        let result =
1674            encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1675        assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1676    }
1677
1678    #[test]
1679    fn test_roundtrip_v0317() {
1680        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1681        let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1682        let cursor = Some(6);
1683
1684        let encoded =
1685            encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1686        let stripped = encoded
1687            .strip_suffix("<|end|>")
1688            .expect("should have end marker");
1689        let stripped = stripped.replace("<|user_cursor|>", "");
1690        let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1691        assert_eq!(reconstructed, new);
1692    }
1693
1694    #[test]
1695    fn test_roundtrip_v0317_with_cursor_marker() {
1696        let old = "aaa\nbbb\nccc\n";
1697        let new = "aaa\nBBB\nccc\n";
1698        let result =
1699            encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1700        assert!(result.contains("<|user_cursor|>"), "result: {result}");
1701        assert!(result.contains("<|marker-0|>"), "result: {result}");
1702    }
1703
1704    #[test]
1705    fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1706        let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1707        let v0316_offsets = compute_marker_offsets(text);
1708        let v0318_offsets = compute_marker_offsets_v0318(text);
1709
1710        assert!(v0318_offsets.len() < v0316_offsets.len());
1711        assert_eq!(v0316_offsets.first().copied(), Some(0));
1712        assert_eq!(v0318_offsets.first().copied(), Some(0));
1713        assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1714        assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1715    }
1716
1717    #[test]
1718    fn test_roundtrip_v0318() {
1719        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1720        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1721        let encoded =
1722            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1723        let stripped = encoded
1724            .strip_suffix("<|end|>")
1725            .expect("should have end marker");
1726        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1727        assert_eq!(reconstructed, new);
1728    }
1729
1730    #[test]
1731    fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1732        let old = "line1\nline2\nline3\n";
1733        let new = "line1\nline2\nline3\nline4\n";
1734        let encoded =
1735            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1736
1737        assert_ne!(encoded, "<|marker_2|><|end|>");
1738
1739        let stripped = encoded
1740            .strip_suffix("<|end|>")
1741            .expect("should have end marker");
1742        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1743        assert_eq!(reconstructed, new);
1744    }
1745
1746    #[test]
1747    fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1748        let old = "alpha\nbeta\n\ngamma\ndelta\n";
1749        let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1750        let encoded =
1751            encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1752
1753        let stripped = encoded
1754            .strip_suffix("<|end|>")
1755            .expect("should have end marker");
1756        let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1757        assert_eq!(reconstructed, new);
1758    }
1759
1760    #[test]
1761    fn test_encode_v0317_markers_stay_on_line_boundaries() {
1762        let old = "\
1763\t\t\t\tcontinue outer;
1764\t\t\t}
1765\t\t}
1766\t}
1767
1768\tconst intersectionObserver = new IntersectionObserver((entries) => {
1769\t\tfor (const entry of entries) {
1770\t\t\tif (entry.isIntersecting) {
1771\t\t\t\tintersectionObserver.unobserve(entry.target);
1772\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1773\t\t\t}
1774\t\t}
1775\t});
1776
1777\tconst observer = new MutationObserver(() => {
1778\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1779\t\t\tdocument.querySelectorAll('a[data-preload]')
1780\t\t);
1781
1782\t\tfor (const link of links) {
1783\t\t\tif (linkSet.has(link)) continue;
1784\t\t\tlinkSet.add(link);
1785
1786\t\t\tswitch (link.dataset.preload) {
1787\t\t\t\tcase '':
1788\t\t\t\tcase 'true':
1789\t\t\t\tcase 'hover': {
1790\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1791\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1792\t\t\t\t\t\tanchorPreload(link);
1793\t\t\t\t\t});
1794";
1795        let new = old.replacen(
1796            "\t\t\t\tcase 'true':\n",
1797            "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1798            1,
1799        );
1800
1801        let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1802        let new_without_cursor = new.replace("<|user_cursor|>", "");
1803
1804        let encoded = encode_from_old_and_new_v0317(
1805            old,
1806            &new_without_cursor,
1807            Some(cursor_offset),
1808            "<|user_cursor|>",
1809            "<|end|>",
1810        )
1811        .unwrap();
1812
1813        let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1814        for marker in collect_relative_marker_tags(core) {
1815            let tag_start = marker.tag_start;
1816            assert!(
1817                tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1818                "marker not at line boundary: {} in output:\n{}",
1819                marker_tag_relative(marker.value),
1820                core
1821            );
1822        }
1823    }
1824}