1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
6const V0316_MIN_BLOCK_LINES: usize = 3;
7const V0316_MAX_BLOCK_LINES: usize = 8;
8const V0318_MIN_BLOCK_LINES: usize = 6;
9const V0318_MAX_BLOCK_LINES: usize = 16;
10const MAX_NUDGE_LINES: usize = 5;
11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
14pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>";
15
16pub fn marker_tag(number: usize) -> String {
17 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
18}
19
20pub fn marker_tag_relative(delta: isize) -> String {
21 if delta > 0 {
22 format!("<|marker+{delta}|>")
23 } else if delta == 0 {
24 String::from("<|marker-0|>")
25 } else {
26 format!("<|marker{delta}|>")
27 }
28}
29
30struct LineInfo {
31 start: usize,
32 is_blank: bool,
33 is_good_start: bool,
34}
35
36fn collect_line_info(text: &str) -> Vec<LineInfo> {
37 let mut lines = Vec::new();
38 let mut offset = 0;
39 for line in text.split('\n') {
40 let trimmed = line.trim();
41 let is_blank = trimmed.is_empty();
42 let is_good_start = !is_blank && !is_structural_tail(trimmed);
43 lines.push(LineInfo {
44 start: offset,
45 is_blank,
46 is_good_start,
47 });
48 offset += line.len() + 1;
49 }
50 // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
51 // empty element when the text ends with '\n'.
52 if text.ends_with('\n') && lines.len() > 1 {
53 lines.pop();
54 }
55 lines
56}
57
58fn is_structural_tail(trimmed_line: &str) -> bool {
59 if trimmed_line.starts_with(&['}', ']', ')']) {
60 return true;
61 }
62 matches!(
63 trimmed_line.trim_end_matches(';'),
64 "break" | "continue" | "return" | "throw" | "end"
65 )
66}
67
68/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
69/// line with `is_good_start`. Returns `None` if no suitable line is found.
70fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
71 (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
72}
73
74/// Compute byte offsets within `editable_text` where marker boundaries should
75/// be placed.
76///
77/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
78/// `editable_text.len()`. Interior offsets are placed at line boundaries
79/// (right after a `\n`), preferring blank-line boundaries when available and
80/// respecting `min_block_lines` / `max_block_lines` constraints.
81fn compute_marker_offsets_with_limits(
82 editable_text: &str,
83 min_block_lines: usize,
84 max_block_lines: usize,
85) -> Vec<usize> {
86 if editable_text.is_empty() {
87 return vec![0, 0];
88 }
89
90 let lines = collect_line_info(editable_text);
91 let mut offsets = vec![0usize];
92 let mut last_boundary_line = 0;
93 let mut i = 0;
94
95 while i < lines.len() {
96 let gap = i - last_boundary_line;
97
98 // Blank-line split: non-blank line following blank line(s) with enough
99 // accumulated lines.
100 if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
101 let target = if lines[i].is_good_start {
102 i
103 } else {
104 skip_to_good_start(&lines, i).unwrap_or(i)
105 };
106 if lines.len() - target >= min_block_lines
107 && lines[target].start > *offsets.last().unwrap_or(&0)
108 {
109 offsets.push(lines[target].start);
110 last_boundary_line = target;
111 i = target + 1;
112 continue;
113 }
114 }
115
116 // Hard cap: too many lines without a split.
117 if gap >= max_block_lines {
118 let target = skip_to_good_start(&lines, i).unwrap_or(i);
119 if lines[target].start > *offsets.last().unwrap_or(&0) {
120 offsets.push(lines[target].start);
121 last_boundary_line = target;
122 i = target + 1;
123 continue;
124 }
125 }
126
127 i += 1;
128 }
129
130 let end = editable_text.len();
131 if *offsets.last().unwrap_or(&0) != end {
132 offsets.push(end);
133 }
134
135 offsets
136}
137
138/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
139pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
140 compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
141}
142
143pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
144 compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
145}
146
147fn line_start_at_or_before(text: &str, offset: usize) -> usize {
148 let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
149 text[..bounded_offset]
150 .rfind('\n')
151 .map(|index| index + 1)
152 .unwrap_or(0)
153}
154
155fn line_end_at_or_after(text: &str, offset: usize) -> usize {
156 let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
157 if bounded_offset >= text.len() {
158 return text.len();
159 }
160
161 text[bounded_offset..]
162 .find('\n')
163 .map(|index| bounded_offset + index + 1)
164 .unwrap_or(text.len())
165}
166
167fn grow_v0327_candidate_range(
168 text: &str,
169 cursor_offset: usize,
170 editable_token_limit: usize,
171) -> std::ops::Range<usize> {
172 if text.is_empty() {
173 return 0..0;
174 }
175
176 let byte_budget = editable_token_limit.saturating_mul(3).max(1);
177 let half_budget = byte_budget / 2;
178
179 let mut start = cursor_offset.saturating_sub(half_budget);
180 let mut end = start.saturating_add(byte_budget).min(text.len());
181
182 if end.saturating_sub(start) < byte_budget {
183 start = end.saturating_sub(byte_budget);
184 }
185
186 start = line_start_at_or_before(text, start);
187 end = line_end_at_or_after(text, end);
188
189 if start < end {
190 start..end
191 } else {
192 let line_start = line_start_at_or_before(text, cursor_offset);
193 let line_end = line_end_at_or_after(text, cursor_offset);
194 line_start..line_end.max(line_start)
195 }
196}
197
198fn trim_v0327_candidate_range_to_markers(
199 text: &str,
200 candidate_range: std::ops::Range<usize>,
201 cursor_offset: usize,
202) -> std::ops::Range<usize> {
203 let candidate_text = &text[candidate_range.clone()];
204 let marker_offsets = compute_marker_offsets_v0318(candidate_text);
205
206 if marker_offsets.len() <= 2 {
207 return candidate_range;
208 }
209
210 let candidate_cursor_offset = cursor_offset
211 .saturating_sub(candidate_range.start)
212 .min(candidate_text.len());
213 let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] {
214 1
215 } else {
216 0
217 };
218 let last_internal_marker_index = marker_offsets.len() - 2;
219 let last_marker_index = marker_offsets.len() - 1;
220 let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index]
221 {
222 last_internal_marker_index
223 } else {
224 last_marker_index
225 };
226
227 let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index];
228 let trimmed_end = candidate_range.start + marker_offsets[end_marker_index];
229
230 if trimmed_start < trimmed_end {
231 trimmed_start..trimmed_end
232 } else {
233 let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets);
234 let start = candidate_range.start + marker_offsets[block_index];
235 let end = candidate_range.start + marker_offsets[block_index + 1];
236 if start < end {
237 start..end
238 } else {
239 candidate_range
240 }
241 }
242}
243
244pub fn compute_v0327_editable_range(
245 text: &str,
246 cursor_offset: usize,
247 editable_token_limit: usize,
248) -> std::ops::Range<usize> {
249 let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit);
250 trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset)
251}
252
253/// Write the editable region content with marker tags, inserting the cursor
254/// marker at the given offset within the editable text.
255pub fn write_editable_with_markers(
256 output: &mut String,
257 editable_text: &str,
258 cursor_offset_in_editable: usize,
259 cursor_marker: &str,
260) {
261 let marker_offsets = compute_marker_offsets(editable_text);
262 let mut cursor_placed = false;
263 for (i, &offset) in marker_offsets.iter().enumerate() {
264 let marker_num = i + 1;
265 if !output.is_empty() && !output.ends_with('\n') {
266 output.push('\n');
267 }
268 output.push_str(&marker_tag(marker_num));
269
270 if let Some(&next_offset) = marker_offsets.get(i + 1) {
271 output.push('\n');
272 let block = &editable_text[offset..next_offset];
273 if !cursor_placed
274 && cursor_offset_in_editable >= offset
275 && cursor_offset_in_editable <= next_offset
276 {
277 cursor_placed = true;
278 let cursor_in_block = cursor_offset_in_editable - offset;
279 output.push_str(&block[..cursor_in_block]);
280 output.push_str(cursor_marker);
281 output.push_str(&block[cursor_in_block..]);
282 } else {
283 output.push_str(block);
284 }
285 }
286 }
287}
288
289/// Strip any `<|marker_N|>` tags from `text`.
290///
291/// When a marker tag sits on its own line (followed by `\n`), the trailing
292/// newline is also removed so the surrounding lines stay joined naturally.
293fn strip_marker_tags(text: &str) -> String {
294 let mut result = String::with_capacity(text.len());
295 let mut pos = 0;
296 let bytes = text.as_bytes();
297 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
298 result.push_str(&text[pos..pos + rel]);
299 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
300 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
301 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
302 if bytes.get(tag_end) == Some(&b'\n') {
303 tag_end += 1;
304 }
305 pos = tag_end;
306 } else {
307 result.push_str(MARKER_TAG_PREFIX);
308 pos = num_start;
309 }
310 }
311 result.push_str(&text[pos..]);
312 result
313}
314
315/// Parse model output that uses the marker format.
316///
317/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
318/// The leading format-level newline after the start marker is stripped.
319/// Trailing newlines are preserved so blank-line endings in the editable
320/// region are not lost.
321///
322/// Any extra intermediate marker tags that the model may have inserted
323/// between the first and last markers are stripped from the returned content.
324pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
325 let first_tag_start = text
326 .find(MARKER_TAG_PREFIX)
327 .context("no start marker found in output")?;
328 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
329 let first_num_end = text[first_num_start..]
330 .find(MARKER_TAG_SUFFIX)
331 .map(|i| i + first_num_start)
332 .context("malformed start marker tag")?;
333 let start_num: usize = text[first_num_start..first_num_end]
334 .parse()
335 .context("start marker number is not a valid integer")?;
336 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
337
338 let last_tag_start = text
339 .rfind(MARKER_TAG_PREFIX)
340 .context("no end marker found in output")?;
341 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
342 let last_num_end = text[last_num_start..]
343 .find(MARKER_TAG_SUFFIX)
344 .map(|i| i + last_num_start)
345 .context("malformed end marker tag")?;
346 let end_num: usize = text[last_num_start..last_num_end]
347 .parse()
348 .context("end marker number is not a valid integer")?;
349
350 if start_num == end_num {
351 return Err(anyhow!(
352 "start and end markers are the same (marker {})",
353 start_num
354 ));
355 }
356
357 let mut content_start = first_tag_end;
358 if text.as_bytes().get(content_start) == Some(&b'\n') {
359 content_start += 1;
360 }
361 let content_end = last_tag_start;
362
363 let content = &text[content_start..content_end.max(content_start)];
364 let content = strip_marker_tags(content);
365 Ok((start_num, end_num, content))
366}
367
368/// Given old editable text and model output with marker span, reconstruct the
369/// full new editable region.
370pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
371 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
372 let marker_offsets = compute_marker_offsets(old_editable);
373
374 let start_idx = start_num
375 .checked_sub(1)
376 .context("marker numbers are 1-indexed")?;
377 let end_idx = end_num
378 .checked_sub(1)
379 .context("marker numbers are 1-indexed")?;
380 let start_byte = *marker_offsets
381 .get(start_idx)
382 .context("start marker number out of range")?;
383 let end_byte = *marker_offsets
384 .get(end_idx)
385 .context("end marker number out of range")?;
386
387 if start_byte > end_byte {
388 return Err(anyhow!("start marker must come before end marker"));
389 }
390
391 let old_span = &old_editable[start_byte..end_byte];
392 let mut new_span = raw_new_span;
393 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
394 new_span.push('\n');
395 }
396 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
397 new_span.pop();
398 }
399
400 let mut result = String::new();
401 result.push_str(&old_editable[..start_byte]);
402 result.push_str(&new_span);
403 result.push_str(&old_editable[end_byte..]);
404
405 Ok(result)
406}
407
408/// Compare old and new editable text, find the minimal marker span that covers
409/// all changes, and encode the result with marker tags.
410pub fn encode_from_old_and_new(
411 old_editable: &str,
412 new_editable: &str,
413 cursor_offset_in_new: Option<usize>,
414 cursor_marker: &str,
415 end_marker: &str,
416 no_edits_marker: &str,
417) -> Result<String> {
418 if old_editable == new_editable {
419 return Ok(format!("{no_edits_marker}{end_marker}"));
420 }
421
422 let marker_offsets = compute_marker_offsets(old_editable);
423 let (common_prefix, common_suffix) =
424 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
425 let change_end_in_old = old_editable.len() - common_suffix;
426
427 let start_marker_idx = marker_offsets
428 .iter()
429 .rposition(|&offset| offset <= common_prefix)
430 .unwrap_or(0);
431 let end_marker_idx = marker_offsets
432 .iter()
433 .position(|&offset| offset >= change_end_in_old)
434 .unwrap_or(marker_offsets.len() - 1);
435
436 let old_start = marker_offsets[start_marker_idx];
437 let old_end = marker_offsets[end_marker_idx];
438
439 let new_start = old_start;
440 let new_end = new_editable
441 .len()
442 .saturating_sub(old_editable.len().saturating_sub(old_end));
443
444 let new_span = &new_editable[new_start..new_end];
445
446 let start_marker_num = start_marker_idx + 1;
447 let end_marker_num = end_marker_idx + 1;
448
449 let mut result = String::new();
450 result.push_str(&marker_tag(start_marker_num));
451 result.push('\n');
452
453 if let Some(cursor_offset) = cursor_offset_in_new {
454 if cursor_offset >= new_start && cursor_offset <= new_end {
455 let cursor_in_span = cursor_offset - new_start;
456 let bounded = cursor_in_span.min(new_span.len());
457 result.push_str(&new_span[..bounded]);
458 result.push_str(cursor_marker);
459 result.push_str(&new_span[bounded..]);
460 } else {
461 result.push_str(new_span);
462 }
463 } else {
464 result.push_str(new_span);
465 }
466
467 if !result.ends_with('\n') {
468 result.push('\n');
469 }
470 result.push_str(&marker_tag(end_marker_num));
471 result.push('\n');
472 result.push_str(end_marker);
473
474 Ok(result)
475}
476
477/// Extract the full editable region from text that uses marker tags.
478///
479/// Returns the concatenation of all block contents between the first and last
480/// markers, with intermediate marker tags stripped.
481pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
482 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
483
484 let mut markers: Vec<(usize, usize)> = Vec::new();
485 let mut search_start = first_marker_start;
486 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
487 let tag_start = search_start + rel_pos;
488 let num_start = tag_start + MARKER_TAG_PREFIX.len();
489 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
490 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
491 markers.push((tag_start, tag_end));
492 search_start = tag_end;
493 }
494
495 if markers.len() < 2 {
496 return None;
497 }
498
499 let (_, first_tag_end) = markers[0];
500 let (last_tag_start, _) = markers[markers.len() - 1];
501
502 let mut content_start = first_tag_end;
503 if text.as_bytes().get(content_start) == Some(&b'\n') {
504 content_start += 1;
505 }
506 let mut content_end = last_tag_start;
507 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
508 content_end -= 1;
509 }
510
511 let raw = &text[content_start..content_end];
512 let result = strip_marker_tags(raw);
513 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
514 Some(result)
515}
516
517struct ParsedTag {
518 value: isize,
519 tag_start: usize,
520 tag_end: usize,
521}
522
523fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
524 let mut tags = Vec::new();
525 let mut search_from = 0;
526 while let Some(rel_pos) = text[search_from..].find(prefix) {
527 let tag_start = search_from + rel_pos;
528 let payload_start = tag_start + prefix.len();
529 if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
530 let payload_end = payload_start + suffix_rel;
531 if let Some(value) = parse(&text[payload_start..payload_end]) {
532 let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
533 tags.push(ParsedTag {
534 value,
535 tag_start,
536 tag_end,
537 });
538 search_from = tag_end;
539 continue;
540 }
541 }
542 search_from = tag_start + prefix.len();
543 }
544 tags
545}
546
547fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
548 collect_tags(text, MARKER_TAG_PREFIX, |s| {
549 s.parse::<usize>().ok().map(|n| n as isize)
550 })
551}
552
553fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
554 collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
555 s.parse::<isize>().ok()
556 })
557}
558
559pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
560 let cursor = cursor_offset.unwrap_or(0);
561 marker_offsets
562 .iter()
563 .enumerate()
564 .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
565 .map(|(idx, _)| idx + 1)
566 .unwrap_or(1)
567}
568
569fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
570 let cursor = cursor_offset.unwrap_or(0);
571 marker_offsets
572 .windows(2)
573 .position(|window| cursor >= window[0] && cursor < window[1])
574 .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
575}
576
577fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
578 let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
579 let remaining_a = a.len() - prefix;
580 let remaining_b = b.len() - prefix;
581 let max_suffix = remaining_a.min(remaining_b);
582 let suffix = a[a.len() - max_suffix..]
583 .iter()
584 .rev()
585 .zip(b[b.len() - max_suffix..].iter().rev())
586 .take_while(|(x, y)| x == y)
587 .count();
588 (prefix, suffix)
589}
590
591/// Map a byte offset from old span coordinates to new span coordinates,
592/// using common prefix/suffix within the span for accuracy.
593fn map_boundary_offset(
594 old_rel: usize,
595 old_span_len: usize,
596 new_span_len: usize,
597 span_common_prefix: usize,
598 span_common_suffix: usize,
599) -> usize {
600 if old_rel <= span_common_prefix {
601 old_rel
602 } else if old_rel >= old_span_len - span_common_suffix {
603 new_span_len - (old_span_len - old_rel)
604 } else {
605 let old_changed_start = span_common_prefix;
606 let old_changed_len = old_span_len
607 .saturating_sub(span_common_prefix)
608 .saturating_sub(span_common_suffix);
609 let new_changed_start = span_common_prefix;
610 let new_changed_len = new_span_len
611 .saturating_sub(span_common_prefix)
612 .saturating_sub(span_common_suffix);
613
614 new_changed_start
615 + ((old_rel - old_changed_start) * new_changed_len)
616 .checked_div(old_changed_len)
617 .unwrap_or(new_changed_len)
618 }
619}
620
621fn snap_to_line_start(text: &str, offset: usize) -> usize {
622 let bounded = offset.min(text.len());
623 let bounded = text.floor_char_boundary(bounded);
624
625 if bounded >= text.len() {
626 return text.len();
627 }
628
629 if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
630 return bounded;
631 }
632
633 if let Some(next_nl_rel) = text[bounded..].find('\n') {
634 let next = bounded + next_nl_rel + 1;
635 return text.floor_char_boundary(next.min(text.len()));
636 }
637
638 let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
639 text.floor_char_boundary(prev_start)
640}
641
642/// Write the editable region content with byte-exact marker tags, inserting the
643/// cursor marker at the given offset within the editable text.
644///
645/// The `tag_for_index` closure maps a boundary index to the marker tag string.
646fn write_editable_with_markers_impl(
647 output: &mut String,
648 editable_text: &str,
649 cursor_offset_in_editable: usize,
650 cursor_marker: &str,
651 marker_offsets: &[usize],
652 tag_for_index: impl Fn(usize) -> String,
653) {
654 let mut cursor_placed = false;
655 for (i, &offset) in marker_offsets.iter().enumerate() {
656 output.push_str(&tag_for_index(i));
657
658 if let Some(&next_offset) = marker_offsets.get(i + 1) {
659 let block = &editable_text[offset..next_offset];
660 if !cursor_placed
661 && cursor_offset_in_editable >= offset
662 && cursor_offset_in_editable <= next_offset
663 {
664 cursor_placed = true;
665 let cursor_in_block = cursor_offset_in_editable - offset;
666 output.push_str(&block[..cursor_in_block]);
667 output.push_str(cursor_marker);
668 output.push_str(&block[cursor_in_block..]);
669 } else {
670 output.push_str(block);
671 }
672 }
673 }
674}
675
676pub fn write_editable_with_markers_v0316(
677 output: &mut String,
678 editable_text: &str,
679 cursor_offset_in_editable: usize,
680 cursor_marker: &str,
681) {
682 let marker_offsets = compute_marker_offsets(editable_text);
683 write_editable_with_markers_impl(
684 output,
685 editable_text,
686 cursor_offset_in_editable,
687 cursor_marker,
688 &marker_offsets,
689 |i| marker_tag(i + 1),
690 );
691}
692
693pub fn write_editable_with_markers_v0317(
694 output: &mut String,
695 editable_text: &str,
696 cursor_offset_in_editable: usize,
697 cursor_marker: &str,
698) {
699 let marker_offsets = compute_marker_offsets(editable_text);
700 let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
701 write_editable_with_markers_impl(
702 output,
703 editable_text,
704 cursor_offset_in_editable,
705 cursor_marker,
706 &marker_offsets,
707 |i| marker_tag_relative(i as isize - anchor_idx as isize),
708 );
709}
710
711pub fn write_editable_with_markers_v0318(
712 output: &mut String,
713 editable_text: &str,
714 cursor_offset_in_editable: usize,
715 cursor_marker: &str,
716) {
717 let marker_offsets = compute_marker_offsets_v0318(editable_text);
718 write_editable_with_markers_impl(
719 output,
720 editable_text,
721 cursor_offset_in_editable,
722 cursor_marker,
723 &marker_offsets,
724 |i| marker_tag(i + 1),
725 );
726}
727
728/// Parse byte-exact model output and reconstruct the full new editable region.
729///
730/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
731/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
732/// an error.
733fn apply_marker_span_impl(
734 old_editable: &str,
735 tags: &[ParsedTag],
736 output: &str,
737 resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
738) -> Result<String> {
739 if tags.is_empty() {
740 return Err(anyhow!("no marker tags found in output"));
741 }
742 if tags.len() == 1 {
743 return Err(anyhow!(
744 "only one marker tag found in output, expected at least two"
745 ));
746 }
747
748 let start_value = tags[0].value;
749 let end_value = tags[tags.len() - 1].value;
750
751 if start_value == end_value {
752 return Ok(old_editable.to_string());
753 }
754
755 let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
756
757 if start_byte > end_byte {
758 return Err(anyhow!("start marker must come before end marker"));
759 }
760
761 let mut new_content = String::new();
762 for i in 0..tags.len() - 1 {
763 let content_start = tags[i].tag_end;
764 let content_end = tags[i + 1].tag_start;
765 if content_start <= content_end {
766 new_content.push_str(&output[content_start..content_end]);
767 }
768 }
769
770 let mut result = String::new();
771 result.push_str(&old_editable[..start_byte]);
772 result.push_str(&new_content);
773 result.push_str(&old_editable[end_byte..]);
774
775 Ok(result)
776}
777
778pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
779 let tags = collect_marker_tags(output);
780
781 // Validate monotonically increasing with no gaps (best-effort warning)
782 if tags.len() >= 2 {
783 let start_num = tags[0].value;
784 let end_num = tags[tags.len() - 1].value;
785 if start_num != end_num {
786 let expected: Vec<isize> = (start_num..=end_num).collect();
787 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
788 if actual != expected {
789 eprintln!(
790 "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
791 expected, actual
792 );
793 }
794 }
795 }
796
797 let marker_offsets = compute_marker_offsets(old_editable);
798 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
799 let start_idx = (start_val as usize)
800 .checked_sub(1)
801 .context("marker numbers are 1-indexed")?;
802 let end_idx = (end_val as usize)
803 .checked_sub(1)
804 .context("marker numbers are 1-indexed")?;
805 let start_byte = *marker_offsets
806 .get(start_idx)
807 .context("start marker number out of range")?;
808 let end_byte = *marker_offsets
809 .get(end_idx)
810 .context("end marker number out of range")?;
811 Ok((start_byte, end_byte))
812 })
813}
814
815pub fn apply_marker_span_v0317(
816 old_editable: &str,
817 output: &str,
818 cursor_offset_in_old: Option<usize>,
819) -> Result<String> {
820 let tags = collect_relative_marker_tags(output);
821 let marker_offsets = compute_marker_offsets(old_editable);
822 let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
823
824 apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
825 let start_idx_signed = anchor_idx as isize + start_delta;
826 let end_idx_signed = anchor_idx as isize + end_delta;
827 if start_idx_signed < 0 || end_idx_signed < 0 {
828 return Err(anyhow!("relative marker maps before first marker"));
829 }
830 let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
831 let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
832 let start_byte = *marker_offsets
833 .get(start_idx)
834 .context("start marker number out of range")?;
835 let end_byte = *marker_offsets
836 .get(end_idx)
837 .context("end marker number out of range")?;
838 Ok((start_byte, end_byte))
839 })
840}
841
842pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
843 let tags = collect_marker_tags(output);
844
845 if tags.len() >= 2 {
846 let start_num = tags[0].value;
847 let end_num = tags[tags.len() - 1].value;
848 if start_num != end_num {
849 let expected: Vec<isize> = (start_num..=end_num).collect();
850 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
851 if actual != expected {
852 eprintln!(
853 "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
854 expected, actual
855 );
856 }
857 }
858 }
859
860 let marker_offsets = compute_marker_offsets_v0318(old_editable);
861 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
862 let start_idx = (start_val as usize)
863 .checked_sub(1)
864 .context("marker numbers are 1-indexed")?;
865 let end_idx = (end_val as usize)
866 .checked_sub(1)
867 .context("marker numbers are 1-indexed")?;
868 let start_byte = *marker_offsets
869 .get(start_idx)
870 .context("start marker number out of range")?;
871 let end_byte = *marker_offsets
872 .get(end_idx)
873 .context("end marker number out of range")?;
874 Ok((start_byte, end_byte))
875 })
876}
877
878/// Encode the training target from old and new editable text.
879///
880/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
881/// closure maps a block index to the appropriate marker tag string.
882/// `no_edit_tag` is the marker tag to repeat when there are no edits.
883fn encode_from_old_and_new_impl(
884 old_editable: &str,
885 new_editable: &str,
886 cursor_offset_in_new: Option<usize>,
887 cursor_marker: &str,
888 end_marker: &str,
889 no_edit_tag: &str,
890 marker_offsets: &[usize],
891 tag_for_block_idx: impl Fn(usize) -> String,
892) -> Result<String> {
893 if old_editable == new_editable {
894 return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
895 }
896
897 let (common_prefix, common_suffix) =
898 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
899 let change_end_in_old = old_editable.len() - common_suffix;
900
901 let mut start_marker_idx = marker_offsets
902 .iter()
903 .rposition(|&offset| offset <= common_prefix)
904 .unwrap_or(0);
905 let mut end_marker_idx = marker_offsets
906 .iter()
907 .position(|&offset| offset >= change_end_in_old)
908 .unwrap_or(marker_offsets.len() - 1);
909
910 if start_marker_idx == end_marker_idx {
911 if end_marker_idx < marker_offsets.len().saturating_sub(1) {
912 end_marker_idx += 1;
913 } else if start_marker_idx > 0 {
914 start_marker_idx -= 1;
915 }
916 }
917
918 let old_start = marker_offsets[start_marker_idx];
919 let old_end = marker_offsets[end_marker_idx];
920
921 let new_start = old_start;
922 let new_end = new_editable
923 .len()
924 .saturating_sub(old_editable.len().saturating_sub(old_end));
925
926 let new_span = &new_editable[new_start..new_end];
927 let old_span = &old_editable[old_start..old_end];
928
929 let (span_common_prefix, span_common_suffix) =
930 common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
931
932 let mut result = String::new();
933 let mut prev_new_rel = 0usize;
934 let mut cursor_placed = false;
935
936 for block_idx in start_marker_idx..end_marker_idx {
937 result.push_str(&tag_for_block_idx(block_idx));
938
939 let new_rel_end = if block_idx + 1 == end_marker_idx {
940 new_span.len()
941 } else {
942 let old_rel = marker_offsets[block_idx + 1] - old_start;
943 let mapped = map_boundary_offset(
944 old_rel,
945 old_span.len(),
946 new_span.len(),
947 span_common_prefix,
948 span_common_suffix,
949 );
950 snap_to_line_start(new_span, mapped)
951 };
952
953 let new_rel_end = new_rel_end.max(prev_new_rel);
954 let block_content = &new_span[prev_new_rel..new_rel_end];
955
956 if !cursor_placed {
957 if let Some(cursor_offset) = cursor_offset_in_new {
958 let abs_start = new_start + prev_new_rel;
959 let abs_end = new_start + new_rel_end;
960 if cursor_offset >= abs_start && cursor_offset <= abs_end {
961 cursor_placed = true;
962 let cursor_in_block = cursor_offset - abs_start;
963 let bounded = cursor_in_block.min(block_content.len());
964 result.push_str(&block_content[..bounded]);
965 result.push_str(cursor_marker);
966 result.push_str(&block_content[bounded..]);
967 prev_new_rel = new_rel_end;
968 continue;
969 }
970 }
971 }
972
973 result.push_str(block_content);
974 prev_new_rel = new_rel_end;
975 }
976
977 result.push_str(&tag_for_block_idx(end_marker_idx));
978 result.push_str(end_marker);
979
980 Ok(result)
981}
982
983pub fn encode_from_old_and_new_v0316(
984 old_editable: &str,
985 new_editable: &str,
986 cursor_offset_in_new: Option<usize>,
987 cursor_marker: &str,
988 end_marker: &str,
989) -> Result<String> {
990 let marker_offsets = compute_marker_offsets(old_editable);
991 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
992 encode_from_old_and_new_impl(
993 old_editable,
994 new_editable,
995 cursor_offset_in_new,
996 cursor_marker,
997 end_marker,
998 &no_edit_tag,
999 &marker_offsets,
1000 |block_idx| marker_tag(block_idx + 1),
1001 )
1002}
1003
1004pub fn encode_from_old_and_new_v0317(
1005 old_editable: &str,
1006 new_editable: &str,
1007 cursor_offset_in_new: Option<usize>,
1008 cursor_marker: &str,
1009 end_marker: &str,
1010) -> Result<String> {
1011 let marker_offsets = compute_marker_offsets(old_editable);
1012 let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
1013 let no_edit_tag = marker_tag_relative(0);
1014 encode_from_old_and_new_impl(
1015 old_editable,
1016 new_editable,
1017 cursor_offset_in_new,
1018 cursor_marker,
1019 end_marker,
1020 &no_edit_tag,
1021 &marker_offsets,
1022 |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
1023 )
1024}
1025
1026pub fn encode_from_old_and_new_v0318(
1027 old_editable: &str,
1028 new_editable: &str,
1029 cursor_offset_in_new: Option<usize>,
1030 cursor_marker: &str,
1031 end_marker: &str,
1032) -> Result<String> {
1033 let marker_offsets = compute_marker_offsets_v0318(old_editable);
1034 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
1035 encode_from_old_and_new_impl(
1036 old_editable,
1037 new_editable,
1038 cursor_offset_in_new,
1039 cursor_marker,
1040 end_marker,
1041 &no_edit_tag,
1042 &marker_offsets,
1043 |block_idx| marker_tag(block_idx + 1),
1044 )
1045}
1046
1047#[cfg(test)]
1048mod tests {
1049 use super::*;
1050
1051 #[test]
1052 fn test_compute_marker_offsets_small_block() {
1053 let text = "aaa\nbbb\nccc\n";
1054 let offsets = compute_marker_offsets(text);
1055 assert_eq!(offsets, vec![0, text.len()]);
1056 }
1057
1058 #[test]
1059 fn test_compute_marker_offsets_blank_line_split() {
1060 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1061 let offsets = compute_marker_offsets(text);
1062 assert_eq!(offsets[0], 0);
1063 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1064 assert_eq!(*offsets.last().unwrap(), text.len());
1065 }
1066
1067 #[test]
1068 fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
1069 let text = "\
1070class OCRDataframe(BaseModel):
1071 model_config = ConfigDict(arbitrary_types_allowed=True)
1072
1073 df: pl.DataFrame
1074
1075 def page(self, page_number: int = 0) -> \"OCRDataframe\":
1076 # Filter dataframe on specific page
1077 df_page = self.df.filter(pl.col(\"page\") == page_number)
1078 return OCRDataframe(df=df_page)
1079
1080 def get_text_cell(
1081 self,
1082 cell: Cell,
1083 margin: int = 0,
1084 page_number: Optional[int] = None,
1085 min_confidence: int = 50,
1086 ) -> Optional[str]:
1087 \"\"\"
1088 Get text corresponding to cell
1089";
1090 let offsets = compute_marker_offsets(text);
1091
1092 let def_start = text
1093 .find(" def get_text_cell(")
1094 .expect("def line exists");
1095 let self_start = text.find(" self,").expect("self line exists");
1096
1097 assert!(
1098 offsets.contains(&def_start),
1099 "expected boundary at def line start ({def_start}), got {offsets:?}"
1100 );
1101 assert!(
1102 !offsets.contains(&self_start),
1103 "did not expect boundary at self line start ({self_start}), got {offsets:?}"
1104 );
1105 }
1106
1107 #[test]
1108 fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1109 let text = "\
1110impl Plugin for AhoySchedulePlugin {
1111 fn build(&self, app: &mut App) {
1112 app.configure_sets(
1113 self.schedule,
1114 (
1115 AhoySystems::MoveCharacters,
1116 AhoySystems::ApplyForcesToDynamicRigidBodies,
1117 )
1118 .chain()
1119 .before(PhysicsSystems::First),
1120 );
1121
1122 }
1123}
1124
1125/// System set used by all systems of `bevy_ahoy`.
1126#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1127pub enum AhoySystems {
1128 MoveCharacters,
1129 ApplyForcesToDynamicRigidBodies,
1130}
1131";
1132 let offsets = compute_marker_offsets(text);
1133
1134 let closer_start = text.find(" }\n").expect("closer line exists");
1135 let doc_start = text
1136 .find("/// System set used by all systems of `bevy_ahoy`.")
1137 .expect("doc line exists");
1138
1139 assert!(
1140 !offsets.contains(&closer_start),
1141 "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1142 );
1143 assert!(
1144 offsets.contains(&doc_start),
1145 "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1146 );
1147 }
1148
1149 #[test]
1150 fn test_compute_marker_offsets_max_lines_split() {
1151 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1152 let offsets = compute_marker_offsets(text);
1153 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1154 }
1155
1156 #[test]
1157 fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1158 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1159 let offsets = compute_marker_offsets(text);
1160
1161 let expected = text.find("case 'x': {").expect("case line exists");
1162 assert!(
1163 offsets.contains(&expected),
1164 "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1165 );
1166 }
1167
1168 #[test]
1169 fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1170 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1171 let offsets = compute_marker_offsets(text);
1172
1173 let case_start = text.find("case 'x': {").expect("case line exists");
1174 assert!(
1175 !offsets.contains(&case_start),
1176 "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1177 );
1178 }
1179
1180 #[test]
1181 fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1182 let text = "\
1183aaaaaaaaaa = 1;
1184bbbbbbbbbb = 2;
1185cccccccccc = 3;
1186dddddddddd = 4;
1187eeeeeeeeee = 5;
1188ffffffffff = 6;
1189gggggggggg = 7;
1190hhhhhhhhhh = 8;
1191 };
1192 };
1193
1194 grafanaDashboards = {
1195 cluster-overview.spec = {
1196 inherit instanceSelector;
1197 folderRef = \"infrastructure\";
1198 json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1199 };
1200 };
1201";
1202 let offsets = compute_marker_offsets(text);
1203
1204 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1205 assert_eq!(
1206 offsets.last().copied(),
1207 Some(text.len()),
1208 "offsets: {offsets:?}"
1209 );
1210 assert!(
1211 offsets.windows(2).all(|window| window[0] <= window[1]),
1212 "offsets must be sorted: {offsets:?}"
1213 );
1214 }
1215
1216 #[test]
1217 fn test_compute_marker_offsets_empty() {
1218 let offsets = compute_marker_offsets("");
1219 assert_eq!(offsets, vec![0, 0]);
1220 }
1221
1222 #[test]
1223 fn test_compute_v0327_editable_range_trims_to_marker_boundaries() {
1224 let text = (0..80).map(|_| "x\n").collect::<String>();
1225 let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40;
1226
1227 let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20);
1228 let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20);
1229 let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]);
1230 let relative_start = editable_range.start - candidate_range.start;
1231 let relative_end = editable_range.end - candidate_range.start;
1232
1233 assert!(
1234 marker_offsets.len() > 2,
1235 "expected interior markers: {marker_offsets:?}"
1236 );
1237 assert!(marker_offsets.contains(&relative_start));
1238 assert!(marker_offsets.contains(&relative_end));
1239 assert!(editable_range.start <= cursor_offset);
1240 assert!(editable_range.end >= cursor_offset);
1241 assert!(
1242 editable_range.start > candidate_range.start
1243 || editable_range.end < candidate_range.end,
1244 "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}"
1245 );
1246 }
1247
1248 #[test]
1249 fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1250 let text = "\
1251# Spree Posts
1252
1253This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1254
1255## Installation
1256
12571. Add this extension to your Gemfile with this line:
1258
1259 ```ruby
1260 bundle add spree_posts
1261 ```
1262
12632. Run the install generator
1264
1265 ```ruby
1266 bundle exec rails g spree_posts:install
1267 ```
1268
12693. Restart your server
1270
1271 If your server was running, restart it so that it can find the assets properly.
1272
1273## Developing
1274
12751. Create a dummy app
1276
1277 ```bash
1278 bundle update
1279 bundle exec rake test_app
1280 ```
1281
12822. Add your new code
12833. Run tests
1284
1285 ```bash
1286 bundle exec rspec
1287 ```
1288
1289When testing your applications integration with this extension you may use it's factories.
1290Simply add this require statement to your spec_helper:
1291
1292```ruby
1293require 'spree_posts/factories'
1294```
1295
1296## Releasing a new version
1297
1298```shell
1299bundle exec gem bump -p -t
1300bundle exec gem release
1301```
1302
1303For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1304
1305## Contributing
1306
1307If you'd like to contribute, please take a look at the contributing guide.
1308";
1309 let offsets = compute_marker_offsets(text);
1310
1311 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1312 assert_eq!(
1313 offsets.last().copied(),
1314 Some(text.len()),
1315 "offsets: {offsets:?}"
1316 );
1317
1318 for window in offsets.windows(2) {
1319 let block = &text[window[0]..window[1]];
1320 let line_count = block.lines().count();
1321 assert!(
1322 line_count >= V0316_MIN_BLOCK_LINES,
1323 "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1324 );
1325 }
1326 }
1327
1328 #[test]
1329 fn test_extract_marker_span() {
1330 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
1331 let (start, end, content) = extract_marker_span(text).unwrap();
1332 assert_eq!(start, 2);
1333 assert_eq!(end, 3);
1334 assert_eq!(content, " new content\n");
1335 }
1336
1337 #[test]
1338 fn test_extract_marker_span_multi_line() {
1339 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1340 let (start, end, content) = extract_marker_span(text).unwrap();
1341 assert_eq!(start, 1);
1342 assert_eq!(end, 4);
1343 assert_eq!(content, "line1\nline2\nline3\n");
1344 }
1345
1346 #[test]
1347 fn test_apply_marker_span_basic() {
1348 let old = "aaa\nbbb\nccc\n";
1349 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1350 let result = apply_marker_span(old, output).unwrap();
1351 assert_eq!(result, "aaa\nBBB\nccc\n");
1352 }
1353
1354 #[test]
1355 fn test_apply_marker_span_preserves_trailing_blank_line() {
1356 let old = "/\nresult\n\n";
1357 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1358 let result = apply_marker_span(old, output).unwrap();
1359 assert_eq!(result, "//\nresult\n\n");
1360 }
1361
1362 #[test]
1363 fn test_encode_no_edits() {
1364 let old = "aaa\nbbb\nccc\n";
1365 let result = encode_from_old_and_new(
1366 old,
1367 old,
1368 None,
1369 "<|user_cursor|>",
1370 ">>>>>>> UPDATED\n",
1371 "NO_EDITS\n",
1372 )
1373 .unwrap();
1374 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1375 }
1376
1377 #[test]
1378 fn test_encode_with_change() {
1379 let old = "aaa\nbbb\nccc\n";
1380 let new = "aaa\nBBB\nccc\n";
1381 let result = encode_from_old_and_new(
1382 old,
1383 new,
1384 None,
1385 "<|user_cursor|>",
1386 ">>>>>>> UPDATED\n",
1387 "NO_EDITS\n",
1388 )
1389 .unwrap();
1390 assert!(result.contains("<|marker_1|>"));
1391 assert!(result.contains("<|marker_2|>"));
1392 assert!(result.contains("aaa\nBBB\nccc\n"));
1393 assert!(result.ends_with(">>>>>>> UPDATED\n"));
1394 }
1395
1396 #[test]
1397 fn test_roundtrip_encode_apply() {
1398 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1399 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1400 let encoded = encode_from_old_and_new(
1401 old,
1402 new,
1403 None,
1404 "<|user_cursor|>",
1405 ">>>>>>> UPDATED\n",
1406 "NO_EDITS\n",
1407 )
1408 .unwrap();
1409 let output = encoded
1410 .strip_suffix(">>>>>>> UPDATED\n")
1411 .expect("should have end marker");
1412 let reconstructed = apply_marker_span(old, output).unwrap();
1413 assert_eq!(reconstructed, new);
1414 }
1415
1416 #[test]
1417 fn test_extract_editable_region_from_markers_multi() {
1418 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1419 let parsed = extract_editable_region_from_markers(text).unwrap();
1420 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1421 }
1422
1423 #[test]
1424 fn test_extract_editable_region_two_markers() {
1425 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1426 let parsed = extract_editable_region_from_markers(text).unwrap();
1427 assert_eq!(parsed, "one\ntwo three");
1428 }
1429
1430 #[test]
1431 fn test_encode_with_cursor() {
1432 let old = "aaa\nbbb\nccc\n";
1433 let new = "aaa\nBBB\nccc\n";
1434 let result = encode_from_old_and_new(
1435 old,
1436 new,
1437 Some(5),
1438 "<|user_cursor|>",
1439 ">>>>>>> UPDATED\n",
1440 "NO_EDITS\n",
1441 )
1442 .unwrap();
1443 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1444 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1445 }
1446
1447 #[test]
1448 fn test_extract_marker_span_strips_intermediate_markers() {
1449 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1450 let (start, end, content) = extract_marker_span(text).unwrap();
1451 assert_eq!(start, 2);
1452 assert_eq!(end, 4);
1453 assert_eq!(content, "line1\nline2\n");
1454 }
1455
1456 #[test]
1457 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1458 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1459 let (start, end, content) = extract_marker_span(text).unwrap();
1460 assert_eq!(start, 1);
1461 assert_eq!(end, 4);
1462 assert_eq!(content, "aaa\nbbb\nccc\n");
1463 }
1464
1465 #[test]
1466 fn test_apply_marker_span_with_extra_intermediate_marker() {
1467 let old = "aaa\nbbb\nccc\n";
1468 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1469 let result = apply_marker_span(old, output).unwrap();
1470 assert_eq!(result, "aaa\nBBB\nccc\n");
1471 }
1472
1473 #[test]
1474 fn test_strip_marker_tags_inline() {
1475 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1476 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1477 assert_eq!(
1478 strip_marker_tags("line1\n<|marker_3|>\nline2"),
1479 "line1\nline2"
1480 );
1481 }
1482
1483 #[test]
1484 fn test_write_editable_with_markers_v0316_byte_exact() {
1485 let editable = "aaa\nbbb\nccc\n";
1486 let mut output = String::new();
1487 write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1488 assert!(output.starts_with("<|marker_1|>"));
1489 assert!(output.contains("<|user_cursor|>"));
1490 let stripped = output.replace("<|user_cursor|>", "");
1491 let stripped = strip_marker_tags(&stripped);
1492 assert_eq!(stripped, editable);
1493 }
1494
1495 #[test]
1496 fn test_apply_marker_span_v0316_basic() {
1497 let old = "aaa\nbbb\nccc\n";
1498 let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1499 let result = apply_marker_span_v0316(old, output).unwrap();
1500 assert_eq!(result, "aaa\nBBB\nccc\n");
1501 }
1502
1503 #[test]
1504 fn test_apply_marker_span_v0316_no_edit() {
1505 let old = "aaa\nbbb\nccc\n";
1506 let output = "<|marker_1|><|marker_1|>";
1507 let result = apply_marker_span_v0316(old, output).unwrap();
1508 assert_eq!(result, old);
1509 }
1510
1511 #[test]
1512 fn test_apply_marker_span_v0316_no_edit_any_marker() {
1513 let old = "aaa\nbbb\nccc\n";
1514 let output = "<|marker_2|>ignored content<|marker_2|>";
1515 let result = apply_marker_span_v0316(old, output).unwrap();
1516 assert_eq!(result, old);
1517 }
1518
1519 #[test]
1520 fn test_apply_marker_span_v0316_multi_block() {
1521 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1522 let marker_offsets = compute_marker_offsets(old);
1523 assert!(
1524 marker_offsets.len() >= 3,
1525 "expected at least 3 offsets, got {:?}",
1526 marker_offsets
1527 );
1528
1529 let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1530 let mut output = String::new();
1531 output.push_str("<|marker_1|>");
1532 for i in 0..marker_offsets.len() - 1 {
1533 if i > 0 {
1534 output.push_str(&marker_tag(i + 1));
1535 }
1536 let start = marker_offsets[i];
1537 let end = marker_offsets[i + 1];
1538 let block_len = end - start;
1539 output.push_str(&new_content[start..start + block_len]);
1540 }
1541 let last_marker_num = marker_offsets.len();
1542 output.push_str(&marker_tag(last_marker_num));
1543 let result = apply_marker_span_v0316(old, &output).unwrap();
1544 assert_eq!(result, new_content);
1545 }
1546
1547 #[test]
1548 fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1549 let old = "aaa\nbbb\nccc\n";
1550 let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1551 let result = apply_marker_span_v0316(old, output).unwrap();
1552 assert_eq!(result, "aaa\nBBB\nccc");
1553 }
1554
1555 #[test]
1556 fn test_encode_v0316_no_edits() {
1557 let old = "aaa\nbbb\nccc\n";
1558 let result =
1559 encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1560 assert!(result.ends_with("<|end|>"));
1561 let stripped = result.strip_suffix("<|end|>").unwrap();
1562 let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1563 assert_eq!(result_parsed, old);
1564 }
1565
1566 #[test]
1567 fn test_encode_v0316_with_change() {
1568 let old = "aaa\nbbb\nccc\n";
1569 let new = "aaa\nBBB\nccc\n";
1570 let result =
1571 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1572 assert!(result.contains("<|marker_1|>"));
1573 assert!(result.contains("<|marker_2|>"));
1574 assert!(result.ends_with("<|end|>"));
1575 }
1576
1577 #[test]
1578 fn test_roundtrip_v0316() {
1579 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1580 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1581 let encoded =
1582 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1583 let stripped = encoded
1584 .strip_suffix("<|end|>")
1585 .expect("should have end marker");
1586 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1587 assert_eq!(reconstructed, new);
1588 }
1589
1590 #[test]
1591 fn test_roundtrip_v0316_with_cursor() {
1592 let old = "aaa\nbbb\nccc\n";
1593 let new = "aaa\nBBB\nccc\n";
1594 let result =
1595 encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1596 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1597 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1598 }
1599
1600 #[test]
1601 fn test_roundtrip_v0316_multi_block_change() {
1602 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1603 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1604 let encoded =
1605 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1606 let stripped = encoded
1607 .strip_suffix("<|end|>")
1608 .expect("should have end marker");
1609 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1610 assert_eq!(reconstructed, new);
1611 }
1612
1613 #[test]
1614 fn test_nearest_marker_number() {
1615 let offsets = vec![0, 10, 20, 30];
1616 assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1617 assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1618 assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1619 assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1620 assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1621 assert_eq!(nearest_marker_number(None, &offsets), 1);
1622 }
1623
1624 #[test]
1625 fn test_marker_tag_relative_formats_as_expected() {
1626 assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1627 assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1628 assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1629 assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1630 assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1631 }
1632
1633 #[test]
1634 fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1635 let editable = "aaa\nbbb\nccc\n";
1636 let mut output = String::new();
1637 write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1638
1639 assert!(output.contains("<|marker-0|>"));
1640 assert!(output.contains("<|user_cursor|>"));
1641
1642 let stripped = output.replace("<|user_cursor|>", "");
1643 let stripped =
1644 collect_relative_marker_tags(&stripped)
1645 .iter()
1646 .fold(stripped.clone(), |acc, marker| {
1647 let tag = &stripped[marker.tag_start..marker.tag_end];
1648 acc.replace(tag, "")
1649 });
1650 assert_eq!(stripped, editable);
1651 }
1652
1653 #[test]
1654 fn test_apply_marker_span_v0317_basic() {
1655 let old = "aaa\nbbb\nccc\n";
1656 let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1657 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1658 assert_eq!(result, "aaa\nBBB\nccc\n");
1659 }
1660
1661 #[test]
1662 fn test_apply_marker_span_v0317_no_edit() {
1663 let old = "aaa\nbbb\nccc\n";
1664 let output = "<|marker-0|><|marker-0|>";
1665 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1666 assert_eq!(result, old);
1667 }
1668
1669 #[test]
1670 fn test_encode_v0317_no_edits() {
1671 let old = "aaa\nbbb\nccc\n";
1672 let result =
1673 encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1674 assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1675 }
1676
1677 #[test]
1678 fn test_roundtrip_v0317() {
1679 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1680 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1681 let cursor = Some(6);
1682
1683 let encoded =
1684 encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1685 let stripped = encoded
1686 .strip_suffix("<|end|>")
1687 .expect("should have end marker");
1688 let stripped = stripped.replace("<|user_cursor|>", "");
1689 let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1690 assert_eq!(reconstructed, new);
1691 }
1692
1693 #[test]
1694 fn test_roundtrip_v0317_with_cursor_marker() {
1695 let old = "aaa\nbbb\nccc\n";
1696 let new = "aaa\nBBB\nccc\n";
1697 let result =
1698 encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1699 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1700 assert!(result.contains("<|marker-0|>"), "result: {result}");
1701 }
1702
1703 #[test]
1704 fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1705 let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1706 let v0316_offsets = compute_marker_offsets(text);
1707 let v0318_offsets = compute_marker_offsets_v0318(text);
1708
1709 assert!(v0318_offsets.len() < v0316_offsets.len());
1710 assert_eq!(v0316_offsets.first().copied(), Some(0));
1711 assert_eq!(v0318_offsets.first().copied(), Some(0));
1712 assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1713 assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1714 }
1715
1716 #[test]
1717 fn test_roundtrip_v0318() {
1718 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1719 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1720 let encoded =
1721 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1722 let stripped = encoded
1723 .strip_suffix("<|end|>")
1724 .expect("should have end marker");
1725 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1726 assert_eq!(reconstructed, new);
1727 }
1728
1729 #[test]
1730 fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1731 let old = "line1\nline2\nline3\n";
1732 let new = "line1\nline2\nline3\nline4\n";
1733 let encoded =
1734 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1735
1736 assert_ne!(encoded, "<|marker_2|><|end|>");
1737
1738 let stripped = encoded
1739 .strip_suffix("<|end|>")
1740 .expect("should have end marker");
1741 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1742 assert_eq!(reconstructed, new);
1743 }
1744
1745 #[test]
1746 fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1747 let old = "alpha\nbeta\n\ngamma\ndelta\n";
1748 let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1749 let encoded =
1750 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1751
1752 let stripped = encoded
1753 .strip_suffix("<|end|>")
1754 .expect("should have end marker");
1755 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1756 assert_eq!(reconstructed, new);
1757 }
1758
1759 #[test]
1760 fn test_encode_v0317_markers_stay_on_line_boundaries() {
1761 let old = "\
1762\t\t\t\tcontinue outer;
1763\t\t\t}
1764\t\t}
1765\t}
1766
1767\tconst intersectionObserver = new IntersectionObserver((entries) => {
1768\t\tfor (const entry of entries) {
1769\t\t\tif (entry.isIntersecting) {
1770\t\t\t\tintersectionObserver.unobserve(entry.target);
1771\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1772\t\t\t}
1773\t\t}
1774\t});
1775
1776\tconst observer = new MutationObserver(() => {
1777\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1778\t\t\tdocument.querySelectorAll('a[data-preload]')
1779\t\t);
1780
1781\t\tfor (const link of links) {
1782\t\t\tif (linkSet.has(link)) continue;
1783\t\t\tlinkSet.add(link);
1784
1785\t\t\tswitch (link.dataset.preload) {
1786\t\t\t\tcase '':
1787\t\t\t\tcase 'true':
1788\t\t\t\tcase 'hover': {
1789\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1790\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1791\t\t\t\t\t\tanchorPreload(link);
1792\t\t\t\t\t});
1793";
1794 let new = old.replacen(
1795 "\t\t\t\tcase 'true':\n",
1796 "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1797 1,
1798 );
1799
1800 let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1801 let new_without_cursor = new.replace("<|user_cursor|>", "");
1802
1803 let encoded = encode_from_old_and_new_v0317(
1804 old,
1805 &new_without_cursor,
1806 Some(cursor_offset),
1807 "<|user_cursor|>",
1808 "<|end|>",
1809 )
1810 .unwrap();
1811
1812 let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1813 for marker in collect_relative_marker_tags(core) {
1814 let tag_start = marker.tag_start;
1815 assert!(
1816 tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1817 "marker not at line boundary: {} in output:\n{}",
1818 marker_tag_relative(marker.value),
1819 core
1820 );
1821 }
1822 }
1823}