1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
6const V0316_MIN_BLOCK_LINES: usize = 3;
7const V0316_MAX_BLOCK_LINES: usize = 8;
8const V0318_MIN_BLOCK_LINES: usize = 6;
9const V0318_MAX_BLOCK_LINES: usize = 16;
10const MAX_NUDGE_LINES: usize = 5;
11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
14
15pub fn marker_tag(number: usize) -> String {
16 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
17}
18
19pub fn marker_tag_relative(delta: isize) -> String {
20 if delta > 0 {
21 format!("<|marker+{delta}|>")
22 } else if delta == 0 {
23 String::from("<|marker-0|>")
24 } else {
25 format!("<|marker{delta}|>")
26 }
27}
28
29struct LineInfo {
30 start: usize,
31 is_blank: bool,
32 is_good_start: bool,
33}
34
35fn collect_line_info(text: &str) -> Vec<LineInfo> {
36 let mut lines = Vec::new();
37 let mut offset = 0;
38 for line in text.split('\n') {
39 let trimmed = line.trim();
40 let is_blank = trimmed.is_empty();
41 let is_good_start = !is_blank && !is_structural_tail(trimmed);
42 lines.push(LineInfo {
43 start: offset,
44 is_blank,
45 is_good_start,
46 });
47 offset += line.len() + 1;
48 }
49 // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
50 // empty element when the text ends with '\n'.
51 if text.ends_with('\n') && lines.len() > 1 {
52 lines.pop();
53 }
54 lines
55}
56
57fn is_structural_tail(trimmed_line: &str) -> bool {
58 if trimmed_line.starts_with(&['}', ']', ')']) {
59 return true;
60 }
61 matches!(
62 trimmed_line.trim_end_matches(';'),
63 "break" | "continue" | "return" | "throw" | "end"
64 )
65}
66
67/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
68/// line with `is_good_start`. Returns `None` if no suitable line is found.
69fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
70 (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
71}
72
73/// Compute byte offsets within `editable_text` where marker boundaries should
74/// be placed.
75///
76/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
77/// `editable_text.len()`. Interior offsets are placed at line boundaries
78/// (right after a `\n`), preferring blank-line boundaries when available and
79/// respecting `min_block_lines` / `max_block_lines` constraints.
80fn compute_marker_offsets_with_limits(
81 editable_text: &str,
82 min_block_lines: usize,
83 max_block_lines: usize,
84) -> Vec<usize> {
85 if editable_text.is_empty() {
86 return vec![0, 0];
87 }
88
89 let lines = collect_line_info(editable_text);
90 let mut offsets = vec![0usize];
91 let mut last_boundary_line = 0;
92 let mut i = 0;
93
94 while i < lines.len() {
95 let gap = i - last_boundary_line;
96
97 // Blank-line split: non-blank line following blank line(s) with enough
98 // accumulated lines.
99 if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
100 let target = if lines[i].is_good_start {
101 i
102 } else {
103 skip_to_good_start(&lines, i).unwrap_or(i)
104 };
105 if lines.len() - target >= min_block_lines
106 && lines[target].start > *offsets.last().unwrap_or(&0)
107 {
108 offsets.push(lines[target].start);
109 last_boundary_line = target;
110 i = target + 1;
111 continue;
112 }
113 }
114
115 // Hard cap: too many lines without a split.
116 if gap >= max_block_lines {
117 let target = skip_to_good_start(&lines, i).unwrap_or(i);
118 if lines[target].start > *offsets.last().unwrap_or(&0) {
119 offsets.push(lines[target].start);
120 last_boundary_line = target;
121 i = target + 1;
122 continue;
123 }
124 }
125
126 i += 1;
127 }
128
129 let end = editable_text.len();
130 if *offsets.last().unwrap_or(&0) != end {
131 offsets.push(end);
132 }
133
134 offsets
135}
136
137/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
138pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
139 compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
140}
141
142pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
143 compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
144}
145
146/// Write the editable region content with marker tags, inserting the cursor
147/// marker at the given offset within the editable text.
148pub fn write_editable_with_markers(
149 output: &mut String,
150 editable_text: &str,
151 cursor_offset_in_editable: usize,
152 cursor_marker: &str,
153) {
154 let marker_offsets = compute_marker_offsets(editable_text);
155 let mut cursor_placed = false;
156 for (i, &offset) in marker_offsets.iter().enumerate() {
157 let marker_num = i + 1;
158 if !output.is_empty() && !output.ends_with('\n') {
159 output.push('\n');
160 }
161 output.push_str(&marker_tag(marker_num));
162
163 if let Some(&next_offset) = marker_offsets.get(i + 1) {
164 output.push('\n');
165 let block = &editable_text[offset..next_offset];
166 if !cursor_placed
167 && cursor_offset_in_editable >= offset
168 && cursor_offset_in_editable <= next_offset
169 {
170 cursor_placed = true;
171 let cursor_in_block = cursor_offset_in_editable - offset;
172 output.push_str(&block[..cursor_in_block]);
173 output.push_str(cursor_marker);
174 output.push_str(&block[cursor_in_block..]);
175 } else {
176 output.push_str(block);
177 }
178 }
179 }
180}
181
182/// Strip any `<|marker_N|>` tags from `text`.
183///
184/// When a marker tag sits on its own line (followed by `\n`), the trailing
185/// newline is also removed so the surrounding lines stay joined naturally.
186fn strip_marker_tags(text: &str) -> String {
187 let mut result = String::with_capacity(text.len());
188 let mut pos = 0;
189 let bytes = text.as_bytes();
190 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
191 result.push_str(&text[pos..pos + rel]);
192 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
193 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
194 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
195 if bytes.get(tag_end) == Some(&b'\n') {
196 tag_end += 1;
197 }
198 pos = tag_end;
199 } else {
200 result.push_str(MARKER_TAG_PREFIX);
201 pos = num_start;
202 }
203 }
204 result.push_str(&text[pos..]);
205 result
206}
207
208/// Parse model output that uses the marker format.
209///
210/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
211/// The leading format-level newline after the start marker is stripped.
212/// Trailing newlines are preserved so blank-line endings in the editable
213/// region are not lost.
214///
215/// Any extra intermediate marker tags that the model may have inserted
216/// between the first and last markers are stripped from the returned content.
217pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
218 let first_tag_start = text
219 .find(MARKER_TAG_PREFIX)
220 .context("no start marker found in output")?;
221 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
222 let first_num_end = text[first_num_start..]
223 .find(MARKER_TAG_SUFFIX)
224 .map(|i| i + first_num_start)
225 .context("malformed start marker tag")?;
226 let start_num: usize = text[first_num_start..first_num_end]
227 .parse()
228 .context("start marker number is not a valid integer")?;
229 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
230
231 let last_tag_start = text
232 .rfind(MARKER_TAG_PREFIX)
233 .context("no end marker found in output")?;
234 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
235 let last_num_end = text[last_num_start..]
236 .find(MARKER_TAG_SUFFIX)
237 .map(|i| i + last_num_start)
238 .context("malformed end marker tag")?;
239 let end_num: usize = text[last_num_start..last_num_end]
240 .parse()
241 .context("end marker number is not a valid integer")?;
242
243 if start_num == end_num {
244 return Err(anyhow!(
245 "start and end markers are the same (marker {})",
246 start_num
247 ));
248 }
249
250 let mut content_start = first_tag_end;
251 if text.as_bytes().get(content_start) == Some(&b'\n') {
252 content_start += 1;
253 }
254 let content_end = last_tag_start;
255
256 let content = &text[content_start..content_end.max(content_start)];
257 let content = strip_marker_tags(content);
258 Ok((start_num, end_num, content))
259}
260
261/// Given old editable text and model output with marker span, reconstruct the
262/// full new editable region.
263pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
264 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
265 let marker_offsets = compute_marker_offsets(old_editable);
266
267 let start_idx = start_num
268 .checked_sub(1)
269 .context("marker numbers are 1-indexed")?;
270 let end_idx = end_num
271 .checked_sub(1)
272 .context("marker numbers are 1-indexed")?;
273 let start_byte = *marker_offsets
274 .get(start_idx)
275 .context("start marker number out of range")?;
276 let end_byte = *marker_offsets
277 .get(end_idx)
278 .context("end marker number out of range")?;
279
280 if start_byte > end_byte {
281 return Err(anyhow!("start marker must come before end marker"));
282 }
283
284 let old_span = &old_editable[start_byte..end_byte];
285 let mut new_span = raw_new_span;
286 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
287 new_span.push('\n');
288 }
289 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
290 new_span.pop();
291 }
292
293 let mut result = String::new();
294 result.push_str(&old_editable[..start_byte]);
295 result.push_str(&new_span);
296 result.push_str(&old_editable[end_byte..]);
297
298 Ok(result)
299}
300
301/// Compare old and new editable text, find the minimal marker span that covers
302/// all changes, and encode the result with marker tags.
303pub fn encode_from_old_and_new(
304 old_editable: &str,
305 new_editable: &str,
306 cursor_offset_in_new: Option<usize>,
307 cursor_marker: &str,
308 end_marker: &str,
309 no_edits_marker: &str,
310) -> Result<String> {
311 if old_editable == new_editable {
312 return Ok(format!("{no_edits_marker}{end_marker}"));
313 }
314
315 let marker_offsets = compute_marker_offsets(old_editable);
316 let (common_prefix, common_suffix) =
317 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
318 let change_end_in_old = old_editable.len() - common_suffix;
319
320 let start_marker_idx = marker_offsets
321 .iter()
322 .rposition(|&offset| offset <= common_prefix)
323 .unwrap_or(0);
324 let end_marker_idx = marker_offsets
325 .iter()
326 .position(|&offset| offset >= change_end_in_old)
327 .unwrap_or(marker_offsets.len() - 1);
328
329 let old_start = marker_offsets[start_marker_idx];
330 let old_end = marker_offsets[end_marker_idx];
331
332 let new_start = old_start;
333 let new_end = new_editable
334 .len()
335 .saturating_sub(old_editable.len().saturating_sub(old_end));
336
337 let new_span = &new_editable[new_start..new_end];
338
339 let start_marker_num = start_marker_idx + 1;
340 let end_marker_num = end_marker_idx + 1;
341
342 let mut result = String::new();
343 result.push_str(&marker_tag(start_marker_num));
344 result.push('\n');
345
346 if let Some(cursor_offset) = cursor_offset_in_new {
347 if cursor_offset >= new_start && cursor_offset <= new_end {
348 let cursor_in_span = cursor_offset - new_start;
349 let bounded = cursor_in_span.min(new_span.len());
350 result.push_str(&new_span[..bounded]);
351 result.push_str(cursor_marker);
352 result.push_str(&new_span[bounded..]);
353 } else {
354 result.push_str(new_span);
355 }
356 } else {
357 result.push_str(new_span);
358 }
359
360 if !result.ends_with('\n') {
361 result.push('\n');
362 }
363 result.push_str(&marker_tag(end_marker_num));
364 result.push('\n');
365 result.push_str(end_marker);
366
367 Ok(result)
368}
369
370/// Extract the full editable region from text that uses marker tags.
371///
372/// Returns the concatenation of all block contents between the first and last
373/// markers, with intermediate marker tags stripped.
374pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
375 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
376
377 let mut markers: Vec<(usize, usize)> = Vec::new();
378 let mut search_start = first_marker_start;
379 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
380 let tag_start = search_start + rel_pos;
381 let num_start = tag_start + MARKER_TAG_PREFIX.len();
382 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
383 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
384 markers.push((tag_start, tag_end));
385 search_start = tag_end;
386 }
387
388 if markers.len() < 2 {
389 return None;
390 }
391
392 let (_, first_tag_end) = markers[0];
393 let (last_tag_start, _) = markers[markers.len() - 1];
394
395 let mut content_start = first_tag_end;
396 if text.as_bytes().get(content_start) == Some(&b'\n') {
397 content_start += 1;
398 }
399 let mut content_end = last_tag_start;
400 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
401 content_end -= 1;
402 }
403
404 let raw = &text[content_start..content_end];
405 let result = strip_marker_tags(raw);
406 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
407 Some(result)
408}
409
410struct ParsedTag {
411 value: isize,
412 tag_start: usize,
413 tag_end: usize,
414}
415
416fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
417 let mut tags = Vec::new();
418 let mut search_from = 0;
419 while let Some(rel_pos) = text[search_from..].find(prefix) {
420 let tag_start = search_from + rel_pos;
421 let payload_start = tag_start + prefix.len();
422 if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
423 let payload_end = payload_start + suffix_rel;
424 if let Some(value) = parse(&text[payload_start..payload_end]) {
425 let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
426 tags.push(ParsedTag {
427 value,
428 tag_start,
429 tag_end,
430 });
431 search_from = tag_end;
432 continue;
433 }
434 }
435 search_from = tag_start + prefix.len();
436 }
437 tags
438}
439
440fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
441 collect_tags(text, MARKER_TAG_PREFIX, |s| {
442 s.parse::<usize>().ok().map(|n| n as isize)
443 })
444}
445
446fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
447 collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
448 s.parse::<isize>().ok()
449 })
450}
451
452pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
453 let cursor = cursor_offset.unwrap_or(0);
454 marker_offsets
455 .iter()
456 .enumerate()
457 .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
458 .map(|(idx, _)| idx + 1)
459 .unwrap_or(1)
460}
461
462fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
463 let cursor = cursor_offset.unwrap_or(0);
464 marker_offsets
465 .windows(2)
466 .position(|window| cursor >= window[0] && cursor < window[1])
467 .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
468}
469
470fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
471 let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
472 let remaining_a = a.len() - prefix;
473 let remaining_b = b.len() - prefix;
474 let max_suffix = remaining_a.min(remaining_b);
475 let suffix = a[a.len() - max_suffix..]
476 .iter()
477 .rev()
478 .zip(b[b.len() - max_suffix..].iter().rev())
479 .take_while(|(x, y)| x == y)
480 .count();
481 (prefix, suffix)
482}
483
484/// Map a byte offset from old span coordinates to new span coordinates,
485/// using common prefix/suffix within the span for accuracy.
486fn map_boundary_offset(
487 old_rel: usize,
488 old_span_len: usize,
489 new_span_len: usize,
490 span_common_prefix: usize,
491 span_common_suffix: usize,
492) -> usize {
493 if old_rel <= span_common_prefix {
494 old_rel
495 } else if old_rel >= old_span_len - span_common_suffix {
496 new_span_len - (old_span_len - old_rel)
497 } else {
498 let old_changed_start = span_common_prefix;
499 let old_changed_len = old_span_len
500 .saturating_sub(span_common_prefix)
501 .saturating_sub(span_common_suffix);
502 let new_changed_start = span_common_prefix;
503 let new_changed_len = new_span_len
504 .saturating_sub(span_common_prefix)
505 .saturating_sub(span_common_suffix);
506
507 if old_changed_len == 0 {
508 new_changed_start
509 } else {
510 new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
511 }
512 }
513}
514
515fn snap_to_line_start(text: &str, offset: usize) -> usize {
516 let bounded = offset.min(text.len());
517 let bounded = text.floor_char_boundary(bounded);
518
519 if bounded >= text.len() {
520 return text.len();
521 }
522
523 if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
524 return bounded;
525 }
526
527 if let Some(next_nl_rel) = text[bounded..].find('\n') {
528 let next = bounded + next_nl_rel + 1;
529 return text.floor_char_boundary(next.min(text.len()));
530 }
531
532 let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
533 text.floor_char_boundary(prev_start)
534}
535
536/// Write the editable region content with byte-exact marker tags, inserting the
537/// cursor marker at the given offset within the editable text.
538///
539/// The `tag_for_index` closure maps a boundary index to the marker tag string.
540fn write_editable_with_markers_impl(
541 output: &mut String,
542 editable_text: &str,
543 cursor_offset_in_editable: usize,
544 cursor_marker: &str,
545 marker_offsets: &[usize],
546 tag_for_index: impl Fn(usize) -> String,
547) {
548 let mut cursor_placed = false;
549 for (i, &offset) in marker_offsets.iter().enumerate() {
550 output.push_str(&tag_for_index(i));
551
552 if let Some(&next_offset) = marker_offsets.get(i + 1) {
553 let block = &editable_text[offset..next_offset];
554 if !cursor_placed
555 && cursor_offset_in_editable >= offset
556 && cursor_offset_in_editable <= next_offset
557 {
558 cursor_placed = true;
559 let cursor_in_block = cursor_offset_in_editable - offset;
560 output.push_str(&block[..cursor_in_block]);
561 output.push_str(cursor_marker);
562 output.push_str(&block[cursor_in_block..]);
563 } else {
564 output.push_str(block);
565 }
566 }
567 }
568}
569
570pub fn write_editable_with_markers_v0316(
571 output: &mut String,
572 editable_text: &str,
573 cursor_offset_in_editable: usize,
574 cursor_marker: &str,
575) {
576 let marker_offsets = compute_marker_offsets(editable_text);
577 write_editable_with_markers_impl(
578 output,
579 editable_text,
580 cursor_offset_in_editable,
581 cursor_marker,
582 &marker_offsets,
583 |i| marker_tag(i + 1),
584 );
585}
586
587pub fn write_editable_with_markers_v0317(
588 output: &mut String,
589 editable_text: &str,
590 cursor_offset_in_editable: usize,
591 cursor_marker: &str,
592) {
593 let marker_offsets = compute_marker_offsets(editable_text);
594 let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
595 write_editable_with_markers_impl(
596 output,
597 editable_text,
598 cursor_offset_in_editable,
599 cursor_marker,
600 &marker_offsets,
601 |i| marker_tag_relative(i as isize - anchor_idx as isize),
602 );
603}
604
605pub fn write_editable_with_markers_v0318(
606 output: &mut String,
607 editable_text: &str,
608 cursor_offset_in_editable: usize,
609 cursor_marker: &str,
610) {
611 let marker_offsets = compute_marker_offsets_v0318(editable_text);
612 write_editable_with_markers_impl(
613 output,
614 editable_text,
615 cursor_offset_in_editable,
616 cursor_marker,
617 &marker_offsets,
618 |i| marker_tag(i + 1),
619 );
620}
621
622/// Parse byte-exact model output and reconstruct the full new editable region.
623///
624/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
625/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
626/// an error.
627fn apply_marker_span_impl(
628 old_editable: &str,
629 tags: &[ParsedTag],
630 output: &str,
631 resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
632) -> Result<String> {
633 if tags.is_empty() {
634 return Err(anyhow!("no marker tags found in output"));
635 }
636 if tags.len() == 1 {
637 return Err(anyhow!(
638 "only one marker tag found in output, expected at least two"
639 ));
640 }
641
642 let start_value = tags[0].value;
643 let end_value = tags[tags.len() - 1].value;
644
645 if start_value == end_value {
646 return Ok(old_editable.to_string());
647 }
648
649 let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
650
651 if start_byte > end_byte {
652 return Err(anyhow!("start marker must come before end marker"));
653 }
654
655 let mut new_content = String::new();
656 for i in 0..tags.len() - 1 {
657 let content_start = tags[i].tag_end;
658 let content_end = tags[i + 1].tag_start;
659 if content_start <= content_end {
660 new_content.push_str(&output[content_start..content_end]);
661 }
662 }
663
664 let mut result = String::new();
665 result.push_str(&old_editable[..start_byte]);
666 result.push_str(&new_content);
667 result.push_str(&old_editable[end_byte..]);
668
669 Ok(result)
670}
671
672pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
673 let tags = collect_marker_tags(output);
674
675 // Validate monotonically increasing with no gaps (best-effort warning)
676 if tags.len() >= 2 {
677 let start_num = tags[0].value;
678 let end_num = tags[tags.len() - 1].value;
679 if start_num != end_num {
680 let expected: Vec<isize> = (start_num..=end_num).collect();
681 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
682 if actual != expected {
683 eprintln!(
684 "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
685 expected, actual
686 );
687 }
688 }
689 }
690
691 let marker_offsets = compute_marker_offsets(old_editable);
692 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
693 let start_idx = (start_val as usize)
694 .checked_sub(1)
695 .context("marker numbers are 1-indexed")?;
696 let end_idx = (end_val as usize)
697 .checked_sub(1)
698 .context("marker numbers are 1-indexed")?;
699 let start_byte = *marker_offsets
700 .get(start_idx)
701 .context("start marker number out of range")?;
702 let end_byte = *marker_offsets
703 .get(end_idx)
704 .context("end marker number out of range")?;
705 Ok((start_byte, end_byte))
706 })
707}
708
709pub fn apply_marker_span_v0317(
710 old_editable: &str,
711 output: &str,
712 cursor_offset_in_old: Option<usize>,
713) -> Result<String> {
714 let tags = collect_relative_marker_tags(output);
715 let marker_offsets = compute_marker_offsets(old_editable);
716 let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
717
718 apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
719 let start_idx_signed = anchor_idx as isize + start_delta;
720 let end_idx_signed = anchor_idx as isize + end_delta;
721 if start_idx_signed < 0 || end_idx_signed < 0 {
722 return Err(anyhow!("relative marker maps before first marker"));
723 }
724 let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
725 let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
726 let start_byte = *marker_offsets
727 .get(start_idx)
728 .context("start marker number out of range")?;
729 let end_byte = *marker_offsets
730 .get(end_idx)
731 .context("end marker number out of range")?;
732 Ok((start_byte, end_byte))
733 })
734}
735
736pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
737 let tags = collect_marker_tags(output);
738
739 if tags.len() >= 2 {
740 let start_num = tags[0].value;
741 let end_num = tags[tags.len() - 1].value;
742 if start_num != end_num {
743 let expected: Vec<isize> = (start_num..=end_num).collect();
744 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
745 if actual != expected {
746 eprintln!(
747 "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
748 expected, actual
749 );
750 }
751 }
752 }
753
754 let marker_offsets = compute_marker_offsets_v0318(old_editable);
755 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
756 let start_idx = (start_val as usize)
757 .checked_sub(1)
758 .context("marker numbers are 1-indexed")?;
759 let end_idx = (end_val as usize)
760 .checked_sub(1)
761 .context("marker numbers are 1-indexed")?;
762 let start_byte = *marker_offsets
763 .get(start_idx)
764 .context("start marker number out of range")?;
765 let end_byte = *marker_offsets
766 .get(end_idx)
767 .context("end marker number out of range")?;
768 Ok((start_byte, end_byte))
769 })
770}
771
772/// Encode the training target from old and new editable text.
773///
774/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
775/// closure maps a block index to the appropriate marker tag string.
776/// `no_edit_tag` is the marker tag to repeat when there are no edits.
777fn encode_from_old_and_new_impl(
778 old_editable: &str,
779 new_editable: &str,
780 cursor_offset_in_new: Option<usize>,
781 cursor_marker: &str,
782 end_marker: &str,
783 no_edit_tag: &str,
784 marker_offsets: &[usize],
785 tag_for_block_idx: impl Fn(usize) -> String,
786) -> Result<String> {
787 if old_editable == new_editable {
788 return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
789 }
790
791 let (common_prefix, common_suffix) =
792 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
793 let change_end_in_old = old_editable.len() - common_suffix;
794
795 let mut start_marker_idx = marker_offsets
796 .iter()
797 .rposition(|&offset| offset <= common_prefix)
798 .unwrap_or(0);
799 let mut end_marker_idx = marker_offsets
800 .iter()
801 .position(|&offset| offset >= change_end_in_old)
802 .unwrap_or(marker_offsets.len() - 1);
803
804 if start_marker_idx == end_marker_idx {
805 if end_marker_idx < marker_offsets.len().saturating_sub(1) {
806 end_marker_idx += 1;
807 } else if start_marker_idx > 0 {
808 start_marker_idx -= 1;
809 }
810 }
811
812 let old_start = marker_offsets[start_marker_idx];
813 let old_end = marker_offsets[end_marker_idx];
814
815 let new_start = old_start;
816 let new_end = new_editable
817 .len()
818 .saturating_sub(old_editable.len().saturating_sub(old_end));
819
820 let new_span = &new_editable[new_start..new_end];
821 let old_span = &old_editable[old_start..old_end];
822
823 let (span_common_prefix, span_common_suffix) =
824 common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
825
826 let mut result = String::new();
827 let mut prev_new_rel = 0usize;
828 let mut cursor_placed = false;
829
830 for block_idx in start_marker_idx..end_marker_idx {
831 result.push_str(&tag_for_block_idx(block_idx));
832
833 let new_rel_end = if block_idx + 1 == end_marker_idx {
834 new_span.len()
835 } else {
836 let old_rel = marker_offsets[block_idx + 1] - old_start;
837 let mapped = map_boundary_offset(
838 old_rel,
839 old_span.len(),
840 new_span.len(),
841 span_common_prefix,
842 span_common_suffix,
843 );
844 snap_to_line_start(new_span, mapped)
845 };
846
847 let new_rel_end = new_rel_end.max(prev_new_rel);
848 let block_content = &new_span[prev_new_rel..new_rel_end];
849
850 if !cursor_placed {
851 if let Some(cursor_offset) = cursor_offset_in_new {
852 let abs_start = new_start + prev_new_rel;
853 let abs_end = new_start + new_rel_end;
854 if cursor_offset >= abs_start && cursor_offset <= abs_end {
855 cursor_placed = true;
856 let cursor_in_block = cursor_offset - abs_start;
857 let bounded = cursor_in_block.min(block_content.len());
858 result.push_str(&block_content[..bounded]);
859 result.push_str(cursor_marker);
860 result.push_str(&block_content[bounded..]);
861 prev_new_rel = new_rel_end;
862 continue;
863 }
864 }
865 }
866
867 result.push_str(block_content);
868 prev_new_rel = new_rel_end;
869 }
870
871 result.push_str(&tag_for_block_idx(end_marker_idx));
872 result.push_str(end_marker);
873
874 Ok(result)
875}
876
877pub fn encode_from_old_and_new_v0316(
878 old_editable: &str,
879 new_editable: &str,
880 cursor_offset_in_new: Option<usize>,
881 cursor_marker: &str,
882 end_marker: &str,
883) -> Result<String> {
884 let marker_offsets = compute_marker_offsets(old_editable);
885 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
886 encode_from_old_and_new_impl(
887 old_editable,
888 new_editable,
889 cursor_offset_in_new,
890 cursor_marker,
891 end_marker,
892 &no_edit_tag,
893 &marker_offsets,
894 |block_idx| marker_tag(block_idx + 1),
895 )
896}
897
898pub fn encode_from_old_and_new_v0317(
899 old_editable: &str,
900 new_editable: &str,
901 cursor_offset_in_new: Option<usize>,
902 cursor_marker: &str,
903 end_marker: &str,
904) -> Result<String> {
905 let marker_offsets = compute_marker_offsets(old_editable);
906 let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
907 let no_edit_tag = marker_tag_relative(0);
908 encode_from_old_and_new_impl(
909 old_editable,
910 new_editable,
911 cursor_offset_in_new,
912 cursor_marker,
913 end_marker,
914 &no_edit_tag,
915 &marker_offsets,
916 |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
917 )
918}
919
920pub fn encode_from_old_and_new_v0318(
921 old_editable: &str,
922 new_editable: &str,
923 cursor_offset_in_new: Option<usize>,
924 cursor_marker: &str,
925 end_marker: &str,
926) -> Result<String> {
927 let marker_offsets = compute_marker_offsets_v0318(old_editable);
928 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
929 encode_from_old_and_new_impl(
930 old_editable,
931 new_editable,
932 cursor_offset_in_new,
933 cursor_marker,
934 end_marker,
935 &no_edit_tag,
936 &marker_offsets,
937 |block_idx| marker_tag(block_idx + 1),
938 )
939}
940
941#[cfg(test)]
942mod tests {
943 use super::*;
944
945 #[test]
946 fn test_compute_marker_offsets_small_block() {
947 let text = "aaa\nbbb\nccc\n";
948 let offsets = compute_marker_offsets(text);
949 assert_eq!(offsets, vec![0, text.len()]);
950 }
951
952 #[test]
953 fn test_compute_marker_offsets_blank_line_split() {
954 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
955 let offsets = compute_marker_offsets(text);
956 assert_eq!(offsets[0], 0);
957 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
958 assert_eq!(*offsets.last().unwrap(), text.len());
959 }
960
961 #[test]
962 fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
963 let text = "\
964class OCRDataframe(BaseModel):
965 model_config = ConfigDict(arbitrary_types_allowed=True)
966
967 df: pl.DataFrame
968
969 def page(self, page_number: int = 0) -> \"OCRDataframe\":
970 # Filter dataframe on specific page
971 df_page = self.df.filter(pl.col(\"page\") == page_number)
972 return OCRDataframe(df=df_page)
973
974 def get_text_cell(
975 self,
976 cell: Cell,
977 margin: int = 0,
978 page_number: Optional[int] = None,
979 min_confidence: int = 50,
980 ) -> Optional[str]:
981 \"\"\"
982 Get text corresponding to cell
983";
984 let offsets = compute_marker_offsets(text);
985
986 let def_start = text
987 .find(" def get_text_cell(")
988 .expect("def line exists");
989 let self_start = text.find(" self,").expect("self line exists");
990
991 assert!(
992 offsets.contains(&def_start),
993 "expected boundary at def line start ({def_start}), got {offsets:?}"
994 );
995 assert!(
996 !offsets.contains(&self_start),
997 "did not expect boundary at self line start ({self_start}), got {offsets:?}"
998 );
999 }
1000
1001 #[test]
1002 fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1003 let text = "\
1004impl Plugin for AhoySchedulePlugin {
1005 fn build(&self, app: &mut App) {
1006 app.configure_sets(
1007 self.schedule,
1008 (
1009 AhoySystems::MoveCharacters,
1010 AhoySystems::ApplyForcesToDynamicRigidBodies,
1011 )
1012 .chain()
1013 .before(PhysicsSystems::First),
1014 );
1015
1016 }
1017}
1018
1019/// System set used by all systems of `bevy_ahoy`.
1020#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1021pub enum AhoySystems {
1022 MoveCharacters,
1023 ApplyForcesToDynamicRigidBodies,
1024}
1025";
1026 let offsets = compute_marker_offsets(text);
1027
1028 let closer_start = text.find(" }\n").expect("closer line exists");
1029 let doc_start = text
1030 .find("/// System set used by all systems of `bevy_ahoy`.")
1031 .expect("doc line exists");
1032
1033 assert!(
1034 !offsets.contains(&closer_start),
1035 "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1036 );
1037 assert!(
1038 offsets.contains(&doc_start),
1039 "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1040 );
1041 }
1042
1043 #[test]
1044 fn test_compute_marker_offsets_max_lines_split() {
1045 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1046 let offsets = compute_marker_offsets(text);
1047 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1048 }
1049
1050 #[test]
1051 fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1052 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1053 let offsets = compute_marker_offsets(text);
1054
1055 let expected = text.find("case 'x': {").expect("case line exists");
1056 assert!(
1057 offsets.contains(&expected),
1058 "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1059 );
1060 }
1061
1062 #[test]
1063 fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1064 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1065 let offsets = compute_marker_offsets(text);
1066
1067 let case_start = text.find("case 'x': {").expect("case line exists");
1068 assert!(
1069 !offsets.contains(&case_start),
1070 "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1071 );
1072 }
1073
1074 #[test]
1075 fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1076 let text = "\
1077aaaaaaaaaa = 1;
1078bbbbbbbbbb = 2;
1079cccccccccc = 3;
1080dddddddddd = 4;
1081eeeeeeeeee = 5;
1082ffffffffff = 6;
1083gggggggggg = 7;
1084hhhhhhhhhh = 8;
1085 };
1086 };
1087
1088 grafanaDashboards = {
1089 cluster-overview.spec = {
1090 inherit instanceSelector;
1091 folderRef = \"infrastructure\";
1092 json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1093 };
1094 };
1095";
1096 let offsets = compute_marker_offsets(text);
1097
1098 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1099 assert_eq!(
1100 offsets.last().copied(),
1101 Some(text.len()),
1102 "offsets: {offsets:?}"
1103 );
1104 assert!(
1105 offsets.windows(2).all(|window| window[0] <= window[1]),
1106 "offsets must be sorted: {offsets:?}"
1107 );
1108 }
1109
1110 #[test]
1111 fn test_compute_marker_offsets_empty() {
1112 let offsets = compute_marker_offsets("");
1113 assert_eq!(offsets, vec![0, 0]);
1114 }
1115
1116 #[test]
1117 fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1118 let text = "\
1119# Spree Posts
1120
1121This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1122
1123## Installation
1124
11251. Add this extension to your Gemfile with this line:
1126
1127 ```ruby
1128 bundle add spree_posts
1129 ```
1130
11312. Run the install generator
1132
1133 ```ruby
1134 bundle exec rails g spree_posts:install
1135 ```
1136
11373. Restart your server
1138
1139 If your server was running, restart it so that it can find the assets properly.
1140
1141## Developing
1142
11431. Create a dummy app
1144
1145 ```bash
1146 bundle update
1147 bundle exec rake test_app
1148 ```
1149
11502. Add your new code
11513. Run tests
1152
1153 ```bash
1154 bundle exec rspec
1155 ```
1156
1157When testing your applications integration with this extension you may use it's factories.
1158Simply add this require statement to your spec_helper:
1159
1160```ruby
1161require 'spree_posts/factories'
1162```
1163
1164## Releasing a new version
1165
1166```shell
1167bundle exec gem bump -p -t
1168bundle exec gem release
1169```
1170
1171For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1172
1173## Contributing
1174
1175If you'd like to contribute, please take a look at the contributing guide.
1176";
1177 let offsets = compute_marker_offsets(text);
1178
1179 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1180 assert_eq!(
1181 offsets.last().copied(),
1182 Some(text.len()),
1183 "offsets: {offsets:?}"
1184 );
1185
1186 for window in offsets.windows(2) {
1187 let block = &text[window[0]..window[1]];
1188 let line_count = block.lines().count();
1189 assert!(
1190 line_count >= V0316_MIN_BLOCK_LINES,
1191 "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1192 );
1193 }
1194 }
1195
1196 #[test]
1197 fn test_extract_marker_span() {
1198 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
1199 let (start, end, content) = extract_marker_span(text).unwrap();
1200 assert_eq!(start, 2);
1201 assert_eq!(end, 3);
1202 assert_eq!(content, " new content\n");
1203 }
1204
1205 #[test]
1206 fn test_extract_marker_span_multi_line() {
1207 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1208 let (start, end, content) = extract_marker_span(text).unwrap();
1209 assert_eq!(start, 1);
1210 assert_eq!(end, 4);
1211 assert_eq!(content, "line1\nline2\nline3\n");
1212 }
1213
1214 #[test]
1215 fn test_apply_marker_span_basic() {
1216 let old = "aaa\nbbb\nccc\n";
1217 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1218 let result = apply_marker_span(old, output).unwrap();
1219 assert_eq!(result, "aaa\nBBB\nccc\n");
1220 }
1221
1222 #[test]
1223 fn test_apply_marker_span_preserves_trailing_blank_line() {
1224 let old = "/\nresult\n\n";
1225 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1226 let result = apply_marker_span(old, output).unwrap();
1227 assert_eq!(result, "//\nresult\n\n");
1228 }
1229
1230 #[test]
1231 fn test_encode_no_edits() {
1232 let old = "aaa\nbbb\nccc\n";
1233 let result = encode_from_old_and_new(
1234 old,
1235 old,
1236 None,
1237 "<|user_cursor|>",
1238 ">>>>>>> UPDATED\n",
1239 "NO_EDITS\n",
1240 )
1241 .unwrap();
1242 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1243 }
1244
1245 #[test]
1246 fn test_encode_with_change() {
1247 let old = "aaa\nbbb\nccc\n";
1248 let new = "aaa\nBBB\nccc\n";
1249 let result = encode_from_old_and_new(
1250 old,
1251 new,
1252 None,
1253 "<|user_cursor|>",
1254 ">>>>>>> UPDATED\n",
1255 "NO_EDITS\n",
1256 )
1257 .unwrap();
1258 assert!(result.contains("<|marker_1|>"));
1259 assert!(result.contains("<|marker_2|>"));
1260 assert!(result.contains("aaa\nBBB\nccc\n"));
1261 assert!(result.ends_with(">>>>>>> UPDATED\n"));
1262 }
1263
1264 #[test]
1265 fn test_roundtrip_encode_apply() {
1266 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1267 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1268 let encoded = encode_from_old_and_new(
1269 old,
1270 new,
1271 None,
1272 "<|user_cursor|>",
1273 ">>>>>>> UPDATED\n",
1274 "NO_EDITS\n",
1275 )
1276 .unwrap();
1277 let output = encoded
1278 .strip_suffix(">>>>>>> UPDATED\n")
1279 .expect("should have end marker");
1280 let reconstructed = apply_marker_span(old, output).unwrap();
1281 assert_eq!(reconstructed, new);
1282 }
1283
1284 #[test]
1285 fn test_extract_editable_region_from_markers_multi() {
1286 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1287 let parsed = extract_editable_region_from_markers(text).unwrap();
1288 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1289 }
1290
1291 #[test]
1292 fn test_extract_editable_region_two_markers() {
1293 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1294 let parsed = extract_editable_region_from_markers(text).unwrap();
1295 assert_eq!(parsed, "one\ntwo three");
1296 }
1297
1298 #[test]
1299 fn test_encode_with_cursor() {
1300 let old = "aaa\nbbb\nccc\n";
1301 let new = "aaa\nBBB\nccc\n";
1302 let result = encode_from_old_and_new(
1303 old,
1304 new,
1305 Some(5),
1306 "<|user_cursor|>",
1307 ">>>>>>> UPDATED\n",
1308 "NO_EDITS\n",
1309 )
1310 .unwrap();
1311 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1312 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1313 }
1314
1315 #[test]
1316 fn test_extract_marker_span_strips_intermediate_markers() {
1317 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1318 let (start, end, content) = extract_marker_span(text).unwrap();
1319 assert_eq!(start, 2);
1320 assert_eq!(end, 4);
1321 assert_eq!(content, "line1\nline2\n");
1322 }
1323
1324 #[test]
1325 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1326 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1327 let (start, end, content) = extract_marker_span(text).unwrap();
1328 assert_eq!(start, 1);
1329 assert_eq!(end, 4);
1330 assert_eq!(content, "aaa\nbbb\nccc\n");
1331 }
1332
1333 #[test]
1334 fn test_apply_marker_span_with_extra_intermediate_marker() {
1335 let old = "aaa\nbbb\nccc\n";
1336 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1337 let result = apply_marker_span(old, output).unwrap();
1338 assert_eq!(result, "aaa\nBBB\nccc\n");
1339 }
1340
1341 #[test]
1342 fn test_strip_marker_tags_inline() {
1343 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1344 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1345 assert_eq!(
1346 strip_marker_tags("line1\n<|marker_3|>\nline2"),
1347 "line1\nline2"
1348 );
1349 }
1350
1351 #[test]
1352 fn test_write_editable_with_markers_v0316_byte_exact() {
1353 let editable = "aaa\nbbb\nccc\n";
1354 let mut output = String::new();
1355 write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1356 assert!(output.starts_with("<|marker_1|>"));
1357 assert!(output.contains("<|user_cursor|>"));
1358 let stripped = output.replace("<|user_cursor|>", "");
1359 let stripped = strip_marker_tags(&stripped);
1360 assert_eq!(stripped, editable);
1361 }
1362
1363 #[test]
1364 fn test_apply_marker_span_v0316_basic() {
1365 let old = "aaa\nbbb\nccc\n";
1366 let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1367 let result = apply_marker_span_v0316(old, output).unwrap();
1368 assert_eq!(result, "aaa\nBBB\nccc\n");
1369 }
1370
1371 #[test]
1372 fn test_apply_marker_span_v0316_no_edit() {
1373 let old = "aaa\nbbb\nccc\n";
1374 let output = "<|marker_1|><|marker_1|>";
1375 let result = apply_marker_span_v0316(old, output).unwrap();
1376 assert_eq!(result, old);
1377 }
1378
1379 #[test]
1380 fn test_apply_marker_span_v0316_no_edit_any_marker() {
1381 let old = "aaa\nbbb\nccc\n";
1382 let output = "<|marker_2|>ignored content<|marker_2|>";
1383 let result = apply_marker_span_v0316(old, output).unwrap();
1384 assert_eq!(result, old);
1385 }
1386
1387 #[test]
1388 fn test_apply_marker_span_v0316_multi_block() {
1389 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1390 let marker_offsets = compute_marker_offsets(old);
1391 assert!(
1392 marker_offsets.len() >= 3,
1393 "expected at least 3 offsets, got {:?}",
1394 marker_offsets
1395 );
1396
1397 let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1398 let mut output = String::new();
1399 output.push_str("<|marker_1|>");
1400 for i in 0..marker_offsets.len() - 1 {
1401 if i > 0 {
1402 output.push_str(&marker_tag(i + 1));
1403 }
1404 let start = marker_offsets[i];
1405 let end = marker_offsets[i + 1];
1406 let block_len = end - start;
1407 output.push_str(&new_content[start..start + block_len]);
1408 }
1409 let last_marker_num = marker_offsets.len();
1410 output.push_str(&marker_tag(last_marker_num));
1411 let result = apply_marker_span_v0316(old, &output).unwrap();
1412 assert_eq!(result, new_content);
1413 }
1414
1415 #[test]
1416 fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1417 let old = "aaa\nbbb\nccc\n";
1418 let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1419 let result = apply_marker_span_v0316(old, output).unwrap();
1420 assert_eq!(result, "aaa\nBBB\nccc");
1421 }
1422
1423 #[test]
1424 fn test_encode_v0316_no_edits() {
1425 let old = "aaa\nbbb\nccc\n";
1426 let result =
1427 encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1428 assert!(result.ends_with("<|end|>"));
1429 let stripped = result.strip_suffix("<|end|>").unwrap();
1430 let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1431 assert_eq!(result_parsed, old);
1432 }
1433
1434 #[test]
1435 fn test_encode_v0316_with_change() {
1436 let old = "aaa\nbbb\nccc\n";
1437 let new = "aaa\nBBB\nccc\n";
1438 let result =
1439 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1440 assert!(result.contains("<|marker_1|>"));
1441 assert!(result.contains("<|marker_2|>"));
1442 assert!(result.ends_with("<|end|>"));
1443 }
1444
1445 #[test]
1446 fn test_roundtrip_v0316() {
1447 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1448 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1449 let encoded =
1450 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1451 let stripped = encoded
1452 .strip_suffix("<|end|>")
1453 .expect("should have end marker");
1454 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1455 assert_eq!(reconstructed, new);
1456 }
1457
1458 #[test]
1459 fn test_roundtrip_v0316_with_cursor() {
1460 let old = "aaa\nbbb\nccc\n";
1461 let new = "aaa\nBBB\nccc\n";
1462 let result =
1463 encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1464 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1465 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1466 }
1467
1468 #[test]
1469 fn test_roundtrip_v0316_multi_block_change() {
1470 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1471 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1472 let encoded =
1473 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1474 let stripped = encoded
1475 .strip_suffix("<|end|>")
1476 .expect("should have end marker");
1477 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1478 assert_eq!(reconstructed, new);
1479 }
1480
1481 #[test]
1482 fn test_nearest_marker_number() {
1483 let offsets = vec![0, 10, 20, 30];
1484 assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1485 assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1486 assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1487 assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1488 assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1489 assert_eq!(nearest_marker_number(None, &offsets), 1);
1490 }
1491
1492 #[test]
1493 fn test_marker_tag_relative_formats_as_expected() {
1494 assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1495 assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1496 assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1497 assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1498 assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1499 }
1500
1501 #[test]
1502 fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1503 let editable = "aaa\nbbb\nccc\n";
1504 let mut output = String::new();
1505 write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1506
1507 assert!(output.contains("<|marker-0|>"));
1508 assert!(output.contains("<|user_cursor|>"));
1509
1510 let stripped = output.replace("<|user_cursor|>", "");
1511 let stripped =
1512 collect_relative_marker_tags(&stripped)
1513 .iter()
1514 .fold(stripped.clone(), |acc, marker| {
1515 let tag = &stripped[marker.tag_start..marker.tag_end];
1516 acc.replace(tag, "")
1517 });
1518 assert_eq!(stripped, editable);
1519 }
1520
1521 #[test]
1522 fn test_apply_marker_span_v0317_basic() {
1523 let old = "aaa\nbbb\nccc\n";
1524 let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1525 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1526 assert_eq!(result, "aaa\nBBB\nccc\n");
1527 }
1528
1529 #[test]
1530 fn test_apply_marker_span_v0317_no_edit() {
1531 let old = "aaa\nbbb\nccc\n";
1532 let output = "<|marker-0|><|marker-0|>";
1533 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1534 assert_eq!(result, old);
1535 }
1536
1537 #[test]
1538 fn test_encode_v0317_no_edits() {
1539 let old = "aaa\nbbb\nccc\n";
1540 let result =
1541 encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1542 assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1543 }
1544
1545 #[test]
1546 fn test_roundtrip_v0317() {
1547 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1548 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1549 let cursor = Some(6);
1550
1551 let encoded =
1552 encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1553 let stripped = encoded
1554 .strip_suffix("<|end|>")
1555 .expect("should have end marker");
1556 let stripped = stripped.replace("<|user_cursor|>", "");
1557 let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1558 assert_eq!(reconstructed, new);
1559 }
1560
1561 #[test]
1562 fn test_roundtrip_v0317_with_cursor_marker() {
1563 let old = "aaa\nbbb\nccc\n";
1564 let new = "aaa\nBBB\nccc\n";
1565 let result =
1566 encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1567 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1568 assert!(result.contains("<|marker-0|>"), "result: {result}");
1569 }
1570
1571 #[test]
1572 fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1573 let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1574 let v0316_offsets = compute_marker_offsets(text);
1575 let v0318_offsets = compute_marker_offsets_v0318(text);
1576
1577 assert!(v0318_offsets.len() < v0316_offsets.len());
1578 assert_eq!(v0316_offsets.first().copied(), Some(0));
1579 assert_eq!(v0318_offsets.first().copied(), Some(0));
1580 assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1581 assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1582 }
1583
1584 #[test]
1585 fn test_roundtrip_v0318() {
1586 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1587 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1588 let encoded =
1589 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1590 let stripped = encoded
1591 .strip_suffix("<|end|>")
1592 .expect("should have end marker");
1593 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1594 assert_eq!(reconstructed, new);
1595 }
1596
1597 #[test]
1598 fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1599 let old = "line1\nline2\nline3\n";
1600 let new = "line1\nline2\nline3\nline4\n";
1601 let encoded =
1602 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1603
1604 assert_ne!(encoded, "<|marker_2|><|end|>");
1605
1606 let stripped = encoded
1607 .strip_suffix("<|end|>")
1608 .expect("should have end marker");
1609 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1610 assert_eq!(reconstructed, new);
1611 }
1612
1613 #[test]
1614 fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1615 let old = "alpha\nbeta\n\ngamma\ndelta\n";
1616 let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1617 let encoded =
1618 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1619
1620 let stripped = encoded
1621 .strip_suffix("<|end|>")
1622 .expect("should have end marker");
1623 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1624 assert_eq!(reconstructed, new);
1625 }
1626
1627 #[test]
1628 fn test_encode_v0317_markers_stay_on_line_boundaries() {
1629 let old = "\
1630\t\t\t\tcontinue outer;
1631\t\t\t}
1632\t\t}
1633\t}
1634
1635\tconst intersectionObserver = new IntersectionObserver((entries) => {
1636\t\tfor (const entry of entries) {
1637\t\t\tif (entry.isIntersecting) {
1638\t\t\t\tintersectionObserver.unobserve(entry.target);
1639\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1640\t\t\t}
1641\t\t}
1642\t});
1643
1644\tconst observer = new MutationObserver(() => {
1645\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1646\t\t\tdocument.querySelectorAll('a[data-preload]')
1647\t\t);
1648
1649\t\tfor (const link of links) {
1650\t\t\tif (linkSet.has(link)) continue;
1651\t\t\tlinkSet.add(link);
1652
1653\t\t\tswitch (link.dataset.preload) {
1654\t\t\t\tcase '':
1655\t\t\t\tcase 'true':
1656\t\t\t\tcase 'hover': {
1657\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1658\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1659\t\t\t\t\t\tanchorPreload(link);
1660\t\t\t\t\t});
1661";
1662 let new = old.replacen(
1663 "\t\t\t\tcase 'true':\n",
1664 "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1665 1,
1666 );
1667
1668 let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1669 let new_without_cursor = new.replace("<|user_cursor|>", "");
1670
1671 let encoded = encode_from_old_and_new_v0317(
1672 old,
1673 &new_without_cursor,
1674 Some(cursor_offset),
1675 "<|user_cursor|>",
1676 "<|end|>",
1677 )
1678 .unwrap();
1679
1680 let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1681 for marker in collect_relative_marker_tags(core) {
1682 let tag_start = marker.tag_start;
1683 assert!(
1684 tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1685 "marker not at line boundary: {} in output:\n{}",
1686 marker_tag_relative(marker.value),
1687 core
1688 );
1689 }
1690 }
1691}