1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
6const V0316_MIN_BLOCK_LINES: usize = 3;
7const V0316_MAX_BLOCK_LINES: usize = 8;
8const V0318_MIN_BLOCK_LINES: usize = 6;
9const V0318_MAX_BLOCK_LINES: usize = 16;
10const MAX_NUDGE_LINES: usize = 5;
11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
14
15pub fn marker_tag(number: usize) -> String {
16 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
17}
18
19pub fn marker_tag_relative(delta: isize) -> String {
20 if delta > 0 {
21 format!("<|marker+{delta}|>")
22 } else if delta == 0 {
23 String::from("<|marker-0|>")
24 } else {
25 format!("<|marker{delta}|>")
26 }
27}
28
29struct LineInfo {
30 start: usize,
31 is_blank: bool,
32 is_good_start: bool,
33}
34
35fn collect_line_info(text: &str) -> Vec<LineInfo> {
36 let mut lines = Vec::new();
37 let mut offset = 0;
38 for line in text.split('\n') {
39 let trimmed = line.trim();
40 let is_blank = trimmed.is_empty();
41 let is_good_start = !is_blank && !is_structural_tail(trimmed);
42 lines.push(LineInfo {
43 start: offset,
44 is_blank,
45 is_good_start,
46 });
47 offset += line.len() + 1;
48 }
49 // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
50 // empty element when the text ends with '\n'.
51 if text.ends_with('\n') && lines.len() > 1 {
52 lines.pop();
53 }
54 lines
55}
56
57fn is_structural_tail(trimmed_line: &str) -> bool {
58 if trimmed_line.starts_with(&['}', ']', ')']) {
59 return true;
60 }
61 matches!(
62 trimmed_line.trim_end_matches(';'),
63 "break" | "continue" | "return" | "throw" | "end"
64 )
65}
66
67/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
68/// line with `is_good_start`. Returns `None` if no suitable line is found.
69fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
70 (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
71}
72
73/// Compute byte offsets within `editable_text` where marker boundaries should
74/// be placed.
75///
76/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
77/// `editable_text.len()`. Interior offsets are placed at line boundaries
78/// (right after a `\n`), preferring blank-line boundaries when available and
79/// respecting `min_block_lines` / `max_block_lines` constraints.
80fn compute_marker_offsets_with_limits(
81 editable_text: &str,
82 min_block_lines: usize,
83 max_block_lines: usize,
84) -> Vec<usize> {
85 if editable_text.is_empty() {
86 return vec![0, 0];
87 }
88
89 let lines = collect_line_info(editable_text);
90 let mut offsets = vec![0usize];
91 let mut last_boundary_line = 0;
92 let mut i = 0;
93
94 while i < lines.len() {
95 let gap = i - last_boundary_line;
96
97 // Blank-line split: non-blank line following blank line(s) with enough
98 // accumulated lines.
99 if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
100 let target = if lines[i].is_good_start {
101 i
102 } else {
103 skip_to_good_start(&lines, i).unwrap_or(i)
104 };
105 if lines.len() - target >= min_block_lines
106 && lines[target].start > *offsets.last().unwrap_or(&0)
107 {
108 offsets.push(lines[target].start);
109 last_boundary_line = target;
110 i = target + 1;
111 continue;
112 }
113 }
114
115 // Hard cap: too many lines without a split.
116 if gap >= max_block_lines {
117 let target = skip_to_good_start(&lines, i).unwrap_or(i);
118 if lines[target].start > *offsets.last().unwrap_or(&0) {
119 offsets.push(lines[target].start);
120 last_boundary_line = target;
121 i = target + 1;
122 continue;
123 }
124 }
125
126 i += 1;
127 }
128
129 let end = editable_text.len();
130 if *offsets.last().unwrap_or(&0) != end {
131 offsets.push(end);
132 }
133
134 offsets
135}
136
137/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
138pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
139 compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
140}
141
142pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
143 compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
144}
145
146/// Write the editable region content with marker tags, inserting the cursor
147/// marker at the given offset within the editable text.
148pub fn write_editable_with_markers(
149 output: &mut String,
150 editable_text: &str,
151 cursor_offset_in_editable: usize,
152 cursor_marker: &str,
153) {
154 let marker_offsets = compute_marker_offsets(editable_text);
155 let mut cursor_placed = false;
156 for (i, &offset) in marker_offsets.iter().enumerate() {
157 let marker_num = i + 1;
158 if !output.is_empty() && !output.ends_with('\n') {
159 output.push('\n');
160 }
161 output.push_str(&marker_tag(marker_num));
162
163 if let Some(&next_offset) = marker_offsets.get(i + 1) {
164 output.push('\n');
165 let block = &editable_text[offset..next_offset];
166 if !cursor_placed
167 && cursor_offset_in_editable >= offset
168 && cursor_offset_in_editable <= next_offset
169 {
170 cursor_placed = true;
171 let cursor_in_block = cursor_offset_in_editable - offset;
172 output.push_str(&block[..cursor_in_block]);
173 output.push_str(cursor_marker);
174 output.push_str(&block[cursor_in_block..]);
175 } else {
176 output.push_str(block);
177 }
178 }
179 }
180}
181
182/// Strip any `<|marker_N|>` tags from `text`.
183///
184/// When a marker tag sits on its own line (followed by `\n`), the trailing
185/// newline is also removed so the surrounding lines stay joined naturally.
186fn strip_marker_tags(text: &str) -> String {
187 let mut result = String::with_capacity(text.len());
188 let mut pos = 0;
189 let bytes = text.as_bytes();
190 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
191 result.push_str(&text[pos..pos + rel]);
192 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
193 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
194 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
195 if bytes.get(tag_end) == Some(&b'\n') {
196 tag_end += 1;
197 }
198 pos = tag_end;
199 } else {
200 result.push_str(MARKER_TAG_PREFIX);
201 pos = num_start;
202 }
203 }
204 result.push_str(&text[pos..]);
205 result
206}
207
208/// Parse model output that uses the marker format.
209///
210/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
211/// The leading format-level newline after the start marker is stripped.
212/// Trailing newlines are preserved so blank-line endings in the editable
213/// region are not lost.
214///
215/// Any extra intermediate marker tags that the model may have inserted
216/// between the first and last markers are stripped from the returned content.
217pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
218 let first_tag_start = text
219 .find(MARKER_TAG_PREFIX)
220 .context("no start marker found in output")?;
221 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
222 let first_num_end = text[first_num_start..]
223 .find(MARKER_TAG_SUFFIX)
224 .map(|i| i + first_num_start)
225 .context("malformed start marker tag")?;
226 let start_num: usize = text[first_num_start..first_num_end]
227 .parse()
228 .context("start marker number is not a valid integer")?;
229 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
230
231 let last_tag_start = text
232 .rfind(MARKER_TAG_PREFIX)
233 .context("no end marker found in output")?;
234 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
235 let last_num_end = text[last_num_start..]
236 .find(MARKER_TAG_SUFFIX)
237 .map(|i| i + last_num_start)
238 .context("malformed end marker tag")?;
239 let end_num: usize = text[last_num_start..last_num_end]
240 .parse()
241 .context("end marker number is not a valid integer")?;
242
243 if start_num == end_num {
244 return Err(anyhow!(
245 "start and end markers are the same (marker {})",
246 start_num
247 ));
248 }
249
250 let mut content_start = first_tag_end;
251 if text.as_bytes().get(content_start) == Some(&b'\n') {
252 content_start += 1;
253 }
254 let content_end = last_tag_start;
255
256 let content = &text[content_start..content_end.max(content_start)];
257 let content = strip_marker_tags(content);
258 Ok((start_num, end_num, content))
259}
260
261/// Given old editable text and model output with marker span, reconstruct the
262/// full new editable region.
263pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
264 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
265 let marker_offsets = compute_marker_offsets(old_editable);
266
267 let start_idx = start_num
268 .checked_sub(1)
269 .context("marker numbers are 1-indexed")?;
270 let end_idx = end_num
271 .checked_sub(1)
272 .context("marker numbers are 1-indexed")?;
273 let start_byte = *marker_offsets
274 .get(start_idx)
275 .context("start marker number out of range")?;
276 let end_byte = *marker_offsets
277 .get(end_idx)
278 .context("end marker number out of range")?;
279
280 if start_byte > end_byte {
281 return Err(anyhow!("start marker must come before end marker"));
282 }
283
284 let old_span = &old_editable[start_byte..end_byte];
285 let mut new_span = raw_new_span;
286 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
287 new_span.push('\n');
288 }
289 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
290 new_span.pop();
291 }
292
293 let mut result = String::new();
294 result.push_str(&old_editable[..start_byte]);
295 result.push_str(&new_span);
296 result.push_str(&old_editable[end_byte..]);
297
298 Ok(result)
299}
300
301/// Compare old and new editable text, find the minimal marker span that covers
302/// all changes, and encode the result with marker tags.
303pub fn encode_from_old_and_new(
304 old_editable: &str,
305 new_editable: &str,
306 cursor_offset_in_new: Option<usize>,
307 cursor_marker: &str,
308 end_marker: &str,
309 no_edits_marker: &str,
310) -> Result<String> {
311 if old_editable == new_editable {
312 return Ok(format!("{no_edits_marker}{end_marker}"));
313 }
314
315 let marker_offsets = compute_marker_offsets(old_editable);
316 let (common_prefix, common_suffix) =
317 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
318 let change_end_in_old = old_editable.len() - common_suffix;
319
320 let start_marker_idx = marker_offsets
321 .iter()
322 .rposition(|&offset| offset <= common_prefix)
323 .unwrap_or(0);
324 let end_marker_idx = marker_offsets
325 .iter()
326 .position(|&offset| offset >= change_end_in_old)
327 .unwrap_or(marker_offsets.len() - 1);
328
329 let old_start = marker_offsets[start_marker_idx];
330 let old_end = marker_offsets[end_marker_idx];
331
332 let new_start = old_start;
333 let new_end = new_editable
334 .len()
335 .saturating_sub(old_editable.len().saturating_sub(old_end));
336
337 let new_span = &new_editable[new_start..new_end];
338
339 let start_marker_num = start_marker_idx + 1;
340 let end_marker_num = end_marker_idx + 1;
341
342 let mut result = String::new();
343 result.push_str(&marker_tag(start_marker_num));
344 result.push('\n');
345
346 if let Some(cursor_offset) = cursor_offset_in_new {
347 if cursor_offset >= new_start && cursor_offset <= new_end {
348 let cursor_in_span = cursor_offset - new_start;
349 let bounded = cursor_in_span.min(new_span.len());
350 result.push_str(&new_span[..bounded]);
351 result.push_str(cursor_marker);
352 result.push_str(&new_span[bounded..]);
353 } else {
354 result.push_str(new_span);
355 }
356 } else {
357 result.push_str(new_span);
358 }
359
360 if !result.ends_with('\n') {
361 result.push('\n');
362 }
363 result.push_str(&marker_tag(end_marker_num));
364 result.push('\n');
365 result.push_str(end_marker);
366
367 Ok(result)
368}
369
370/// Extract the full editable region from text that uses marker tags.
371///
372/// Returns the concatenation of all block contents between the first and last
373/// markers, with intermediate marker tags stripped.
374pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
375 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
376
377 let mut markers: Vec<(usize, usize)> = Vec::new();
378 let mut search_start = first_marker_start;
379 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
380 let tag_start = search_start + rel_pos;
381 let num_start = tag_start + MARKER_TAG_PREFIX.len();
382 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
383 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
384 markers.push((tag_start, tag_end));
385 search_start = tag_end;
386 }
387
388 if markers.len() < 2 {
389 return None;
390 }
391
392 let (_, first_tag_end) = markers[0];
393 let (last_tag_start, _) = markers[markers.len() - 1];
394
395 let mut content_start = first_tag_end;
396 if text.as_bytes().get(content_start) == Some(&b'\n') {
397 content_start += 1;
398 }
399 let mut content_end = last_tag_start;
400 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
401 content_end -= 1;
402 }
403
404 let raw = &text[content_start..content_end];
405 let result = strip_marker_tags(raw);
406 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
407 Some(result)
408}
409
410struct ParsedTag {
411 value: isize,
412 tag_start: usize,
413 tag_end: usize,
414}
415
416fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
417 let mut tags = Vec::new();
418 let mut search_from = 0;
419 while let Some(rel_pos) = text[search_from..].find(prefix) {
420 let tag_start = search_from + rel_pos;
421 let payload_start = tag_start + prefix.len();
422 if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
423 let payload_end = payload_start + suffix_rel;
424 if let Some(value) = parse(&text[payload_start..payload_end]) {
425 let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
426 tags.push(ParsedTag {
427 value,
428 tag_start,
429 tag_end,
430 });
431 search_from = tag_end;
432 continue;
433 }
434 }
435 search_from = tag_start + prefix.len();
436 }
437 tags
438}
439
440fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
441 collect_tags(text, MARKER_TAG_PREFIX, |s| {
442 s.parse::<usize>().ok().map(|n| n as isize)
443 })
444}
445
446fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
447 collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
448 s.parse::<isize>().ok()
449 })
450}
451
452pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
453 let cursor = cursor_offset.unwrap_or(0);
454 marker_offsets
455 .iter()
456 .enumerate()
457 .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
458 .map(|(idx, _)| idx + 1)
459 .unwrap_or(1)
460}
461
462fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
463 let cursor = cursor_offset.unwrap_or(0);
464 marker_offsets
465 .windows(2)
466 .position(|window| cursor >= window[0] && cursor < window[1])
467 .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
468}
469
470fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
471 let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
472 let remaining_a = a.len() - prefix;
473 let remaining_b = b.len() - prefix;
474 let max_suffix = remaining_a.min(remaining_b);
475 let suffix = a[a.len() - max_suffix..]
476 .iter()
477 .rev()
478 .zip(b[b.len() - max_suffix..].iter().rev())
479 .take_while(|(x, y)| x == y)
480 .count();
481 (prefix, suffix)
482}
483
484/// Map a byte offset from old span coordinates to new span coordinates,
485/// using common prefix/suffix within the span for accuracy.
486fn map_boundary_offset(
487 old_rel: usize,
488 old_span_len: usize,
489 new_span_len: usize,
490 span_common_prefix: usize,
491 span_common_suffix: usize,
492) -> usize {
493 if old_rel <= span_common_prefix {
494 old_rel
495 } else if old_rel >= old_span_len - span_common_suffix {
496 new_span_len - (old_span_len - old_rel)
497 } else {
498 let old_changed_start = span_common_prefix;
499 let old_changed_len = old_span_len
500 .saturating_sub(span_common_prefix)
501 .saturating_sub(span_common_suffix);
502 let new_changed_start = span_common_prefix;
503 let new_changed_len = new_span_len
504 .saturating_sub(span_common_prefix)
505 .saturating_sub(span_common_suffix);
506
507 if old_changed_len == 0 {
508 new_changed_start
509 } else {
510 new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
511 }
512 }
513}
514
515fn snap_to_line_start(text: &str, offset: usize) -> usize {
516 let bounded = offset.min(text.len());
517 let bounded = text.floor_char_boundary(bounded);
518
519 if bounded >= text.len() {
520 return text.len();
521 }
522
523 if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
524 return bounded;
525 }
526
527 if let Some(next_nl_rel) = text[bounded..].find('\n') {
528 let next = bounded + next_nl_rel + 1;
529 return text.floor_char_boundary(next.min(text.len()));
530 }
531
532 let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
533 text.floor_char_boundary(prev_start)
534}
535
536/// Write the editable region content with byte-exact marker tags, inserting the
537/// cursor marker at the given offset within the editable text.
538///
539/// The `tag_for_index` closure maps a boundary index to the marker tag string.
540fn write_editable_with_markers_impl(
541 output: &mut String,
542 editable_text: &str,
543 cursor_offset_in_editable: usize,
544 cursor_marker: &str,
545 marker_offsets: &[usize],
546 tag_for_index: impl Fn(usize) -> String,
547) {
548 let mut cursor_placed = false;
549 for (i, &offset) in marker_offsets.iter().enumerate() {
550 output.push_str(&tag_for_index(i));
551
552 if let Some(&next_offset) = marker_offsets.get(i + 1) {
553 let block = &editable_text[offset..next_offset];
554 if !cursor_placed
555 && cursor_offset_in_editable >= offset
556 && cursor_offset_in_editable <= next_offset
557 {
558 cursor_placed = true;
559 let cursor_in_block = cursor_offset_in_editable - offset;
560 output.push_str(&block[..cursor_in_block]);
561 output.push_str(cursor_marker);
562 output.push_str(&block[cursor_in_block..]);
563 } else {
564 output.push_str(block);
565 }
566 }
567 }
568}
569
570pub fn write_editable_with_markers_v0316(
571 output: &mut String,
572 editable_text: &str,
573 cursor_offset_in_editable: usize,
574 cursor_marker: &str,
575) {
576 let marker_offsets = compute_marker_offsets(editable_text);
577 write_editable_with_markers_impl(
578 output,
579 editable_text,
580 cursor_offset_in_editable,
581 cursor_marker,
582 &marker_offsets,
583 |i| marker_tag(i + 1),
584 );
585}
586
587pub fn write_editable_with_markers_v0317(
588 output: &mut String,
589 editable_text: &str,
590 cursor_offset_in_editable: usize,
591 cursor_marker: &str,
592) {
593 let marker_offsets = compute_marker_offsets(editable_text);
594 let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
595 write_editable_with_markers_impl(
596 output,
597 editable_text,
598 cursor_offset_in_editable,
599 cursor_marker,
600 &marker_offsets,
601 |i| marker_tag_relative(i as isize - anchor_idx as isize),
602 );
603}
604
605pub fn write_editable_with_markers_v0318(
606 output: &mut String,
607 editable_text: &str,
608 cursor_offset_in_editable: usize,
609 cursor_marker: &str,
610) {
611 let marker_offsets = compute_marker_offsets_v0318(editable_text);
612 write_editable_with_markers_impl(
613 output,
614 editable_text,
615 cursor_offset_in_editable,
616 cursor_marker,
617 &marker_offsets,
618 |i| marker_tag(i + 1),
619 );
620}
621
622/// Parse byte-exact model output and reconstruct the full new editable region.
623///
624/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
625/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
626/// an error.
627fn apply_marker_span_impl(
628 old_editable: &str,
629 tags: &[ParsedTag],
630 output: &str,
631 resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
632) -> Result<String> {
633 if tags.is_empty() {
634 return Err(anyhow!("no marker tags found in output"));
635 }
636 if tags.len() == 1 {
637 return Err(anyhow!(
638 "only one marker tag found in output, expected at least two"
639 ));
640 }
641
642 let start_value = tags[0].value;
643 let end_value = tags[tags.len() - 1].value;
644
645 if start_value == end_value {
646 return Ok(old_editable.to_string());
647 }
648
649 let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
650
651 if start_byte > end_byte {
652 return Err(anyhow!("start marker must come before end marker"));
653 }
654
655 let mut new_content = String::new();
656 for i in 0..tags.len() - 1 {
657 let content_start = tags[i].tag_end;
658 let content_end = tags[i + 1].tag_start;
659 if content_start <= content_end {
660 new_content.push_str(&output[content_start..content_end]);
661 }
662 }
663
664 let mut result = String::new();
665 result.push_str(&old_editable[..start_byte]);
666 result.push_str(&new_content);
667 result.push_str(&old_editable[end_byte..]);
668
669 Ok(result)
670}
671
672pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
673 let tags = collect_marker_tags(output);
674
675 // Validate monotonically increasing with no gaps (best-effort warning)
676 if tags.len() >= 2 {
677 let start_num = tags[0].value;
678 let end_num = tags[tags.len() - 1].value;
679 if start_num != end_num {
680 let expected: Vec<isize> = (start_num..=end_num).collect();
681 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
682 if actual != expected {
683 eprintln!(
684 "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
685 expected, actual
686 );
687 }
688 }
689 }
690
691 let marker_offsets = compute_marker_offsets(old_editable);
692 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
693 let start_idx = (start_val as usize)
694 .checked_sub(1)
695 .context("marker numbers are 1-indexed")?;
696 let end_idx = (end_val as usize)
697 .checked_sub(1)
698 .context("marker numbers are 1-indexed")?;
699 let start_byte = *marker_offsets
700 .get(start_idx)
701 .context("start marker number out of range")?;
702 let end_byte = *marker_offsets
703 .get(end_idx)
704 .context("end marker number out of range")?;
705 Ok((start_byte, end_byte))
706 })
707}
708
709pub fn apply_marker_span_v0317(
710 old_editable: &str,
711 output: &str,
712 cursor_offset_in_old: Option<usize>,
713) -> Result<String> {
714 let tags = collect_relative_marker_tags(output);
715 let marker_offsets = compute_marker_offsets(old_editable);
716 let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
717
718 apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
719 let start_idx_signed = anchor_idx as isize + start_delta;
720 let end_idx_signed = anchor_idx as isize + end_delta;
721 if start_idx_signed < 0 || end_idx_signed < 0 {
722 return Err(anyhow!("relative marker maps before first marker"));
723 }
724 let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
725 let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
726 let start_byte = *marker_offsets
727 .get(start_idx)
728 .context("start marker number out of range")?;
729 let end_byte = *marker_offsets
730 .get(end_idx)
731 .context("end marker number out of range")?;
732 Ok((start_byte, end_byte))
733 })
734}
735
736pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
737 let tags = collect_marker_tags(output);
738
739 if tags.len() >= 2 {
740 let start_num = tags[0].value;
741 let end_num = tags[tags.len() - 1].value;
742 if start_num != end_num {
743 let expected: Vec<isize> = (start_num..=end_num).collect();
744 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
745 if actual != expected {
746 eprintln!(
747 "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
748 expected, actual
749 );
750 }
751 }
752 }
753
754 let marker_offsets = compute_marker_offsets_v0318(old_editable);
755 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
756 let start_idx = (start_val as usize)
757 .checked_sub(1)
758 .context("marker numbers are 1-indexed")?;
759 let end_idx = (end_val as usize)
760 .checked_sub(1)
761 .context("marker numbers are 1-indexed")?;
762 let start_byte = *marker_offsets
763 .get(start_idx)
764 .context("start marker number out of range")?;
765 let end_byte = *marker_offsets
766 .get(end_idx)
767 .context("end marker number out of range")?;
768 Ok((start_byte, end_byte))
769 })
770}
771
772/// Encode the training target from old and new editable text.
773///
774/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
775/// closure maps a block index to the appropriate marker tag string.
776/// `no_edit_tag` is the marker tag to repeat when there are no edits.
777fn encode_from_old_and_new_impl(
778 old_editable: &str,
779 new_editable: &str,
780 cursor_offset_in_new: Option<usize>,
781 cursor_marker: &str,
782 end_marker: &str,
783 no_edit_tag: &str,
784 marker_offsets: &[usize],
785 tag_for_block_idx: impl Fn(usize) -> String,
786) -> Result<String> {
787 if old_editable == new_editable {
788 return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
789 }
790
791 let (common_prefix, common_suffix) =
792 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
793 let change_end_in_old = old_editable.len() - common_suffix;
794
795 let start_marker_idx = marker_offsets
796 .iter()
797 .rposition(|&offset| offset <= common_prefix)
798 .unwrap_or(0);
799 let end_marker_idx = marker_offsets
800 .iter()
801 .position(|&offset| offset >= change_end_in_old)
802 .unwrap_or(marker_offsets.len() - 1);
803
804 let old_start = marker_offsets[start_marker_idx];
805 let old_end = marker_offsets[end_marker_idx];
806
807 let new_start = old_start;
808 let new_end = new_editable
809 .len()
810 .saturating_sub(old_editable.len().saturating_sub(old_end));
811
812 let new_span = &new_editable[new_start..new_end];
813 let old_span = &old_editable[old_start..old_end];
814
815 let (span_common_prefix, span_common_suffix) =
816 common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
817
818 let mut result = String::new();
819 let mut prev_new_rel = 0usize;
820 let mut cursor_placed = false;
821
822 for block_idx in start_marker_idx..end_marker_idx {
823 result.push_str(&tag_for_block_idx(block_idx));
824
825 let new_rel_end = if block_idx + 1 == end_marker_idx {
826 new_span.len()
827 } else {
828 let old_rel = marker_offsets[block_idx + 1] - old_start;
829 let mapped = map_boundary_offset(
830 old_rel,
831 old_span.len(),
832 new_span.len(),
833 span_common_prefix,
834 span_common_suffix,
835 );
836 snap_to_line_start(new_span, mapped)
837 };
838
839 let new_rel_end = new_rel_end.max(prev_new_rel);
840 let block_content = &new_span[prev_new_rel..new_rel_end];
841
842 if !cursor_placed {
843 if let Some(cursor_offset) = cursor_offset_in_new {
844 let abs_start = new_start + prev_new_rel;
845 let abs_end = new_start + new_rel_end;
846 if cursor_offset >= abs_start && cursor_offset <= abs_end {
847 cursor_placed = true;
848 let cursor_in_block = cursor_offset - abs_start;
849 let bounded = cursor_in_block.min(block_content.len());
850 result.push_str(&block_content[..bounded]);
851 result.push_str(cursor_marker);
852 result.push_str(&block_content[bounded..]);
853 prev_new_rel = new_rel_end;
854 continue;
855 }
856 }
857 }
858
859 result.push_str(block_content);
860 prev_new_rel = new_rel_end;
861 }
862
863 result.push_str(&tag_for_block_idx(end_marker_idx));
864 result.push_str(end_marker);
865
866 Ok(result)
867}
868
869pub fn encode_from_old_and_new_v0316(
870 old_editable: &str,
871 new_editable: &str,
872 cursor_offset_in_new: Option<usize>,
873 cursor_marker: &str,
874 end_marker: &str,
875) -> Result<String> {
876 let marker_offsets = compute_marker_offsets(old_editable);
877 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
878 encode_from_old_and_new_impl(
879 old_editable,
880 new_editable,
881 cursor_offset_in_new,
882 cursor_marker,
883 end_marker,
884 &no_edit_tag,
885 &marker_offsets,
886 |block_idx| marker_tag(block_idx + 1),
887 )
888}
889
890pub fn encode_from_old_and_new_v0317(
891 old_editable: &str,
892 new_editable: &str,
893 cursor_offset_in_new: Option<usize>,
894 cursor_marker: &str,
895 end_marker: &str,
896) -> Result<String> {
897 let marker_offsets = compute_marker_offsets(old_editable);
898 let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
899 let no_edit_tag = marker_tag_relative(0);
900 encode_from_old_and_new_impl(
901 old_editable,
902 new_editable,
903 cursor_offset_in_new,
904 cursor_marker,
905 end_marker,
906 &no_edit_tag,
907 &marker_offsets,
908 |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
909 )
910}
911
912pub fn encode_from_old_and_new_v0318(
913 old_editable: &str,
914 new_editable: &str,
915 cursor_offset_in_new: Option<usize>,
916 cursor_marker: &str,
917 end_marker: &str,
918) -> Result<String> {
919 let marker_offsets = compute_marker_offsets_v0318(old_editable);
920 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
921 encode_from_old_and_new_impl(
922 old_editable,
923 new_editable,
924 cursor_offset_in_new,
925 cursor_marker,
926 end_marker,
927 &no_edit_tag,
928 &marker_offsets,
929 |block_idx| marker_tag(block_idx + 1),
930 )
931}
932
933#[cfg(test)]
934mod tests {
935 use super::*;
936
937 #[test]
938 fn test_compute_marker_offsets_small_block() {
939 let text = "aaa\nbbb\nccc\n";
940 let offsets = compute_marker_offsets(text);
941 assert_eq!(offsets, vec![0, text.len()]);
942 }
943
944 #[test]
945 fn test_compute_marker_offsets_blank_line_split() {
946 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
947 let offsets = compute_marker_offsets(text);
948 assert_eq!(offsets[0], 0);
949 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
950 assert_eq!(*offsets.last().unwrap(), text.len());
951 }
952
953 #[test]
954 fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
955 let text = "\
956class OCRDataframe(BaseModel):
957 model_config = ConfigDict(arbitrary_types_allowed=True)
958
959 df: pl.DataFrame
960
961 def page(self, page_number: int = 0) -> \"OCRDataframe\":
962 # Filter dataframe on specific page
963 df_page = self.df.filter(pl.col(\"page\") == page_number)
964 return OCRDataframe(df=df_page)
965
966 def get_text_cell(
967 self,
968 cell: Cell,
969 margin: int = 0,
970 page_number: Optional[int] = None,
971 min_confidence: int = 50,
972 ) -> Optional[str]:
973 \"\"\"
974 Get text corresponding to cell
975";
976 let offsets = compute_marker_offsets(text);
977
978 let def_start = text
979 .find(" def get_text_cell(")
980 .expect("def line exists");
981 let self_start = text.find(" self,").expect("self line exists");
982
983 assert!(
984 offsets.contains(&def_start),
985 "expected boundary at def line start ({def_start}), got {offsets:?}"
986 );
987 assert!(
988 !offsets.contains(&self_start),
989 "did not expect boundary at self line start ({self_start}), got {offsets:?}"
990 );
991 }
992
993 #[test]
994 fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
995 let text = "\
996impl Plugin for AhoySchedulePlugin {
997 fn build(&self, app: &mut App) {
998 app.configure_sets(
999 self.schedule,
1000 (
1001 AhoySystems::MoveCharacters,
1002 AhoySystems::ApplyForcesToDynamicRigidBodies,
1003 )
1004 .chain()
1005 .before(PhysicsSystems::First),
1006 );
1007
1008 }
1009}
1010
1011/// System set used by all systems of `bevy_ahoy`.
1012#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1013pub enum AhoySystems {
1014 MoveCharacters,
1015 ApplyForcesToDynamicRigidBodies,
1016}
1017";
1018 let offsets = compute_marker_offsets(text);
1019
1020 let closer_start = text.find(" }\n").expect("closer line exists");
1021 let doc_start = text
1022 .find("/// System set used by all systems of `bevy_ahoy`.")
1023 .expect("doc line exists");
1024
1025 assert!(
1026 !offsets.contains(&closer_start),
1027 "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1028 );
1029 assert!(
1030 offsets.contains(&doc_start),
1031 "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1032 );
1033 }
1034
1035 #[test]
1036 fn test_compute_marker_offsets_max_lines_split() {
1037 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1038 let offsets = compute_marker_offsets(text);
1039 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1040 }
1041
1042 #[test]
1043 fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1044 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1045 let offsets = compute_marker_offsets(text);
1046
1047 let expected = text.find("case 'x': {").expect("case line exists");
1048 assert!(
1049 offsets.contains(&expected),
1050 "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1051 );
1052 }
1053
1054 #[test]
1055 fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1056 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1057 let offsets = compute_marker_offsets(text);
1058
1059 let case_start = text.find("case 'x': {").expect("case line exists");
1060 assert!(
1061 !offsets.contains(&case_start),
1062 "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1063 );
1064 }
1065
1066 #[test]
1067 fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1068 let text = "\
1069aaaaaaaaaa = 1;
1070bbbbbbbbbb = 2;
1071cccccccccc = 3;
1072dddddddddd = 4;
1073eeeeeeeeee = 5;
1074ffffffffff = 6;
1075gggggggggg = 7;
1076hhhhhhhhhh = 8;
1077 };
1078 };
1079
1080 grafanaDashboards = {
1081 cluster-overview.spec = {
1082 inherit instanceSelector;
1083 folderRef = \"infrastructure\";
1084 json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1085 };
1086 };
1087";
1088 let offsets = compute_marker_offsets(text);
1089
1090 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1091 assert_eq!(
1092 offsets.last().copied(),
1093 Some(text.len()),
1094 "offsets: {offsets:?}"
1095 );
1096 assert!(
1097 offsets.windows(2).all(|window| window[0] <= window[1]),
1098 "offsets must be sorted: {offsets:?}"
1099 );
1100 }
1101
1102 #[test]
1103 fn test_compute_marker_offsets_empty() {
1104 let offsets = compute_marker_offsets("");
1105 assert_eq!(offsets, vec![0, 0]);
1106 }
1107
1108 #[test]
1109 fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1110 let text = "\
1111# Spree Posts
1112
1113This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1114
1115## Installation
1116
11171. Add this extension to your Gemfile with this line:
1118
1119 ```ruby
1120 bundle add spree_posts
1121 ```
1122
11232. Run the install generator
1124
1125 ```ruby
1126 bundle exec rails g spree_posts:install
1127 ```
1128
11293. Restart your server
1130
1131 If your server was running, restart it so that it can find the assets properly.
1132
1133## Developing
1134
11351. Create a dummy app
1136
1137 ```bash
1138 bundle update
1139 bundle exec rake test_app
1140 ```
1141
11422. Add your new code
11433. Run tests
1144
1145 ```bash
1146 bundle exec rspec
1147 ```
1148
1149When testing your applications integration with this extension you may use it's factories.
1150Simply add this require statement to your spec_helper:
1151
1152```ruby
1153require 'spree_posts/factories'
1154```
1155
1156## Releasing a new version
1157
1158```shell
1159bundle exec gem bump -p -t
1160bundle exec gem release
1161```
1162
1163For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1164
1165## Contributing
1166
1167If you'd like to contribute, please take a look at the contributing guide.
1168";
1169 let offsets = compute_marker_offsets(text);
1170
1171 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1172 assert_eq!(
1173 offsets.last().copied(),
1174 Some(text.len()),
1175 "offsets: {offsets:?}"
1176 );
1177
1178 for window in offsets.windows(2) {
1179 let block = &text[window[0]..window[1]];
1180 let line_count = block.lines().count();
1181 assert!(
1182 line_count >= V0316_MIN_BLOCK_LINES,
1183 "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1184 );
1185 }
1186 }
1187
1188 #[test]
1189 fn test_extract_marker_span() {
1190 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
1191 let (start, end, content) = extract_marker_span(text).unwrap();
1192 assert_eq!(start, 2);
1193 assert_eq!(end, 3);
1194 assert_eq!(content, " new content\n");
1195 }
1196
1197 #[test]
1198 fn test_extract_marker_span_multi_line() {
1199 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1200 let (start, end, content) = extract_marker_span(text).unwrap();
1201 assert_eq!(start, 1);
1202 assert_eq!(end, 4);
1203 assert_eq!(content, "line1\nline2\nline3\n");
1204 }
1205
1206 #[test]
1207 fn test_apply_marker_span_basic() {
1208 let old = "aaa\nbbb\nccc\n";
1209 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1210 let result = apply_marker_span(old, output).unwrap();
1211 assert_eq!(result, "aaa\nBBB\nccc\n");
1212 }
1213
1214 #[test]
1215 fn test_apply_marker_span_preserves_trailing_blank_line() {
1216 let old = "/\nresult\n\n";
1217 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1218 let result = apply_marker_span(old, output).unwrap();
1219 assert_eq!(result, "//\nresult\n\n");
1220 }
1221
1222 #[test]
1223 fn test_encode_no_edits() {
1224 let old = "aaa\nbbb\nccc\n";
1225 let result = encode_from_old_and_new(
1226 old,
1227 old,
1228 None,
1229 "<|user_cursor|>",
1230 ">>>>>>> UPDATED\n",
1231 "NO_EDITS\n",
1232 )
1233 .unwrap();
1234 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1235 }
1236
1237 #[test]
1238 fn test_encode_with_change() {
1239 let old = "aaa\nbbb\nccc\n";
1240 let new = "aaa\nBBB\nccc\n";
1241 let result = encode_from_old_and_new(
1242 old,
1243 new,
1244 None,
1245 "<|user_cursor|>",
1246 ">>>>>>> UPDATED\n",
1247 "NO_EDITS\n",
1248 )
1249 .unwrap();
1250 assert!(result.contains("<|marker_1|>"));
1251 assert!(result.contains("<|marker_2|>"));
1252 assert!(result.contains("aaa\nBBB\nccc\n"));
1253 assert!(result.ends_with(">>>>>>> UPDATED\n"));
1254 }
1255
1256 #[test]
1257 fn test_roundtrip_encode_apply() {
1258 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1259 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1260 let encoded = encode_from_old_and_new(
1261 old,
1262 new,
1263 None,
1264 "<|user_cursor|>",
1265 ">>>>>>> UPDATED\n",
1266 "NO_EDITS\n",
1267 )
1268 .unwrap();
1269 let output = encoded
1270 .strip_suffix(">>>>>>> UPDATED\n")
1271 .expect("should have end marker");
1272 let reconstructed = apply_marker_span(old, output).unwrap();
1273 assert_eq!(reconstructed, new);
1274 }
1275
1276 #[test]
1277 fn test_extract_editable_region_from_markers_multi() {
1278 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1279 let parsed = extract_editable_region_from_markers(text).unwrap();
1280 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1281 }
1282
1283 #[test]
1284 fn test_extract_editable_region_two_markers() {
1285 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1286 let parsed = extract_editable_region_from_markers(text).unwrap();
1287 assert_eq!(parsed, "one\ntwo three");
1288 }
1289
1290 #[test]
1291 fn test_encode_with_cursor() {
1292 let old = "aaa\nbbb\nccc\n";
1293 let new = "aaa\nBBB\nccc\n";
1294 let result = encode_from_old_and_new(
1295 old,
1296 new,
1297 Some(5),
1298 "<|user_cursor|>",
1299 ">>>>>>> UPDATED\n",
1300 "NO_EDITS\n",
1301 )
1302 .unwrap();
1303 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1304 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1305 }
1306
1307 #[test]
1308 fn test_extract_marker_span_strips_intermediate_markers() {
1309 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1310 let (start, end, content) = extract_marker_span(text).unwrap();
1311 assert_eq!(start, 2);
1312 assert_eq!(end, 4);
1313 assert_eq!(content, "line1\nline2\n");
1314 }
1315
1316 #[test]
1317 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1318 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1319 let (start, end, content) = extract_marker_span(text).unwrap();
1320 assert_eq!(start, 1);
1321 assert_eq!(end, 4);
1322 assert_eq!(content, "aaa\nbbb\nccc\n");
1323 }
1324
1325 #[test]
1326 fn test_apply_marker_span_with_extra_intermediate_marker() {
1327 let old = "aaa\nbbb\nccc\n";
1328 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1329 let result = apply_marker_span(old, output).unwrap();
1330 assert_eq!(result, "aaa\nBBB\nccc\n");
1331 }
1332
1333 #[test]
1334 fn test_strip_marker_tags_inline() {
1335 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1336 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1337 assert_eq!(
1338 strip_marker_tags("line1\n<|marker_3|>\nline2"),
1339 "line1\nline2"
1340 );
1341 }
1342
1343 #[test]
1344 fn test_write_editable_with_markers_v0316_byte_exact() {
1345 let editable = "aaa\nbbb\nccc\n";
1346 let mut output = String::new();
1347 write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1348 assert!(output.starts_with("<|marker_1|>"));
1349 assert!(output.contains("<|user_cursor|>"));
1350 let stripped = output.replace("<|user_cursor|>", "");
1351 let stripped = strip_marker_tags(&stripped);
1352 assert_eq!(stripped, editable);
1353 }
1354
1355 #[test]
1356 fn test_apply_marker_span_v0316_basic() {
1357 let old = "aaa\nbbb\nccc\n";
1358 let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1359 let result = apply_marker_span_v0316(old, output).unwrap();
1360 assert_eq!(result, "aaa\nBBB\nccc\n");
1361 }
1362
1363 #[test]
1364 fn test_apply_marker_span_v0316_no_edit() {
1365 let old = "aaa\nbbb\nccc\n";
1366 let output = "<|marker_1|><|marker_1|>";
1367 let result = apply_marker_span_v0316(old, output).unwrap();
1368 assert_eq!(result, old);
1369 }
1370
1371 #[test]
1372 fn test_apply_marker_span_v0316_no_edit_any_marker() {
1373 let old = "aaa\nbbb\nccc\n";
1374 let output = "<|marker_2|>ignored content<|marker_2|>";
1375 let result = apply_marker_span_v0316(old, output).unwrap();
1376 assert_eq!(result, old);
1377 }
1378
1379 #[test]
1380 fn test_apply_marker_span_v0316_multi_block() {
1381 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1382 let marker_offsets = compute_marker_offsets(old);
1383 assert!(
1384 marker_offsets.len() >= 3,
1385 "expected at least 3 offsets, got {:?}",
1386 marker_offsets
1387 );
1388
1389 let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1390 let mut output = String::new();
1391 output.push_str("<|marker_1|>");
1392 for i in 0..marker_offsets.len() - 1 {
1393 if i > 0 {
1394 output.push_str(&marker_tag(i + 1));
1395 }
1396 let start = marker_offsets[i];
1397 let end = marker_offsets[i + 1];
1398 let block_len = end - start;
1399 output.push_str(&new_content[start..start + block_len]);
1400 }
1401 let last_marker_num = marker_offsets.len();
1402 output.push_str(&marker_tag(last_marker_num));
1403 let result = apply_marker_span_v0316(old, &output).unwrap();
1404 assert_eq!(result, new_content);
1405 }
1406
1407 #[test]
1408 fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1409 let old = "aaa\nbbb\nccc\n";
1410 let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1411 let result = apply_marker_span_v0316(old, output).unwrap();
1412 assert_eq!(result, "aaa\nBBB\nccc");
1413 }
1414
1415 #[test]
1416 fn test_encode_v0316_no_edits() {
1417 let old = "aaa\nbbb\nccc\n";
1418 let result =
1419 encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1420 assert!(result.ends_with("<|end|>"));
1421 let stripped = result.strip_suffix("<|end|>").unwrap();
1422 let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1423 assert_eq!(result_parsed, old);
1424 }
1425
1426 #[test]
1427 fn test_encode_v0316_with_change() {
1428 let old = "aaa\nbbb\nccc\n";
1429 let new = "aaa\nBBB\nccc\n";
1430 let result =
1431 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1432 assert!(result.contains("<|marker_1|>"));
1433 assert!(result.contains("<|marker_2|>"));
1434 assert!(result.ends_with("<|end|>"));
1435 }
1436
1437 #[test]
1438 fn test_roundtrip_v0316() {
1439 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1440 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1441 let encoded =
1442 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1443 let stripped = encoded
1444 .strip_suffix("<|end|>")
1445 .expect("should have end marker");
1446 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1447 assert_eq!(reconstructed, new);
1448 }
1449
1450 #[test]
1451 fn test_roundtrip_v0316_with_cursor() {
1452 let old = "aaa\nbbb\nccc\n";
1453 let new = "aaa\nBBB\nccc\n";
1454 let result =
1455 encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1456 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1457 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1458 }
1459
1460 #[test]
1461 fn test_roundtrip_v0316_multi_block_change() {
1462 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1463 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1464 let encoded =
1465 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1466 let stripped = encoded
1467 .strip_suffix("<|end|>")
1468 .expect("should have end marker");
1469 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1470 assert_eq!(reconstructed, new);
1471 }
1472
1473 #[test]
1474 fn test_nearest_marker_number() {
1475 let offsets = vec![0, 10, 20, 30];
1476 assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1477 assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1478 assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1479 assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1480 assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1481 assert_eq!(nearest_marker_number(None, &offsets), 1);
1482 }
1483
1484 #[test]
1485 fn test_marker_tag_relative_formats_as_expected() {
1486 assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1487 assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1488 assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1489 assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1490 assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1491 }
1492
1493 #[test]
1494 fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1495 let editable = "aaa\nbbb\nccc\n";
1496 let mut output = String::new();
1497 write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1498
1499 assert!(output.contains("<|marker-0|>"));
1500 assert!(output.contains("<|user_cursor|>"));
1501
1502 let stripped = output.replace("<|user_cursor|>", "");
1503 let stripped =
1504 collect_relative_marker_tags(&stripped)
1505 .iter()
1506 .fold(stripped.clone(), |acc, marker| {
1507 let tag = &stripped[marker.tag_start..marker.tag_end];
1508 acc.replace(tag, "")
1509 });
1510 assert_eq!(stripped, editable);
1511 }
1512
1513 #[test]
1514 fn test_apply_marker_span_v0317_basic() {
1515 let old = "aaa\nbbb\nccc\n";
1516 let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1517 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1518 assert_eq!(result, "aaa\nBBB\nccc\n");
1519 }
1520
1521 #[test]
1522 fn test_apply_marker_span_v0317_no_edit() {
1523 let old = "aaa\nbbb\nccc\n";
1524 let output = "<|marker-0|><|marker-0|>";
1525 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1526 assert_eq!(result, old);
1527 }
1528
1529 #[test]
1530 fn test_encode_v0317_no_edits() {
1531 let old = "aaa\nbbb\nccc\n";
1532 let result =
1533 encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1534 assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1535 }
1536
1537 #[test]
1538 fn test_roundtrip_v0317() {
1539 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1540 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1541 let cursor = Some(6);
1542
1543 let encoded =
1544 encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1545 let stripped = encoded
1546 .strip_suffix("<|end|>")
1547 .expect("should have end marker");
1548 let stripped = stripped.replace("<|user_cursor|>", "");
1549 let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1550 assert_eq!(reconstructed, new);
1551 }
1552
1553 #[test]
1554 fn test_roundtrip_v0317_with_cursor_marker() {
1555 let old = "aaa\nbbb\nccc\n";
1556 let new = "aaa\nBBB\nccc\n";
1557 let result =
1558 encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1559 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1560 assert!(result.contains("<|marker-0|>"), "result: {result}");
1561 }
1562
1563 #[test]
1564 fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1565 let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1566 let v0316_offsets = compute_marker_offsets(text);
1567 let v0318_offsets = compute_marker_offsets_v0318(text);
1568
1569 assert!(v0318_offsets.len() < v0316_offsets.len());
1570 assert_eq!(v0316_offsets.first().copied(), Some(0));
1571 assert_eq!(v0318_offsets.first().copied(), Some(0));
1572 assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1573 assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1574 }
1575
1576 #[test]
1577 fn test_roundtrip_v0318() {
1578 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1579 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1580 let encoded =
1581 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1582 let stripped = encoded
1583 .strip_suffix("<|end|>")
1584 .expect("should have end marker");
1585 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1586 assert_eq!(reconstructed, new);
1587 }
1588
1589 #[test]
1590 fn test_encode_v0317_markers_stay_on_line_boundaries() {
1591 let old = "\
1592\t\t\t\tcontinue outer;
1593\t\t\t}
1594\t\t}
1595\t}
1596
1597\tconst intersectionObserver = new IntersectionObserver((entries) => {
1598\t\tfor (const entry of entries) {
1599\t\t\tif (entry.isIntersecting) {
1600\t\t\t\tintersectionObserver.unobserve(entry.target);
1601\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1602\t\t\t}
1603\t\t}
1604\t});
1605
1606\tconst observer = new MutationObserver(() => {
1607\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1608\t\t\tdocument.querySelectorAll('a[data-preload]')
1609\t\t);
1610
1611\t\tfor (const link of links) {
1612\t\t\tif (linkSet.has(link)) continue;
1613\t\t\tlinkSet.add(link);
1614
1615\t\t\tswitch (link.dataset.preload) {
1616\t\t\t\tcase '':
1617\t\t\t\tcase 'true':
1618\t\t\t\tcase 'hover': {
1619\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1620\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1621\t\t\t\t\t\tanchorPreload(link);
1622\t\t\t\t\t});
1623";
1624 let new = old.replacen(
1625 "\t\t\t\tcase 'true':\n",
1626 "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1627 1,
1628 );
1629
1630 let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1631 let new_without_cursor = new.replace("<|user_cursor|>", "");
1632
1633 let encoded = encode_from_old_and_new_v0317(
1634 old,
1635 &new_without_cursor,
1636 Some(cursor_offset),
1637 "<|user_cursor|>",
1638 "<|end|>",
1639 )
1640 .unwrap();
1641
1642 let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1643 for marker in collect_relative_marker_tags(core) {
1644 let tag_start = marker.tag_start;
1645 assert!(
1646 tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1647 "marker not at line boundary: {} in output:\n{}",
1648 marker_tag_relative(marker.value),
1649 core
1650 );
1651 }
1652 }
1653}