1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
6const MIN_BLOCK_LINES: usize = 3;
7const MAX_BLOCK_LINES: usize = 8;
8pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
9pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
10
11pub fn marker_tag(number: usize) -> String {
12 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
13}
14
15pub fn marker_tag_relative(delta: isize) -> String {
16 if delta > 0 {
17 format!("<|marker+{delta}|>")
18 } else if delta == 0 {
19 String::from("<|marker-0|>")
20 } else {
21 format!("<|marker{delta}|>")
22 }
23}
24
25/// Compute byte offsets within `editable_text` where marker boundaries should
26/// be placed.
27///
28/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
29/// `editable_text.len()`. Interior offsets are placed at line boundaries
30/// (right after a `\n`), preferring blank-line boundaries when available and
31/// respecting `MIN_BLOCK_LINES` / `MAX_BLOCK_LINES` constraints.
32pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
33 if editable_text.is_empty() {
34 return vec![0, 0];
35 }
36
37 let mut offsets = vec![0usize];
38 let mut lines_since_last_marker = 0usize;
39 let mut byte_offset = 0usize;
40
41 for line in editable_text.split('\n') {
42 let line_end = byte_offset + line.len() + 1;
43 let is_past_end = line_end > editable_text.len();
44 let actual_line_end = line_end.min(editable_text.len());
45 lines_since_last_marker += 1;
46
47 let is_blank = line.trim().is_empty();
48
49 if !is_past_end && lines_since_last_marker >= MIN_BLOCK_LINES {
50 if is_blank {
51 // Blank-line boundary found. We'll place the marker when we
52 // find the next non-blank line (handled below).
53 } else if lines_since_last_marker >= MAX_BLOCK_LINES {
54 offsets.push(actual_line_end);
55 lines_since_last_marker = 0;
56 }
57 }
58
59 // Non-blank line immediately following blank line(s): split here so
60 // the new block starts with this line.
61 if !is_blank && byte_offset > 0 && lines_since_last_marker >= MIN_BLOCK_LINES {
62 let before = &editable_text[..byte_offset];
63 let has_preceding_blank_line = before
64 .strip_suffix('\n')
65 .map(|stripped| {
66 let last_line = match stripped.rfind('\n') {
67 Some(pos) => &stripped[pos + 1..],
68 None => stripped,
69 };
70 last_line.trim().is_empty()
71 })
72 .unwrap_or(false);
73
74 if has_preceding_blank_line {
75 offsets.push(byte_offset);
76 lines_since_last_marker = 1;
77 }
78 }
79
80 byte_offset = actual_line_end;
81
82 // Re-check after blank-line logic since lines_since_last_marker may
83 // have been reset.
84 if !is_past_end && lines_since_last_marker >= MAX_BLOCK_LINES {
85 if *offsets.last().unwrap_or(&0) != actual_line_end {
86 offsets.push(actual_line_end);
87 lines_since_last_marker = 0;
88 }
89 }
90 }
91
92 let end = editable_text.len();
93 if *offsets.last().unwrap_or(&0) != end {
94 offsets.push(end);
95 }
96
97 offsets
98}
99
100/// Write the editable region content with marker tags, inserting the cursor
101/// marker at the given offset within the editable text.
102pub fn write_editable_with_markers(
103 output: &mut String,
104 editable_text: &str,
105 cursor_offset_in_editable: usize,
106 cursor_marker: &str,
107) {
108 let marker_offsets = compute_marker_offsets(editable_text);
109 let mut cursor_placed = false;
110 for (i, &offset) in marker_offsets.iter().enumerate() {
111 let marker_num = i + 1;
112 if !output.is_empty() && !output.ends_with('\n') {
113 output.push('\n');
114 }
115 output.push_str(&marker_tag(marker_num));
116
117 if let Some(&next_offset) = marker_offsets.get(i + 1) {
118 output.push('\n');
119 let block = &editable_text[offset..next_offset];
120 if !cursor_placed
121 && cursor_offset_in_editable >= offset
122 && cursor_offset_in_editable <= next_offset
123 {
124 cursor_placed = true;
125 let cursor_in_block = cursor_offset_in_editable - offset;
126 output.push_str(&block[..cursor_in_block]);
127 output.push_str(cursor_marker);
128 output.push_str(&block[cursor_in_block..]);
129 } else {
130 output.push_str(block);
131 }
132 }
133 }
134}
135
136/// Strip any `<|marker_N|>` tags from `text`.
137///
138/// When a marker tag sits on its own line (followed by `\n`), the trailing
139/// newline is also removed so the surrounding lines stay joined naturally.
140fn strip_marker_tags(text: &str) -> String {
141 let mut result = String::with_capacity(text.len());
142 let mut pos = 0;
143 let bytes = text.as_bytes();
144 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
145 result.push_str(&text[pos..pos + rel]);
146 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
147 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
148 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
149 if bytes.get(tag_end) == Some(&b'\n') {
150 tag_end += 1;
151 }
152 pos = tag_end;
153 } else {
154 result.push_str(MARKER_TAG_PREFIX);
155 pos = num_start;
156 }
157 }
158 result.push_str(&text[pos..]);
159 result
160}
161
162/// Parse model output that uses the marker format.
163///
164/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
165/// The leading format-level newline after the start marker is stripped.
166/// Trailing newlines are preserved so blank-line endings in the editable
167/// region are not lost.
168///
169/// Any extra intermediate marker tags that the model may have inserted
170/// between the first and last markers are stripped from the returned content.
171pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
172 let first_tag_start = text
173 .find(MARKER_TAG_PREFIX)
174 .context("no start marker found in output")?;
175 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
176 let first_num_end = text[first_num_start..]
177 .find(MARKER_TAG_SUFFIX)
178 .map(|i| i + first_num_start)
179 .context("malformed start marker tag")?;
180 let start_num: usize = text[first_num_start..first_num_end]
181 .parse()
182 .context("start marker number is not a valid integer")?;
183 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
184
185 let last_tag_start = text
186 .rfind(MARKER_TAG_PREFIX)
187 .context("no end marker found in output")?;
188 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
189 let last_num_end = text[last_num_start..]
190 .find(MARKER_TAG_SUFFIX)
191 .map(|i| i + last_num_start)
192 .context("malformed end marker tag")?;
193 let end_num: usize = text[last_num_start..last_num_end]
194 .parse()
195 .context("end marker number is not a valid integer")?;
196
197 if start_num == end_num {
198 return Err(anyhow!(
199 "start and end markers are the same (marker {})",
200 start_num
201 ));
202 }
203
204 let mut content_start = first_tag_end;
205 if text.as_bytes().get(content_start) == Some(&b'\n') {
206 content_start += 1;
207 }
208 let content_end = last_tag_start;
209
210 let content = &text[content_start..content_end.max(content_start)];
211 let content = strip_marker_tags(content);
212 Ok((start_num, end_num, content))
213}
214
215/// Given old editable text and model output with marker span, reconstruct the
216/// full new editable region.
217pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
218 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
219 let marker_offsets = compute_marker_offsets(old_editable);
220
221 let start_idx = start_num
222 .checked_sub(1)
223 .context("marker numbers are 1-indexed")?;
224 let end_idx = end_num
225 .checked_sub(1)
226 .context("marker numbers are 1-indexed")?;
227 let start_byte = *marker_offsets
228 .get(start_idx)
229 .context("start marker number out of range")?;
230 let end_byte = *marker_offsets
231 .get(end_idx)
232 .context("end marker number out of range")?;
233
234 if start_byte > end_byte {
235 return Err(anyhow!("start marker must come before end marker"));
236 }
237
238 let old_span = &old_editable[start_byte..end_byte];
239 let mut new_span = raw_new_span;
240 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
241 new_span.push('\n');
242 }
243 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
244 new_span.pop();
245 }
246
247 let mut result = String::new();
248 result.push_str(&old_editable[..start_byte]);
249 result.push_str(&new_span);
250 result.push_str(&old_editable[end_byte..]);
251
252 Ok(result)
253}
254
255/// Compare old and new editable text, find the minimal marker span that covers
256/// all changes, and encode the result with marker tags.
257pub fn encode_from_old_and_new(
258 old_editable: &str,
259 new_editable: &str,
260 cursor_offset_in_new: Option<usize>,
261 cursor_marker: &str,
262 end_marker: &str,
263 no_edits_marker: &str,
264) -> Result<String> {
265 if old_editable == new_editable {
266 return Ok(format!("{no_edits_marker}{end_marker}"));
267 }
268
269 let marker_offsets = compute_marker_offsets(old_editable);
270
271 let common_prefix = old_editable
272 .bytes()
273 .zip(new_editable.bytes())
274 .take_while(|(a, b)| a == b)
275 .count();
276
277 let old_remaining = old_editable.len() - common_prefix;
278 let new_remaining = new_editable.len() - common_prefix;
279 let max_suffix = old_remaining.min(new_remaining);
280 let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
281 .iter()
282 .rev()
283 .zip(
284 new_editable.as_bytes()[new_editable.len() - max_suffix..]
285 .iter()
286 .rev(),
287 )
288 .take_while(|(a, b)| a == b)
289 .count();
290
291 let change_end_in_old = old_editable.len() - common_suffix;
292
293 let start_marker_idx = marker_offsets
294 .iter()
295 .rposition(|&offset| offset <= common_prefix)
296 .unwrap_or(0);
297 let end_marker_idx = marker_offsets
298 .iter()
299 .position(|&offset| offset >= change_end_in_old)
300 .unwrap_or(marker_offsets.len() - 1);
301
302 let old_start = marker_offsets[start_marker_idx];
303 let old_end = marker_offsets[end_marker_idx];
304
305 let new_start = old_start;
306 let new_end = new_editable
307 .len()
308 .saturating_sub(old_editable.len().saturating_sub(old_end));
309
310 let new_span = &new_editable[new_start..new_end];
311
312 let start_marker_num = start_marker_idx + 1;
313 let end_marker_num = end_marker_idx + 1;
314
315 let mut result = String::new();
316 result.push_str(&marker_tag(start_marker_num));
317 result.push('\n');
318
319 if let Some(cursor_offset) = cursor_offset_in_new {
320 if cursor_offset >= new_start && cursor_offset <= new_end {
321 let cursor_in_span = cursor_offset - new_start;
322 let bounded = cursor_in_span.min(new_span.len());
323 result.push_str(&new_span[..bounded]);
324 result.push_str(cursor_marker);
325 result.push_str(&new_span[bounded..]);
326 } else {
327 result.push_str(new_span);
328 }
329 } else {
330 result.push_str(new_span);
331 }
332
333 if !result.ends_with('\n') {
334 result.push('\n');
335 }
336 result.push_str(&marker_tag(end_marker_num));
337 result.push('\n');
338 result.push_str(end_marker);
339
340 Ok(result)
341}
342
343/// Extract the full editable region from text that uses marker tags.
344///
345/// Returns the concatenation of all block contents between the first and last
346/// markers, with intermediate marker tags stripped.
347pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
348 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
349
350 let mut markers: Vec<(usize, usize)> = Vec::new();
351 let mut search_start = first_marker_start;
352 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
353 let tag_start = search_start + rel_pos;
354 let num_start = tag_start + MARKER_TAG_PREFIX.len();
355 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
356 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
357 markers.push((tag_start, tag_end));
358 search_start = tag_end;
359 }
360
361 if markers.len() < 2 {
362 return None;
363 }
364
365 let (_, first_tag_end) = markers[0];
366 let (last_tag_start, _) = markers[markers.len() - 1];
367
368 let mut content_start = first_tag_end;
369 if text.as_bytes().get(content_start) == Some(&b'\n') {
370 content_start += 1;
371 }
372 let mut content_end = last_tag_start;
373 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
374 content_end -= 1;
375 }
376
377 let raw = &text[content_start..content_end];
378 let result = strip_marker_tags(raw);
379 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
380 Some(result)
381}
382
383struct MarkerTag {
384 number: usize,
385 tag_start: usize,
386 tag_end: usize,
387}
388
389struct RelativeMarkerTag {
390 delta: isize,
391 tag_start: usize,
392 tag_end: usize,
393}
394
395fn collect_marker_tags(text: &str) -> Vec<MarkerTag> {
396 let mut markers = Vec::new();
397 let mut search_from = 0;
398 while let Some(rel_pos) = text[search_from..].find(MARKER_TAG_PREFIX) {
399 let tag_start = search_from + rel_pos;
400 let num_start = tag_start + MARKER_TAG_PREFIX.len();
401 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
402 let num_end = num_start + suffix_rel;
403 if let Ok(number) = text[num_start..num_end].parse::<usize>() {
404 let tag_end = num_end + MARKER_TAG_SUFFIX.len();
405 markers.push(MarkerTag {
406 number,
407 tag_start,
408 tag_end,
409 });
410 search_from = tag_end;
411 continue;
412 }
413 }
414 search_from = tag_start + MARKER_TAG_PREFIX.len();
415 }
416 markers
417}
418
419fn collect_relative_marker_tags(text: &str) -> Vec<RelativeMarkerTag> {
420 let mut markers = Vec::new();
421 let mut search_from = 0;
422 while let Some(rel_pos) = text[search_from..].find(RELATIVE_MARKER_TAG_PREFIX) {
423 let tag_start = search_from + rel_pos;
424 let payload_start = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
425 if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
426 let payload_end = payload_start + suffix_rel;
427 let payload = &text[payload_start..payload_end];
428 if let Ok(delta) = payload.parse::<isize>() {
429 let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
430 markers.push(RelativeMarkerTag {
431 delta,
432 tag_start,
433 tag_end,
434 });
435 search_from = tag_end;
436 continue;
437 }
438 }
439 search_from = tag_start + RELATIVE_MARKER_TAG_PREFIX.len();
440 }
441 markers
442}
443
444pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
445 let cursor = cursor_offset.unwrap_or(0);
446 marker_offsets
447 .iter()
448 .enumerate()
449 .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
450 .map(|(idx, _)| idx + 1)
451 .unwrap_or(1)
452}
453
454fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
455 let cursor = cursor_offset.unwrap_or(0);
456 marker_offsets
457 .windows(2)
458 .position(|window| cursor >= window[0] && cursor < window[1])
459 .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
460}
461
462/// Write the editable region content with V0317 byte-exact marker tags, where
463/// marker numbers are relative to the cursor block.
464pub fn write_editable_with_markers_v0317(
465 output: &mut String,
466 editable_text: &str,
467 cursor_offset_in_editable: usize,
468 cursor_marker: &str,
469) {
470 let marker_offsets = compute_marker_offsets(editable_text);
471 let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
472 let mut cursor_placed = false;
473
474 for (i, &offset) in marker_offsets.iter().enumerate() {
475 let marker_delta = i as isize - anchor_idx as isize;
476 output.push_str(&marker_tag_relative(marker_delta));
477
478 if let Some(&next_offset) = marker_offsets.get(i + 1) {
479 let block = &editable_text[offset..next_offset];
480 if !cursor_placed
481 && cursor_offset_in_editable >= offset
482 && cursor_offset_in_editable <= next_offset
483 {
484 cursor_placed = true;
485 let cursor_in_block = cursor_offset_in_editable - offset;
486 output.push_str(&block[..cursor_in_block]);
487 output.push_str(cursor_marker);
488 output.push_str(&block[cursor_in_block..]);
489 } else {
490 output.push_str(block);
491 }
492 }
493 }
494}
495
496/// Write the editable region content with V0316 byte-exact marker tags.
497///
498/// Unlike the V0306 version, markers are pure delimiters with no newline
499/// padding. The content between markers is the exact bytes from the editable
500/// text.
501pub fn write_editable_with_markers_v0316(
502 output: &mut String,
503 editable_text: &str,
504 cursor_offset_in_editable: usize,
505 cursor_marker: &str,
506) {
507 let marker_offsets = compute_marker_offsets(editable_text);
508 let mut cursor_placed = false;
509 for (i, &offset) in marker_offsets.iter().enumerate() {
510 let marker_num = i + 1;
511 output.push_str(&marker_tag(marker_num));
512
513 if let Some(&next_offset) = marker_offsets.get(i + 1) {
514 let block = &editable_text[offset..next_offset];
515 if !cursor_placed
516 && cursor_offset_in_editable >= offset
517 && cursor_offset_in_editable <= next_offset
518 {
519 cursor_placed = true;
520 let cursor_in_block = cursor_offset_in_editable - offset;
521 output.push_str(&block[..cursor_in_block]);
522 output.push_str(cursor_marker);
523 output.push_str(&block[cursor_in_block..]);
524 } else {
525 output.push_str(block);
526 }
527 }
528 }
529}
530
531/// Parse V0316 model output and reconstruct the full new editable region.
532///
533/// V0316 differences from V0306:
534/// - No newline stripping or normalization (byte-exact content).
535/// - The no-edit signal is `start_num == end_num` (any repeated marker).
536/// - Intermediate marker tags are used for block-level extraction.
537pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
538 let markers = collect_marker_tags(output);
539
540 if markers.is_empty() {
541 return Err(anyhow!("no marker tags found in output"));
542 }
543
544 if markers.len() == 1 {
545 return Err(anyhow!(
546 "only one marker tag found in output, expected at least two"
547 ));
548 }
549
550 let start_num = markers
551 .first()
552 .map(|marker| marker.number)
553 .context("missing first marker")?;
554 let end_num = markers
555 .last()
556 .map(|marker| marker.number)
557 .context("missing last marker")?;
558
559 // No-edit signal: start_num == end_num
560 if start_num == end_num {
561 return Ok(old_editable.to_string());
562 }
563
564 // Validate monotonically increasing with no gaps
565 let expected_nums: Vec<usize> = (start_num..=end_num).collect();
566 let actual_nums: Vec<usize> = markers.iter().map(|m| m.number).collect();
567 if actual_nums != expected_nums {
568 eprintln!(
569 "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
570 expected_nums, actual_nums
571 );
572 }
573
574 let marker_offsets = compute_marker_offsets(old_editable);
575
576 let start_idx = start_num
577 .checked_sub(1)
578 .context("marker numbers are 1-indexed")?;
579 let end_idx = end_num
580 .checked_sub(1)
581 .context("marker numbers are 1-indexed")?;
582
583 let start_byte = *marker_offsets
584 .get(start_idx)
585 .context("start marker number out of range")?;
586 let end_byte = *marker_offsets
587 .get(end_idx)
588 .context("end marker number out of range")?;
589
590 if start_byte > end_byte {
591 return Err(anyhow!("start marker must come before end marker"));
592 }
593
594 // Extract byte-exact content between consecutive markers
595 let mut new_content = String::new();
596 for i in 0..markers.len() - 1 {
597 let content_start = markers[i].tag_end;
598 let content_end = markers[i + 1].tag_start;
599 if content_start <= content_end {
600 new_content.push_str(&output[content_start..content_end]);
601 }
602 }
603
604 // Splice into old_editable
605 let mut result = String::new();
606 result.push_str(&old_editable[..start_byte]);
607 result.push_str(&new_content);
608 result.push_str(&old_editable[end_byte..]);
609
610 Ok(result)
611}
612
613/// Parse V0317 model output and reconstruct the full new editable region.
614///
615/// V0317 differences from V0316:
616/// - Marker ids are relative to the cursor block (e.g. -2, -1, 0, +1, +2).
617/// - No-edit signal is any repeated relative marker tag.
618pub fn apply_marker_span_v0317(
619 old_editable: &str,
620 output: &str,
621 cursor_offset_in_old: Option<usize>,
622) -> Result<String> {
623 let markers = collect_relative_marker_tags(output);
624
625 if markers.is_empty() {
626 return Err(anyhow!("no marker tags found in output"));
627 }
628
629 if markers.len() == 1 {
630 return Err(anyhow!(
631 "only one marker tag found in output, expected at least two"
632 ));
633 }
634
635 let marker_offsets = compute_marker_offsets(old_editable);
636 let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
637
638 let start_delta = markers
639 .first()
640 .map(|marker| marker.delta)
641 .context("missing first marker")?;
642 let end_delta = markers
643 .last()
644 .map(|marker| marker.delta)
645 .context("missing last marker")?;
646
647 if start_delta == end_delta {
648 return Ok(old_editable.to_string());
649 }
650
651 let start_idx_isize = anchor_idx as isize + start_delta;
652 let end_idx_isize = anchor_idx as isize + end_delta;
653 if start_idx_isize < 0 || end_idx_isize < 0 {
654 return Err(anyhow!("relative marker maps before first marker"));
655 }
656
657 let start_idx = usize::try_from(start_idx_isize).context("invalid start marker index")?;
658 let end_idx = usize::try_from(end_idx_isize).context("invalid end marker index")?;
659
660 let start_byte = *marker_offsets
661 .get(start_idx)
662 .context("start marker number out of range")?;
663 let end_byte = *marker_offsets
664 .get(end_idx)
665 .context("end marker number out of range")?;
666
667 if start_byte > end_byte {
668 return Err(anyhow!("start marker must come before end marker"));
669 }
670
671 let mut new_content = String::new();
672 for i in 0..markers.len() - 1 {
673 let content_start = markers[i].tag_end;
674 let content_end = markers[i + 1].tag_start;
675 if content_start <= content_end {
676 new_content.push_str(&output[content_start..content_end]);
677 }
678 }
679
680 let mut result = String::new();
681 result.push_str(&old_editable[..start_byte]);
682 result.push_str(&new_content);
683 result.push_str(&old_editable[end_byte..]);
684
685 Ok(result)
686}
687
688/// Encode the V0316 training target from old and new editable text.
689///
690/// V0316 differences from V0306:
691/// - No-edit signal: `<|marker_C|><|marker_C|>{end_marker}` where C is nearest
692/// to cursor.
693/// - All intermediate markers are emitted with byte-exact content.
694/// - No newline padding around marker tags.
695pub fn encode_from_old_and_new_v0316(
696 old_editable: &str,
697 new_editable: &str,
698 cursor_offset_in_new: Option<usize>,
699 cursor_marker: &str,
700 end_marker: &str,
701) -> Result<String> {
702 let marker_offsets = compute_marker_offsets(old_editable);
703
704 if old_editable == new_editable {
705 let marker_num = nearest_marker_number(cursor_offset_in_new, &marker_offsets);
706 let tag = marker_tag(marker_num);
707 return Ok(format!("{tag}{tag}{end_marker}"));
708 }
709
710 let common_prefix = old_editable
711 .bytes()
712 .zip(new_editable.bytes())
713 .take_while(|(a, b)| a == b)
714 .count();
715
716 let old_remaining = old_editable.len() - common_prefix;
717 let new_remaining = new_editable.len() - common_prefix;
718 let max_suffix = old_remaining.min(new_remaining);
719 let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
720 .iter()
721 .rev()
722 .zip(
723 new_editable.as_bytes()[new_editable.len() - max_suffix..]
724 .iter()
725 .rev(),
726 )
727 .take_while(|(a, b)| a == b)
728 .count();
729
730 let change_end_in_old = old_editable.len() - common_suffix;
731
732 let start_marker_idx = marker_offsets
733 .iter()
734 .rposition(|&offset| offset <= common_prefix)
735 .unwrap_or(0);
736 let end_marker_idx = marker_offsets
737 .iter()
738 .position(|&offset| offset >= change_end_in_old)
739 .unwrap_or(marker_offsets.len() - 1);
740
741 let old_start = marker_offsets[start_marker_idx];
742 let old_end = marker_offsets[end_marker_idx];
743
744 let new_start = old_start;
745 let new_end = new_editable
746 .len()
747 .saturating_sub(old_editable.len().saturating_sub(old_end));
748
749 let new_span = &new_editable[new_start..new_end];
750 let old_span = &old_editable[old_start..old_end];
751
752 // Compute common prefix/suffix within the span for accurate boundary mapping
753 let span_common_prefix = old_span
754 .bytes()
755 .zip(new_span.bytes())
756 .take_while(|(a, b)| a == b)
757 .count();
758
759 let span_old_remaining = old_span.len() - span_common_prefix;
760 let span_new_remaining = new_span.len() - span_common_prefix;
761 let span_max_suffix = span_old_remaining.min(span_new_remaining);
762 let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
763 .iter()
764 .rev()
765 .zip(
766 new_span.as_bytes()[new_span.len() - span_max_suffix..]
767 .iter()
768 .rev(),
769 )
770 .take_while(|(a, b)| a == b)
771 .count();
772
773 let mut result = String::new();
774 let mut prev_new_rel = 0usize;
775 let mut cursor_placed = false;
776
777 for block_idx in start_marker_idx..end_marker_idx {
778 let marker_num = block_idx + 1;
779 result.push_str(&marker_tag(marker_num));
780
781 let new_rel_end = if block_idx + 1 == end_marker_idx {
782 // Last block: extends to end of new span
783 new_span.len()
784 } else {
785 // Map the intermediate boundary from old to new coordinates
786 let old_rel = marker_offsets[block_idx + 1] - old_start;
787 let mapped = map_boundary_offset(
788 old_rel,
789 old_span.len(),
790 new_span.len(),
791 span_common_prefix,
792 span_common_suffix,
793 );
794 // Ensure char boundary safety and monotonicity
795 new_span.floor_char_boundary(mapped)
796 };
797
798 // Ensure monotonicity (each block gets at least zero content)
799 let new_rel_end = new_rel_end.max(prev_new_rel);
800
801 let block_content = &new_span[prev_new_rel..new_rel_end];
802
803 if !cursor_placed {
804 if let Some(cursor_offset) = cursor_offset_in_new {
805 let abs_start = new_start + prev_new_rel;
806 let abs_end = new_start + new_rel_end;
807 if cursor_offset >= abs_start && cursor_offset <= abs_end {
808 cursor_placed = true;
809 let cursor_in_block = cursor_offset - abs_start;
810 let bounded = cursor_in_block.min(block_content.len());
811 result.push_str(&block_content[..bounded]);
812 result.push_str(cursor_marker);
813 result.push_str(&block_content[bounded..]);
814 prev_new_rel = new_rel_end;
815 continue;
816 }
817 }
818 }
819
820 result.push_str(block_content);
821 prev_new_rel = new_rel_end;
822 }
823
824 // Final closing marker
825 let end_marker_num = end_marker_idx + 1;
826 result.push_str(&marker_tag(end_marker_num));
827 result.push_str(end_marker);
828
829 Ok(result)
830}
831
832/// Encode the V0317 training target from old and new editable text.
833///
834/// V0317 differences from V0316:
835/// - Marker ids are relative to cursor block (..., -2, -1, 0, +1, +2, ...).
836/// - No-edit signal: repeated cursor-relative marker.
837pub fn encode_from_old_and_new_v0317(
838 old_editable: &str,
839 new_editable: &str,
840 cursor_offset_in_new: Option<usize>,
841 cursor_marker: &str,
842 end_marker: &str,
843) -> Result<String> {
844 let marker_offsets = compute_marker_offsets(old_editable);
845 let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
846
847 if old_editable == new_editable {
848 let tag = marker_tag_relative(0);
849 return Ok(format!("{tag}{tag}{end_marker}"));
850 }
851
852 let common_prefix = old_editable
853 .bytes()
854 .zip(new_editable.bytes())
855 .take_while(|(a, b)| a == b)
856 .count();
857
858 let old_remaining = old_editable.len() - common_prefix;
859 let new_remaining = new_editable.len() - common_prefix;
860 let max_suffix = old_remaining.min(new_remaining);
861 let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
862 .iter()
863 .rev()
864 .zip(
865 new_editable.as_bytes()[new_editable.len() - max_suffix..]
866 .iter()
867 .rev(),
868 )
869 .take_while(|(a, b)| a == b)
870 .count();
871
872 let change_end_in_old = old_editable.len() - common_suffix;
873
874 let start_marker_idx = marker_offsets
875 .iter()
876 .rposition(|&offset| offset <= common_prefix)
877 .unwrap_or(0);
878 let end_marker_idx = marker_offsets
879 .iter()
880 .position(|&offset| offset >= change_end_in_old)
881 .unwrap_or(marker_offsets.len() - 1);
882
883 let old_start = marker_offsets[start_marker_idx];
884 let old_end = marker_offsets[end_marker_idx];
885
886 let new_start = old_start;
887 let new_end = new_editable
888 .len()
889 .saturating_sub(old_editable.len().saturating_sub(old_end));
890
891 let new_span = &new_editable[new_start..new_end];
892 let old_span = &old_editable[old_start..old_end];
893
894 let span_common_prefix = old_span
895 .bytes()
896 .zip(new_span.bytes())
897 .take_while(|(a, b)| a == b)
898 .count();
899
900 let span_old_remaining = old_span.len() - span_common_prefix;
901 let span_new_remaining = new_span.len() - span_common_prefix;
902 let span_max_suffix = span_old_remaining.min(span_new_remaining);
903 let span_common_suffix = old_span.as_bytes()[old_span.len() - span_max_suffix..]
904 .iter()
905 .rev()
906 .zip(
907 new_span.as_bytes()[new_span.len() - span_max_suffix..]
908 .iter()
909 .rev(),
910 )
911 .take_while(|(a, b)| a == b)
912 .count();
913
914 let mut result = String::new();
915 let mut prev_new_rel = 0usize;
916 let mut cursor_placed = false;
917
918 for block_idx in start_marker_idx..end_marker_idx {
919 let marker_delta = block_idx as isize - anchor_idx as isize;
920 result.push_str(&marker_tag_relative(marker_delta));
921
922 let new_rel_end = if block_idx + 1 == end_marker_idx {
923 new_span.len()
924 } else {
925 let old_rel = marker_offsets[block_idx + 1] - old_start;
926 let mapped = map_boundary_offset(
927 old_rel,
928 old_span.len(),
929 new_span.len(),
930 span_common_prefix,
931 span_common_suffix,
932 );
933 new_span.floor_char_boundary(mapped)
934 };
935
936 let new_rel_end = new_rel_end.max(prev_new_rel);
937 let block_content = &new_span[prev_new_rel..new_rel_end];
938
939 if !cursor_placed {
940 if let Some(cursor_offset) = cursor_offset_in_new {
941 let abs_start = new_start + prev_new_rel;
942 let abs_end = new_start + new_rel_end;
943 if cursor_offset >= abs_start && cursor_offset <= abs_end {
944 cursor_placed = true;
945 let cursor_in_block = cursor_offset - abs_start;
946 let bounded = cursor_in_block.min(block_content.len());
947 result.push_str(&block_content[..bounded]);
948 result.push_str(cursor_marker);
949 result.push_str(&block_content[bounded..]);
950 prev_new_rel = new_rel_end;
951 continue;
952 }
953 }
954 }
955
956 result.push_str(block_content);
957 prev_new_rel = new_rel_end;
958 }
959
960 let end_marker_delta = end_marker_idx as isize - anchor_idx as isize;
961 result.push_str(&marker_tag_relative(end_marker_delta));
962 result.push_str(end_marker);
963
964 Ok(result)
965}
966
967/// Map a byte offset from old span coordinates to new span coordinates,
968/// using common prefix/suffix within the span for accuracy.
969fn map_boundary_offset(
970 old_rel: usize,
971 old_span_len: usize,
972 new_span_len: usize,
973 span_common_prefix: usize,
974 span_common_suffix: usize,
975) -> usize {
976 if old_rel <= span_common_prefix {
977 old_rel
978 } else if old_rel >= old_span_len - span_common_suffix {
979 new_span_len - (old_span_len - old_rel)
980 } else {
981 // Within the changed region: proportional mapping
982 let old_changed_start = span_common_prefix;
983 let old_changed_len = old_span_len
984 .saturating_sub(span_common_prefix)
985 .saturating_sub(span_common_suffix);
986 let new_changed_start = span_common_prefix;
987 let new_changed_len = new_span_len
988 .saturating_sub(span_common_prefix)
989 .saturating_sub(span_common_suffix);
990
991 if old_changed_len == 0 {
992 new_changed_start
993 } else {
994 new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
995 }
996 }
997}
998
999#[cfg(test)]
1000mod tests {
1001 use super::*;
1002
1003 #[test]
1004 fn test_compute_marker_offsets_small_block() {
1005 let text = "aaa\nbbb\nccc\n";
1006 let offsets = compute_marker_offsets(text);
1007 assert_eq!(offsets, vec![0, text.len()]);
1008 }
1009
1010 #[test]
1011 fn test_compute_marker_offsets_blank_line_split() {
1012 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1013 let offsets = compute_marker_offsets(text);
1014 assert_eq!(offsets[0], 0);
1015 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1016 assert_eq!(*offsets.last().unwrap(), text.len());
1017 }
1018
1019 #[test]
1020 fn test_compute_marker_offsets_max_lines_split() {
1021 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1022 let offsets = compute_marker_offsets(text);
1023 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1024 }
1025
1026 #[test]
1027 fn test_compute_marker_offsets_empty() {
1028 let offsets = compute_marker_offsets("");
1029 assert_eq!(offsets, vec![0, 0]);
1030 }
1031
1032 #[test]
1033 fn test_extract_marker_span() {
1034 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
1035 let (start, end, content) = extract_marker_span(text).unwrap();
1036 assert_eq!(start, 2);
1037 assert_eq!(end, 3);
1038 assert_eq!(content, " new content\n");
1039 }
1040
1041 #[test]
1042 fn test_extract_marker_span_multi_line() {
1043 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1044 let (start, end, content) = extract_marker_span(text).unwrap();
1045 assert_eq!(start, 1);
1046 assert_eq!(end, 4);
1047 assert_eq!(content, "line1\nline2\nline3\n");
1048 }
1049
1050 #[test]
1051 fn test_apply_marker_span_basic() {
1052 let old = "aaa\nbbb\nccc\n";
1053 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1054 let result = apply_marker_span(old, output).unwrap();
1055 assert_eq!(result, "aaa\nBBB\nccc\n");
1056 }
1057
1058 #[test]
1059 fn test_apply_marker_span_preserves_trailing_blank_line() {
1060 let old = "/\nresult\n\n";
1061 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1062 let result = apply_marker_span(old, output).unwrap();
1063 assert_eq!(result, "//\nresult\n\n");
1064 }
1065
1066 #[test]
1067 fn test_encode_no_edits() {
1068 let old = "aaa\nbbb\nccc\n";
1069 let result = encode_from_old_and_new(
1070 old,
1071 old,
1072 None,
1073 "<|user_cursor|>",
1074 ">>>>>>> UPDATED\n",
1075 "NO_EDITS\n",
1076 )
1077 .unwrap();
1078 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1079 }
1080
1081 #[test]
1082 fn test_encode_with_change() {
1083 let old = "aaa\nbbb\nccc\n";
1084 let new = "aaa\nBBB\nccc\n";
1085 let result = encode_from_old_and_new(
1086 old,
1087 new,
1088 None,
1089 "<|user_cursor|>",
1090 ">>>>>>> UPDATED\n",
1091 "NO_EDITS\n",
1092 )
1093 .unwrap();
1094 assert!(result.contains("<|marker_1|>"));
1095 assert!(result.contains("<|marker_2|>"));
1096 assert!(result.contains("aaa\nBBB\nccc\n"));
1097 assert!(result.ends_with(">>>>>>> UPDATED\n"));
1098 }
1099
1100 #[test]
1101 fn test_roundtrip_encode_apply() {
1102 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1103 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1104 let encoded = encode_from_old_and_new(
1105 old,
1106 new,
1107 None,
1108 "<|user_cursor|>",
1109 ">>>>>>> UPDATED\n",
1110 "NO_EDITS\n",
1111 )
1112 .unwrap();
1113 let output = encoded
1114 .strip_suffix(">>>>>>> UPDATED\n")
1115 .expect("should have end marker");
1116 let reconstructed = apply_marker_span(old, output).unwrap();
1117 assert_eq!(reconstructed, new);
1118 }
1119
1120 #[test]
1121 fn test_extract_editable_region_from_markers_multi() {
1122 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1123 let parsed = extract_editable_region_from_markers(text).unwrap();
1124 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1125 }
1126
1127 #[test]
1128 fn test_extract_editable_region_two_markers() {
1129 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1130 let parsed = extract_editable_region_from_markers(text).unwrap();
1131 assert_eq!(parsed, "one\ntwo three");
1132 }
1133
1134 #[test]
1135 fn test_encode_with_cursor() {
1136 let old = "aaa\nbbb\nccc\n";
1137 let new = "aaa\nBBB\nccc\n";
1138 let result = encode_from_old_and_new(
1139 old,
1140 new,
1141 Some(5),
1142 "<|user_cursor|>",
1143 ">>>>>>> UPDATED\n",
1144 "NO_EDITS\n",
1145 )
1146 .unwrap();
1147 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1148 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1149 }
1150
1151 #[test]
1152 fn test_extract_marker_span_strips_intermediate_markers() {
1153 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1154 let (start, end, content) = extract_marker_span(text).unwrap();
1155 assert_eq!(start, 2);
1156 assert_eq!(end, 4);
1157 assert_eq!(content, "line1\nline2\n");
1158 }
1159
1160 #[test]
1161 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1162 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1163 let (start, end, content) = extract_marker_span(text).unwrap();
1164 assert_eq!(start, 1);
1165 assert_eq!(end, 4);
1166 assert_eq!(content, "aaa\nbbb\nccc\n");
1167 }
1168
1169 #[test]
1170 fn test_apply_marker_span_with_extra_intermediate_marker() {
1171 let old = "aaa\nbbb\nccc\n";
1172 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1173 let result = apply_marker_span(old, output).unwrap();
1174 assert_eq!(result, "aaa\nBBB\nccc\n");
1175 }
1176
1177 #[test]
1178 fn test_strip_marker_tags_inline() {
1179 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1180 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1181 assert_eq!(
1182 strip_marker_tags("line1\n<|marker_3|>\nline2"),
1183 "line1\nline2"
1184 );
1185 }
1186
1187 #[test]
1188 fn test_write_editable_with_markers_v0316_byte_exact() {
1189 let editable = "aaa\nbbb\nccc\n";
1190 let mut output = String::new();
1191 write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1192 // Should have marker tags with no extra newlines
1193 assert!(output.starts_with("<|marker_1|>"));
1194 assert!(output.contains("<|user_cursor|>"));
1195 // Content should be byte-exact - no extra newlines added by markers
1196 let stripped = output.replace("<|user_cursor|>", "");
1197 let stripped = strip_marker_tags(&stripped);
1198 assert_eq!(stripped, editable);
1199 }
1200
1201 #[test]
1202 fn test_apply_marker_span_v0316_basic() {
1203 let old = "aaa\nbbb\nccc\n";
1204 let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1205 let result = apply_marker_span_v0316(old, output).unwrap();
1206 assert_eq!(result, "aaa\nBBB\nccc\n");
1207 }
1208
1209 #[test]
1210 fn test_apply_marker_span_v0316_no_edit() {
1211 let old = "aaa\nbbb\nccc\n";
1212 let output = "<|marker_1|><|marker_1|>";
1213 let result = apply_marker_span_v0316(old, output).unwrap();
1214 assert_eq!(result, old);
1215 }
1216
1217 #[test]
1218 fn test_apply_marker_span_v0316_no_edit_any_marker() {
1219 let old = "aaa\nbbb\nccc\n";
1220 let output = "<|marker_2|>ignored content<|marker_2|>";
1221 let result = apply_marker_span_v0316(old, output).unwrap();
1222 assert_eq!(result, old);
1223 }
1224
1225 #[test]
1226 fn test_apply_marker_span_v0316_multi_block() {
1227 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1228 let marker_offsets = compute_marker_offsets(old);
1229 assert!(
1230 marker_offsets.len() >= 3,
1231 "expected at least 3 offsets, got {:?}",
1232 marker_offsets
1233 );
1234
1235 // Build output spanning all blocks with new content
1236 let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1237 let mut output = String::new();
1238 output.push_str("<|marker_1|>");
1239 // Split new_content at old block boundaries
1240 for i in 0..marker_offsets.len() - 1 {
1241 if i > 0 {
1242 output.push_str(&marker_tag(i + 1));
1243 }
1244 let start = marker_offsets[i];
1245 let end = marker_offsets[i + 1];
1246 let block_len = end - start;
1247 // Use same length blocks from new content (they happen to be same length)
1248 output.push_str(&new_content[start..start + block_len]);
1249 }
1250 let last_marker_num = marker_offsets.len();
1251 output.push_str(&marker_tag(last_marker_num));
1252 let result = apply_marker_span_v0316(old, &output).unwrap();
1253 assert_eq!(result, new_content);
1254 }
1255
1256 #[test]
1257 fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1258 let old = "aaa\nbbb\nccc\n";
1259 // Content doesn't end with \n - should NOT be normalized
1260 let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1261 let result = apply_marker_span_v0316(old, output).unwrap();
1262 // V0316 is byte-exact: the missing trailing \n is NOT added
1263 assert_eq!(result, "aaa\nBBB\nccc");
1264 }
1265
1266 #[test]
1267 fn test_encode_v0316_no_edits() {
1268 let old = "aaa\nbbb\nccc\n";
1269 let result =
1270 encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1271 // Should be <|marker_K|><|marker_K|><|end|> where K is nearest to cursor
1272 assert!(result.ends_with("<|end|>"));
1273 // Parse it and verify it's a no-edit
1274 let stripped = result.strip_suffix("<|end|>").unwrap();
1275 let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1276 assert_eq!(result_parsed, old);
1277 }
1278
1279 #[test]
1280 fn test_encode_v0316_with_change() {
1281 let old = "aaa\nbbb\nccc\n";
1282 let new = "aaa\nBBB\nccc\n";
1283 let result =
1284 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1285 assert!(result.contains("<|marker_1|>"));
1286 assert!(result.contains("<|marker_2|>"));
1287 assert!(result.ends_with("<|end|>"));
1288 }
1289
1290 #[test]
1291 fn test_roundtrip_v0316() {
1292 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1293 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1294 let encoded =
1295 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1296 let stripped = encoded
1297 .strip_suffix("<|end|>")
1298 .expect("should have end marker");
1299 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1300 assert_eq!(reconstructed, new);
1301 }
1302
1303 #[test]
1304 fn test_roundtrip_v0316_with_cursor() {
1305 let old = "aaa\nbbb\nccc\n";
1306 let new = "aaa\nBBB\nccc\n";
1307 let result =
1308 encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1309 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1310 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1311 }
1312
1313 #[test]
1314 fn test_roundtrip_v0316_multi_block_change() {
1315 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1316 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1317 let encoded =
1318 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1319 let stripped = encoded
1320 .strip_suffix("<|end|>")
1321 .expect("should have end marker");
1322 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1323 assert_eq!(reconstructed, new);
1324 }
1325
1326 #[test]
1327 fn test_nearest_marker_number() {
1328 let offsets = vec![0, 10, 20, 30];
1329 assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1330 assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1331 assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1332 assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1333 assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1334 assert_eq!(nearest_marker_number(None, &offsets), 1);
1335 }
1336
1337 #[test]
1338 fn test_marker_tag_relative_formats_as_expected() {
1339 assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1340 assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1341 assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1342 assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1343 assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1344 }
1345
1346 #[test]
1347 fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1348 let editable = "aaa\nbbb\nccc\n";
1349 let mut output = String::new();
1350 write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1351
1352 assert!(output.contains("<|marker-0|>"));
1353 assert!(output.contains("<|user_cursor|>"));
1354
1355 let stripped = output.replace("<|user_cursor|>", "");
1356 let stripped =
1357 collect_relative_marker_tags(&stripped)
1358 .iter()
1359 .fold(stripped.clone(), |acc, marker| {
1360 let tag = &stripped[marker.tag_start..marker.tag_end];
1361 acc.replace(tag, "")
1362 });
1363 assert_eq!(stripped, editable);
1364 }
1365
1366 #[test]
1367 fn test_apply_marker_span_v0317_basic() {
1368 let old = "aaa\nbbb\nccc\n";
1369 let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1370 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1371 assert_eq!(result, "aaa\nBBB\nccc\n");
1372 }
1373
1374 #[test]
1375 fn test_apply_marker_span_v0317_no_edit() {
1376 let old = "aaa\nbbb\nccc\n";
1377 let output = "<|marker-0|><|marker-0|>";
1378 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1379 assert_eq!(result, old);
1380 }
1381
1382 #[test]
1383 fn test_encode_v0317_no_edits() {
1384 let old = "aaa\nbbb\nccc\n";
1385 let result =
1386 encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1387 assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1388 }
1389
1390 #[test]
1391 fn test_roundtrip_v0317() {
1392 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1393 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1394 let cursor = Some(6);
1395
1396 let encoded =
1397 encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1398 let stripped = encoded
1399 .strip_suffix("<|end|>")
1400 .expect("should have end marker");
1401 let stripped = stripped.replace("<|user_cursor|>", "");
1402 let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1403 assert_eq!(reconstructed, new);
1404 }
1405
1406 #[test]
1407 fn test_roundtrip_v0317_with_cursor_marker() {
1408 let old = "aaa\nbbb\nccc\n";
1409 let new = "aaa\nBBB\nccc\n";
1410 let result =
1411 encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1412 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1413 assert!(result.contains("<|marker-0|>"), "result: {result}");
1414 }
1415}