1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5pub const RELATIVE_MARKER_TAG_PREFIX: &str = "<|marker";
6const V0316_MIN_BLOCK_LINES: usize = 3;
7const V0316_MAX_BLOCK_LINES: usize = 8;
8const V0318_MIN_BLOCK_LINES: usize = 6;
9const V0318_MAX_BLOCK_LINES: usize = 16;
10const MAX_NUDGE_LINES: usize = 5;
11pub const V0316_END_MARKER: &str = "<[end▁of▁sentence]>";
12pub const V0317_END_MARKER: &str = "<[end▁of▁sentence]>";
13pub const V0318_END_MARKER: &str = "<[end▁of▁sentence]>";
14pub const V0327_END_MARKER: &str = "<[end▁of▁sentence]>";
15
16pub fn marker_tag(number: usize) -> String {
17 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
18}
19
20pub fn marker_tag_relative(delta: isize) -> String {
21 if delta > 0 {
22 format!("<|marker+{delta}|>")
23 } else if delta == 0 {
24 String::from("<|marker-0|>")
25 } else {
26 format!("<|marker{delta}|>")
27 }
28}
29
30struct LineInfo {
31 start: usize,
32 is_blank: bool,
33 is_good_start: bool,
34}
35
36fn collect_line_info(text: &str) -> Vec<LineInfo> {
37 let mut lines = Vec::new();
38 let mut offset = 0;
39 for line in text.split('\n') {
40 let trimmed = line.trim();
41 let is_blank = trimmed.is_empty();
42 let is_good_start = !is_blank && !is_structural_tail(trimmed);
43 lines.push(LineInfo {
44 start: offset,
45 is_blank,
46 is_good_start,
47 });
48 offset += line.len() + 1;
49 }
50 // split('\n') on "abc\n" yields ["abc", ""] — drop the phantom trailing
51 // empty element when the text ends with '\n'.
52 if text.ends_with('\n') && lines.len() > 1 {
53 lines.pop();
54 }
55 lines
56}
57
58fn is_structural_tail(trimmed_line: &str) -> bool {
59 if trimmed_line.starts_with(&['}', ']', ')']) {
60 return true;
61 }
62 matches!(
63 trimmed_line.trim_end_matches(';'),
64 "break" | "continue" | "return" | "throw" | "end"
65 )
66}
67
68/// Starting from line `from`, scan up to `MAX_NUDGE_LINES` forward to find a
69/// line with `is_good_start`. Returns `None` if no suitable line is found.
70fn skip_to_good_start(lines: &[LineInfo], from: usize) -> Option<usize> {
71 (from..lines.len().min(from + MAX_NUDGE_LINES)).find(|&i| lines[i].is_good_start)
72}
73
74/// Compute byte offsets within `editable_text` where marker boundaries should
75/// be placed.
76///
77/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
78/// `editable_text.len()`. Interior offsets are placed at line boundaries
79/// (right after a `\n`), preferring blank-line boundaries when available and
80/// respecting `min_block_lines` / `max_block_lines` constraints.
81fn compute_marker_offsets_with_limits(
82 editable_text: &str,
83 min_block_lines: usize,
84 max_block_lines: usize,
85) -> Vec<usize> {
86 if editable_text.is_empty() {
87 return vec![0, 0];
88 }
89
90 let lines = collect_line_info(editable_text);
91 let mut offsets = vec![0usize];
92 let mut last_boundary_line = 0;
93 let mut i = 0;
94
95 while i < lines.len() {
96 let gap = i - last_boundary_line;
97
98 // Blank-line split: non-blank line following blank line(s) with enough
99 // accumulated lines.
100 if gap >= min_block_lines && !lines[i].is_blank && i > 0 && lines[i - 1].is_blank {
101 let target = if lines[i].is_good_start {
102 i
103 } else {
104 skip_to_good_start(&lines, i).unwrap_or(i)
105 };
106 if lines.len() - target >= min_block_lines
107 && lines[target].start > *offsets.last().unwrap_or(&0)
108 {
109 offsets.push(lines[target].start);
110 last_boundary_line = target;
111 i = target + 1;
112 continue;
113 }
114 }
115
116 // Hard cap: too many lines without a split.
117 if gap >= max_block_lines {
118 let target = skip_to_good_start(&lines, i).unwrap_or(i);
119 if lines[target].start > *offsets.last().unwrap_or(&0) {
120 offsets.push(lines[target].start);
121 last_boundary_line = target;
122 i = target + 1;
123 continue;
124 }
125 }
126
127 i += 1;
128 }
129
130 let end = editable_text.len();
131 if *offsets.last().unwrap_or(&0) != end {
132 offsets.push(end);
133 }
134
135 offsets
136}
137
138/// Compute byte offsets within `editable_text` for the V0316/V0317 block sizing rules.
139pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
140 compute_marker_offsets_with_limits(editable_text, V0316_MIN_BLOCK_LINES, V0316_MAX_BLOCK_LINES)
141}
142
143pub fn compute_marker_offsets_v0318(editable_text: &str) -> Vec<usize> {
144 compute_marker_offsets_with_limits(editable_text, V0318_MIN_BLOCK_LINES, V0318_MAX_BLOCK_LINES)
145}
146
147fn line_start_at_or_before(text: &str, offset: usize) -> usize {
148 let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
149 text[..bounded_offset]
150 .rfind('\n')
151 .map(|index| index + 1)
152 .unwrap_or(0)
153}
154
155fn line_end_at_or_after(text: &str, offset: usize) -> usize {
156 let bounded_offset = text.floor_char_boundary(offset.min(text.len()));
157 if bounded_offset >= text.len() {
158 return text.len();
159 }
160
161 text[bounded_offset..]
162 .find('\n')
163 .map(|index| bounded_offset + index + 1)
164 .unwrap_or(text.len())
165}
166
167fn grow_v0327_candidate_range(
168 text: &str,
169 cursor_offset: usize,
170 editable_token_limit: usize,
171) -> std::ops::Range<usize> {
172 if text.is_empty() {
173 return 0..0;
174 }
175
176 let byte_budget = editable_token_limit.saturating_mul(3).max(1);
177 let half_budget = byte_budget / 2;
178
179 let mut start = cursor_offset.saturating_sub(half_budget);
180 let mut end = start.saturating_add(byte_budget).min(text.len());
181
182 if end.saturating_sub(start) < byte_budget {
183 start = end.saturating_sub(byte_budget);
184 }
185
186 start = line_start_at_or_before(text, start);
187 end = line_end_at_or_after(text, end);
188
189 if start < end {
190 start..end
191 } else {
192 let line_start = line_start_at_or_before(text, cursor_offset);
193 let line_end = line_end_at_or_after(text, cursor_offset);
194 line_start..line_end.max(line_start)
195 }
196}
197
198fn trim_v0327_candidate_range_to_markers(
199 text: &str,
200 candidate_range: std::ops::Range<usize>,
201 cursor_offset: usize,
202) -> std::ops::Range<usize> {
203 let candidate_text = &text[candidate_range.clone()];
204 let marker_offsets = compute_marker_offsets_v0318(candidate_text);
205
206 if marker_offsets.len() <= 2 {
207 return candidate_range;
208 }
209
210 let candidate_cursor_offset = cursor_offset
211 .saturating_sub(candidate_range.start)
212 .min(candidate_text.len());
213 let first_internal_marker_index = if candidate_cursor_offset >= marker_offsets[1] {
214 1
215 } else {
216 0
217 };
218 let last_internal_marker_index = marker_offsets.len() - 2;
219 let last_marker_index = marker_offsets.len() - 1;
220 let end_marker_index = if candidate_cursor_offset <= marker_offsets[last_internal_marker_index]
221 {
222 last_internal_marker_index
223 } else {
224 last_marker_index
225 };
226
227 let trimmed_start = candidate_range.start + marker_offsets[first_internal_marker_index];
228 let trimmed_end = candidate_range.start + marker_offsets[end_marker_index];
229
230 if trimmed_start < trimmed_end {
231 trimmed_start..trimmed_end
232 } else {
233 let block_index = cursor_block_index(Some(candidate_cursor_offset), &marker_offsets);
234 let start = candidate_range.start + marker_offsets[block_index];
235 let end = candidate_range.start + marker_offsets[block_index + 1];
236 if start < end {
237 start..end
238 } else {
239 candidate_range
240 }
241 }
242}
243
244pub fn compute_v0327_editable_range(
245 text: &str,
246 cursor_offset: usize,
247 editable_token_limit: usize,
248) -> std::ops::Range<usize> {
249 let candidate_range = grow_v0327_candidate_range(text, cursor_offset, editable_token_limit);
250 trim_v0327_candidate_range_to_markers(text, candidate_range, cursor_offset)
251}
252
253/// Write the editable region content with marker tags, inserting the cursor
254/// marker at the given offset within the editable text.
255pub fn write_editable_with_markers(
256 output: &mut String,
257 editable_text: &str,
258 cursor_offset_in_editable: usize,
259 cursor_marker: &str,
260) {
261 let marker_offsets = compute_marker_offsets(editable_text);
262 let mut cursor_placed = false;
263 for (i, &offset) in marker_offsets.iter().enumerate() {
264 let marker_num = i + 1;
265 if !output.is_empty() && !output.ends_with('\n') {
266 output.push('\n');
267 }
268 output.push_str(&marker_tag(marker_num));
269
270 if let Some(&next_offset) = marker_offsets.get(i + 1) {
271 output.push('\n');
272 let block = &editable_text[offset..next_offset];
273 if !cursor_placed
274 && cursor_offset_in_editable >= offset
275 && cursor_offset_in_editable <= next_offset
276 {
277 cursor_placed = true;
278 let cursor_in_block = cursor_offset_in_editable - offset;
279 output.push_str(&block[..cursor_in_block]);
280 output.push_str(cursor_marker);
281 output.push_str(&block[cursor_in_block..]);
282 } else {
283 output.push_str(block);
284 }
285 }
286 }
287}
288
289/// Strip any `<|marker_N|>` tags from `text`.
290///
291/// When a marker tag sits on its own line (followed by `\n`), the trailing
292/// newline is also removed so the surrounding lines stay joined naturally.
293fn strip_marker_tags(text: &str) -> String {
294 let mut result = String::with_capacity(text.len());
295 let mut pos = 0;
296 let bytes = text.as_bytes();
297 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
298 result.push_str(&text[pos..pos + rel]);
299 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
300 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
301 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
302 if bytes.get(tag_end) == Some(&b'\n') {
303 tag_end += 1;
304 }
305 pos = tag_end;
306 } else {
307 result.push_str(MARKER_TAG_PREFIX);
308 pos = num_start;
309 }
310 }
311 result.push_str(&text[pos..]);
312 result
313}
314
315/// Parse model output that uses the marker format.
316///
317/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
318/// The leading format-level newline after the start marker is stripped.
319/// Trailing newlines are preserved so blank-line endings in the editable
320/// region are not lost.
321///
322/// Any extra intermediate marker tags that the model may have inserted
323/// between the first and last markers are stripped from the returned content.
324pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
325 let first_tag_start = text
326 .find(MARKER_TAG_PREFIX)
327 .context("no start marker found in output")?;
328 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
329 let first_num_end = text[first_num_start..]
330 .find(MARKER_TAG_SUFFIX)
331 .map(|i| i + first_num_start)
332 .context("malformed start marker tag")?;
333 let start_num: usize = text[first_num_start..first_num_end]
334 .parse()
335 .context("start marker number is not a valid integer")?;
336 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
337
338 let last_tag_start = text
339 .rfind(MARKER_TAG_PREFIX)
340 .context("no end marker found in output")?;
341 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
342 let last_num_end = text[last_num_start..]
343 .find(MARKER_TAG_SUFFIX)
344 .map(|i| i + last_num_start)
345 .context("malformed end marker tag")?;
346 let end_num: usize = text[last_num_start..last_num_end]
347 .parse()
348 .context("end marker number is not a valid integer")?;
349
350 if start_num == end_num {
351 return Err(anyhow!(
352 "start and end markers are the same (marker {})",
353 start_num
354 ));
355 }
356
357 let mut content_start = first_tag_end;
358 if text.as_bytes().get(content_start) == Some(&b'\n') {
359 content_start += 1;
360 }
361 let content_end = last_tag_start;
362
363 let content = &text[content_start..content_end.max(content_start)];
364 let content = strip_marker_tags(content);
365 Ok((start_num, end_num, content))
366}
367
368/// Given old editable text and model output with marker span, reconstruct the
369/// full new editable region.
370pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
371 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
372 let marker_offsets = compute_marker_offsets(old_editable);
373
374 let start_idx = start_num
375 .checked_sub(1)
376 .context("marker numbers are 1-indexed")?;
377 let end_idx = end_num
378 .checked_sub(1)
379 .context("marker numbers are 1-indexed")?;
380 let start_byte = *marker_offsets
381 .get(start_idx)
382 .context("start marker number out of range")?;
383 let end_byte = *marker_offsets
384 .get(end_idx)
385 .context("end marker number out of range")?;
386
387 if start_byte > end_byte {
388 return Err(anyhow!("start marker must come before end marker"));
389 }
390
391 let old_span = &old_editable[start_byte..end_byte];
392 let mut new_span = raw_new_span;
393 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
394 new_span.push('\n');
395 }
396 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
397 new_span.pop();
398 }
399
400 let mut result = String::new();
401 result.push_str(&old_editable[..start_byte]);
402 result.push_str(&new_span);
403 result.push_str(&old_editable[end_byte..]);
404
405 Ok(result)
406}
407
408/// Compare old and new editable text, find the minimal marker span that covers
409/// all changes, and encode the result with marker tags.
410pub fn encode_from_old_and_new(
411 old_editable: &str,
412 new_editable: &str,
413 cursor_offset_in_new: Option<usize>,
414 cursor_marker: &str,
415 end_marker: &str,
416 no_edits_marker: &str,
417) -> Result<String> {
418 if old_editable == new_editable {
419 return Ok(format!("{no_edits_marker}{end_marker}"));
420 }
421
422 let marker_offsets = compute_marker_offsets(old_editable);
423 let (common_prefix, common_suffix) =
424 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
425 let change_end_in_old = old_editable.len() - common_suffix;
426
427 let start_marker_idx = marker_offsets
428 .iter()
429 .rposition(|&offset| offset <= common_prefix)
430 .unwrap_or(0);
431 let end_marker_idx = marker_offsets
432 .iter()
433 .position(|&offset| offset >= change_end_in_old)
434 .unwrap_or(marker_offsets.len() - 1);
435
436 let old_start = marker_offsets[start_marker_idx];
437 let old_end = marker_offsets[end_marker_idx];
438
439 let new_start = old_start;
440 let new_end = new_editable
441 .len()
442 .saturating_sub(old_editable.len().saturating_sub(old_end));
443
444 let new_span = &new_editable[new_start..new_end];
445
446 let start_marker_num = start_marker_idx + 1;
447 let end_marker_num = end_marker_idx + 1;
448
449 let mut result = String::new();
450 result.push_str(&marker_tag(start_marker_num));
451 result.push('\n');
452
453 if let Some(cursor_offset) = cursor_offset_in_new {
454 if cursor_offset >= new_start && cursor_offset <= new_end {
455 let cursor_in_span = cursor_offset - new_start;
456 let bounded = cursor_in_span.min(new_span.len());
457 result.push_str(&new_span[..bounded]);
458 result.push_str(cursor_marker);
459 result.push_str(&new_span[bounded..]);
460 } else {
461 result.push_str(new_span);
462 }
463 } else {
464 result.push_str(new_span);
465 }
466
467 if !result.ends_with('\n') {
468 result.push('\n');
469 }
470 result.push_str(&marker_tag(end_marker_num));
471 result.push('\n');
472 result.push_str(end_marker);
473
474 Ok(result)
475}
476
477/// Extract the full editable region from text that uses marker tags.
478///
479/// Returns the concatenation of all block contents between the first and last
480/// markers, with intermediate marker tags stripped.
481pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
482 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
483
484 let mut markers: Vec<(usize, usize)> = Vec::new();
485 let mut search_start = first_marker_start;
486 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
487 let tag_start = search_start + rel_pos;
488 let num_start = tag_start + MARKER_TAG_PREFIX.len();
489 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
490 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
491 markers.push((tag_start, tag_end));
492 search_start = tag_end;
493 }
494
495 if markers.len() < 2 {
496 return None;
497 }
498
499 let (_, first_tag_end) = markers[0];
500 let (last_tag_start, _) = markers[markers.len() - 1];
501
502 let mut content_start = first_tag_end;
503 if text.as_bytes().get(content_start) == Some(&b'\n') {
504 content_start += 1;
505 }
506 let mut content_end = last_tag_start;
507 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
508 content_end -= 1;
509 }
510
511 let raw = &text[content_start..content_end];
512 let result = strip_marker_tags(raw);
513 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
514 Some(result)
515}
516
517struct ParsedTag {
518 value: isize,
519 tag_start: usize,
520 tag_end: usize,
521}
522
523fn collect_tags(text: &str, prefix: &str, parse: fn(&str) -> Option<isize>) -> Vec<ParsedTag> {
524 let mut tags = Vec::new();
525 let mut search_from = 0;
526 while let Some(rel_pos) = text[search_from..].find(prefix) {
527 let tag_start = search_from + rel_pos;
528 let payload_start = tag_start + prefix.len();
529 if let Some(suffix_rel) = text[payload_start..].find(MARKER_TAG_SUFFIX) {
530 let payload_end = payload_start + suffix_rel;
531 if let Some(value) = parse(&text[payload_start..payload_end]) {
532 let tag_end = payload_end + MARKER_TAG_SUFFIX.len();
533 tags.push(ParsedTag {
534 value,
535 tag_start,
536 tag_end,
537 });
538 search_from = tag_end;
539 continue;
540 }
541 }
542 search_from = tag_start + prefix.len();
543 }
544 tags
545}
546
547fn collect_marker_tags(text: &str) -> Vec<ParsedTag> {
548 collect_tags(text, MARKER_TAG_PREFIX, |s| {
549 s.parse::<usize>().ok().map(|n| n as isize)
550 })
551}
552
553fn collect_relative_marker_tags(text: &str) -> Vec<ParsedTag> {
554 collect_tags(text, RELATIVE_MARKER_TAG_PREFIX, |s| {
555 s.parse::<isize>().ok()
556 })
557}
558
559pub fn nearest_marker_number(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
560 let cursor = cursor_offset.unwrap_or(0);
561 marker_offsets
562 .iter()
563 .enumerate()
564 .min_by_key(|(_, offset)| (**offset as isize - cursor as isize).unsigned_abs())
565 .map(|(idx, _)| idx + 1)
566 .unwrap_or(1)
567}
568
569fn cursor_block_index(cursor_offset: Option<usize>, marker_offsets: &[usize]) -> usize {
570 let cursor = cursor_offset.unwrap_or(0);
571 marker_offsets
572 .windows(2)
573 .position(|window| cursor >= window[0] && cursor < window[1])
574 .unwrap_or_else(|| marker_offsets.len().saturating_sub(2))
575}
576
577fn common_prefix_suffix(a: &[u8], b: &[u8]) -> (usize, usize) {
578 let prefix = a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count();
579 let remaining_a = a.len() - prefix;
580 let remaining_b = b.len() - prefix;
581 let max_suffix = remaining_a.min(remaining_b);
582 let suffix = a[a.len() - max_suffix..]
583 .iter()
584 .rev()
585 .zip(b[b.len() - max_suffix..].iter().rev())
586 .take_while(|(x, y)| x == y)
587 .count();
588 (prefix, suffix)
589}
590
591/// Map a byte offset from old span coordinates to new span coordinates,
592/// using common prefix/suffix within the span for accuracy.
593fn map_boundary_offset(
594 old_rel: usize,
595 old_span_len: usize,
596 new_span_len: usize,
597 span_common_prefix: usize,
598 span_common_suffix: usize,
599) -> usize {
600 if old_rel <= span_common_prefix {
601 old_rel
602 } else if old_rel >= old_span_len - span_common_suffix {
603 new_span_len - (old_span_len - old_rel)
604 } else {
605 let old_changed_start = span_common_prefix;
606 let old_changed_len = old_span_len
607 .saturating_sub(span_common_prefix)
608 .saturating_sub(span_common_suffix);
609 let new_changed_start = span_common_prefix;
610 let new_changed_len = new_span_len
611 .saturating_sub(span_common_prefix)
612 .saturating_sub(span_common_suffix);
613
614 if old_changed_len == 0 {
615 new_changed_start
616 } else {
617 new_changed_start + ((old_rel - old_changed_start) * new_changed_len / old_changed_len)
618 }
619 }
620}
621
622fn snap_to_line_start(text: &str, offset: usize) -> usize {
623 let bounded = offset.min(text.len());
624 let bounded = text.floor_char_boundary(bounded);
625
626 if bounded >= text.len() {
627 return text.len();
628 }
629
630 if bounded == 0 || text.as_bytes().get(bounded - 1) == Some(&b'\n') {
631 return bounded;
632 }
633
634 if let Some(next_nl_rel) = text[bounded..].find('\n') {
635 let next = bounded + next_nl_rel + 1;
636 return text.floor_char_boundary(next.min(text.len()));
637 }
638
639 let prev_start = text[..bounded].rfind('\n').map(|idx| idx + 1).unwrap_or(0);
640 text.floor_char_boundary(prev_start)
641}
642
643/// Write the editable region content with byte-exact marker tags, inserting the
644/// cursor marker at the given offset within the editable text.
645///
646/// The `tag_for_index` closure maps a boundary index to the marker tag string.
647fn write_editable_with_markers_impl(
648 output: &mut String,
649 editable_text: &str,
650 cursor_offset_in_editable: usize,
651 cursor_marker: &str,
652 marker_offsets: &[usize],
653 tag_for_index: impl Fn(usize) -> String,
654) {
655 let mut cursor_placed = false;
656 for (i, &offset) in marker_offsets.iter().enumerate() {
657 output.push_str(&tag_for_index(i));
658
659 if let Some(&next_offset) = marker_offsets.get(i + 1) {
660 let block = &editable_text[offset..next_offset];
661 if !cursor_placed
662 && cursor_offset_in_editable >= offset
663 && cursor_offset_in_editable <= next_offset
664 {
665 cursor_placed = true;
666 let cursor_in_block = cursor_offset_in_editable - offset;
667 output.push_str(&block[..cursor_in_block]);
668 output.push_str(cursor_marker);
669 output.push_str(&block[cursor_in_block..]);
670 } else {
671 output.push_str(block);
672 }
673 }
674 }
675}
676
677pub fn write_editable_with_markers_v0316(
678 output: &mut String,
679 editable_text: &str,
680 cursor_offset_in_editable: usize,
681 cursor_marker: &str,
682) {
683 let marker_offsets = compute_marker_offsets(editable_text);
684 write_editable_with_markers_impl(
685 output,
686 editable_text,
687 cursor_offset_in_editable,
688 cursor_marker,
689 &marker_offsets,
690 |i| marker_tag(i + 1),
691 );
692}
693
694pub fn write_editable_with_markers_v0317(
695 output: &mut String,
696 editable_text: &str,
697 cursor_offset_in_editable: usize,
698 cursor_marker: &str,
699) {
700 let marker_offsets = compute_marker_offsets(editable_text);
701 let anchor_idx = cursor_block_index(Some(cursor_offset_in_editable), &marker_offsets);
702 write_editable_with_markers_impl(
703 output,
704 editable_text,
705 cursor_offset_in_editable,
706 cursor_marker,
707 &marker_offsets,
708 |i| marker_tag_relative(i as isize - anchor_idx as isize),
709 );
710}
711
712pub fn write_editable_with_markers_v0318(
713 output: &mut String,
714 editable_text: &str,
715 cursor_offset_in_editable: usize,
716 cursor_marker: &str,
717) {
718 let marker_offsets = compute_marker_offsets_v0318(editable_text);
719 write_editable_with_markers_impl(
720 output,
721 editable_text,
722 cursor_offset_in_editable,
723 cursor_marker,
724 &marker_offsets,
725 |i| marker_tag(i + 1),
726 );
727}
728
729/// Parse byte-exact model output and reconstruct the full new editable region.
730///
731/// `resolve_boundary` maps a parsed tag value to an absolute byte offset in
732/// old_editable, given the marker_offsets. Returns `(start_byte, end_byte)` or
733/// an error.
734fn apply_marker_span_impl(
735 old_editable: &str,
736 tags: &[ParsedTag],
737 output: &str,
738 resolve_boundaries: impl Fn(isize, isize) -> Result<(usize, usize)>,
739) -> Result<String> {
740 if tags.is_empty() {
741 return Err(anyhow!("no marker tags found in output"));
742 }
743 if tags.len() == 1 {
744 return Err(anyhow!(
745 "only one marker tag found in output, expected at least two"
746 ));
747 }
748
749 let start_value = tags[0].value;
750 let end_value = tags[tags.len() - 1].value;
751
752 if start_value == end_value {
753 return Ok(old_editable.to_string());
754 }
755
756 let (start_byte, end_byte) = resolve_boundaries(start_value, end_value)?;
757
758 if start_byte > end_byte {
759 return Err(anyhow!("start marker must come before end marker"));
760 }
761
762 let mut new_content = String::new();
763 for i in 0..tags.len() - 1 {
764 let content_start = tags[i].tag_end;
765 let content_end = tags[i + 1].tag_start;
766 if content_start <= content_end {
767 new_content.push_str(&output[content_start..content_end]);
768 }
769 }
770
771 let mut result = String::new();
772 result.push_str(&old_editable[..start_byte]);
773 result.push_str(&new_content);
774 result.push_str(&old_editable[end_byte..]);
775
776 Ok(result)
777}
778
779pub fn apply_marker_span_v0316(old_editable: &str, output: &str) -> Result<String> {
780 let tags = collect_marker_tags(output);
781
782 // Validate monotonically increasing with no gaps (best-effort warning)
783 if tags.len() >= 2 {
784 let start_num = tags[0].value;
785 let end_num = tags[tags.len() - 1].value;
786 if start_num != end_num {
787 let expected: Vec<isize> = (start_num..=end_num).collect();
788 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
789 if actual != expected {
790 eprintln!(
791 "V0316 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
792 expected, actual
793 );
794 }
795 }
796 }
797
798 let marker_offsets = compute_marker_offsets(old_editable);
799 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
800 let start_idx = (start_val as usize)
801 .checked_sub(1)
802 .context("marker numbers are 1-indexed")?;
803 let end_idx = (end_val as usize)
804 .checked_sub(1)
805 .context("marker numbers are 1-indexed")?;
806 let start_byte = *marker_offsets
807 .get(start_idx)
808 .context("start marker number out of range")?;
809 let end_byte = *marker_offsets
810 .get(end_idx)
811 .context("end marker number out of range")?;
812 Ok((start_byte, end_byte))
813 })
814}
815
816pub fn apply_marker_span_v0317(
817 old_editable: &str,
818 output: &str,
819 cursor_offset_in_old: Option<usize>,
820) -> Result<String> {
821 let tags = collect_relative_marker_tags(output);
822 let marker_offsets = compute_marker_offsets(old_editable);
823 let anchor_idx = cursor_block_index(cursor_offset_in_old, &marker_offsets);
824
825 apply_marker_span_impl(old_editable, &tags, output, |start_delta, end_delta| {
826 let start_idx_signed = anchor_idx as isize + start_delta;
827 let end_idx_signed = anchor_idx as isize + end_delta;
828 if start_idx_signed < 0 || end_idx_signed < 0 {
829 return Err(anyhow!("relative marker maps before first marker"));
830 }
831 let start_idx = usize::try_from(start_idx_signed).context("invalid start marker index")?;
832 let end_idx = usize::try_from(end_idx_signed).context("invalid end marker index")?;
833 let start_byte = *marker_offsets
834 .get(start_idx)
835 .context("start marker number out of range")?;
836 let end_byte = *marker_offsets
837 .get(end_idx)
838 .context("end marker number out of range")?;
839 Ok((start_byte, end_byte))
840 })
841}
842
843pub fn apply_marker_span_v0318(old_editable: &str, output: &str) -> Result<String> {
844 let tags = collect_marker_tags(output);
845
846 if tags.len() >= 2 {
847 let start_num = tags[0].value;
848 let end_num = tags[tags.len() - 1].value;
849 if start_num != end_num {
850 let expected: Vec<isize> = (start_num..=end_num).collect();
851 let actual: Vec<isize> = tags.iter().map(|t| t.value).collect();
852 if actual != expected {
853 eprintln!(
854 "V0318 marker sequence validation failed: expected {:?}, got {:?}. Attempting best-effort parse.",
855 expected, actual
856 );
857 }
858 }
859 }
860
861 let marker_offsets = compute_marker_offsets_v0318(old_editable);
862 apply_marker_span_impl(old_editable, &tags, output, |start_val, end_val| {
863 let start_idx = (start_val as usize)
864 .checked_sub(1)
865 .context("marker numbers are 1-indexed")?;
866 let end_idx = (end_val as usize)
867 .checked_sub(1)
868 .context("marker numbers are 1-indexed")?;
869 let start_byte = *marker_offsets
870 .get(start_idx)
871 .context("start marker number out of range")?;
872 let end_byte = *marker_offsets
873 .get(end_idx)
874 .context("end marker number out of range")?;
875 Ok((start_byte, end_byte))
876 })
877}
878
879/// Encode the training target from old and new editable text.
880///
881/// Shared implementation for V0316, V0317, and V0318. The `tag_for_block_idx`
882/// closure maps a block index to the appropriate marker tag string.
883/// `no_edit_tag` is the marker tag to repeat when there are no edits.
884fn encode_from_old_and_new_impl(
885 old_editable: &str,
886 new_editable: &str,
887 cursor_offset_in_new: Option<usize>,
888 cursor_marker: &str,
889 end_marker: &str,
890 no_edit_tag: &str,
891 marker_offsets: &[usize],
892 tag_for_block_idx: impl Fn(usize) -> String,
893) -> Result<String> {
894 if old_editable == new_editable {
895 return Ok(format!("{no_edit_tag}{no_edit_tag}{end_marker}"));
896 }
897
898 let (common_prefix, common_suffix) =
899 common_prefix_suffix(old_editable.as_bytes(), new_editable.as_bytes());
900 let change_end_in_old = old_editable.len() - common_suffix;
901
902 let mut start_marker_idx = marker_offsets
903 .iter()
904 .rposition(|&offset| offset <= common_prefix)
905 .unwrap_or(0);
906 let mut end_marker_idx = marker_offsets
907 .iter()
908 .position(|&offset| offset >= change_end_in_old)
909 .unwrap_or(marker_offsets.len() - 1);
910
911 if start_marker_idx == end_marker_idx {
912 if end_marker_idx < marker_offsets.len().saturating_sub(1) {
913 end_marker_idx += 1;
914 } else if start_marker_idx > 0 {
915 start_marker_idx -= 1;
916 }
917 }
918
919 let old_start = marker_offsets[start_marker_idx];
920 let old_end = marker_offsets[end_marker_idx];
921
922 let new_start = old_start;
923 let new_end = new_editable
924 .len()
925 .saturating_sub(old_editable.len().saturating_sub(old_end));
926
927 let new_span = &new_editable[new_start..new_end];
928 let old_span = &old_editable[old_start..old_end];
929
930 let (span_common_prefix, span_common_suffix) =
931 common_prefix_suffix(old_span.as_bytes(), new_span.as_bytes());
932
933 let mut result = String::new();
934 let mut prev_new_rel = 0usize;
935 let mut cursor_placed = false;
936
937 for block_idx in start_marker_idx..end_marker_idx {
938 result.push_str(&tag_for_block_idx(block_idx));
939
940 let new_rel_end = if block_idx + 1 == end_marker_idx {
941 new_span.len()
942 } else {
943 let old_rel = marker_offsets[block_idx + 1] - old_start;
944 let mapped = map_boundary_offset(
945 old_rel,
946 old_span.len(),
947 new_span.len(),
948 span_common_prefix,
949 span_common_suffix,
950 );
951 snap_to_line_start(new_span, mapped)
952 };
953
954 let new_rel_end = new_rel_end.max(prev_new_rel);
955 let block_content = &new_span[prev_new_rel..new_rel_end];
956
957 if !cursor_placed {
958 if let Some(cursor_offset) = cursor_offset_in_new {
959 let abs_start = new_start + prev_new_rel;
960 let abs_end = new_start + new_rel_end;
961 if cursor_offset >= abs_start && cursor_offset <= abs_end {
962 cursor_placed = true;
963 let cursor_in_block = cursor_offset - abs_start;
964 let bounded = cursor_in_block.min(block_content.len());
965 result.push_str(&block_content[..bounded]);
966 result.push_str(cursor_marker);
967 result.push_str(&block_content[bounded..]);
968 prev_new_rel = new_rel_end;
969 continue;
970 }
971 }
972 }
973
974 result.push_str(block_content);
975 prev_new_rel = new_rel_end;
976 }
977
978 result.push_str(&tag_for_block_idx(end_marker_idx));
979 result.push_str(end_marker);
980
981 Ok(result)
982}
983
984pub fn encode_from_old_and_new_v0316(
985 old_editable: &str,
986 new_editable: &str,
987 cursor_offset_in_new: Option<usize>,
988 cursor_marker: &str,
989 end_marker: &str,
990) -> Result<String> {
991 let marker_offsets = compute_marker_offsets(old_editable);
992 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
993 encode_from_old_and_new_impl(
994 old_editable,
995 new_editable,
996 cursor_offset_in_new,
997 cursor_marker,
998 end_marker,
999 &no_edit_tag,
1000 &marker_offsets,
1001 |block_idx| marker_tag(block_idx + 1),
1002 )
1003}
1004
1005pub fn encode_from_old_and_new_v0317(
1006 old_editable: &str,
1007 new_editable: &str,
1008 cursor_offset_in_new: Option<usize>,
1009 cursor_marker: &str,
1010 end_marker: &str,
1011) -> Result<String> {
1012 let marker_offsets = compute_marker_offsets(old_editable);
1013 let anchor_idx = cursor_block_index(cursor_offset_in_new, &marker_offsets);
1014 let no_edit_tag = marker_tag_relative(0);
1015 encode_from_old_and_new_impl(
1016 old_editable,
1017 new_editable,
1018 cursor_offset_in_new,
1019 cursor_marker,
1020 end_marker,
1021 &no_edit_tag,
1022 &marker_offsets,
1023 |block_idx| marker_tag_relative(block_idx as isize - anchor_idx as isize),
1024 )
1025}
1026
1027pub fn encode_from_old_and_new_v0318(
1028 old_editable: &str,
1029 new_editable: &str,
1030 cursor_offset_in_new: Option<usize>,
1031 cursor_marker: &str,
1032 end_marker: &str,
1033) -> Result<String> {
1034 let marker_offsets = compute_marker_offsets_v0318(old_editable);
1035 let no_edit_tag = marker_tag(nearest_marker_number(cursor_offset_in_new, &marker_offsets));
1036 encode_from_old_and_new_impl(
1037 old_editable,
1038 new_editable,
1039 cursor_offset_in_new,
1040 cursor_marker,
1041 end_marker,
1042 &no_edit_tag,
1043 &marker_offsets,
1044 |block_idx| marker_tag(block_idx + 1),
1045 )
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050 use super::*;
1051
1052 #[test]
1053 fn test_compute_marker_offsets_small_block() {
1054 let text = "aaa\nbbb\nccc\n";
1055 let offsets = compute_marker_offsets(text);
1056 assert_eq!(offsets, vec![0, text.len()]);
1057 }
1058
1059 #[test]
1060 fn test_compute_marker_offsets_blank_line_split() {
1061 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
1062 let offsets = compute_marker_offsets(text);
1063 assert_eq!(offsets[0], 0);
1064 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
1065 assert_eq!(*offsets.last().unwrap(), text.len());
1066 }
1067
1068 #[test]
1069 fn test_compute_marker_offsets_blank_line_split_overrides_pending_hard_cap_boundary() {
1070 let text = "\
1071class OCRDataframe(BaseModel):
1072 model_config = ConfigDict(arbitrary_types_allowed=True)
1073
1074 df: pl.DataFrame
1075
1076 def page(self, page_number: int = 0) -> \"OCRDataframe\":
1077 # Filter dataframe on specific page
1078 df_page = self.df.filter(pl.col(\"page\") == page_number)
1079 return OCRDataframe(df=df_page)
1080
1081 def get_text_cell(
1082 self,
1083 cell: Cell,
1084 margin: int = 0,
1085 page_number: Optional[int] = None,
1086 min_confidence: int = 50,
1087 ) -> Optional[str]:
1088 \"\"\"
1089 Get text corresponding to cell
1090";
1091 let offsets = compute_marker_offsets(text);
1092
1093 let def_start = text
1094 .find(" def get_text_cell(")
1095 .expect("def line exists");
1096 let self_start = text.find(" self,").expect("self line exists");
1097
1098 assert!(
1099 offsets.contains(&def_start),
1100 "expected boundary at def line start ({def_start}), got {offsets:?}"
1101 );
1102 assert!(
1103 !offsets.contains(&self_start),
1104 "did not expect boundary at self line start ({self_start}), got {offsets:?}"
1105 );
1106 }
1107
1108 #[test]
1109 fn test_compute_marker_offsets_blank_line_split_skips_closer_line() {
1110 let text = "\
1111impl Plugin for AhoySchedulePlugin {
1112 fn build(&self, app: &mut App) {
1113 app.configure_sets(
1114 self.schedule,
1115 (
1116 AhoySystems::MoveCharacters,
1117 AhoySystems::ApplyForcesToDynamicRigidBodies,
1118 )
1119 .chain()
1120 .before(PhysicsSystems::First),
1121 );
1122
1123 }
1124}
1125
1126/// System set used by all systems of `bevy_ahoy`.
1127#[derive(SystemSet, Debug, Clone, Copy, Hash, PartialEq, Eq)]
1128pub enum AhoySystems {
1129 MoveCharacters,
1130 ApplyForcesToDynamicRigidBodies,
1131}
1132";
1133 let offsets = compute_marker_offsets(text);
1134
1135 let closer_start = text.find(" }\n").expect("closer line exists");
1136 let doc_start = text
1137 .find("/// System set used by all systems of `bevy_ahoy`.")
1138 .expect("doc line exists");
1139
1140 assert!(
1141 !offsets.contains(&closer_start),
1142 "did not expect boundary at closer line start ({closer_start}), got {offsets:?}"
1143 );
1144 assert!(
1145 offsets.contains(&doc_start),
1146 "expected boundary at doc line start ({doc_start}), got {offsets:?}"
1147 );
1148 }
1149
1150 #[test]
1151 fn test_compute_marker_offsets_max_lines_split() {
1152 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
1153 let offsets = compute_marker_offsets(text);
1154 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
1155 }
1156
1157 #[test]
1158 fn test_compute_marker_offsets_hard_cap_nudges_past_closer_to_case_line() {
1159 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\ncase 'x': {\nbody\n";
1160 let offsets = compute_marker_offsets(text);
1161
1162 let expected = text.find("case 'x': {").expect("case line exists");
1163 assert!(
1164 offsets.contains(&expected),
1165 "expected nudged boundary at case line start ({expected}), got {offsets:?}"
1166 );
1167 }
1168
1169 #[test]
1170 fn test_compute_marker_offsets_hard_cap_nudge_respects_max_forward_lines() {
1171 let text = "a1\na2\na3\na4\na5\na6\na7\na8\n}\n}\n}\n}\n}\ncase 'x': {\nbody\n";
1172 let offsets = compute_marker_offsets(text);
1173
1174 let case_start = text.find("case 'x': {").expect("case line exists");
1175 assert!(
1176 !offsets.contains(&case_start),
1177 "boundary should not nudge beyond max forward lines; offsets: {offsets:?}"
1178 );
1179 }
1180
1181 #[test]
1182 fn test_compute_marker_offsets_stay_sorted_when_hard_cap_boundary_nudges_forward() {
1183 let text = "\
1184aaaaaaaaaa = 1;
1185bbbbbbbbbb = 2;
1186cccccccccc = 3;
1187dddddddddd = 4;
1188eeeeeeeeee = 5;
1189ffffffffff = 6;
1190gggggggggg = 7;
1191hhhhhhhhhh = 8;
1192 };
1193 };
1194
1195 grafanaDashboards = {
1196 cluster-overview.spec = {
1197 inherit instanceSelector;
1198 folderRef = \"infrastructure\";
1199 json = builtins.readFile ./grafana/dashboards/cluster-overview.json;
1200 };
1201 };
1202";
1203 let offsets = compute_marker_offsets(text);
1204
1205 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1206 assert_eq!(
1207 offsets.last().copied(),
1208 Some(text.len()),
1209 "offsets: {offsets:?}"
1210 );
1211 assert!(
1212 offsets.windows(2).all(|window| window[0] <= window[1]),
1213 "offsets must be sorted: {offsets:?}"
1214 );
1215 }
1216
1217 #[test]
1218 fn test_compute_marker_offsets_empty() {
1219 let offsets = compute_marker_offsets("");
1220 assert_eq!(offsets, vec![0, 0]);
1221 }
1222
1223 #[test]
1224 fn test_compute_v0327_editable_range_trims_to_marker_boundaries() {
1225 let text = (0..80).map(|_| "x\n").collect::<String>();
1226 let cursor_offset = text.find("x\nx\nx\nx\nx\n").expect("cursor anchor exists") + 40;
1227
1228 let candidate_range = grow_v0327_candidate_range(&text, cursor_offset, 20);
1229 let editable_range = compute_v0327_editable_range(&text, cursor_offset, 20);
1230 let marker_offsets = compute_marker_offsets_v0318(&text[candidate_range.clone()]);
1231 let relative_start = editable_range.start - candidate_range.start;
1232 let relative_end = editable_range.end - candidate_range.start;
1233
1234 assert!(
1235 marker_offsets.len() > 2,
1236 "expected interior markers: {marker_offsets:?}"
1237 );
1238 assert!(marker_offsets.contains(&relative_start));
1239 assert!(marker_offsets.contains(&relative_end));
1240 assert!(editable_range.start <= cursor_offset);
1241 assert!(editable_range.end >= cursor_offset);
1242 assert!(
1243 editable_range.start > candidate_range.start
1244 || editable_range.end < candidate_range.end,
1245 "expected at least one side to trim from {candidate_range:?} down to {editable_range:?}"
1246 );
1247 }
1248
1249 #[test]
1250 fn test_compute_marker_offsets_avoid_short_markdown_blocks() {
1251 let text = "\
1252# Spree Posts
1253
1254This is a Posts extension for [Spree Commerce](https://spreecommerce.org), built with Ruby on Rails.
1255
1256## Installation
1257
12581. Add this extension to your Gemfile with this line:
1259
1260 ```ruby
1261 bundle add spree_posts
1262 ```
1263
12642. Run the install generator
1265
1266 ```ruby
1267 bundle exec rails g spree_posts:install
1268 ```
1269
12703. Restart your server
1271
1272 If your server was running, restart it so that it can find the assets properly.
1273
1274## Developing
1275
12761. Create a dummy app
1277
1278 ```bash
1279 bundle update
1280 bundle exec rake test_app
1281 ```
1282
12832. Add your new code
12843. Run tests
1285
1286 ```bash
1287 bundle exec rspec
1288 ```
1289
1290When testing your applications integration with this extension you may use it's factories.
1291Simply add this require statement to your spec_helper:
1292
1293```ruby
1294require 'spree_posts/factories'
1295```
1296
1297## Releasing a new version
1298
1299```shell
1300bundle exec gem bump -p -t
1301bundle exec gem release
1302```
1303
1304For more options please see [gem-release README](https://github.com/svenfuchs/gem-release)
1305
1306## Contributing
1307
1308If you'd like to contribute, please take a look at the contributing guide.
1309";
1310 let offsets = compute_marker_offsets(text);
1311
1312 assert_eq!(offsets.first().copied(), Some(0), "offsets: {offsets:?}");
1313 assert_eq!(
1314 offsets.last().copied(),
1315 Some(text.len()),
1316 "offsets: {offsets:?}"
1317 );
1318
1319 for window in offsets.windows(2) {
1320 let block = &text[window[0]..window[1]];
1321 let line_count = block.lines().count();
1322 assert!(
1323 line_count >= V0316_MIN_BLOCK_LINES,
1324 "block too short: {line_count} lines in block {block:?} with offsets {offsets:?}"
1325 );
1326 }
1327 }
1328
1329 #[test]
1330 fn test_extract_marker_span() {
1331 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
1332 let (start, end, content) = extract_marker_span(text).unwrap();
1333 assert_eq!(start, 2);
1334 assert_eq!(end, 3);
1335 assert_eq!(content, " new content\n");
1336 }
1337
1338 #[test]
1339 fn test_extract_marker_span_multi_line() {
1340 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
1341 let (start, end, content) = extract_marker_span(text).unwrap();
1342 assert_eq!(start, 1);
1343 assert_eq!(end, 4);
1344 assert_eq!(content, "line1\nline2\nline3\n");
1345 }
1346
1347 #[test]
1348 fn test_apply_marker_span_basic() {
1349 let old = "aaa\nbbb\nccc\n";
1350 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
1351 let result = apply_marker_span(old, output).unwrap();
1352 assert_eq!(result, "aaa\nBBB\nccc\n");
1353 }
1354
1355 #[test]
1356 fn test_apply_marker_span_preserves_trailing_blank_line() {
1357 let old = "/\nresult\n\n";
1358 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
1359 let result = apply_marker_span(old, output).unwrap();
1360 assert_eq!(result, "//\nresult\n\n");
1361 }
1362
1363 #[test]
1364 fn test_encode_no_edits() {
1365 let old = "aaa\nbbb\nccc\n";
1366 let result = encode_from_old_and_new(
1367 old,
1368 old,
1369 None,
1370 "<|user_cursor|>",
1371 ">>>>>>> UPDATED\n",
1372 "NO_EDITS\n",
1373 )
1374 .unwrap();
1375 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
1376 }
1377
1378 #[test]
1379 fn test_encode_with_change() {
1380 let old = "aaa\nbbb\nccc\n";
1381 let new = "aaa\nBBB\nccc\n";
1382 let result = encode_from_old_and_new(
1383 old,
1384 new,
1385 None,
1386 "<|user_cursor|>",
1387 ">>>>>>> UPDATED\n",
1388 "NO_EDITS\n",
1389 )
1390 .unwrap();
1391 assert!(result.contains("<|marker_1|>"));
1392 assert!(result.contains("<|marker_2|>"));
1393 assert!(result.contains("aaa\nBBB\nccc\n"));
1394 assert!(result.ends_with(">>>>>>> UPDATED\n"));
1395 }
1396
1397 #[test]
1398 fn test_roundtrip_encode_apply() {
1399 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1400 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1401 let encoded = encode_from_old_and_new(
1402 old,
1403 new,
1404 None,
1405 "<|user_cursor|>",
1406 ">>>>>>> UPDATED\n",
1407 "NO_EDITS\n",
1408 )
1409 .unwrap();
1410 let output = encoded
1411 .strip_suffix(">>>>>>> UPDATED\n")
1412 .expect("should have end marker");
1413 let reconstructed = apply_marker_span(old, output).unwrap();
1414 assert_eq!(reconstructed, new);
1415 }
1416
1417 #[test]
1418 fn test_extract_editable_region_from_markers_multi() {
1419 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
1420 let parsed = extract_editable_region_from_markers(text).unwrap();
1421 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
1422 }
1423
1424 #[test]
1425 fn test_extract_editable_region_two_markers() {
1426 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
1427 let parsed = extract_editable_region_from_markers(text).unwrap();
1428 assert_eq!(parsed, "one\ntwo three");
1429 }
1430
1431 #[test]
1432 fn test_encode_with_cursor() {
1433 let old = "aaa\nbbb\nccc\n";
1434 let new = "aaa\nBBB\nccc\n";
1435 let result = encode_from_old_and_new(
1436 old,
1437 new,
1438 Some(5),
1439 "<|user_cursor|>",
1440 ">>>>>>> UPDATED\n",
1441 "NO_EDITS\n",
1442 )
1443 .unwrap();
1444 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1445 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1446 }
1447
1448 #[test]
1449 fn test_extract_marker_span_strips_intermediate_markers() {
1450 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
1451 let (start, end, content) = extract_marker_span(text).unwrap();
1452 assert_eq!(start, 2);
1453 assert_eq!(end, 4);
1454 assert_eq!(content, "line1\nline2\n");
1455 }
1456
1457 #[test]
1458 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
1459 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
1460 let (start, end, content) = extract_marker_span(text).unwrap();
1461 assert_eq!(start, 1);
1462 assert_eq!(end, 4);
1463 assert_eq!(content, "aaa\nbbb\nccc\n");
1464 }
1465
1466 #[test]
1467 fn test_apply_marker_span_with_extra_intermediate_marker() {
1468 let old = "aaa\nbbb\nccc\n";
1469 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
1470 let result = apply_marker_span(old, output).unwrap();
1471 assert_eq!(result, "aaa\nBBB\nccc\n");
1472 }
1473
1474 #[test]
1475 fn test_strip_marker_tags_inline() {
1476 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
1477 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
1478 assert_eq!(
1479 strip_marker_tags("line1\n<|marker_3|>\nline2"),
1480 "line1\nline2"
1481 );
1482 }
1483
1484 #[test]
1485 fn test_write_editable_with_markers_v0316_byte_exact() {
1486 let editable = "aaa\nbbb\nccc\n";
1487 let mut output = String::new();
1488 write_editable_with_markers_v0316(&mut output, editable, 4, "<|user_cursor|>");
1489 assert!(output.starts_with("<|marker_1|>"));
1490 assert!(output.contains("<|user_cursor|>"));
1491 let stripped = output.replace("<|user_cursor|>", "");
1492 let stripped = strip_marker_tags(&stripped);
1493 assert_eq!(stripped, editable);
1494 }
1495
1496 #[test]
1497 fn test_apply_marker_span_v0316_basic() {
1498 let old = "aaa\nbbb\nccc\n";
1499 let output = "<|marker_1|>aaa\nBBB\nccc\n<|marker_2|>";
1500 let result = apply_marker_span_v0316(old, output).unwrap();
1501 assert_eq!(result, "aaa\nBBB\nccc\n");
1502 }
1503
1504 #[test]
1505 fn test_apply_marker_span_v0316_no_edit() {
1506 let old = "aaa\nbbb\nccc\n";
1507 let output = "<|marker_1|><|marker_1|>";
1508 let result = apply_marker_span_v0316(old, output).unwrap();
1509 assert_eq!(result, old);
1510 }
1511
1512 #[test]
1513 fn test_apply_marker_span_v0316_no_edit_any_marker() {
1514 let old = "aaa\nbbb\nccc\n";
1515 let output = "<|marker_2|>ignored content<|marker_2|>";
1516 let result = apply_marker_span_v0316(old, output).unwrap();
1517 assert_eq!(result, old);
1518 }
1519
1520 #[test]
1521 fn test_apply_marker_span_v0316_multi_block() {
1522 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1523 let marker_offsets = compute_marker_offsets(old);
1524 assert!(
1525 marker_offsets.len() >= 3,
1526 "expected at least 3 offsets, got {:?}",
1527 marker_offsets
1528 );
1529
1530 let new_content = "LINE1\nLINE2\nLINE3\n\nLINE5\nLINE6\nLINE7\nLINE8\n";
1531 let mut output = String::new();
1532 output.push_str("<|marker_1|>");
1533 for i in 0..marker_offsets.len() - 1 {
1534 if i > 0 {
1535 output.push_str(&marker_tag(i + 1));
1536 }
1537 let start = marker_offsets[i];
1538 let end = marker_offsets[i + 1];
1539 let block_len = end - start;
1540 output.push_str(&new_content[start..start + block_len]);
1541 }
1542 let last_marker_num = marker_offsets.len();
1543 output.push_str(&marker_tag(last_marker_num));
1544 let result = apply_marker_span_v0316(old, &output).unwrap();
1545 assert_eq!(result, new_content);
1546 }
1547
1548 #[test]
1549 fn test_apply_marker_span_v0316_byte_exact_no_normalization() {
1550 let old = "aaa\nbbb\nccc\n";
1551 let output = "<|marker_1|>aaa\nBBB\nccc<|marker_2|>";
1552 let result = apply_marker_span_v0316(old, output).unwrap();
1553 assert_eq!(result, "aaa\nBBB\nccc");
1554 }
1555
1556 #[test]
1557 fn test_encode_v0316_no_edits() {
1558 let old = "aaa\nbbb\nccc\n";
1559 let result =
1560 encode_from_old_and_new_v0316(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1561 assert!(result.ends_with("<|end|>"));
1562 let stripped = result.strip_suffix("<|end|>").unwrap();
1563 let result_parsed = apply_marker_span_v0316(old, stripped).unwrap();
1564 assert_eq!(result_parsed, old);
1565 }
1566
1567 #[test]
1568 fn test_encode_v0316_with_change() {
1569 let old = "aaa\nbbb\nccc\n";
1570 let new = "aaa\nBBB\nccc\n";
1571 let result =
1572 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1573 assert!(result.contains("<|marker_1|>"));
1574 assert!(result.contains("<|marker_2|>"));
1575 assert!(result.ends_with("<|end|>"));
1576 }
1577
1578 #[test]
1579 fn test_roundtrip_v0316() {
1580 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1581 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1582 let encoded =
1583 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1584 let stripped = encoded
1585 .strip_suffix("<|end|>")
1586 .expect("should have end marker");
1587 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1588 assert_eq!(reconstructed, new);
1589 }
1590
1591 #[test]
1592 fn test_roundtrip_v0316_with_cursor() {
1593 let old = "aaa\nbbb\nccc\n";
1594 let new = "aaa\nBBB\nccc\n";
1595 let result =
1596 encode_from_old_and_new_v0316(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1597 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1598 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
1599 }
1600
1601 #[test]
1602 fn test_roundtrip_v0316_multi_block_change() {
1603 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1604 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1605 let encoded =
1606 encode_from_old_and_new_v0316(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1607 let stripped = encoded
1608 .strip_suffix("<|end|>")
1609 .expect("should have end marker");
1610 let reconstructed = apply_marker_span_v0316(old, stripped).unwrap();
1611 assert_eq!(reconstructed, new);
1612 }
1613
1614 #[test]
1615 fn test_nearest_marker_number() {
1616 let offsets = vec![0, 10, 20, 30];
1617 assert_eq!(nearest_marker_number(Some(0), &offsets), 1);
1618 assert_eq!(nearest_marker_number(Some(9), &offsets), 2);
1619 assert_eq!(nearest_marker_number(Some(15), &offsets), 2);
1620 assert_eq!(nearest_marker_number(Some(25), &offsets), 3);
1621 assert_eq!(nearest_marker_number(Some(30), &offsets), 4);
1622 assert_eq!(nearest_marker_number(None, &offsets), 1);
1623 }
1624
1625 #[test]
1626 fn test_marker_tag_relative_formats_as_expected() {
1627 assert_eq!(marker_tag_relative(-2), "<|marker-2|>");
1628 assert_eq!(marker_tag_relative(-1), "<|marker-1|>");
1629 assert_eq!(marker_tag_relative(0), "<|marker-0|>");
1630 assert_eq!(marker_tag_relative(1), "<|marker+1|>");
1631 assert_eq!(marker_tag_relative(2), "<|marker+2|>");
1632 }
1633
1634 #[test]
1635 fn test_write_editable_with_markers_v0317_includes_relative_markers_and_cursor() {
1636 let editable = "aaa\nbbb\nccc\n";
1637 let mut output = String::new();
1638 write_editable_with_markers_v0317(&mut output, editable, 4, "<|user_cursor|>");
1639
1640 assert!(output.contains("<|marker-0|>"));
1641 assert!(output.contains("<|user_cursor|>"));
1642
1643 let stripped = output.replace("<|user_cursor|>", "");
1644 let stripped =
1645 collect_relative_marker_tags(&stripped)
1646 .iter()
1647 .fold(stripped.clone(), |acc, marker| {
1648 let tag = &stripped[marker.tag_start..marker.tag_end];
1649 acc.replace(tag, "")
1650 });
1651 assert_eq!(stripped, editable);
1652 }
1653
1654 #[test]
1655 fn test_apply_marker_span_v0317_basic() {
1656 let old = "aaa\nbbb\nccc\n";
1657 let output = "<|marker-0|>aaa\nBBB\nccc\n<|marker+1|>";
1658 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1659 assert_eq!(result, "aaa\nBBB\nccc\n");
1660 }
1661
1662 #[test]
1663 fn test_apply_marker_span_v0317_no_edit() {
1664 let old = "aaa\nbbb\nccc\n";
1665 let output = "<|marker-0|><|marker-0|>";
1666 let result = apply_marker_span_v0317(old, output, Some(0)).unwrap();
1667 assert_eq!(result, old);
1668 }
1669
1670 #[test]
1671 fn test_encode_v0317_no_edits() {
1672 let old = "aaa\nbbb\nccc\n";
1673 let result =
1674 encode_from_old_and_new_v0317(old, old, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1675 assert_eq!(result, "<|marker-0|><|marker-0|><|end|>");
1676 }
1677
1678 #[test]
1679 fn test_roundtrip_v0317() {
1680 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\n";
1681 let new = "line1\nLINE2\nline3\n\nline5\nLINE6\nline7\nline8\n";
1682 let cursor = Some(6);
1683
1684 let encoded =
1685 encode_from_old_and_new_v0317(old, new, cursor, "<|user_cursor|>", "<|end|>").unwrap();
1686 let stripped = encoded
1687 .strip_suffix("<|end|>")
1688 .expect("should have end marker");
1689 let stripped = stripped.replace("<|user_cursor|>", "");
1690 let reconstructed = apply_marker_span_v0317(old, &stripped, cursor).unwrap();
1691 assert_eq!(reconstructed, new);
1692 }
1693
1694 #[test]
1695 fn test_roundtrip_v0317_with_cursor_marker() {
1696 let old = "aaa\nbbb\nccc\n";
1697 let new = "aaa\nBBB\nccc\n";
1698 let result =
1699 encode_from_old_and_new_v0317(old, new, Some(5), "<|user_cursor|>", "<|end|>").unwrap();
1700 assert!(result.contains("<|user_cursor|>"), "result: {result}");
1701 assert!(result.contains("<|marker-0|>"), "result: {result}");
1702 }
1703
1704 #[test]
1705 fn test_compute_marker_offsets_v0318_uses_larger_block_sizes() {
1706 let text = "l1\nl2\nl3\n\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\n";
1707 let v0316_offsets = compute_marker_offsets(text);
1708 let v0318_offsets = compute_marker_offsets_v0318(text);
1709
1710 assert!(v0318_offsets.len() < v0316_offsets.len());
1711 assert_eq!(v0316_offsets.first().copied(), Some(0));
1712 assert_eq!(v0318_offsets.first().copied(), Some(0));
1713 assert_eq!(v0316_offsets.last().copied(), Some(text.len()));
1714 assert_eq!(v0318_offsets.last().copied(), Some(text.len()));
1715 }
1716
1717 #[test]
1718 fn test_roundtrip_v0318() {
1719 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
1720 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
1721 let encoded =
1722 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1723 let stripped = encoded
1724 .strip_suffix("<|end|>")
1725 .expect("should have end marker");
1726 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1727 assert_eq!(reconstructed, new);
1728 }
1729
1730 #[test]
1731 fn test_roundtrip_v0318_append_at_end_of_editable_region() {
1732 let old = "line1\nline2\nline3\n";
1733 let new = "line1\nline2\nline3\nline4\n";
1734 let encoded =
1735 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1736
1737 assert_ne!(encoded, "<|marker_2|><|end|>");
1738
1739 let stripped = encoded
1740 .strip_suffix("<|end|>")
1741 .expect("should have end marker");
1742 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1743 assert_eq!(reconstructed, new);
1744 }
1745
1746 #[test]
1747 fn test_roundtrip_v0318_insert_at_internal_marker_boundary() {
1748 let old = "alpha\nbeta\n\ngamma\ndelta\n";
1749 let new = "alpha\nbeta\n\ninserted\ngamma\ndelta\n";
1750 let encoded =
1751 encode_from_old_and_new_v0318(old, new, None, "<|user_cursor|>", "<|end|>").unwrap();
1752
1753 let stripped = encoded
1754 .strip_suffix("<|end|>")
1755 .expect("should have end marker");
1756 let reconstructed = apply_marker_span_v0318(old, stripped).unwrap();
1757 assert_eq!(reconstructed, new);
1758 }
1759
1760 #[test]
1761 fn test_encode_v0317_markers_stay_on_line_boundaries() {
1762 let old = "\
1763\t\t\t\tcontinue outer;
1764\t\t\t}
1765\t\t}
1766\t}
1767
1768\tconst intersectionObserver = new IntersectionObserver((entries) => {
1769\t\tfor (const entry of entries) {
1770\t\t\tif (entry.isIntersecting) {
1771\t\t\t\tintersectionObserver.unobserve(entry.target);
1772\t\t\t\tanchorPreload(/** @type {HTMLAnchorElement} */ (entry.target));
1773\t\t\t}
1774\t\t}
1775\t});
1776
1777\tconst observer = new MutationObserver(() => {
1778\t\tconst links = /** @type {NodeListOf<HTMLAnchorElement>} */ (
1779\t\t\tdocument.querySelectorAll('a[data-preload]')
1780\t\t);
1781
1782\t\tfor (const link of links) {
1783\t\t\tif (linkSet.has(link)) continue;
1784\t\t\tlinkSet.add(link);
1785
1786\t\t\tswitch (link.dataset.preload) {
1787\t\t\t\tcase '':
1788\t\t\t\tcase 'true':
1789\t\t\t\tcase 'hover': {
1790\t\t\t\t\tlink.addEventListener('mouseenter', function callback() {
1791\t\t\t\t\t\tlink.removeEventListener('mouseenter', callback);
1792\t\t\t\t\t\tanchorPreload(link);
1793\t\t\t\t\t});
1794";
1795 let new = old.replacen(
1796 "\t\t\t\tcase 'true':\n",
1797 "\t\t\t\tcase 'TRUE':<|user_cursor|>\n",
1798 1,
1799 );
1800
1801 let cursor_offset = new.find("<|user_cursor|>").expect("cursor marker in new");
1802 let new_without_cursor = new.replace("<|user_cursor|>", "");
1803
1804 let encoded = encode_from_old_and_new_v0317(
1805 old,
1806 &new_without_cursor,
1807 Some(cursor_offset),
1808 "<|user_cursor|>",
1809 "<|end|>",
1810 )
1811 .unwrap();
1812
1813 let core = encoded.strip_suffix("<|end|>").unwrap_or(&encoded);
1814 for marker in collect_relative_marker_tags(core) {
1815 let tag_start = marker.tag_start;
1816 assert!(
1817 tag_start == 0 || core.as_bytes()[tag_start - 1] == b'\n',
1818 "marker not at line boundary: {} in output:\n{}",
1819 marker_tag_relative(marker.value),
1820 core
1821 );
1822 }
1823 }
1824}