1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5const MIN_BLOCK_LINES: usize = 3;
6const MAX_BLOCK_LINES: usize = 8;
7
8pub fn marker_tag(number: usize) -> String {
9 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
10}
11
12/// Compute byte offsets within `editable_text` where marker boundaries should
13/// be placed.
14///
15/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
16/// `editable_text.len()`. Interior offsets are placed at line boundaries
17/// (right after a `\n`), preferring blank-line boundaries when available and
18/// respecting `MIN_BLOCK_LINES` / `MAX_BLOCK_LINES` constraints.
19pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
20 if editable_text.is_empty() {
21 return vec![0, 0];
22 }
23
24 let mut offsets = vec![0usize];
25 let mut lines_since_last_marker = 0usize;
26 let mut byte_offset = 0usize;
27
28 for line in editable_text.split('\n') {
29 let line_end = byte_offset + line.len() + 1;
30 let is_past_end = line_end > editable_text.len();
31 let actual_line_end = line_end.min(editable_text.len());
32 lines_since_last_marker += 1;
33
34 let is_blank = line.trim().is_empty();
35
36 if !is_past_end && lines_since_last_marker >= MIN_BLOCK_LINES {
37 if is_blank {
38 // Blank-line boundary found. We'll place the marker when we
39 // find the next non-blank line (handled below).
40 } else if lines_since_last_marker >= MAX_BLOCK_LINES {
41 offsets.push(actual_line_end);
42 lines_since_last_marker = 0;
43 }
44 }
45
46 // Non-blank line immediately following blank line(s): split here so
47 // the new block starts with this line.
48 if !is_blank && byte_offset > 0 && lines_since_last_marker >= MIN_BLOCK_LINES {
49 let before = &editable_text[..byte_offset];
50 let has_preceding_blank_line = before
51 .strip_suffix('\n')
52 .map(|stripped| {
53 let last_line = match stripped.rfind('\n') {
54 Some(pos) => &stripped[pos + 1..],
55 None => stripped,
56 };
57 last_line.trim().is_empty()
58 })
59 .unwrap_or(false);
60
61 if has_preceding_blank_line {
62 offsets.push(byte_offset);
63 lines_since_last_marker = 1;
64 }
65 }
66
67 byte_offset = actual_line_end;
68
69 // Re-check after blank-line logic since lines_since_last_marker may
70 // have been reset.
71 if !is_past_end && lines_since_last_marker >= MAX_BLOCK_LINES {
72 if *offsets.last().unwrap_or(&0) != actual_line_end {
73 offsets.push(actual_line_end);
74 lines_since_last_marker = 0;
75 }
76 }
77 }
78
79 let end = editable_text.len();
80 if *offsets.last().unwrap_or(&0) != end {
81 offsets.push(end);
82 }
83
84 offsets
85}
86
87/// Write the editable region content with marker tags, inserting the cursor
88/// marker at the given offset within the editable text.
89pub fn write_editable_with_markers(
90 output: &mut String,
91 editable_text: &str,
92 cursor_offset_in_editable: usize,
93 cursor_marker: &str,
94) {
95 let marker_offsets = compute_marker_offsets(editable_text);
96 let mut cursor_placed = false;
97 for (i, &offset) in marker_offsets.iter().enumerate() {
98 let marker_num = i + 1;
99 if !output.is_empty() && !output.ends_with('\n') {
100 output.push('\n');
101 }
102 output.push_str(&marker_tag(marker_num));
103
104 if let Some(&next_offset) = marker_offsets.get(i + 1) {
105 output.push('\n');
106 let block = &editable_text[offset..next_offset];
107 if !cursor_placed
108 && cursor_offset_in_editable >= offset
109 && cursor_offset_in_editable <= next_offset
110 {
111 cursor_placed = true;
112 let cursor_in_block = cursor_offset_in_editable - offset;
113 output.push_str(&block[..cursor_in_block]);
114 output.push_str(cursor_marker);
115 output.push_str(&block[cursor_in_block..]);
116 } else {
117 output.push_str(block);
118 }
119 }
120 }
121}
122
123/// Check if the output represents a "no edits" signal for V0316:
124/// the same marker tag appears twice in succession with no meaningful
125/// content between them (e.g. `<|marker_N|>\n<|marker_N|>`).
126pub fn is_repeated_final_marker(output: &str) -> bool {
127 let trimmed = output.trim();
128 let Some(prefix_end) = trimmed.find(MARKER_TAG_SUFFIX) else {
129 return false;
130 };
131 let first_tag_end = prefix_end + MARKER_TAG_SUFFIX.len();
132 let first_tag = &trimmed[..first_tag_end];
133
134 if !first_tag.starts_with(MARKER_TAG_PREFIX) {
135 return false;
136 }
137
138 let rest = &trimmed[first_tag_end..];
139 let rest = rest.strip_prefix('\n').unwrap_or(rest);
140 rest.trim() == first_tag
141}
142
143/// Strip any `<|marker_N|>` tags from `text`.
144///
145/// When a marker tag sits on its own line (followed by `\n`), the trailing
146/// newline is also removed so the surrounding lines stay joined naturally.
147fn strip_marker_tags(text: &str) -> String {
148 let mut result = String::with_capacity(text.len());
149 let mut pos = 0;
150 let bytes = text.as_bytes();
151 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
152 result.push_str(&text[pos..pos + rel]);
153 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
154 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
155 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
156 if bytes.get(tag_end) == Some(&b'\n') {
157 tag_end += 1;
158 }
159 pos = tag_end;
160 } else {
161 result.push_str(MARKER_TAG_PREFIX);
162 pos = num_start;
163 }
164 }
165 result.push_str(&text[pos..]);
166 result
167}
168
169/// Parse model output that uses the marker format.
170///
171/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
172/// The leading format-level newline after the start marker is stripped.
173/// Trailing newlines are preserved so blank-line endings in the editable
174/// region are not lost.
175///
176/// Any extra intermediate marker tags that the model may have inserted
177/// between the first and last markers are stripped from the returned content.
178pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
179 let first_tag_start = text
180 .find(MARKER_TAG_PREFIX)
181 .context("no start marker found in output")?;
182 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
183 let first_num_end = text[first_num_start..]
184 .find(MARKER_TAG_SUFFIX)
185 .map(|i| i + first_num_start)
186 .context("malformed start marker tag")?;
187 let start_num: usize = text[first_num_start..first_num_end]
188 .parse()
189 .context("start marker number is not a valid integer")?;
190 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
191
192 let last_tag_start = text
193 .rfind(MARKER_TAG_PREFIX)
194 .context("no end marker found in output")?;
195 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
196 let last_num_end = text[last_num_start..]
197 .find(MARKER_TAG_SUFFIX)
198 .map(|i| i + last_num_start)
199 .context("malformed end marker tag")?;
200 let end_num: usize = text[last_num_start..last_num_end]
201 .parse()
202 .context("end marker number is not a valid integer")?;
203
204 if start_num == end_num {
205 return Err(anyhow!(
206 "start and end markers are the same (marker {})",
207 start_num
208 ));
209 }
210
211 let mut content_start = first_tag_end;
212 if text.as_bytes().get(content_start) == Some(&b'\n') {
213 content_start += 1;
214 }
215 let content_end = last_tag_start;
216
217 let content = &text[content_start..content_end.max(content_start)];
218 let content = strip_marker_tags(content);
219 Ok((start_num, end_num, content))
220}
221
222/// Given old editable text and model output with marker span, reconstruct the
223/// full new editable region.
224pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
225 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
226 let marker_offsets = compute_marker_offsets(old_editable);
227
228 let start_idx = start_num
229 .checked_sub(1)
230 .context("marker numbers are 1-indexed")?;
231 let end_idx = end_num
232 .checked_sub(1)
233 .context("marker numbers are 1-indexed")?;
234 let start_byte = *marker_offsets
235 .get(start_idx)
236 .context("start marker number out of range")?;
237 let end_byte = *marker_offsets
238 .get(end_idx)
239 .context("end marker number out of range")?;
240
241 if start_byte > end_byte {
242 return Err(anyhow!("start marker must come before end marker"));
243 }
244
245 let old_span = &old_editable[start_byte..end_byte];
246 let mut new_span = raw_new_span;
247 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
248 new_span.push('\n');
249 }
250 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
251 new_span.pop();
252 }
253
254 let mut result = String::new();
255 result.push_str(&old_editable[..start_byte]);
256 result.push_str(&new_span);
257 result.push_str(&old_editable[end_byte..]);
258
259 Ok(result)
260}
261
262/// Compare old and new editable text, find the minimal marker span that covers
263/// all changes, and encode the result with marker tags.
264pub fn encode_from_old_and_new(
265 old_editable: &str,
266 new_editable: &str,
267 cursor_offset_in_new: Option<usize>,
268 cursor_marker: &str,
269 end_marker: &str,
270 no_edits_marker: &str,
271) -> Result<String> {
272 if old_editable == new_editable {
273 return Ok(format!("{no_edits_marker}{end_marker}"));
274 }
275
276 let marker_offsets = compute_marker_offsets(old_editable);
277
278 let common_prefix = old_editable
279 .bytes()
280 .zip(new_editable.bytes())
281 .take_while(|(a, b)| a == b)
282 .count();
283
284 let old_remaining = old_editable.len() - common_prefix;
285 let new_remaining = new_editable.len() - common_prefix;
286 let max_suffix = old_remaining.min(new_remaining);
287 let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
288 .iter()
289 .rev()
290 .zip(
291 new_editable.as_bytes()[new_editable.len() - max_suffix..]
292 .iter()
293 .rev(),
294 )
295 .take_while(|(a, b)| a == b)
296 .count();
297
298 let change_end_in_old = old_editable.len() - common_suffix;
299
300 let start_marker_idx = marker_offsets
301 .iter()
302 .rposition(|&offset| offset <= common_prefix)
303 .unwrap_or(0);
304 let end_marker_idx = marker_offsets
305 .iter()
306 .position(|&offset| offset >= change_end_in_old)
307 .unwrap_or(marker_offsets.len() - 1);
308
309 let old_start = marker_offsets[start_marker_idx];
310 let old_end = marker_offsets[end_marker_idx];
311
312 let new_start = old_start;
313 let new_end = new_editable
314 .len()
315 .saturating_sub(old_editable.len().saturating_sub(old_end));
316
317 let new_span = &new_editable[new_start..new_end];
318
319 let start_marker_num = start_marker_idx + 1;
320 let end_marker_num = end_marker_idx + 1;
321
322 let mut result = String::new();
323 result.push_str(&marker_tag(start_marker_num));
324 result.push('\n');
325
326 if let Some(cursor_offset) = cursor_offset_in_new {
327 if cursor_offset >= new_start && cursor_offset <= new_end {
328 let cursor_in_span = cursor_offset - new_start;
329 let bounded = cursor_in_span.min(new_span.len());
330 result.push_str(&new_span[..bounded]);
331 result.push_str(cursor_marker);
332 result.push_str(&new_span[bounded..]);
333 } else {
334 result.push_str(new_span);
335 }
336 } else {
337 result.push_str(new_span);
338 }
339
340 if !result.ends_with('\n') {
341 result.push('\n');
342 }
343 result.push_str(&marker_tag(end_marker_num));
344 result.push('\n');
345 result.push_str(end_marker);
346
347 Ok(result)
348}
349
350/// Extract the full editable region from text that uses marker tags.
351///
352/// Returns the concatenation of all block contents between the first and last
353/// markers, with intermediate marker tags stripped.
354pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
355 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
356
357 let mut markers: Vec<(usize, usize)> = Vec::new();
358 let mut search_start = first_marker_start;
359 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
360 let tag_start = search_start + rel_pos;
361 let num_start = tag_start + MARKER_TAG_PREFIX.len();
362 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
363 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
364 markers.push((tag_start, tag_end));
365 search_start = tag_end;
366 }
367
368 if markers.len() < 2 {
369 return None;
370 }
371
372 let (_, first_tag_end) = markers[0];
373 let (last_tag_start, _) = markers[markers.len() - 1];
374
375 let mut content_start = first_tag_end;
376 if text.as_bytes().get(content_start) == Some(&b'\n') {
377 content_start += 1;
378 }
379 let mut content_end = last_tag_start;
380 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
381 content_end -= 1;
382 }
383
384 let raw = &text[content_start..content_end];
385 let result = strip_marker_tags(raw);
386 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
387 Some(result)
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn test_compute_marker_offsets_small_block() {
396 let text = "aaa\nbbb\nccc\n";
397 let offsets = compute_marker_offsets(text);
398 assert_eq!(offsets, vec![0, text.len()]);
399 }
400
401 #[test]
402 fn test_compute_marker_offsets_blank_line_split() {
403 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
404 let offsets = compute_marker_offsets(text);
405 assert_eq!(offsets[0], 0);
406 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
407 assert_eq!(*offsets.last().unwrap(), text.len());
408 }
409
410 #[test]
411 fn test_compute_marker_offsets_max_lines_split() {
412 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
413 let offsets = compute_marker_offsets(text);
414 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
415 }
416
417 #[test]
418 fn test_compute_marker_offsets_empty() {
419 let offsets = compute_marker_offsets("");
420 assert_eq!(offsets, vec![0, 0]);
421 }
422
423 #[test]
424 fn test_extract_marker_span() {
425 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
426 let (start, end, content) = extract_marker_span(text).unwrap();
427 assert_eq!(start, 2);
428 assert_eq!(end, 3);
429 assert_eq!(content, " new content\n");
430 }
431
432 #[test]
433 fn test_extract_marker_span_multi_line() {
434 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
435 let (start, end, content) = extract_marker_span(text).unwrap();
436 assert_eq!(start, 1);
437 assert_eq!(end, 4);
438 assert_eq!(content, "line1\nline2\nline3\n");
439 }
440
441 #[test]
442 fn test_apply_marker_span_basic() {
443 let old = "aaa\nbbb\nccc\n";
444 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
445 let result = apply_marker_span(old, output).unwrap();
446 assert_eq!(result, "aaa\nBBB\nccc\n");
447 }
448
449 #[test]
450 fn test_apply_marker_span_preserves_trailing_blank_line() {
451 let old = "/\nresult\n\n";
452 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
453 let result = apply_marker_span(old, output).unwrap();
454 assert_eq!(result, "//\nresult\n\n");
455 }
456
457 #[test]
458 fn test_encode_no_edits() {
459 let old = "aaa\nbbb\nccc\n";
460 let result = encode_from_old_and_new(
461 old,
462 old,
463 None,
464 "<|user_cursor|>",
465 ">>>>>>> UPDATED\n",
466 "NO_EDITS\n",
467 )
468 .unwrap();
469 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
470 }
471
472 #[test]
473 fn test_encode_with_change() {
474 let old = "aaa\nbbb\nccc\n";
475 let new = "aaa\nBBB\nccc\n";
476 let result = encode_from_old_and_new(
477 old,
478 new,
479 None,
480 "<|user_cursor|>",
481 ">>>>>>> UPDATED\n",
482 "NO_EDITS\n",
483 )
484 .unwrap();
485 assert!(result.contains("<|marker_1|>"));
486 assert!(result.contains("<|marker_2|>"));
487 assert!(result.contains("aaa\nBBB\nccc\n"));
488 assert!(result.ends_with(">>>>>>> UPDATED\n"));
489 }
490
491 #[test]
492 fn test_roundtrip_encode_apply() {
493 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
494 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
495 let encoded = encode_from_old_and_new(
496 old,
497 new,
498 None,
499 "<|user_cursor|>",
500 ">>>>>>> UPDATED\n",
501 "NO_EDITS\n",
502 )
503 .unwrap();
504 let output = encoded
505 .strip_suffix(">>>>>>> UPDATED\n")
506 .expect("should have end marker");
507 let reconstructed = apply_marker_span(old, output).unwrap();
508 assert_eq!(reconstructed, new);
509 }
510
511 #[test]
512 fn test_extract_editable_region_from_markers_multi() {
513 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
514 let parsed = extract_editable_region_from_markers(text).unwrap();
515 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
516 }
517
518 #[test]
519 fn test_extract_editable_region_two_markers() {
520 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
521 let parsed = extract_editable_region_from_markers(text).unwrap();
522 assert_eq!(parsed, "one\ntwo three");
523 }
524
525 #[test]
526 fn test_encode_with_cursor() {
527 let old = "aaa\nbbb\nccc\n";
528 let new = "aaa\nBBB\nccc\n";
529 let result = encode_from_old_and_new(
530 old,
531 new,
532 Some(5),
533 "<|user_cursor|>",
534 ">>>>>>> UPDATED\n",
535 "NO_EDITS\n",
536 )
537 .unwrap();
538 assert!(result.contains("<|user_cursor|>"), "result: {result}");
539 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
540 }
541
542 #[test]
543 fn test_extract_marker_span_strips_intermediate_markers() {
544 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
545 let (start, end, content) = extract_marker_span(text).unwrap();
546 assert_eq!(start, 2);
547 assert_eq!(end, 4);
548 assert_eq!(content, "line1\nline2\n");
549 }
550
551 #[test]
552 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
553 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
554 let (start, end, content) = extract_marker_span(text).unwrap();
555 assert_eq!(start, 1);
556 assert_eq!(end, 4);
557 assert_eq!(content, "aaa\nbbb\nccc\n");
558 }
559
560 #[test]
561 fn test_apply_marker_span_with_extra_intermediate_marker() {
562 let old = "aaa\nbbb\nccc\n";
563 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
564 let result = apply_marker_span(old, output).unwrap();
565 assert_eq!(result, "aaa\nBBB\nccc\n");
566 }
567
568 #[test]
569 fn test_is_repeated_final_marker() {
570 assert!(is_repeated_final_marker("<|marker_5|>\n<|marker_5|>"));
571 assert!(is_repeated_final_marker("<|marker_5|>\n<|marker_5|>\n"));
572 assert!(is_repeated_final_marker(" <|marker_3|>\n<|marker_3|> "));
573 assert!(!is_repeated_final_marker(
574 "<|marker_2|>\nnew content\n<|marker_3|>"
575 ));
576 assert!(!is_repeated_final_marker("<|marker_2|>\n<|marker_3|>"));
577 assert!(!is_repeated_final_marker("no markers here"));
578 assert!(!is_repeated_final_marker(""));
579 }
580
581 #[test]
582 fn test_strip_marker_tags_inline() {
583 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
584 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
585 assert_eq!(
586 strip_marker_tags("line1\n<|marker_3|>\nline2"),
587 "line1\nline2"
588 );
589 }
590}