1use anyhow::{Context as _, Result, anyhow};
2
3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
4pub const MARKER_TAG_SUFFIX: &str = "|>";
5const MIN_BLOCK_LINES: usize = 3;
6const MAX_BLOCK_LINES: usize = 8;
7
8pub fn marker_tag(number: usize) -> String {
9 format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
10}
11
12/// Compute byte offsets within `editable_text` where marker boundaries should
13/// be placed.
14///
15/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
16/// `editable_text.len()`. Interior offsets are placed at line boundaries
17/// (right after a `\n`), preferring blank-line boundaries when available and
18/// respecting `MIN_BLOCK_LINES` / `MAX_BLOCK_LINES` constraints.
19pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
20 if editable_text.is_empty() {
21 return vec![0, 0];
22 }
23
24 let mut offsets = vec![0usize];
25 let mut lines_since_last_marker = 0usize;
26 let mut byte_offset = 0usize;
27
28 for line in editable_text.split('\n') {
29 let line_end = byte_offset + line.len() + 1;
30 let is_past_end = line_end > editable_text.len();
31 let actual_line_end = line_end.min(editable_text.len());
32 lines_since_last_marker += 1;
33
34 let is_blank = line.trim().is_empty();
35
36 if !is_past_end && lines_since_last_marker >= MIN_BLOCK_LINES {
37 if is_blank {
38 // Blank-line boundary found. We'll place the marker when we
39 // find the next non-blank line (handled below).
40 } else if lines_since_last_marker >= MAX_BLOCK_LINES {
41 offsets.push(actual_line_end);
42 lines_since_last_marker = 0;
43 }
44 }
45
46 // Non-blank line immediately following blank line(s): split here so
47 // the new block starts with this line.
48 if !is_blank && byte_offset > 0 && lines_since_last_marker >= MIN_BLOCK_LINES {
49 let before = &editable_text[..byte_offset];
50 let has_preceding_blank_line = before
51 .strip_suffix('\n')
52 .map(|stripped| {
53 let last_line = match stripped.rfind('\n') {
54 Some(pos) => &stripped[pos + 1..],
55 None => stripped,
56 };
57 last_line.trim().is_empty()
58 })
59 .unwrap_or(false);
60
61 if has_preceding_blank_line {
62 offsets.push(byte_offset);
63 lines_since_last_marker = 1;
64 }
65 }
66
67 byte_offset = actual_line_end;
68
69 // Re-check after blank-line logic since lines_since_last_marker may
70 // have been reset.
71 if !is_past_end && lines_since_last_marker >= MAX_BLOCK_LINES {
72 if *offsets.last().unwrap_or(&0) != actual_line_end {
73 offsets.push(actual_line_end);
74 lines_since_last_marker = 0;
75 }
76 }
77 }
78
79 let end = editable_text.len();
80 if *offsets.last().unwrap_or(&0) != end {
81 offsets.push(end);
82 }
83
84 offsets
85}
86
87/// Write the editable region content with marker tags, inserting the cursor
88/// marker at the given offset within the editable text.
89pub fn write_editable_with_markers(
90 output: &mut String,
91 editable_text: &str,
92 cursor_offset_in_editable: usize,
93 cursor_marker: &str,
94) {
95 let marker_offsets = compute_marker_offsets(editable_text);
96 let mut cursor_placed = false;
97 for (i, &offset) in marker_offsets.iter().enumerate() {
98 let marker_num = i + 1;
99 if !output.is_empty() && !output.ends_with('\n') {
100 output.push('\n');
101 }
102 output.push_str(&marker_tag(marker_num));
103
104 if let Some(&next_offset) = marker_offsets.get(i + 1) {
105 output.push('\n');
106 let block = &editable_text[offset..next_offset];
107 if !cursor_placed
108 && cursor_offset_in_editable >= offset
109 && cursor_offset_in_editable <= next_offset
110 {
111 cursor_placed = true;
112 let cursor_in_block = cursor_offset_in_editable - offset;
113 output.push_str(&block[..cursor_in_block]);
114 output.push_str(cursor_marker);
115 output.push_str(&block[cursor_in_block..]);
116 } else {
117 output.push_str(block);
118 }
119 }
120 }
121}
122
123/// Strip any `<|marker_N|>` tags from `text`.
124///
125/// When a marker tag sits on its own line (followed by `\n`), the trailing
126/// newline is also removed so the surrounding lines stay joined naturally.
127fn strip_marker_tags(text: &str) -> String {
128 let mut result = String::with_capacity(text.len());
129 let mut pos = 0;
130 let bytes = text.as_bytes();
131 while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
132 result.push_str(&text[pos..pos + rel]);
133 let num_start = pos + rel + MARKER_TAG_PREFIX.len();
134 if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
135 let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
136 if bytes.get(tag_end) == Some(&b'\n') {
137 tag_end += 1;
138 }
139 pos = tag_end;
140 } else {
141 result.push_str(MARKER_TAG_PREFIX);
142 pos = num_start;
143 }
144 }
145 result.push_str(&text[pos..]);
146 result
147}
148
149/// Parse model output that uses the marker format.
150///
151/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
152/// The leading format-level newline after the start marker is stripped.
153/// Trailing newlines are preserved so blank-line endings in the editable
154/// region are not lost.
155///
156/// Any extra intermediate marker tags that the model may have inserted
157/// between the first and last markers are stripped from the returned content.
158pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
159 let first_tag_start = text
160 .find(MARKER_TAG_PREFIX)
161 .context("no start marker found in output")?;
162 let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
163 let first_num_end = text[first_num_start..]
164 .find(MARKER_TAG_SUFFIX)
165 .map(|i| i + first_num_start)
166 .context("malformed start marker tag")?;
167 let start_num: usize = text[first_num_start..first_num_end]
168 .parse()
169 .context("start marker number is not a valid integer")?;
170 let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
171
172 let last_tag_start = text
173 .rfind(MARKER_TAG_PREFIX)
174 .context("no end marker found in output")?;
175 let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
176 let last_num_end = text[last_num_start..]
177 .find(MARKER_TAG_SUFFIX)
178 .map(|i| i + last_num_start)
179 .context("malformed end marker tag")?;
180 let end_num: usize = text[last_num_start..last_num_end]
181 .parse()
182 .context("end marker number is not a valid integer")?;
183
184 if start_num == end_num {
185 return Err(anyhow!(
186 "start and end markers are the same (marker {})",
187 start_num
188 ));
189 }
190
191 let mut content_start = first_tag_end;
192 if text.as_bytes().get(content_start) == Some(&b'\n') {
193 content_start += 1;
194 }
195 let content_end = last_tag_start;
196
197 let content = &text[content_start..content_end.max(content_start)];
198 let content = strip_marker_tags(content);
199 Ok((start_num, end_num, content))
200}
201
202/// Given old editable text and model output with marker span, reconstruct the
203/// full new editable region.
204pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
205 let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
206 let marker_offsets = compute_marker_offsets(old_editable);
207
208 let start_idx = start_num
209 .checked_sub(1)
210 .context("marker numbers are 1-indexed")?;
211 let end_idx = end_num
212 .checked_sub(1)
213 .context("marker numbers are 1-indexed")?;
214 let start_byte = *marker_offsets
215 .get(start_idx)
216 .context("start marker number out of range")?;
217 let end_byte = *marker_offsets
218 .get(end_idx)
219 .context("end marker number out of range")?;
220
221 if start_byte > end_byte {
222 return Err(anyhow!("start marker must come before end marker"));
223 }
224
225 let old_span = &old_editable[start_byte..end_byte];
226 let mut new_span = raw_new_span;
227 if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
228 new_span.push('\n');
229 }
230 if !old_span.ends_with('\n') && new_span.ends_with('\n') {
231 new_span.pop();
232 }
233
234 let mut result = String::new();
235 result.push_str(&old_editable[..start_byte]);
236 result.push_str(&new_span);
237 result.push_str(&old_editable[end_byte..]);
238
239 Ok(result)
240}
241
242/// Compare old and new editable text, find the minimal marker span that covers
243/// all changes, and encode the result with marker tags.
244pub fn encode_from_old_and_new(
245 old_editable: &str,
246 new_editable: &str,
247 cursor_offset_in_new: Option<usize>,
248 cursor_marker: &str,
249 end_marker: &str,
250 no_edits_marker: &str,
251) -> Result<String> {
252 if old_editable == new_editable {
253 return Ok(format!("{no_edits_marker}{end_marker}"));
254 }
255
256 let marker_offsets = compute_marker_offsets(old_editable);
257
258 let common_prefix = old_editable
259 .bytes()
260 .zip(new_editable.bytes())
261 .take_while(|(a, b)| a == b)
262 .count();
263
264 let old_remaining = old_editable.len() - common_prefix;
265 let new_remaining = new_editable.len() - common_prefix;
266 let max_suffix = old_remaining.min(new_remaining);
267 let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
268 .iter()
269 .rev()
270 .zip(
271 new_editable.as_bytes()[new_editable.len() - max_suffix..]
272 .iter()
273 .rev(),
274 )
275 .take_while(|(a, b)| a == b)
276 .count();
277
278 let change_end_in_old = old_editable.len() - common_suffix;
279
280 let start_marker_idx = marker_offsets
281 .iter()
282 .rposition(|&offset| offset <= common_prefix)
283 .unwrap_or(0);
284 let end_marker_idx = marker_offsets
285 .iter()
286 .position(|&offset| offset >= change_end_in_old)
287 .unwrap_or(marker_offsets.len() - 1);
288
289 let old_start = marker_offsets[start_marker_idx];
290 let old_end = marker_offsets[end_marker_idx];
291
292 let new_start = old_start;
293 let new_end = new_editable
294 .len()
295 .saturating_sub(old_editable.len().saturating_sub(old_end));
296
297 let new_span = &new_editable[new_start..new_end];
298
299 let start_marker_num = start_marker_idx + 1;
300 let end_marker_num = end_marker_idx + 1;
301
302 let mut result = String::new();
303 result.push_str(&marker_tag(start_marker_num));
304 result.push('\n');
305
306 if let Some(cursor_offset) = cursor_offset_in_new {
307 if cursor_offset >= new_start && cursor_offset <= new_end {
308 let cursor_in_span = cursor_offset - new_start;
309 let bounded = cursor_in_span.min(new_span.len());
310 result.push_str(&new_span[..bounded]);
311 result.push_str(cursor_marker);
312 result.push_str(&new_span[bounded..]);
313 } else {
314 result.push_str(new_span);
315 }
316 } else {
317 result.push_str(new_span);
318 }
319
320 if !result.ends_with('\n') {
321 result.push('\n');
322 }
323 result.push_str(&marker_tag(end_marker_num));
324 result.push('\n');
325 result.push_str(end_marker);
326
327 Ok(result)
328}
329
330/// Extract the full editable region from text that uses marker tags.
331///
332/// Returns the concatenation of all block contents between the first and last
333/// markers, with intermediate marker tags stripped.
334pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
335 let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
336
337 let mut markers: Vec<(usize, usize)> = Vec::new();
338 let mut search_start = first_marker_start;
339 while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
340 let tag_start = search_start + rel_pos;
341 let num_start = tag_start + MARKER_TAG_PREFIX.len();
342 let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
343 let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
344 markers.push((tag_start, tag_end));
345 search_start = tag_end;
346 }
347
348 if markers.len() < 2 {
349 return None;
350 }
351
352 let (_, first_tag_end) = markers[0];
353 let (last_tag_start, _) = markers[markers.len() - 1];
354
355 let mut content_start = first_tag_end;
356 if text.as_bytes().get(content_start) == Some(&b'\n') {
357 content_start += 1;
358 }
359 let mut content_end = last_tag_start;
360 if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
361 content_end -= 1;
362 }
363
364 let raw = &text[content_start..content_end];
365 let result = strip_marker_tags(raw);
366 let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
367 Some(result)
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373
374 #[test]
375 fn test_compute_marker_offsets_small_block() {
376 let text = "aaa\nbbb\nccc\n";
377 let offsets = compute_marker_offsets(text);
378 assert_eq!(offsets, vec![0, text.len()]);
379 }
380
381 #[test]
382 fn test_compute_marker_offsets_blank_line_split() {
383 let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
384 let offsets = compute_marker_offsets(text);
385 assert_eq!(offsets[0], 0);
386 assert!(offsets.contains(&13), "offsets: {:?}", offsets);
387 assert_eq!(*offsets.last().unwrap(), text.len());
388 }
389
390 #[test]
391 fn test_compute_marker_offsets_max_lines_split() {
392 let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
393 let offsets = compute_marker_offsets(text);
394 assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
395 }
396
397 #[test]
398 fn test_compute_marker_offsets_empty() {
399 let offsets = compute_marker_offsets("");
400 assert_eq!(offsets, vec![0, 0]);
401 }
402
403 #[test]
404 fn test_extract_marker_span() {
405 let text = "<|marker_2|>\n new content\n<|marker_3|>\n";
406 let (start, end, content) = extract_marker_span(text).unwrap();
407 assert_eq!(start, 2);
408 assert_eq!(end, 3);
409 assert_eq!(content, " new content\n");
410 }
411
412 #[test]
413 fn test_extract_marker_span_multi_line() {
414 let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
415 let (start, end, content) = extract_marker_span(text).unwrap();
416 assert_eq!(start, 1);
417 assert_eq!(end, 4);
418 assert_eq!(content, "line1\nline2\nline3\n");
419 }
420
421 #[test]
422 fn test_apply_marker_span_basic() {
423 let old = "aaa\nbbb\nccc\n";
424 let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
425 let result = apply_marker_span(old, output).unwrap();
426 assert_eq!(result, "aaa\nBBB\nccc\n");
427 }
428
429 #[test]
430 fn test_apply_marker_span_preserves_trailing_blank_line() {
431 let old = "/\nresult\n\n";
432 let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
433 let result = apply_marker_span(old, output).unwrap();
434 assert_eq!(result, "//\nresult\n\n");
435 }
436
437 #[test]
438 fn test_encode_no_edits() {
439 let old = "aaa\nbbb\nccc\n";
440 let result = encode_from_old_and_new(
441 old,
442 old,
443 None,
444 "<|user_cursor|>",
445 ">>>>>>> UPDATED\n",
446 "NO_EDITS\n",
447 )
448 .unwrap();
449 assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
450 }
451
452 #[test]
453 fn test_encode_with_change() {
454 let old = "aaa\nbbb\nccc\n";
455 let new = "aaa\nBBB\nccc\n";
456 let result = encode_from_old_and_new(
457 old,
458 new,
459 None,
460 "<|user_cursor|>",
461 ">>>>>>> UPDATED\n",
462 "NO_EDITS\n",
463 )
464 .unwrap();
465 assert!(result.contains("<|marker_1|>"));
466 assert!(result.contains("<|marker_2|>"));
467 assert!(result.contains("aaa\nBBB\nccc\n"));
468 assert!(result.ends_with(">>>>>>> UPDATED\n"));
469 }
470
471 #[test]
472 fn test_roundtrip_encode_apply() {
473 let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
474 let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
475 let encoded = encode_from_old_and_new(
476 old,
477 new,
478 None,
479 "<|user_cursor|>",
480 ">>>>>>> UPDATED\n",
481 "NO_EDITS\n",
482 )
483 .unwrap();
484 let output = encoded
485 .strip_suffix(">>>>>>> UPDATED\n")
486 .expect("should have end marker");
487 let reconstructed = apply_marker_span(old, output).unwrap();
488 assert_eq!(reconstructed, new);
489 }
490
491 #[test]
492 fn test_extract_editable_region_from_markers_multi() {
493 let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
494 let parsed = extract_editable_region_from_markers(text).unwrap();
495 assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
496 }
497
498 #[test]
499 fn test_extract_editable_region_two_markers() {
500 let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
501 let parsed = extract_editable_region_from_markers(text).unwrap();
502 assert_eq!(parsed, "one\ntwo three");
503 }
504
505 #[test]
506 fn test_encode_with_cursor() {
507 let old = "aaa\nbbb\nccc\n";
508 let new = "aaa\nBBB\nccc\n";
509 let result = encode_from_old_and_new(
510 old,
511 new,
512 Some(5),
513 "<|user_cursor|>",
514 ">>>>>>> UPDATED\n",
515 "NO_EDITS\n",
516 )
517 .unwrap();
518 assert!(result.contains("<|user_cursor|>"), "result: {result}");
519 assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
520 }
521
522 #[test]
523 fn test_extract_marker_span_strips_intermediate_markers() {
524 let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
525 let (start, end, content) = extract_marker_span(text).unwrap();
526 assert_eq!(start, 2);
527 assert_eq!(end, 4);
528 assert_eq!(content, "line1\nline2\n");
529 }
530
531 #[test]
532 fn test_extract_marker_span_strips_multiple_intermediate_markers() {
533 let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
534 let (start, end, content) = extract_marker_span(text).unwrap();
535 assert_eq!(start, 1);
536 assert_eq!(end, 4);
537 assert_eq!(content, "aaa\nbbb\nccc\n");
538 }
539
540 #[test]
541 fn test_apply_marker_span_with_extra_intermediate_marker() {
542 let old = "aaa\nbbb\nccc\n";
543 let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
544 let result = apply_marker_span(old, output).unwrap();
545 assert_eq!(result, "aaa\nBBB\nccc\n");
546 }
547
548 #[test]
549 fn test_strip_marker_tags_inline() {
550 assert_eq!(strip_marker_tags("no markers here"), "no markers here");
551 assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
552 assert_eq!(
553 strip_marker_tags("line1\n<|marker_3|>\nline2"),
554 "line1\nline2"
555 );
556 }
557}