multi_region.rs

  1use anyhow::{Context as _, Result, anyhow};
  2
  3pub const MARKER_TAG_PREFIX: &str = "<|marker_";
  4pub const MARKER_TAG_SUFFIX: &str = "|>";
  5const MIN_BLOCK_LINES: usize = 3;
  6const MAX_BLOCK_LINES: usize = 8;
  7
  8pub fn marker_tag(number: usize) -> String {
  9    format!("{MARKER_TAG_PREFIX}{number}{MARKER_TAG_SUFFIX}")
 10}
 11
 12/// Compute byte offsets within `editable_text` where marker boundaries should
 13/// be placed.
 14///
 15/// Returns a sorted `Vec<usize>` that always starts with `0` and ends with
 16/// `editable_text.len()`. Interior offsets are placed at line boundaries
 17/// (right after a `\n`), preferring blank-line boundaries when available and
 18/// respecting `MIN_BLOCK_LINES` / `MAX_BLOCK_LINES` constraints.
 19pub fn compute_marker_offsets(editable_text: &str) -> Vec<usize> {
 20    if editable_text.is_empty() {
 21        return vec![0, 0];
 22    }
 23
 24    let mut offsets = vec![0usize];
 25    let mut lines_since_last_marker = 0usize;
 26    let mut byte_offset = 0usize;
 27
 28    for line in editable_text.split('\n') {
 29        let line_end = byte_offset + line.len() + 1;
 30        let is_past_end = line_end > editable_text.len();
 31        let actual_line_end = line_end.min(editable_text.len());
 32        lines_since_last_marker += 1;
 33
 34        let is_blank = line.trim().is_empty();
 35
 36        if !is_past_end && lines_since_last_marker >= MIN_BLOCK_LINES {
 37            if is_blank {
 38                // Blank-line boundary found. We'll place the marker when we
 39                // find the next non-blank line (handled below).
 40            } else if lines_since_last_marker >= MAX_BLOCK_LINES {
 41                offsets.push(actual_line_end);
 42                lines_since_last_marker = 0;
 43            }
 44        }
 45
 46        // Non-blank line immediately following blank line(s): split here so
 47        // the new block starts with this line.
 48        if !is_blank && byte_offset > 0 && lines_since_last_marker >= MIN_BLOCK_LINES {
 49            let before = &editable_text[..byte_offset];
 50            let has_preceding_blank_line = before
 51                .strip_suffix('\n')
 52                .map(|stripped| {
 53                    let last_line = match stripped.rfind('\n') {
 54                        Some(pos) => &stripped[pos + 1..],
 55                        None => stripped,
 56                    };
 57                    last_line.trim().is_empty()
 58                })
 59                .unwrap_or(false);
 60
 61            if has_preceding_blank_line {
 62                offsets.push(byte_offset);
 63                lines_since_last_marker = 1;
 64            }
 65        }
 66
 67        byte_offset = actual_line_end;
 68
 69        // Re-check after blank-line logic since lines_since_last_marker may
 70        // have been reset.
 71        if !is_past_end && lines_since_last_marker >= MAX_BLOCK_LINES {
 72            if *offsets.last().unwrap_or(&0) != actual_line_end {
 73                offsets.push(actual_line_end);
 74                lines_since_last_marker = 0;
 75            }
 76        }
 77    }
 78
 79    let end = editable_text.len();
 80    if *offsets.last().unwrap_or(&0) != end {
 81        offsets.push(end);
 82    }
 83
 84    offsets
 85}
 86
 87/// Write the editable region content with marker tags, inserting the cursor
 88/// marker at the given offset within the editable text.
 89pub fn write_editable_with_markers(
 90    output: &mut String,
 91    editable_text: &str,
 92    cursor_offset_in_editable: usize,
 93    cursor_marker: &str,
 94) {
 95    let marker_offsets = compute_marker_offsets(editable_text);
 96    let mut cursor_placed = false;
 97    for (i, &offset) in marker_offsets.iter().enumerate() {
 98        let marker_num = i + 1;
 99        if !output.is_empty() && !output.ends_with('\n') {
100            output.push('\n');
101        }
102        output.push_str(&marker_tag(marker_num));
103
104        if let Some(&next_offset) = marker_offsets.get(i + 1) {
105            output.push('\n');
106            let block = &editable_text[offset..next_offset];
107            if !cursor_placed
108                && cursor_offset_in_editable >= offset
109                && cursor_offset_in_editable <= next_offset
110            {
111                cursor_placed = true;
112                let cursor_in_block = cursor_offset_in_editable - offset;
113                output.push_str(&block[..cursor_in_block]);
114                output.push_str(cursor_marker);
115                output.push_str(&block[cursor_in_block..]);
116            } else {
117                output.push_str(block);
118            }
119        }
120    }
121}
122
123/// Strip any `<|marker_N|>` tags from `text`.
124///
125/// When a marker tag sits on its own line (followed by `\n`), the trailing
126/// newline is also removed so the surrounding lines stay joined naturally.
127fn strip_marker_tags(text: &str) -> String {
128    let mut result = String::with_capacity(text.len());
129    let mut pos = 0;
130    let bytes = text.as_bytes();
131    while let Some(rel) = text[pos..].find(MARKER_TAG_PREFIX) {
132        result.push_str(&text[pos..pos + rel]);
133        let num_start = pos + rel + MARKER_TAG_PREFIX.len();
134        if let Some(suffix_rel) = text[num_start..].find(MARKER_TAG_SUFFIX) {
135            let mut tag_end = num_start + suffix_rel + MARKER_TAG_SUFFIX.len();
136            if bytes.get(tag_end) == Some(&b'\n') {
137                tag_end += 1;
138            }
139            pos = tag_end;
140        } else {
141            result.push_str(MARKER_TAG_PREFIX);
142            pos = num_start;
143        }
144    }
145    result.push_str(&text[pos..]);
146    result
147}
148
149/// Parse model output that uses the marker format.
150///
151/// Returns `(start_marker_num, end_marker_num, content_between_markers)`.
152/// The leading format-level newline after the start marker is stripped.
153/// Trailing newlines are preserved so blank-line endings in the editable
154/// region are not lost.
155///
156/// Any extra intermediate marker tags that the model may have inserted
157/// between the first and last markers are stripped from the returned content.
158pub fn extract_marker_span(text: &str) -> Result<(usize, usize, String)> {
159    let first_tag_start = text
160        .find(MARKER_TAG_PREFIX)
161        .context("no start marker found in output")?;
162    let first_num_start = first_tag_start + MARKER_TAG_PREFIX.len();
163    let first_num_end = text[first_num_start..]
164        .find(MARKER_TAG_SUFFIX)
165        .map(|i| i + first_num_start)
166        .context("malformed start marker tag")?;
167    let start_num: usize = text[first_num_start..first_num_end]
168        .parse()
169        .context("start marker number is not a valid integer")?;
170    let first_tag_end = first_num_end + MARKER_TAG_SUFFIX.len();
171
172    let last_tag_start = text
173        .rfind(MARKER_TAG_PREFIX)
174        .context("no end marker found in output")?;
175    let last_num_start = last_tag_start + MARKER_TAG_PREFIX.len();
176    let last_num_end = text[last_num_start..]
177        .find(MARKER_TAG_SUFFIX)
178        .map(|i| i + last_num_start)
179        .context("malformed end marker tag")?;
180    let end_num: usize = text[last_num_start..last_num_end]
181        .parse()
182        .context("end marker number is not a valid integer")?;
183
184    if start_num == end_num {
185        return Err(anyhow!(
186            "start and end markers are the same (marker {})",
187            start_num
188        ));
189    }
190
191    let mut content_start = first_tag_end;
192    if text.as_bytes().get(content_start) == Some(&b'\n') {
193        content_start += 1;
194    }
195    let content_end = last_tag_start;
196
197    let content = &text[content_start..content_end.max(content_start)];
198    let content = strip_marker_tags(content);
199    Ok((start_num, end_num, content))
200}
201
202/// Given old editable text and model output with marker span, reconstruct the
203/// full new editable region.
204pub fn apply_marker_span(old_editable: &str, output: &str) -> Result<String> {
205    let (start_num, end_num, raw_new_span) = extract_marker_span(output)?;
206    let marker_offsets = compute_marker_offsets(old_editable);
207
208    let start_idx = start_num
209        .checked_sub(1)
210        .context("marker numbers are 1-indexed")?;
211    let end_idx = end_num
212        .checked_sub(1)
213        .context("marker numbers are 1-indexed")?;
214    let start_byte = *marker_offsets
215        .get(start_idx)
216        .context("start marker number out of range")?;
217    let end_byte = *marker_offsets
218        .get(end_idx)
219        .context("end marker number out of range")?;
220
221    if start_byte > end_byte {
222        return Err(anyhow!("start marker must come before end marker"));
223    }
224
225    let old_span = &old_editable[start_byte..end_byte];
226    let mut new_span = raw_new_span;
227    if old_span.ends_with('\n') && !new_span.ends_with('\n') && !new_span.is_empty() {
228        new_span.push('\n');
229    }
230    if !old_span.ends_with('\n') && new_span.ends_with('\n') {
231        new_span.pop();
232    }
233
234    let mut result = String::new();
235    result.push_str(&old_editable[..start_byte]);
236    result.push_str(&new_span);
237    result.push_str(&old_editable[end_byte..]);
238
239    Ok(result)
240}
241
242/// Compare old and new editable text, find the minimal marker span that covers
243/// all changes, and encode the result with marker tags.
244pub fn encode_from_old_and_new(
245    old_editable: &str,
246    new_editable: &str,
247    cursor_offset_in_new: Option<usize>,
248    cursor_marker: &str,
249    end_marker: &str,
250    no_edits_marker: &str,
251) -> Result<String> {
252    if old_editable == new_editable {
253        return Ok(format!("{no_edits_marker}{end_marker}"));
254    }
255
256    let marker_offsets = compute_marker_offsets(old_editable);
257
258    let common_prefix = old_editable
259        .bytes()
260        .zip(new_editable.bytes())
261        .take_while(|(a, b)| a == b)
262        .count();
263
264    let old_remaining = old_editable.len() - common_prefix;
265    let new_remaining = new_editable.len() - common_prefix;
266    let max_suffix = old_remaining.min(new_remaining);
267    let common_suffix = old_editable.as_bytes()[old_editable.len() - max_suffix..]
268        .iter()
269        .rev()
270        .zip(
271            new_editable.as_bytes()[new_editable.len() - max_suffix..]
272                .iter()
273                .rev(),
274        )
275        .take_while(|(a, b)| a == b)
276        .count();
277
278    let change_end_in_old = old_editable.len() - common_suffix;
279
280    let start_marker_idx = marker_offsets
281        .iter()
282        .rposition(|&offset| offset <= common_prefix)
283        .unwrap_or(0);
284    let end_marker_idx = marker_offsets
285        .iter()
286        .position(|&offset| offset >= change_end_in_old)
287        .unwrap_or(marker_offsets.len() - 1);
288
289    let old_start = marker_offsets[start_marker_idx];
290    let old_end = marker_offsets[end_marker_idx];
291
292    let new_start = old_start;
293    let new_end = new_editable
294        .len()
295        .saturating_sub(old_editable.len().saturating_sub(old_end));
296
297    let new_span = &new_editable[new_start..new_end];
298
299    let start_marker_num = start_marker_idx + 1;
300    let end_marker_num = end_marker_idx + 1;
301
302    let mut result = String::new();
303    result.push_str(&marker_tag(start_marker_num));
304    result.push('\n');
305
306    if let Some(cursor_offset) = cursor_offset_in_new {
307        if cursor_offset >= new_start && cursor_offset <= new_end {
308            let cursor_in_span = cursor_offset - new_start;
309            let bounded = cursor_in_span.min(new_span.len());
310            result.push_str(&new_span[..bounded]);
311            result.push_str(cursor_marker);
312            result.push_str(&new_span[bounded..]);
313        } else {
314            result.push_str(new_span);
315        }
316    } else {
317        result.push_str(new_span);
318    }
319
320    if !result.ends_with('\n') {
321        result.push('\n');
322    }
323    result.push_str(&marker_tag(end_marker_num));
324    result.push('\n');
325    result.push_str(end_marker);
326
327    Ok(result)
328}
329
330/// Extract the full editable region from text that uses marker tags.
331///
332/// Returns the concatenation of all block contents between the first and last
333/// markers, with intermediate marker tags stripped.
334pub fn extract_editable_region_from_markers(text: &str) -> Option<String> {
335    let first_marker_start = text.find(MARKER_TAG_PREFIX)?;
336
337    let mut markers: Vec<(usize, usize)> = Vec::new();
338    let mut search_start = first_marker_start;
339    while let Some(rel_pos) = text[search_start..].find(MARKER_TAG_PREFIX) {
340        let tag_start = search_start + rel_pos;
341        let num_start = tag_start + MARKER_TAG_PREFIX.len();
342        let num_end = text[num_start..].find(MARKER_TAG_SUFFIX)?;
343        let tag_end = num_start + num_end + MARKER_TAG_SUFFIX.len();
344        markers.push((tag_start, tag_end));
345        search_start = tag_end;
346    }
347
348    if markers.len() < 2 {
349        return None;
350    }
351
352    let (_, first_tag_end) = markers[0];
353    let (last_tag_start, _) = markers[markers.len() - 1];
354
355    let mut content_start = first_tag_end;
356    if text.as_bytes().get(content_start) == Some(&b'\n') {
357        content_start += 1;
358    }
359    let mut content_end = last_tag_start;
360    if content_end > content_start && text.as_bytes().get(content_end - 1) == Some(&b'\n') {
361        content_end -= 1;
362    }
363
364    let raw = &text[content_start..content_end];
365    let result = strip_marker_tags(raw);
366    let result = result.strip_suffix('\n').unwrap_or(&result).to_string();
367    Some(result)
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_compute_marker_offsets_small_block() {
376        let text = "aaa\nbbb\nccc\n";
377        let offsets = compute_marker_offsets(text);
378        assert_eq!(offsets, vec![0, text.len()]);
379    }
380
381    #[test]
382    fn test_compute_marker_offsets_blank_line_split() {
383        let text = "aaa\nbbb\nccc\n\nddd\neee\nfff\n";
384        let offsets = compute_marker_offsets(text);
385        assert_eq!(offsets[0], 0);
386        assert!(offsets.contains(&13), "offsets: {:?}", offsets);
387        assert_eq!(*offsets.last().unwrap(), text.len());
388    }
389
390    #[test]
391    fn test_compute_marker_offsets_max_lines_split() {
392        let text = "1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n";
393        let offsets = compute_marker_offsets(text);
394        assert!(offsets.len() >= 3, "offsets: {:?}", offsets);
395    }
396
397    #[test]
398    fn test_compute_marker_offsets_empty() {
399        let offsets = compute_marker_offsets("");
400        assert_eq!(offsets, vec![0, 0]);
401    }
402
403    #[test]
404    fn test_extract_marker_span() {
405        let text = "<|marker_2|>\n    new content\n<|marker_3|>\n";
406        let (start, end, content) = extract_marker_span(text).unwrap();
407        assert_eq!(start, 2);
408        assert_eq!(end, 3);
409        assert_eq!(content, "    new content\n");
410    }
411
412    #[test]
413    fn test_extract_marker_span_multi_line() {
414        let text = "<|marker_1|>\nline1\nline2\nline3\n<|marker_4|>";
415        let (start, end, content) = extract_marker_span(text).unwrap();
416        assert_eq!(start, 1);
417        assert_eq!(end, 4);
418        assert_eq!(content, "line1\nline2\nline3\n");
419    }
420
421    #[test]
422    fn test_apply_marker_span_basic() {
423        let old = "aaa\nbbb\nccc\n";
424        let output = "<|marker_1|>\naaa\nBBB\nccc\n<|marker_2|>";
425        let result = apply_marker_span(old, output).unwrap();
426        assert_eq!(result, "aaa\nBBB\nccc\n");
427    }
428
429    #[test]
430    fn test_apply_marker_span_preserves_trailing_blank_line() {
431        let old = "/\nresult\n\n";
432        let output = "<|marker_1|>\n//\nresult\n\n<|marker_2|>";
433        let result = apply_marker_span(old, output).unwrap();
434        assert_eq!(result, "//\nresult\n\n");
435    }
436
437    #[test]
438    fn test_encode_no_edits() {
439        let old = "aaa\nbbb\nccc\n";
440        let result = encode_from_old_and_new(
441            old,
442            old,
443            None,
444            "<|user_cursor|>",
445            ">>>>>>> UPDATED\n",
446            "NO_EDITS\n",
447        )
448        .unwrap();
449        assert_eq!(result, "NO_EDITS\n>>>>>>> UPDATED\n");
450    }
451
452    #[test]
453    fn test_encode_with_change() {
454        let old = "aaa\nbbb\nccc\n";
455        let new = "aaa\nBBB\nccc\n";
456        let result = encode_from_old_and_new(
457            old,
458            new,
459            None,
460            "<|user_cursor|>",
461            ">>>>>>> UPDATED\n",
462            "NO_EDITS\n",
463        )
464        .unwrap();
465        assert!(result.contains("<|marker_1|>"));
466        assert!(result.contains("<|marker_2|>"));
467        assert!(result.contains("aaa\nBBB\nccc\n"));
468        assert!(result.ends_with(">>>>>>> UPDATED\n"));
469    }
470
471    #[test]
472    fn test_roundtrip_encode_apply() {
473        let old = "line1\nline2\nline3\n\nline5\nline6\nline7\nline8\nline9\nline10\n";
474        let new = "line1\nline2\nline3\n\nline5\nLINE6\nline7\nline8\nline9\nline10\n";
475        let encoded = encode_from_old_and_new(
476            old,
477            new,
478            None,
479            "<|user_cursor|>",
480            ">>>>>>> UPDATED\n",
481            "NO_EDITS\n",
482        )
483        .unwrap();
484        let output = encoded
485            .strip_suffix(">>>>>>> UPDATED\n")
486            .expect("should have end marker");
487        let reconstructed = apply_marker_span(old, output).unwrap();
488        assert_eq!(reconstructed, new);
489    }
490
491    #[test]
492    fn test_extract_editable_region_from_markers_multi() {
493        let text = "prefix\n<|marker_1|>\naaa\nbbb\n<|marker_2|>\nccc\nddd\n<|marker_3|>\nsuffix";
494        let parsed = extract_editable_region_from_markers(text).unwrap();
495        assert_eq!(parsed, "aaa\nbbb\nccc\nddd");
496    }
497
498    #[test]
499    fn test_extract_editable_region_two_markers() {
500        let text = "<|marker_1|>\none\ntwo three\n<|marker_2|>";
501        let parsed = extract_editable_region_from_markers(text).unwrap();
502        assert_eq!(parsed, "one\ntwo three");
503    }
504
505    #[test]
506    fn test_encode_with_cursor() {
507        let old = "aaa\nbbb\nccc\n";
508        let new = "aaa\nBBB\nccc\n";
509        let result = encode_from_old_and_new(
510            old,
511            new,
512            Some(5),
513            "<|user_cursor|>",
514            ">>>>>>> UPDATED\n",
515            "NO_EDITS\n",
516        )
517        .unwrap();
518        assert!(result.contains("<|user_cursor|>"), "result: {result}");
519        assert!(result.contains("B<|user_cursor|>BB"), "result: {result}");
520    }
521
522    #[test]
523    fn test_extract_marker_span_strips_intermediate_markers() {
524        let text = "<|marker_2|>\nline1\n<|marker_3|>\nline2\n<|marker_4|>";
525        let (start, end, content) = extract_marker_span(text).unwrap();
526        assert_eq!(start, 2);
527        assert_eq!(end, 4);
528        assert_eq!(content, "line1\nline2\n");
529    }
530
531    #[test]
532    fn test_extract_marker_span_strips_multiple_intermediate_markers() {
533        let text = "<|marker_1|>\naaa\n<|marker_2|>\nbbb\n<|marker_3|>\nccc\n<|marker_4|>";
534        let (start, end, content) = extract_marker_span(text).unwrap();
535        assert_eq!(start, 1);
536        assert_eq!(end, 4);
537        assert_eq!(content, "aaa\nbbb\nccc\n");
538    }
539
540    #[test]
541    fn test_apply_marker_span_with_extra_intermediate_marker() {
542        let old = "aaa\nbbb\nccc\n";
543        let output = "<|marker_1|>\naaa\n<|marker_1|>\nBBB\nccc\n<|marker_2|>";
544        let result = apply_marker_span(old, output).unwrap();
545        assert_eq!(result, "aaa\nBBB\nccc\n");
546    }
547
548    #[test]
549    fn test_strip_marker_tags_inline() {
550        assert_eq!(strip_marker_tags("no markers here"), "no markers here");
551        assert_eq!(strip_marker_tags("before<|marker_5|>after"), "beforeafter");
552        assert_eq!(
553            strip_marker_tags("line1\n<|marker_3|>\nline2"),
554            "line1\nline2"
555        );
556    }
557}