1pub mod excerpt_ranges;
2
3use anyhow::{Result, anyhow};
4use serde::{Deserialize, Serialize};
5use std::fmt::Write;
6use std::ops::Range;
7use std::path::Path;
8use std::sync::Arc;
9use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
10
11pub use crate::excerpt_ranges::{
12 ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
13};
14
15pub const CURSOR_MARKER: &str = "<|user_cursor|>";
16pub const MAX_PROMPT_TOKENS: usize = 4096;
17
18/// Use up to this amount of the editable region for prefill.
19/// Larger values may result in more robust generation, but
20/// this region becomes non-editable.
21pub const PREFILL_RATIO: f64 = 0.1; // 10%
22
23fn estimate_tokens(bytes: usize) -> usize {
24 bytes / 3
25}
26
27#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
28pub struct ZetaPromptInput {
29 pub cursor_path: Arc<Path>,
30 pub cursor_excerpt: Arc<str>,
31 pub cursor_offset_in_excerpt: usize,
32 #[serde(default, skip_serializing_if = "Option::is_none")]
33 pub excerpt_start_row: Option<u32>,
34 pub events: Vec<Arc<Event>>,
35 #[serde(default)]
36 pub related_files: Option<Vec<RelatedFile>>,
37 #[serde(default, skip_serializing_if = "Vec::is_empty")]
38 pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
39 /// These ranges let the server select model-appropriate subsets.
40 pub excerpt_ranges: ExcerptRanges,
41 /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
42 /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
43 /// When present, the server uses these to compute editable/context ranges
44 /// instead of `excerpt_ranges`.
45 #[serde(default, skip_serializing_if = "Option::is_none")]
46 pub syntax_ranges: Option<Vec<Range<usize>>>,
47 /// The name of the edit prediction model experiment to use.
48 #[serde(default, skip_serializing_if = "Option::is_none")]
49 pub experiment: Option<String>,
50 #[serde(default)]
51 pub in_open_source_repo: bool,
52 #[serde(default)]
53 pub can_collect_data: bool,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
55 pub repo_url: Option<String>,
56}
57
58#[derive(
59 Default,
60 Clone,
61 Copy,
62 Debug,
63 PartialEq,
64 Eq,
65 Hash,
66 EnumIter,
67 IntoStaticStr,
68 Serialize,
69 Deserialize,
70)]
71#[allow(non_camel_case_types)]
72pub enum ZetaFormat {
73 V0112MiddleAtEnd,
74 V0113Ordered,
75 V0114180EditableRegion,
76 V0120GitMergeMarkers,
77 #[default]
78 V0131GitMergeMarkersPrefix,
79 V0211Prefill,
80 V0211SeedCoder,
81 v0226Hashline,
82 V0304VariableEdit,
83 V0304SeedNoEdits,
84}
85
86impl std::fmt::Display for ZetaFormat {
87 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
88 write!(f, "{}", <&'static str>::from(self))
89 }
90}
91
92impl ZetaFormat {
93 pub fn parse(format_name: &str) -> Result<Self> {
94 let mut results = ZetaFormat::iter().filter(|version| {
95 <&'static str>::from(version)
96 .to_lowercase()
97 .contains(&format_name.to_lowercase())
98 });
99 let Some(result) = results.next() else {
100 anyhow::bail!(
101 "`{format_name}` did not match any of:\n{}",
102 Self::options_as_string()
103 );
104 };
105 if results.next().is_some() {
106 anyhow::bail!(
107 "`{format_name}` matched more than one of:\n{}",
108 Self::options_as_string()
109 );
110 }
111 Ok(result)
112 }
113
114 pub fn options_as_string() -> String {
115 ZetaFormat::iter()
116 .map(|format| format!("- {}\n", <&'static str>::from(format)))
117 .collect::<Vec<_>>()
118 .concat()
119 }
120}
121
122#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
123#[serde(tag = "event")]
124pub enum Event {
125 BufferChange {
126 path: Arc<Path>,
127 old_path: Arc<Path>,
128 diff: String,
129 predicted: bool,
130 in_open_source_repo: bool,
131 },
132}
133
134impl Event {
135 pub fn in_open_source_repo(&self) -> bool {
136 match self {
137 Event::BufferChange {
138 in_open_source_repo,
139 ..
140 } => *in_open_source_repo,
141 }
142 }
143}
144
145pub fn write_event(prompt: &mut String, event: &Event) {
146 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
147 for component in path.components() {
148 prompt.push('/');
149 write!(prompt, "{}", component.as_os_str().display()).ok();
150 }
151 }
152 match event {
153 Event::BufferChange {
154 path,
155 old_path,
156 diff,
157 predicted,
158 in_open_source_repo: _,
159 } => {
160 if *predicted {
161 prompt.push_str("// User accepted prediction:\n");
162 }
163 prompt.push_str("--- a");
164 write_path_as_unix_str(prompt, old_path.as_ref());
165 prompt.push_str("\n+++ b");
166 write_path_as_unix_str(prompt, path.as_ref());
167 prompt.push('\n');
168 prompt.push_str(diff);
169 }
170 }
171}
172
173#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
174pub struct ActiveBufferDiagnostic {
175 pub severity: Option<i32>,
176 pub message: String,
177 pub snippet: String,
178 pub snippet_buffer_row_range: Range<u32>,
179 pub diagnostic_range_in_snippet: Range<usize>,
180}
181
182#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
183pub struct RelatedFile {
184 pub path: Arc<Path>,
185 pub max_row: u32,
186 pub excerpts: Vec<RelatedExcerpt>,
187 #[serde(default)]
188 pub in_open_source_repo: bool,
189}
190
191#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
192pub struct RelatedExcerpt {
193 pub row_range: Range<u32>,
194 pub text: Arc<str>,
195 #[serde(default)]
196 pub order: usize,
197}
198
199pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
200 special_tokens_for_format(format)
201 .iter()
202 .any(|token| input.cursor_excerpt.contains(token))
203}
204
205pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
206 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
207}
208
209pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
210 match format {
211 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
212 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
213 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
214 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
215 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
216 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
217 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
218 ZetaFormat::v0226Hashline => hashline::special_tokens(),
219 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
220 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
221 }
222}
223
224/// Returns the (editable_token_limit, context_token_limit) for a given format.
225pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
226 match format {
227 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
228 ZetaFormat::V0114180EditableRegion => (180, 350),
229 ZetaFormat::V0120GitMergeMarkers
230 | ZetaFormat::V0131GitMergeMarkersPrefix
231 | ZetaFormat::V0211Prefill
232 | ZetaFormat::V0211SeedCoder
233 | ZetaFormat::v0226Hashline
234 | ZetaFormat::V0304SeedNoEdits => (350, 150),
235 ZetaFormat::V0304VariableEdit => (1024, 0),
236 }
237}
238
239pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
240 match format {
241 ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
242 ZetaFormat::V0112MiddleAtEnd
243 | ZetaFormat::V0113Ordered
244 | ZetaFormat::V0114180EditableRegion
245 | ZetaFormat::V0120GitMergeMarkers
246 | ZetaFormat::V0131GitMergeMarkersPrefix
247 | ZetaFormat::V0211Prefill
248 | ZetaFormat::V0211SeedCoder
249 | ZetaFormat::V0304VariableEdit
250 | ZetaFormat::V0304SeedNoEdits => &[],
251 }
252}
253
254pub fn excerpt_ranges_for_format(
255 format: ZetaFormat,
256 ranges: &ExcerptRanges,
257) -> (Range<usize>, Range<usize>) {
258 match format {
259 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
260 ranges.editable_150.clone(),
261 ranges.editable_150_context_350.clone(),
262 ),
263 ZetaFormat::V0114180EditableRegion => (
264 ranges.editable_180.clone(),
265 ranges.editable_180_context_350.clone(),
266 ),
267 ZetaFormat::V0120GitMergeMarkers
268 | ZetaFormat::V0131GitMergeMarkersPrefix
269 | ZetaFormat::V0211Prefill
270 | ZetaFormat::V0211SeedCoder
271 | ZetaFormat::v0226Hashline
272 | ZetaFormat::V0304SeedNoEdits => (
273 ranges.editable_350.clone(),
274 ranges.editable_350_context_150.clone(),
275 ),
276 ZetaFormat::V0304VariableEdit => {
277 let context = ranges
278 .editable_350_context_1024
279 .clone()
280 .or(ranges.editable_350_context_512.clone())
281 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
282 (context.clone(), context)
283 }
284 }
285}
286
287pub fn write_cursor_excerpt_section_for_format(
288 format: ZetaFormat,
289 prompt: &mut String,
290 path: &Path,
291 context: &str,
292 editable_range: &Range<usize>,
293 cursor_offset: usize,
294) {
295 match format {
296 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
297 prompt,
298 path,
299 context,
300 editable_range,
301 cursor_offset,
302 ),
303 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
304 v0113_ordered::write_cursor_excerpt_section(
305 prompt,
306 path,
307 context,
308 editable_range,
309 cursor_offset,
310 )
311 }
312 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
313 prompt,
314 path,
315 context,
316 editable_range,
317 cursor_offset,
318 ),
319 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
320 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
321 prompt,
322 path,
323 context,
324 editable_range,
325 cursor_offset,
326 )
327 }
328 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
329 seed_coder::write_cursor_excerpt_section(
330 prompt,
331 path,
332 context,
333 editable_range,
334 cursor_offset,
335 )
336 }
337 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
338 prompt,
339 path,
340 context,
341 editable_range,
342 cursor_offset,
343 ),
344 ZetaFormat::V0304VariableEdit => {
345 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
346 }
347 }
348}
349
350fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
351 let start_row = text[0..range.start].matches('\n').count() as u32;
352 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
353 if !text[..range.end].ends_with('\n') {
354 end_row += 1;
355 }
356 return start_row..end_row;
357}
358
359pub fn format_prompt_with_budget_for_format(
360 input: &ZetaPromptInput,
361 format: ZetaFormat,
362 max_tokens: usize,
363) -> String {
364 let (context, editable_range, context_range, cursor_offset) =
365 resolve_cursor_region(input, format);
366 let path = &*input.cursor_path;
367
368 let empty_files = Vec::new();
369 let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
370 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
371 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
372 let row_range = relative_row_range.start + cursor_excerpt_start_row
373 ..relative_row_range.end + cursor_excerpt_start_row;
374 &filter_redundant_excerpts(
375 input_related_files.to_vec(),
376 input.cursor_path.as_ref(),
377 row_range,
378 )
379 } else {
380 input_related_files
381 };
382
383 match format {
384 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
385 seed_coder::format_prompt_with_budget(
386 path,
387 context,
388 &editable_range,
389 cursor_offset,
390 &input.events,
391 related_files,
392 max_tokens,
393 )
394 }
395 _ => {
396 let mut cursor_section = String::new();
397 write_cursor_excerpt_section_for_format(
398 format,
399 &mut cursor_section,
400 path,
401 context,
402 &editable_range,
403 cursor_offset,
404 );
405
406 let cursor_tokens = estimate_tokens(cursor_section.len());
407 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
408
409 let edit_history_section = format_edit_history_within_budget(
410 &input.events,
411 "<|file_sep|>",
412 "edit history",
413 budget_after_cursor,
414 );
415 let edit_history_tokens = estimate_tokens(edit_history_section.len());
416 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
417
418 let related_files_section = format_related_files_within_budget(
419 &related_files,
420 "<|file_sep|>",
421 "",
422 budget_after_edit_history,
423 );
424
425 let mut prompt = String::new();
426 prompt.push_str(&related_files_section);
427 prompt.push_str(&edit_history_section);
428 prompt.push_str(&cursor_section);
429 prompt
430 }
431 }
432}
433
434pub fn filter_redundant_excerpts(
435 mut related_files: Vec<RelatedFile>,
436 cursor_path: &Path,
437 cursor_row_range: Range<u32>,
438) -> Vec<RelatedFile> {
439 for file in &mut related_files {
440 if file.path.as_ref() == cursor_path {
441 file.excerpts.retain(|excerpt| {
442 excerpt.row_range.start < cursor_row_range.start
443 || excerpt.row_range.end > cursor_row_range.end
444 });
445 }
446 }
447 related_files.retain(|file| !file.excerpts.is_empty());
448 related_files
449}
450
451pub fn get_prefill_for_format(
452 format: ZetaFormat,
453 context: &str,
454 editable_range: &Range<usize>,
455) -> String {
456 match format {
457 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
458 ZetaFormat::V0112MiddleAtEnd
459 | ZetaFormat::V0113Ordered
460 | ZetaFormat::V0114180EditableRegion
461 | ZetaFormat::V0120GitMergeMarkers
462 | ZetaFormat::V0131GitMergeMarkersPrefix
463 | ZetaFormat::V0211SeedCoder
464 | ZetaFormat::v0226Hashline
465 | ZetaFormat::V0304VariableEdit => String::new(),
466 ZetaFormat::V0304SeedNoEdits => String::new(),
467 }
468}
469
470pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
471 match format {
472 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
473 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
474 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
475 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
476 ZetaFormat::V0112MiddleAtEnd
477 | ZetaFormat::V0113Ordered
478 | ZetaFormat::V0114180EditableRegion
479 | ZetaFormat::v0226Hashline
480 | ZetaFormat::V0304VariableEdit => None,
481 }
482}
483
484pub fn encode_patch_as_output_for_format(
485 format: ZetaFormat,
486 old_editable_region: &str,
487 patch: &str,
488 cursor_offset: Option<usize>,
489) -> Result<Option<String>> {
490 match format {
491 ZetaFormat::v0226Hashline => {
492 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
493 }
494 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
495 old_editable_region,
496 patch,
497 cursor_offset,
498 )
499 .map(Some),
500 ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
501 _ => Ok(None),
502 }
503}
504
505pub struct ParsedOutput {
506 /// Text that should replace the editable region
507 pub new_editable_region: String,
508 /// The byte range within `cursor_excerpt` that this replacement applies to
509 pub range_in_excerpt: Range<usize>,
510}
511
512/// Parse model output for the given zeta format
513pub fn parse_zeta2_model_output(
514 output: &str,
515 format: ZetaFormat,
516 prompt_inputs: &ZetaPromptInput,
517) -> Result<ParsedOutput> {
518 let output = match output_end_marker_for_format(format) {
519 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
520 None => output,
521 };
522
523 let (context, editable_range_in_context, context_range, _) =
524 resolve_cursor_region(prompt_inputs, format);
525 let context_start = context_range.start;
526 let old_editable_region = &context[editable_range_in_context.clone()];
527
528 let (range_in_context, output) = match format {
529 ZetaFormat::v0226Hashline => (
530 editable_range_in_context,
531 if hashline::output_has_edit_commands(output) {
532 hashline::apply_edit_commands(old_editable_region, output)
533 } else {
534 output.to_string()
535 },
536 ),
537 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
538 ZetaFormat::V0304SeedNoEdits => (
539 editable_range_in_context,
540 if output.starts_with(seed_coder::NO_EDITS) {
541 old_editable_region.to_string()
542 } else {
543 output.to_string()
544 },
545 ),
546 _ => (editable_range_in_context, output.to_string()),
547 };
548
549 let range_in_excerpt =
550 range_in_context.start + context_start..range_in_context.end + context_start;
551
552 Ok(ParsedOutput {
553 new_editable_region: output,
554 range_in_excerpt,
555 })
556}
557
558pub fn excerpt_range_for_format(
559 format: ZetaFormat,
560 ranges: &ExcerptRanges,
561) -> (Range<usize>, Range<usize>) {
562 excerpt_ranges_for_format(format, ranges)
563}
564
565pub fn resolve_cursor_region(
566 input: &ZetaPromptInput,
567 format: ZetaFormat,
568) -> (&str, Range<usize>, Range<usize>, usize) {
569 let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
570 let (editable_tokens, context_tokens) = token_limits_for_format(format);
571 compute_editable_and_context_ranges(
572 &input.cursor_excerpt,
573 input.cursor_offset_in_excerpt,
574 syntax_ranges,
575 editable_tokens,
576 context_tokens,
577 )
578 } else {
579 excerpt_range_for_format(format, &input.excerpt_ranges)
580 };
581 let context_start = context_range.start;
582 let context_text = &input.cursor_excerpt[context_range.clone()];
583 let adjusted_editable =
584 (editable_range.start - context_start)..(editable_range.end - context_start);
585 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
586
587 (
588 context_text,
589 adjusted_editable,
590 context_range,
591 adjusted_cursor,
592 )
593}
594
595pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
596 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
597 get_prefill_for_format(format, context, &editable_range)
598}
599
600fn format_edit_history_within_budget(
601 events: &[Arc<Event>],
602 file_marker: &str,
603 edit_history_name: &str,
604 max_tokens: usize,
605) -> String {
606 let header = format!("{}{}\n", file_marker, edit_history_name);
607 let header_tokens = estimate_tokens(header.len());
608 if header_tokens >= max_tokens {
609 return String::new();
610 }
611
612 let mut event_strings: Vec<String> = Vec::new();
613 let mut total_tokens = header_tokens;
614
615 for event in events.iter().rev() {
616 let mut event_str = String::new();
617 write_event(&mut event_str, event);
618 let event_tokens = estimate_tokens(event_str.len());
619
620 if total_tokens + event_tokens > max_tokens {
621 break;
622 }
623 total_tokens += event_tokens;
624 event_strings.push(event_str);
625 }
626
627 if event_strings.is_empty() {
628 return String::new();
629 }
630
631 let mut result = header;
632 for event_str in event_strings.iter().rev() {
633 result.push_str(event_str);
634 }
635 result
636}
637
638fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
639 let needs_newline = !excerpt.text.ends_with('\n');
640 let needs_ellipsis = excerpt.row_range.end < file_max_row;
641 let len = excerpt.text.len()
642 + if needs_newline { "\n".len() } else { 0 }
643 + if needs_ellipsis { "...\n".len() } else { 0 };
644 estimate_tokens(len)
645}
646
647pub fn format_related_files_within_budget(
648 related_files: &[RelatedFile],
649 file_prefix: &str,
650 file_suffix: &str,
651 max_tokens: usize,
652) -> String {
653 struct ExcerptCandidate {
654 file_ix: usize,
655 excerpt_ix: usize,
656 order: usize,
657 }
658
659 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
660 .iter()
661 .enumerate()
662 .flat_map(|(file_ix, file)| {
663 file.excerpts
664 .iter()
665 .enumerate()
666 .map(move |(excerpt_ix, e)| ExcerptCandidate {
667 file_ix,
668 excerpt_ix,
669 order: e.order,
670 })
671 })
672 .collect();
673
674 // Pre-compute file header strings and their token costs.
675 let file_headers: Vec<String> = related_files
676 .iter()
677 .map(|file| {
678 let path_str = file.path.to_string_lossy();
679 format!("{}{}\n", file_prefix, path_str)
680 })
681 .collect();
682
683 // Sort the excerpts by their order and determine how many fit within the budget.
684 let mut total_tokens = 0;
685 let mut included_excerpt_count = 0_usize;
686 let mut included_file_indices = vec![false; related_files.len()];
687 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
688 for candidate in &excerpt_candidates {
689 let file = &related_files[candidate.file_ix];
690 let excerpt = &file.excerpts[candidate.excerpt_ix];
691 let file_already_included = included_file_indices[candidate.file_ix];
692 let header_cost = if file_already_included {
693 0
694 } else {
695 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
696 };
697 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
698 if total_tokens + header_cost + excerpt_cost > max_tokens {
699 break;
700 }
701 total_tokens += header_cost + excerpt_cost;
702 if !file_already_included {
703 included_file_indices[candidate.file_ix] = true;
704 }
705 included_excerpt_count += 1;
706 }
707
708 excerpt_candidates.truncate(included_excerpt_count);
709 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
710
711 // Render all of the files that fit within the token budget, in the original order.
712 let mut result = String::new();
713 let mut last_file_ix = None;
714 for candidate in &excerpt_candidates {
715 if last_file_ix != Some(candidate.file_ix) {
716 if last_file_ix.is_some() {
717 result.push_str(file_suffix);
718 }
719 result.push_str(&file_headers[candidate.file_ix]);
720 last_file_ix = Some(candidate.file_ix);
721 }
722 let file = &related_files[candidate.file_ix];
723 let excerpt = &file.excerpts[candidate.excerpt_ix];
724 result.push_str(&excerpt.text);
725 if !result.ends_with('\n') {
726 result.push('\n');
727 }
728 if excerpt.row_range.end < file.max_row {
729 result.push_str("...\n");
730 }
731 }
732
733 result
734}
735
736pub fn write_related_files(
737 prompt: &mut String,
738 related_files: &[RelatedFile],
739) -> Vec<Range<usize>> {
740 let mut ranges = Vec::new();
741 for file in related_files {
742 let start = prompt.len();
743 let path_str = file.path.to_string_lossy();
744 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
745 for excerpt in &file.excerpts {
746 prompt.push_str(&excerpt.text);
747 if !prompt.ends_with('\n') {
748 prompt.push('\n');
749 }
750 if excerpt.row_range.end < file.max_row {
751 prompt.push_str("...\n");
752 }
753 }
754 let end = prompt.len();
755 ranges.push(start..end);
756 }
757 ranges
758}
759
760mod v0112_middle_at_end {
761 use super::*;
762
763 pub fn special_tokens() -> &'static [&'static str] {
764 &[
765 "<|fim_prefix|>",
766 "<|fim_suffix|>",
767 "<|fim_middle|>",
768 "<|file_sep|>",
769 CURSOR_MARKER,
770 ]
771 }
772
773 pub fn write_cursor_excerpt_section(
774 prompt: &mut String,
775 path: &Path,
776 context: &str,
777 editable_range: &Range<usize>,
778 cursor_offset: usize,
779 ) {
780 let path_str = path.to_string_lossy();
781 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
782
783 prompt.push_str("<|fim_prefix|>\n");
784 prompt.push_str(&context[..editable_range.start]);
785
786 prompt.push_str("<|fim_suffix|>\n");
787 prompt.push_str(&context[editable_range.end..]);
788 if !prompt.ends_with('\n') {
789 prompt.push('\n');
790 }
791
792 prompt.push_str("<|fim_middle|>current\n");
793 prompt.push_str(&context[editable_range.start..cursor_offset]);
794 prompt.push_str(CURSOR_MARKER);
795 prompt.push_str(&context[cursor_offset..editable_range.end]);
796 if !prompt.ends_with('\n') {
797 prompt.push('\n');
798 }
799
800 prompt.push_str("<|fim_middle|>updated\n");
801 }
802}
803
804mod v0113_ordered {
805 use super::*;
806
807 pub fn special_tokens() -> &'static [&'static str] {
808 &[
809 "<|fim_prefix|>",
810 "<|fim_suffix|>",
811 "<|fim_middle|>",
812 "<|file_sep|>",
813 CURSOR_MARKER,
814 ]
815 }
816
817 pub fn write_cursor_excerpt_section(
818 prompt: &mut String,
819 path: &Path,
820 context: &str,
821 editable_range: &Range<usize>,
822 cursor_offset: usize,
823 ) {
824 let path_str = path.to_string_lossy();
825 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
826
827 prompt.push_str("<|fim_prefix|>\n");
828 prompt.push_str(&context[..editable_range.start]);
829 if !prompt.ends_with('\n') {
830 prompt.push('\n');
831 }
832
833 prompt.push_str("<|fim_middle|>current\n");
834 prompt.push_str(&context[editable_range.start..cursor_offset]);
835 prompt.push_str(CURSOR_MARKER);
836 prompt.push_str(&context[cursor_offset..editable_range.end]);
837 if !prompt.ends_with('\n') {
838 prompt.push('\n');
839 }
840
841 prompt.push_str("<|fim_suffix|>\n");
842 prompt.push_str(&context[editable_range.end..]);
843 if !prompt.ends_with('\n') {
844 prompt.push('\n');
845 }
846
847 prompt.push_str("<|fim_middle|>updated\n");
848 }
849}
850
851mod v0114180_editable_region {
852 use super::*;
853
854 pub fn special_tokens() -> &'static [&'static str] {
855 v0113_ordered::special_tokens()
856 }
857}
858
859pub mod v0120_git_merge_markers {
860 //! A prompt that uses git-style merge conflict markers to represent the editable region.
861 //!
862 //! Example prompt:
863 //!
864 //! <|file_sep|>path/to/target_file.py
865 //! <|fim_prefix|>
866 //! code before editable region
867 //! <|fim_suffix|>
868 //! code after editable region
869 //! <|fim_middle|>
870 //! <<<<<<< CURRENT
871 //! code that
872 //! needs to<|user_cursor|>
873 //! be rewritten
874 //! =======
875 //!
876 //! Expected output (should be generated by the model):
877 //!
878 //! updated
879 //! code with
880 //! changes applied
881 //! >>>>>>> UPDATED
882
883 use super::*;
884
885 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
886 pub const SEPARATOR: &str = "=======\n";
887 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
888
889 pub fn special_tokens() -> &'static [&'static str] {
890 &[
891 "<|fim_prefix|>",
892 "<|fim_suffix|>",
893 "<|fim_middle|>",
894 "<|file_sep|>",
895 START_MARKER,
896 SEPARATOR,
897 END_MARKER,
898 CURSOR_MARKER,
899 ]
900 }
901
902 pub fn write_cursor_excerpt_section(
903 prompt: &mut String,
904 path: &Path,
905 context: &str,
906 editable_range: &Range<usize>,
907 cursor_offset: usize,
908 ) {
909 let path_str = path.to_string_lossy();
910 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
911
912 prompt.push_str("<|fim_prefix|>");
913 prompt.push_str(&context[..editable_range.start]);
914
915 prompt.push_str("<|fim_suffix|>");
916 prompt.push_str(&context[editable_range.end..]);
917 if !prompt.ends_with('\n') {
918 prompt.push('\n');
919 }
920
921 prompt.push_str("<|fim_middle|>");
922 prompt.push_str(START_MARKER);
923 prompt.push_str(&context[editable_range.start..cursor_offset]);
924 prompt.push_str(CURSOR_MARKER);
925 prompt.push_str(&context[cursor_offset..editable_range.end]);
926 if !prompt.ends_with('\n') {
927 prompt.push('\n');
928 }
929 prompt.push_str(SEPARATOR);
930 }
931}
932
933pub mod v0131_git_merge_markers_prefix {
934 //! A prompt that uses git-style merge conflict markers to represent the editable region.
935 //!
936 //! Example prompt:
937 //!
938 //! <|file_sep|>path/to/target_file.py
939 //! <|fim_prefix|>
940 //! code before editable region
941 //! <<<<<<< CURRENT
942 //! code that
943 //! needs to<|user_cursor|>
944 //! be rewritten
945 //! =======
946 //! <|fim_suffix|>
947 //! code after editable region
948 //! <|fim_middle|>
949 //!
950 //! Expected output (should be generated by the model):
951 //!
952 //! updated
953 //! code with
954 //! changes applied
955 //! >>>>>>> UPDATED
956
957 use super::*;
958
959 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
960 pub const SEPARATOR: &str = "=======\n";
961 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
962
963 pub fn special_tokens() -> &'static [&'static str] {
964 &[
965 "<|fim_prefix|>",
966 "<|fim_suffix|>",
967 "<|fim_middle|>",
968 "<|file_sep|>",
969 START_MARKER,
970 SEPARATOR,
971 END_MARKER,
972 CURSOR_MARKER,
973 ]
974 }
975
976 pub fn write_cursor_excerpt_section(
977 prompt: &mut String,
978 path: &Path,
979 context: &str,
980 editable_range: &Range<usize>,
981 cursor_offset: usize,
982 ) {
983 let path_str = path.to_string_lossy();
984 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
985
986 prompt.push_str("<|fim_prefix|>");
987 prompt.push_str(&context[..editable_range.start]);
988 prompt.push_str(START_MARKER);
989 prompt.push_str(&context[editable_range.start..cursor_offset]);
990 prompt.push_str(CURSOR_MARKER);
991 prompt.push_str(&context[cursor_offset..editable_range.end]);
992 if !prompt.ends_with('\n') {
993 prompt.push('\n');
994 }
995 prompt.push_str(SEPARATOR);
996
997 prompt.push_str("<|fim_suffix|>");
998 prompt.push_str(&context[editable_range.end..]);
999 if !prompt.ends_with('\n') {
1000 prompt.push('\n');
1001 }
1002
1003 prompt.push_str("<|fim_middle|>");
1004 }
1005}
1006
1007pub mod v0211_prefill {
1008 use super::*;
1009
1010 pub fn special_tokens() -> &'static [&'static str] {
1011 v0131_git_merge_markers_prefix::special_tokens()
1012 }
1013
1014 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1015 let editable_region = &context[editable_range.start..editable_range.end];
1016
1017 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1018 let prefill_len = editable_region.floor_char_boundary(prefill_len);
1019
1020 // Find a token boundary to avoid splitting tokens in the prefill.
1021 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1022 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1023 // the \n and consume any consecutive \n characters after it.
1024 let prefill = &editable_region[..prefill_len];
1025 match prefill.rfind('\n') {
1026 Some(pos) => {
1027 let mut end = pos + 1;
1028 while end < editable_region.len()
1029 && editable_region.as_bytes().get(end) == Some(&b'\n')
1030 {
1031 end += 1;
1032 }
1033 editable_region[..end].to_string()
1034 }
1035 // No newline found. Fall back to splitting before the last space
1036 // (word-level boundary)
1037 None => match prefill.rfind(' ') {
1038 Some(pos) => prefill[..pos].to_string(),
1039 None => prefill.to_string(),
1040 },
1041 }
1042 }
1043}
1044
1045pub mod hashline {
1046
1047 use std::fmt::Display;
1048
1049 pub const END_MARKER: &str = "<|fim_middle|>updated";
1050 pub const START_MARKER: &str = "<|fim_middle|>current";
1051
1052 use super::*;
1053
1054 const SET_COMMAND_MARKER: &str = "<|set|>";
1055 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1056 pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1057
1058 pub fn special_tokens() -> &'static [&'static str] {
1059 return &[
1060 SET_COMMAND_MARKER,
1061 "<|set_range|>",
1062 INSERT_COMMAND_MARKER,
1063 NO_EDITS_COMMAND_MARKER,
1064 CURSOR_MARKER,
1065 "<|file_sep|>",
1066 "<|fim_prefix|>",
1067 "<|fim_suffix|>",
1068 "<|fim_middle|>",
1069 ];
1070 }
1071
1072 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1073 #[derive(Debug, Clone, PartialEq, Eq)]
1074 struct LineRef {
1075 index: usize,
1076 hash: u8,
1077 }
1078
1079 impl Display for LineRef {
1080 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1081 write!(f, "{}:{:02x}", self.index, self.hash)
1082 }
1083 }
1084
1085 pub fn hash_line(line: &[u8]) -> u8 {
1086 let mut h: u8 = 0;
1087 for &byte in line {
1088 h = h.wrapping_add(byte);
1089 }
1090 return h;
1091 }
1092
1093 /// Write the hashline-encoded editable region into `out`. Each line of
1094 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1095 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1096 /// to the start of `editable_text`).
1097 pub fn write_hashline_editable_region(
1098 out: &mut String,
1099 editable_text: &str,
1100 cursor_offset_in_editable: usize,
1101 ) {
1102 let mut offset = 0;
1103 for (i, line) in editable_text.lines().enumerate() {
1104 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1105 && cursor_offset_in_editable < offset + line.len()
1106 {
1107 (
1108 &line[..cursor_offset_in_editable - offset],
1109 CURSOR_MARKER,
1110 &line[cursor_offset_in_editable - offset..],
1111 )
1112 } else {
1113 (line, "", "")
1114 };
1115 write!(
1116 out,
1117 "\n{}|{head}{cursor}{tail}",
1118 LineRef {
1119 index: i,
1120 hash: hash_line(line.as_bytes())
1121 }
1122 )
1123 .unwrap();
1124 offset += line.len() + 1;
1125 }
1126 }
1127
1128 pub fn write_cursor_excerpt_section(
1129 prompt: &mut String,
1130 path: &Path,
1131 context: &str,
1132 editable_range: &Range<usize>,
1133 cursor_offset: usize,
1134 ) {
1135 let path_str = path.to_string_lossy();
1136 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1137
1138 prompt.push_str("<|fim_prefix|>\n");
1139 prompt.push_str(&context[..editable_range.start]);
1140 prompt.push_str(START_MARKER);
1141
1142 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1143 let editable_region = &context[editable_range.clone()];
1144 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1145
1146 if !prompt.ends_with('\n') {
1147 prompt.push('\n');
1148 }
1149
1150 prompt.push_str("<|fim_suffix|>\n");
1151 prompt.push_str(&context[editable_range.end..]);
1152 if !prompt.ends_with('\n') {
1153 prompt.push('\n');
1154 }
1155
1156 prompt.push_str(END_MARKER);
1157 prompt.push('\n');
1158 }
1159
1160 /// A single edit command parsed from the model output.
1161 #[derive(Debug)]
1162 enum EditCommand<'a> {
1163 /// Replace a range of lines (inclusive on both ends). Single-line set is
1164 /// represented by `start == end`.
1165 Set {
1166 start: LineRef,
1167 end: LineRef,
1168 content: &'a str,
1169 },
1170 /// Insert new lines after the given line, or before the first line if
1171 /// `after` is `None`.
1172 Insert {
1173 after: Option<LineRef>,
1174 content: &'a str,
1175 },
1176 }
1177
1178 /// Parse a line reference like `3:c3` into a `LineRef`.
1179 fn parse_line_ref(s: &str) -> Option<LineRef> {
1180 let (idx_str, hash_str) = s.split_once(':')?;
1181 let index = idx_str.parse::<usize>().ok()?;
1182 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1183 Some(LineRef { index, hash })
1184 }
1185
1186 /// Parse the model output into a list of `EditCommand`s.
1187 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1188 let mut commands = Vec::new();
1189 let mut offset = 0usize;
1190
1191 while offset < model_output.len() {
1192 let next_nl = model_output[offset..]
1193 .find('\n')
1194 .map(|i| offset + i)
1195 .unwrap_or(model_output.len());
1196 let line = &model_output[offset..next_nl];
1197 let line_end = if next_nl < model_output.len() {
1198 next_nl + 1
1199 } else {
1200 next_nl
1201 };
1202
1203 let trimmed = line.trim();
1204 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1205 (true, spec)
1206 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1207 (false, spec)
1208 } else {
1209 offset = line_end;
1210 continue;
1211 };
1212
1213 let mut content_end = line_end;
1214 let mut scan = line_end;
1215
1216 while scan < model_output.len() {
1217 let body_nl = model_output[scan..]
1218 .find('\n')
1219 .map(|i| scan + i)
1220 .unwrap_or(model_output.len());
1221 let body_line = &model_output[scan..body_nl];
1222 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1223 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1224 {
1225 break;
1226 }
1227 scan = if body_nl < model_output.len() {
1228 body_nl + 1
1229 } else {
1230 body_nl
1231 };
1232 content_end = scan;
1233 }
1234
1235 let content = &model_output[line_end..content_end];
1236
1237 if is_set {
1238 if let Some((start_str, end_str)) = specifier.split_once('-') {
1239 if let (Some(start), Some(end)) =
1240 (parse_line_ref(start_str), parse_line_ref(end_str))
1241 {
1242 commands.push(EditCommand::Set {
1243 start,
1244 end,
1245 content,
1246 });
1247 }
1248 } else if let Some(target) = parse_line_ref(specifier) {
1249 commands.push(EditCommand::Set {
1250 start: target.clone(),
1251 end: target,
1252 content,
1253 });
1254 }
1255 } else {
1256 let after = parse_line_ref(specifier);
1257 commands.push(EditCommand::Insert { after, content });
1258 }
1259
1260 offset = scan;
1261 }
1262
1263 commands
1264 }
1265
1266 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1267 /// (as opposed to being a plain full-replacement output).
1268 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1269 /// editable region, returning the plain text content.
1270 pub fn strip_hashline_prefixes(region: &str) -> String {
1271 let mut decoded: String = region
1272 .lines()
1273 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1274 .collect::<Vec<_>>()
1275 .join("\n");
1276 if region.ends_with('\n') {
1277 decoded.push('\n');
1278 }
1279 decoded
1280 }
1281
1282 pub fn output_has_edit_commands(model_output: &str) -> bool {
1283 model_output.contains(SET_COMMAND_MARKER)
1284 || model_output.contains(INSERT_COMMAND_MARKER)
1285 || model_output.contains(NO_EDITS_COMMAND_MARKER)
1286 }
1287
1288 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1289 /// original editable region text.
1290 ///
1291 /// `editable_region` is the original text of the editable region (without hash
1292 /// prefixes). `model_output` is the raw model response containing edit commands.
1293 ///
1294 /// Returns the full replacement text for the editable region.
1295 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1296 if model_output
1297 .trim_start()
1298 .starts_with(NO_EDITS_COMMAND_MARKER)
1299 {
1300 return editable_region.to_string();
1301 }
1302
1303 let original_lines: Vec<&str> = editable_region.lines().collect();
1304 let old_hashes: Vec<u8> = original_lines
1305 .iter()
1306 .map(|line| hash_line(line.as_bytes()))
1307 .collect();
1308
1309 let commands = parse_edit_commands(model_output);
1310
1311 // For set operations: indexed by start line → Some((end line index, content))
1312 // For insert operations: indexed by line index → vec of content to insert after
1313 // Insert-before-first is tracked separately.
1314 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1315 let mut insert_before_first: Vec<&str> = Vec::new();
1316 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1317
1318 for command in &commands {
1319 match command {
1320 EditCommand::Set {
1321 start,
1322 end,
1323 content,
1324 } => {
1325 if start.index < old_hashes.len()
1326 && end.index < old_hashes.len()
1327 && start.index <= end.index
1328 && old_hashes[start.index] == start.hash
1329 && old_hashes[end.index] == end.hash
1330 {
1331 set_ops[start.index] = Some((end.index, *content));
1332 }
1333 }
1334 EditCommand::Insert { after, content } => match after {
1335 None => insert_before_first.push(*content),
1336 Some(line_ref) => {
1337 if line_ref.index < old_hashes.len()
1338 && old_hashes[line_ref.index] == line_ref.hash
1339 {
1340 insert_after[line_ref.index].push(*content);
1341 }
1342 }
1343 },
1344 }
1345 }
1346
1347 let mut result = String::new();
1348
1349 // Emit any insertions before the first line
1350 for content in &insert_before_first {
1351 result.push_str(content);
1352 if !content.ends_with('\n') {
1353 result.push('\n');
1354 }
1355 }
1356
1357 let mut i = 0;
1358 while i < original_lines.len() {
1359 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1360 // Replace lines i..=end_index with the replacement content
1361 result.push_str(replacement);
1362 if !replacement.is_empty() && !replacement.ends_with('\n') {
1363 result.push('\n');
1364 }
1365 // Emit any insertions after the end of this set range
1366 if *end_index < insert_after.len() {
1367 for content in &insert_after[*end_index] {
1368 result.push_str(content);
1369 if !content.ends_with('\n') {
1370 result.push('\n');
1371 }
1372 }
1373 }
1374 i = end_index + 1;
1375 } else {
1376 // Keep the original line
1377 result.push_str(original_lines[i]);
1378 result.push('\n');
1379 // Emit any insertions after this line
1380 for content in &insert_after[i] {
1381 result.push_str(content);
1382 if !content.ends_with('\n') {
1383 result.push('\n');
1384 }
1385 }
1386 i += 1;
1387 }
1388 }
1389
1390 // Preserve trailing newline behavior: if the original ended with a
1391 // newline the result already has one; if it didn't, trim the extra one
1392 // we added.
1393 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1394 result.pop();
1395 }
1396
1397 result
1398 }
1399
1400 /// Convert a unified diff patch into hashline edit commands.
1401 ///
1402 /// Parses the unified diff `patch` directly to determine which lines of
1403 /// `old_text` are deleted/replaced and what new lines are added, then emits
1404 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1405 /// `{index}:{hash}` identifiers.
1406 ///
1407 /// `cursor_offset` is an optional byte offset into the first hunk's new
1408 /// text (context + additions) where the cursor marker should be placed.
1409 pub fn patch_to_edit_commands(
1410 old_text: &str,
1411 patch: &str,
1412 cursor_offset: Option<usize>,
1413 ) -> Result<String> {
1414 let old_lines: Vec<&str> = old_text.lines().collect();
1415 let old_hashes: Vec<u8> = old_lines
1416 .iter()
1417 .map(|line| hash_line(line.as_bytes()))
1418 .collect();
1419
1420 let mut result = String::new();
1421 let mut first_hunk = true;
1422
1423 struct Hunk<'a> {
1424 line_range: Range<usize>,
1425 new_text_lines: Vec<&'a str>,
1426 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1427 }
1428
1429 // Parse the patch line by line. We only care about hunk headers,
1430 // context, deletions, and additions.
1431 let mut old_line_index: usize = 0;
1432 let mut current_hunk: Option<Hunk> = None;
1433 // Byte offset tracking within the hunk's new text for cursor placement.
1434 let mut new_text_byte_offset: usize = 0;
1435 // The line index of the last old line seen before/in the current hunk
1436 // (used for insert-after reference).
1437 let mut last_old_line_before_hunk: Option<usize> = None;
1438
1439 fn flush_hunk(
1440 hunk: Hunk,
1441 last_old_line: Option<usize>,
1442 result: &mut String,
1443 old_hashes: &[u8],
1444 ) {
1445 if hunk.line_range.is_empty() {
1446 // Pure insertion — reference the old line to insert after when in bounds.
1447 if let Some(after) = last_old_line
1448 && let Some(&hash) = old_hashes.get(after)
1449 {
1450 write!(
1451 result,
1452 "{INSERT_COMMAND_MARKER}{}\n",
1453 LineRef { index: after, hash }
1454 )
1455 .unwrap();
1456 } else {
1457 result.push_str(INSERT_COMMAND_MARKER);
1458 result.push('\n');
1459 }
1460 } else {
1461 let start = hunk.line_range.start;
1462 let end_exclusive = hunk.line_range.end;
1463 let deleted_line_count = end_exclusive.saturating_sub(start);
1464
1465 if deleted_line_count == 1 {
1466 if let Some(&hash) = old_hashes.get(start) {
1467 write!(
1468 result,
1469 "{SET_COMMAND_MARKER}{}\n",
1470 LineRef { index: start, hash }
1471 )
1472 .unwrap();
1473 } else {
1474 result.push_str(SET_COMMAND_MARKER);
1475 result.push('\n');
1476 }
1477 } else {
1478 let end_inclusive = end_exclusive - 1;
1479 match (
1480 old_hashes.get(start).copied(),
1481 old_hashes.get(end_inclusive).copied(),
1482 ) {
1483 (Some(start_hash), Some(end_hash)) => {
1484 write!(
1485 result,
1486 "{SET_COMMAND_MARKER}{}-{}\n",
1487 LineRef {
1488 index: start,
1489 hash: start_hash
1490 },
1491 LineRef {
1492 index: end_inclusive,
1493 hash: end_hash
1494 }
1495 )
1496 .unwrap();
1497 }
1498 _ => {
1499 result.push_str(SET_COMMAND_MARKER);
1500 result.push('\n');
1501 }
1502 }
1503 }
1504 }
1505 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1506 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1507 && line_offset == cursor_line_offset
1508 {
1509 result.push_str(&line[..char_offset]);
1510 result.push_str(CURSOR_MARKER);
1511 result.push_str(&line[char_offset..]);
1512 continue;
1513 }
1514
1515 result.push_str(line);
1516 }
1517 }
1518
1519 for raw_line in patch.split_inclusive('\n') {
1520 if raw_line.starts_with("@@") {
1521 // Flush any pending change hunk from a previous patch hunk.
1522 if let Some(hunk) = current_hunk.take() {
1523 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1524 }
1525
1526 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1527 // We intentionally do not trust old_start as a direct local index into `old_text`,
1528 // because some patches are produced against a larger file region and carry
1529 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1530 if first_hunk {
1531 new_text_byte_offset = 0;
1532 first_hunk = false;
1533 }
1534 continue;
1535 }
1536
1537 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1538 continue;
1539 }
1540 if raw_line.starts_with("\\ No newline") {
1541 continue;
1542 }
1543
1544 if raw_line.starts_with('-') {
1545 // Extend or start a change hunk with this deleted old line.
1546 match &mut current_hunk {
1547 Some(Hunk {
1548 line_range: range, ..
1549 }) => range.end = old_line_index + 1,
1550 None => {
1551 current_hunk = Some(Hunk {
1552 line_range: old_line_index..old_line_index + 1,
1553 new_text_lines: Vec::new(),
1554 cursor_line_offset_in_new_text: None,
1555 });
1556 }
1557 }
1558 old_line_index += 1;
1559 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1560 // Place cursor marker if cursor_offset falls within this line.
1561 let mut cursor_line_offset = None;
1562 if let Some(cursor_off) = cursor_offset
1563 && (first_hunk
1564 || cursor_off >= new_text_byte_offset
1565 && cursor_off <= new_text_byte_offset + added_content.len())
1566 {
1567 let line_offset = added_content.floor_char_boundary(
1568 cursor_off
1569 .saturating_sub(new_text_byte_offset)
1570 .min(added_content.len()),
1571 );
1572 cursor_line_offset = Some(line_offset);
1573 }
1574
1575 new_text_byte_offset += added_content.len();
1576
1577 let hunk = current_hunk.get_or_insert(Hunk {
1578 line_range: old_line_index..old_line_index,
1579 new_text_lines: vec![],
1580 cursor_line_offset_in_new_text: None,
1581 });
1582 hunk.new_text_lines.push(added_content);
1583 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1584 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1585 } else {
1586 // Context line (starts with ' ' or is empty).
1587 if let Some(hunk) = current_hunk.take() {
1588 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1589 }
1590 last_old_line_before_hunk = Some(old_line_index);
1591 old_line_index += 1;
1592 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1593 new_text_byte_offset += content.len();
1594 }
1595 }
1596
1597 // Flush final group.
1598 if let Some(hunk) = current_hunk.take() {
1599 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1600 }
1601
1602 // Trim a single trailing newline.
1603 if result.ends_with('\n') {
1604 result.pop();
1605 }
1606
1607 if result.is_empty() {
1608 return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1609 }
1610
1611 Ok(result)
1612 }
1613
1614 #[cfg(test)]
1615 mod tests {
1616 use super::*;
1617 use indoc::indoc;
1618
1619 #[test]
1620 fn test_format_cursor_region() {
1621 struct Case {
1622 name: &'static str,
1623 context: &'static str,
1624 editable_range: Range<usize>,
1625 cursor_offset: usize,
1626 expected: &'static str,
1627 }
1628
1629 let cases = [
1630 Case {
1631 name: "basic_cursor_placement",
1632 context: "hello world\n",
1633 editable_range: 0..12,
1634 cursor_offset: 5,
1635 expected: indoc! {"
1636 <|file_sep|>test.rs
1637 <|fim_prefix|>
1638 <|fim_middle|>current
1639 0:5c|hello<|user_cursor|> world
1640 <|fim_suffix|>
1641 <|fim_middle|>updated
1642 "},
1643 },
1644 Case {
1645 name: "multiline_cursor_on_second_line",
1646 context: "aaa\nbbb\nccc\n",
1647 editable_range: 0..12,
1648 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1649 expected: indoc! {"
1650 <|file_sep|>test.rs
1651 <|fim_prefix|>
1652 <|fim_middle|>current
1653 0:23|aaa
1654 1:26|b<|user_cursor|>bb
1655 2:29|ccc
1656 <|fim_suffix|>
1657 <|fim_middle|>updated
1658 "},
1659 },
1660 Case {
1661 name: "no_trailing_newline_in_context",
1662 context: "line1\nline2",
1663 editable_range: 0..11,
1664 cursor_offset: 3,
1665 expected: indoc! {"
1666 <|file_sep|>test.rs
1667 <|fim_prefix|>
1668 <|fim_middle|>current
1669 0:d9|lin<|user_cursor|>e1
1670 1:da|line2
1671 <|fim_suffix|>
1672 <|fim_middle|>updated
1673 "},
1674 },
1675 Case {
1676 name: "leading_newline_in_editable_region",
1677 context: "\nabc\n",
1678 editable_range: 0..5,
1679 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1680 expected: indoc! {"
1681 <|file_sep|>test.rs
1682 <|fim_prefix|>
1683 <|fim_middle|>current
1684 0:00|
1685 1:26|a<|user_cursor|>bc
1686 <|fim_suffix|>
1687 <|fim_middle|>updated
1688 "},
1689 },
1690 Case {
1691 name: "with_suffix",
1692 context: "abc\ndef",
1693 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1694 cursor_offset: 2,
1695 expected: indoc! {"
1696 <|file_sep|>test.rs
1697 <|fim_prefix|>
1698 <|fim_middle|>current
1699 0:26|ab<|user_cursor|>c
1700 <|fim_suffix|>
1701 def
1702 <|fim_middle|>updated
1703 "},
1704 },
1705 Case {
1706 name: "unicode_two_byte_chars",
1707 context: "héllo\n",
1708 editable_range: 0..7,
1709 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1710 expected: indoc! {"
1711 <|file_sep|>test.rs
1712 <|fim_prefix|>
1713 <|fim_middle|>current
1714 0:1b|hé<|user_cursor|>llo
1715 <|fim_suffix|>
1716 <|fim_middle|>updated
1717 "},
1718 },
1719 Case {
1720 name: "unicode_three_byte_chars",
1721 context: "日本語\n",
1722 editable_range: 0..10,
1723 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1724 expected: indoc! {"
1725 <|file_sep|>test.rs
1726 <|fim_prefix|>
1727 <|fim_middle|>current
1728 0:80|日本<|user_cursor|>語
1729 <|fim_suffix|>
1730 <|fim_middle|>updated
1731 "},
1732 },
1733 Case {
1734 name: "unicode_four_byte_chars",
1735 context: "a🌍b\n",
1736 editable_range: 0..7,
1737 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1738 expected: indoc! {"
1739 <|file_sep|>test.rs
1740 <|fim_prefix|>
1741 <|fim_middle|>current
1742 0:6b|a🌍<|user_cursor|>b
1743 <|fim_suffix|>
1744 <|fim_middle|>updated
1745 "},
1746 },
1747 Case {
1748 name: "cursor_at_start_of_region_not_placed",
1749 context: "abc\n",
1750 editable_range: 0..4,
1751 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1752 expected: indoc! {"
1753 <|file_sep|>test.rs
1754 <|fim_prefix|>
1755 <|fim_middle|>current
1756 0:26|abc
1757 <|fim_suffix|>
1758 <|fim_middle|>updated
1759 "},
1760 },
1761 Case {
1762 name: "cursor_at_end_of_line_not_placed",
1763 context: "abc\ndef\n",
1764 editable_range: 0..8,
1765 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1766 expected: indoc! {"
1767 <|file_sep|>test.rs
1768 <|fim_prefix|>
1769 <|fim_middle|>current
1770 0:26|abc
1771 1:2f|def
1772 <|fim_suffix|>
1773 <|fim_middle|>updated
1774 "},
1775 },
1776 Case {
1777 name: "cursor_offset_relative_to_context_not_editable_region",
1778 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1779 // write_cursor_excerpt_section must subtract it before comparing against
1780 // per-line offsets within the editable region.
1781 context: "pre\naaa\nbbb\nsuf\n",
1782 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1783 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1784 expected: indoc! {"
1785 <|file_sep|>test.rs
1786 <|fim_prefix|>
1787 pre
1788 <|fim_middle|>current
1789 0:23|aaa
1790 1:26|b<|user_cursor|>bb
1791 <|fim_suffix|>
1792 suf
1793 <|fim_middle|>updated
1794 "},
1795 },
1796 ];
1797
1798 for case in &cases {
1799 let mut prompt = String::new();
1800 hashline::write_cursor_excerpt_section(
1801 &mut prompt,
1802 Path::new("test.rs"),
1803 case.context,
1804 &case.editable_range,
1805 case.cursor_offset,
1806 );
1807 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1808 }
1809 }
1810
1811 #[test]
1812 fn test_apply_edit_commands() {
1813 struct Case {
1814 name: &'static str,
1815 original: &'static str,
1816 model_output: &'static str,
1817 expected: &'static str,
1818 }
1819
1820 let cases = vec![
1821 Case {
1822 name: "set_single_line",
1823 original: indoc! {"
1824 let mut total = 0;
1825 for product in products {
1826 total += ;
1827 }
1828 total
1829 "},
1830 model_output: indoc! {"
1831 <|set|>2:87
1832 total += product.price;
1833 "},
1834 expected: indoc! {"
1835 let mut total = 0;
1836 for product in products {
1837 total += product.price;
1838 }
1839 total
1840 "},
1841 },
1842 Case {
1843 name: "set_range",
1844 original: indoc! {"
1845 fn foo() {
1846 let x = 1;
1847 let y = 2;
1848 let z = 3;
1849 }
1850 "},
1851 model_output: indoc! {"
1852 <|set|>1:46-3:4a
1853 let sum = 6;
1854 "},
1855 expected: indoc! {"
1856 fn foo() {
1857 let sum = 6;
1858 }
1859 "},
1860 },
1861 Case {
1862 name: "insert_after_line",
1863 original: indoc! {"
1864 fn main() {
1865 let x = 1;
1866 }
1867 "},
1868 model_output: indoc! {"
1869 <|insert|>1:46
1870 let y = 2;
1871 "},
1872 expected: indoc! {"
1873 fn main() {
1874 let x = 1;
1875 let y = 2;
1876 }
1877 "},
1878 },
1879 Case {
1880 name: "insert_before_first",
1881 original: indoc! {"
1882 let x = 1;
1883 let y = 2;
1884 "},
1885 model_output: indoc! {"
1886 <|insert|>
1887 use std::io;
1888 "},
1889 expected: indoc! {"
1890 use std::io;
1891 let x = 1;
1892 let y = 2;
1893 "},
1894 },
1895 Case {
1896 name: "set_with_cursor_marker",
1897 original: indoc! {"
1898 fn main() {
1899 println!();
1900 }
1901 "},
1902 model_output: indoc! {"
1903 <|set|>1:34
1904 eprintln!(\"<|user_cursor|>\");
1905 "},
1906 expected: indoc! {"
1907 fn main() {
1908 eprintln!(\"<|user_cursor|>\");
1909 }
1910 "},
1911 },
1912 Case {
1913 name: "multiple_set_commands",
1914 original: indoc! {"
1915 aaa
1916 bbb
1917 ccc
1918 ddd
1919 "},
1920 model_output: indoc! {"
1921 <|set|>0:23
1922 AAA
1923 <|set|>2:29
1924 CCC
1925 "},
1926 expected: indoc! {"
1927 AAA
1928 bbb
1929 CCC
1930 ddd
1931 "},
1932 },
1933 Case {
1934 name: "set_range_multiline_replacement",
1935 original: indoc! {"
1936 fn handle_submit() {
1937 }
1938
1939 fn handle_keystroke() {
1940 "},
1941 model_output: indoc! {"
1942 <|set|>0:3f-1:7d
1943 fn handle_submit(modal_state: &mut ModalState) {
1944 <|user_cursor|>
1945 }
1946 "},
1947 expected: indoc! {"
1948 fn handle_submit(modal_state: &mut ModalState) {
1949 <|user_cursor|>
1950 }
1951
1952 fn handle_keystroke() {
1953 "},
1954 },
1955 Case {
1956 name: "no_edit_commands_returns_original",
1957 original: indoc! {"
1958 hello
1959 world
1960 "},
1961 model_output: "some random text with no commands",
1962 expected: indoc! {"
1963 hello
1964 world
1965 "},
1966 },
1967 Case {
1968 name: "no_edits_command_returns_original",
1969 original: indoc! {"
1970 hello
1971 world
1972 "},
1973 model_output: "<|no_edits|>",
1974 expected: indoc! {"
1975 hello
1976 world
1977 "},
1978 },
1979 Case {
1980 name: "wrong_hash_set_ignored",
1981 original: indoc! {"
1982 aaa
1983 bbb
1984 "},
1985 model_output: indoc! {"
1986 <|set|>0:ff
1987 ZZZ
1988 "},
1989 expected: indoc! {"
1990 aaa
1991 bbb
1992 "},
1993 },
1994 Case {
1995 name: "insert_and_set_combined",
1996 original: indoc! {"
1997 alpha
1998 beta
1999 gamma
2000 "},
2001 model_output: indoc! {"
2002 <|set|>0:06
2003 ALPHA
2004 <|insert|>1:9c
2005 beta_extra
2006 "},
2007 expected: indoc! {"
2008 ALPHA
2009 beta
2010 beta_extra
2011 gamma
2012 "},
2013 },
2014 Case {
2015 name: "no_trailing_newline_preserved",
2016 original: "hello\nworld",
2017 model_output: indoc! {"
2018 <|set|>0:14
2019 HELLO
2020 "},
2021 expected: "HELLO\nworld",
2022 },
2023 Case {
2024 name: "set_range_hash_mismatch_in_end_bound",
2025 original: indoc! {"
2026 one
2027 two
2028 three
2029 "},
2030 model_output: indoc! {"
2031 <|set|>0:42-2:ff
2032 ONE_TWO_THREE
2033 "},
2034 expected: indoc! {"
2035 one
2036 two
2037 three
2038 "},
2039 },
2040 Case {
2041 name: "set_range_start_greater_than_end_ignored",
2042 original: indoc! {"
2043 a
2044 b
2045 c
2046 "},
2047 model_output: indoc! {"
2048 <|set|>2:63-1:62
2049 X
2050 "},
2051 expected: indoc! {"
2052 a
2053 b
2054 c
2055 "},
2056 },
2057 Case {
2058 name: "insert_out_of_bounds_ignored",
2059 original: indoc! {"
2060 x
2061 y
2062 "},
2063 model_output: indoc! {"
2064 <|insert|>99:aa
2065 z
2066 "},
2067 expected: indoc! {"
2068 x
2069 y
2070 "},
2071 },
2072 Case {
2073 name: "set_out_of_bounds_ignored",
2074 original: indoc! {"
2075 x
2076 y
2077 "},
2078 model_output: indoc! {"
2079 <|set|>99:aa
2080 z
2081 "},
2082 expected: indoc! {"
2083 x
2084 y
2085 "},
2086 },
2087 Case {
2088 name: "malformed_set_command_ignored",
2089 original: indoc! {"
2090 alpha
2091 beta
2092 "},
2093 model_output: indoc! {"
2094 <|set|>not-a-line-ref
2095 UPDATED
2096 "},
2097 expected: indoc! {"
2098 alpha
2099 beta
2100 "},
2101 },
2102 Case {
2103 name: "malformed_insert_hash_treated_as_before_first",
2104 original: indoc! {"
2105 alpha
2106 beta
2107 "},
2108 model_output: indoc! {"
2109 <|insert|>1:nothex
2110 preamble
2111 "},
2112 expected: indoc! {"
2113 preamble
2114 alpha
2115 beta
2116 "},
2117 },
2118 Case {
2119 name: "set_then_insert_same_target_orders_insert_after_replacement",
2120 original: indoc! {"
2121 cat
2122 dog
2123 "},
2124 model_output: indoc! {"
2125 <|set|>0:38
2126 CAT
2127 <|insert|>0:38
2128 TAIL
2129 "},
2130 expected: indoc! {"
2131 CAT
2132 TAIL
2133 dog
2134 "},
2135 },
2136 Case {
2137 name: "overlapping_set_ranges_last_wins",
2138 original: indoc! {"
2139 a
2140 b
2141 c
2142 d
2143 "},
2144 model_output: indoc! {"
2145 <|set|>0:61-2:63
2146 FIRST
2147 <|set|>1:62-3:64
2148 SECOND
2149 "},
2150 expected: indoc! {"
2151 FIRST
2152 d
2153 "},
2154 },
2155 Case {
2156 name: "insert_before_first_and_after_line",
2157 original: indoc! {"
2158 a
2159 b
2160 "},
2161 model_output: indoc! {"
2162 <|insert|>
2163 HEAD
2164 <|insert|>0:61
2165 MID
2166 "},
2167 expected: indoc! {"
2168 HEAD
2169 a
2170 MID
2171 b
2172 "},
2173 },
2174 ];
2175
2176 for case in &cases {
2177 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2178 assert_eq!(result, case.expected, "failed case: {}", case.name);
2179 }
2180 }
2181
2182 #[test]
2183 fn test_output_has_edit_commands() {
2184 assert!(hashline::output_has_edit_commands(&format!(
2185 "{}0:ab\nnew",
2186 SET_COMMAND_MARKER
2187 )));
2188 assert!(hashline::output_has_edit_commands(&format!(
2189 "{}0:ab\nnew",
2190 INSERT_COMMAND_MARKER
2191 )));
2192 assert!(hashline::output_has_edit_commands(&format!(
2193 "some text\n{}1:cd\nstuff",
2194 SET_COMMAND_MARKER
2195 )));
2196 assert!(!hashline::output_has_edit_commands("just plain text"));
2197 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2198 assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2199 }
2200
2201 // ---- hashline::patch_to_edit_commands round-trip tests ----
2202
2203 #[test]
2204 fn test_patch_to_edit_commands() {
2205 struct Case {
2206 name: &'static str,
2207 old: &'static str,
2208 patch: &'static str,
2209 expected_new: &'static str,
2210 }
2211
2212 let cases = [
2213 Case {
2214 name: "single_line_replacement",
2215 old: indoc! {"
2216 let mut total = 0;
2217 for product in products {
2218 total += ;
2219 }
2220 total
2221 "},
2222 patch: indoc! {"
2223 @@ -1,5 +1,5 @@
2224 let mut total = 0;
2225 for product in products {
2226 - total += ;
2227 + total += product.price;
2228 }
2229 total
2230 "},
2231 expected_new: indoc! {"
2232 let mut total = 0;
2233 for product in products {
2234 total += product.price;
2235 }
2236 total
2237 "},
2238 },
2239 Case {
2240 name: "multiline_replacement",
2241 old: indoc! {"
2242 fn foo() {
2243 let x = 1;
2244 let y = 2;
2245 let z = 3;
2246 }
2247 "},
2248 patch: indoc! {"
2249 @@ -1,5 +1,3 @@
2250 fn foo() {
2251 - let x = 1;
2252 - let y = 2;
2253 - let z = 3;
2254 + let sum = 1 + 2 + 3;
2255 }
2256 "},
2257 expected_new: indoc! {"
2258 fn foo() {
2259 let sum = 1 + 2 + 3;
2260 }
2261 "},
2262 },
2263 Case {
2264 name: "insertion",
2265 old: indoc! {"
2266 fn main() {
2267 let x = 1;
2268 }
2269 "},
2270 patch: indoc! {"
2271 @@ -1,3 +1,4 @@
2272 fn main() {
2273 let x = 1;
2274 + let y = 2;
2275 }
2276 "},
2277 expected_new: indoc! {"
2278 fn main() {
2279 let x = 1;
2280 let y = 2;
2281 }
2282 "},
2283 },
2284 Case {
2285 name: "insertion_before_first",
2286 old: indoc! {"
2287 let x = 1;
2288 let y = 2;
2289 "},
2290 patch: indoc! {"
2291 @@ -1,2 +1,3 @@
2292 +use std::io;
2293 let x = 1;
2294 let y = 2;
2295 "},
2296 expected_new: indoc! {"
2297 use std::io;
2298 let x = 1;
2299 let y = 2;
2300 "},
2301 },
2302 Case {
2303 name: "deletion",
2304 old: indoc! {"
2305 aaa
2306 bbb
2307 ccc
2308 ddd
2309 "},
2310 patch: indoc! {"
2311 @@ -1,4 +1,2 @@
2312 aaa
2313 -bbb
2314 -ccc
2315 ddd
2316 "},
2317 expected_new: indoc! {"
2318 aaa
2319 ddd
2320 "},
2321 },
2322 Case {
2323 name: "multiple_changes",
2324 old: indoc! {"
2325 alpha
2326 beta
2327 gamma
2328 delta
2329 epsilon
2330 "},
2331 patch: indoc! {"
2332 @@ -1,5 +1,5 @@
2333 -alpha
2334 +ALPHA
2335 beta
2336 gamma
2337 -delta
2338 +DELTA
2339 epsilon
2340 "},
2341 expected_new: indoc! {"
2342 ALPHA
2343 beta
2344 gamma
2345 DELTA
2346 epsilon
2347 "},
2348 },
2349 Case {
2350 name: "replace_with_insertion",
2351 old: indoc! {r#"
2352 fn handle() {
2353 modal_state.close();
2354 modal_state.dismiss();
2355 "#},
2356 patch: indoc! {r#"
2357 @@ -1,3 +1,4 @@
2358 fn handle() {
2359 modal_state.close();
2360 + eprintln!("");
2361 modal_state.dismiss();
2362 "#},
2363 expected_new: indoc! {r#"
2364 fn handle() {
2365 modal_state.close();
2366 eprintln!("");
2367 modal_state.dismiss();
2368 "#},
2369 },
2370 Case {
2371 name: "complete_replacement",
2372 old: indoc! {"
2373 aaa
2374 bbb
2375 ccc
2376 "},
2377 patch: indoc! {"
2378 @@ -1,3 +1,3 @@
2379 -aaa
2380 -bbb
2381 -ccc
2382 +xxx
2383 +yyy
2384 +zzz
2385 "},
2386 expected_new: indoc! {"
2387 xxx
2388 yyy
2389 zzz
2390 "},
2391 },
2392 Case {
2393 name: "add_function_body",
2394 old: indoc! {"
2395 fn foo() {
2396 modal_state.dismiss();
2397 }
2398
2399 fn
2400
2401 fn handle_keystroke() {
2402 "},
2403 patch: indoc! {"
2404 @@ -1,6 +1,8 @@
2405 fn foo() {
2406 modal_state.dismiss();
2407 }
2408
2409 -fn
2410 +fn handle_submit() {
2411 + todo()
2412 +}
2413
2414 fn handle_keystroke() {
2415 "},
2416 expected_new: indoc! {"
2417 fn foo() {
2418 modal_state.dismiss();
2419 }
2420
2421 fn handle_submit() {
2422 todo()
2423 }
2424
2425 fn handle_keystroke() {
2426 "},
2427 },
2428 Case {
2429 name: "with_cursor_offset",
2430 old: indoc! {r#"
2431 fn main() {
2432 println!();
2433 }
2434 "#},
2435 patch: indoc! {r#"
2436 @@ -1,3 +1,3 @@
2437 fn main() {
2438 - println!();
2439 + eprintln!("");
2440 }
2441 "#},
2442 expected_new: indoc! {r#"
2443 fn main() {
2444 eprintln!("<|user_cursor|>");
2445 }
2446 "#},
2447 },
2448 Case {
2449 name: "non_local_hunk_header_pure_insertion_repro",
2450 old: indoc! {"
2451 aaa
2452 bbb
2453 "},
2454 patch: indoc! {"
2455 @@ -20,2 +20,3 @@
2456 aaa
2457 +xxx
2458 bbb
2459 "},
2460 expected_new: indoc! {"
2461 aaa
2462 xxx
2463 bbb
2464 "},
2465 },
2466 Case {
2467 name: "empty_patch_produces_no_edits_marker",
2468 old: indoc! {"
2469 aaa
2470 bbb
2471 "},
2472 patch: "@@ -20,2 +20,3 @@\n",
2473 expected_new: indoc! {"
2474 aaa
2475 bbb
2476 "},
2477 },
2478 ];
2479
2480 for case in &cases {
2481 // The cursor_offset for patch_to_edit_commands is relative to
2482 // the first hunk's new text (context + additions). We compute
2483 // it by finding where the marker sits in the expected output
2484 // (which mirrors the new text of the hunk).
2485 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2486
2487 let commands =
2488 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2489 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2490
2491 assert!(
2492 hashline::output_has_edit_commands(&commands),
2493 "case {}: expected edit commands, got: {commands:?}",
2494 case.name,
2495 );
2496
2497 let applied = hashline::apply_edit_commands(case.old, &commands);
2498 assert_eq!(applied, case.expected_new, "case {}", case.name);
2499 }
2500 }
2501 }
2502}
2503
2504pub mod seed_coder {
2505 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2506 //!
2507 //! Seed-Coder uses different FIM tokens and order than Qwen:
2508 //! - SPM order: suffix comes FIRST, then prefix, then middle
2509 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2510 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2511 //!
2512 //! All context (related files, edit history) goes in the PREFIX section.
2513 //! The suffix contains only code after the editable region.
2514 //!
2515 //! Example prompt:
2516 //!
2517 //! <[fim-suffix]>
2518 //! code after editable region
2519 //! <[fim-prefix]><filename>related/file.py
2520 //! related file content
2521 //!
2522 //! <filename>edit_history
2523 //! --- a/some_file.py
2524 //! +++ b/some_file.py
2525 //! -old
2526 //! +new
2527 //!
2528 //! <filename>path/to/target_file.py
2529 //! code before editable region
2530 //! <<<<<<< CURRENT
2531 //! code that
2532 //! needs to<|user_cursor|>
2533 //! be rewritten
2534 //! =======
2535 //! <[fim-middle]>
2536 //!
2537 //! Expected output (model generates):
2538 //!
2539 //! updated
2540 //! code with
2541 //! changes applied
2542 //! >>>>>>> UPDATED
2543
2544 use super::*;
2545
2546 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2547 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2548 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2549 pub const FILE_MARKER: &str = "<filename>";
2550
2551 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2552 pub const SEPARATOR: &str = "=======\n";
2553 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2554
2555 pub const NO_EDITS: &str = "NO_EDITS\n";
2556
2557 pub fn special_tokens() -> &'static [&'static str] {
2558 &[
2559 FIM_SUFFIX,
2560 FIM_PREFIX,
2561 FIM_MIDDLE,
2562 FILE_MARKER,
2563 START_MARKER,
2564 SEPARATOR,
2565 END_MARKER,
2566 CURSOR_MARKER,
2567 ]
2568 }
2569
2570 pub fn write_cursor_excerpt_section(
2571 prompt: &mut String,
2572 path: &Path,
2573 context: &str,
2574 editable_range: &Range<usize>,
2575 cursor_offset: usize,
2576 ) {
2577 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2578 prompt.push_str(§ion);
2579 }
2580
2581 pub fn format_prompt_with_budget(
2582 path: &Path,
2583 context: &str,
2584 editable_range: &Range<usize>,
2585 cursor_offset: usize,
2586 events: &[Arc<Event>],
2587 related_files: &[RelatedFile],
2588 max_tokens: usize,
2589 ) -> String {
2590 let suffix_section = build_suffix_section(context, editable_range);
2591 let cursor_prefix_section =
2592 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2593
2594 let suffix_tokens = estimate_tokens(suffix_section.len());
2595 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2596 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2597
2598 let edit_history_section = super::format_edit_history_within_budget(
2599 events,
2600 FILE_MARKER,
2601 "edit_history",
2602 budget_after_cursor,
2603 );
2604 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2605 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2606
2607 let related_files_section = super::format_related_files_within_budget(
2608 related_files,
2609 FILE_MARKER,
2610 "",
2611 budget_after_edit_history,
2612 );
2613
2614 let mut prompt = String::new();
2615 prompt.push_str(&suffix_section);
2616 prompt.push_str(FIM_PREFIX);
2617 prompt.push_str(&related_files_section);
2618 if !related_files_section.is_empty() {
2619 prompt.push('\n');
2620 }
2621 prompt.push_str(&edit_history_section);
2622 if !edit_history_section.is_empty() {
2623 prompt.push('\n');
2624 }
2625 prompt.push_str(&cursor_prefix_section);
2626 prompt.push_str(FIM_MIDDLE);
2627 prompt
2628 }
2629
2630 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2631 let mut section = String::new();
2632 section.push_str(FIM_SUFFIX);
2633 section.push_str(&context[editable_range.end..]);
2634 if !section.ends_with('\n') {
2635 section.push('\n');
2636 }
2637 section
2638 }
2639
2640 fn build_cursor_prefix_section(
2641 path: &Path,
2642 context: &str,
2643 editable_range: &Range<usize>,
2644 cursor_offset: usize,
2645 ) -> String {
2646 let mut section = String::new();
2647 let path_str = path.to_string_lossy();
2648 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2649
2650 section.push_str(&context[..editable_range.start]);
2651 section.push_str(START_MARKER);
2652 section.push_str(&context[editable_range.start..cursor_offset]);
2653 section.push_str(CURSOR_MARKER);
2654 section.push_str(&context[cursor_offset..editable_range.end]);
2655 if !section.ends_with('\n') {
2656 section.push('\n');
2657 }
2658 section.push_str(SEPARATOR);
2659 section
2660 }
2661
2662 /// Format patch as containing no changes if it's empty; otherwise return None.
2663 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2664 // Count lines in the patch
2665 let empty_patch = patch.lines().count() <= 3;
2666 if empty_patch {
2667 Some(format!("{NO_EDITS}{END_MARKER}"))
2668 } else {
2669 None
2670 }
2671 }
2672}
2673
2674pub mod v0304_variable_edit {
2675 //! A prompt format with no fixed editable region. The entire context is shown
2676 //! to the model, and it chooses which text to replace by outputting surrounding
2677 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2678 //! text.
2679 //!
2680 //! Example prompt:
2681 //!
2682 //! <|file_sep|>path/to/file.py
2683 //! zero
2684 //! one
2685 //! two
2686 //! three<|user_cursor|>
2687 //! four
2688 //! five
2689 //! <|fim_prefix|>
2690 //
2691 //! Expected output (model generates):
2692 //!
2693 //! two
2694 //! <|fim_middle|>
2695 //! THREE
2696 //! <|fim_suffix|>
2697 //! four
2698 //!
2699 //! The output means: find "two\n...\nfour" in the context, and replace
2700 //! everything between "two\n" and "four" with "THREE\n".
2701
2702 use super::*;
2703
2704 pub fn special_tokens() -> &'static [&'static str] {
2705 &[
2706 "<|fim_prefix|>",
2707 "<|fim_suffix|>",
2708 "<|fim_middle|>",
2709 "<|file_sep|>",
2710 CURSOR_MARKER,
2711 ]
2712 }
2713
2714 pub fn write_cursor_excerpt_section(
2715 prompt: &mut String,
2716 path: &Path,
2717 context: &str,
2718 cursor_offset: usize,
2719 ) {
2720 let path_str = path.to_string_lossy();
2721 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2722
2723 prompt.push_str(&context[..cursor_offset]);
2724 prompt.push_str(CURSOR_MARKER);
2725 prompt.push_str(&context[cursor_offset..]);
2726 if !prompt.ends_with('\n') {
2727 prompt.push('\n');
2728 }
2729 prompt.push_str("<|fim_prefix|>\n")
2730 }
2731
2732 /// Apply a variable-edit model output to the original context text.
2733 ///
2734 /// The model output has the form:
2735 ///
2736 /// - prefix context lines
2737 /// - `<|fim_middle|>`
2738 /// - new text
2739 /// - `<|fim_suffix|>`
2740 /// - suffix context lines
2741 ///
2742 /// We locate the prefix/suffix context lines in the original text and replace
2743 /// everything between them with the new text.
2744 pub fn apply_variable_edit(
2745 context: &str,
2746 model_output: &str,
2747 ) -> Result<(Range<usize>, String)> {
2748 let (prefix_context, rest) = model_output
2749 .split_once("<|fim_middle|>\n")
2750 .or_else(|| model_output.split_once("<|fim_middle|>"))
2751 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2752
2753 let (new_text, suffix_context) = rest
2754 .split_once("<|fim_suffix|>\n")
2755 .or_else(|| rest.split_once("<|fim_suffix|>"))
2756 .unwrap_or((rest, ""));
2757
2758 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2759 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2760 } else {
2761 suffix_context
2762 };
2763
2764 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2765 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2766 + prefix_context.len();
2767 let suffix_offset = if suffix_context.is_empty() {
2768 context.len()
2769 } else {
2770 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2771 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2772 + prefix_offset
2773 };
2774
2775 let edit_range = prefix_offset..suffix_offset;
2776 return Ok((edit_range, new_text.to_string()));
2777 }
2778
2779 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2780 if needle.is_empty() {
2781 return Some(0);
2782 }
2783
2784 haystack.match_indices(needle).find_map(|(offset, _)| {
2785 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2786 matched_line_start.then_some(offset)
2787 })
2788 }
2789
2790 /// Convert a unified diff patch into the variable-edit output format.
2791 ///
2792 /// Parses `patch` as a unified diff against `old_text` and produces model
2793 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2794 /// delimiters. The diff is resolved by content matching rather than line
2795 /// numbers.
2796 pub fn patch_to_variable_edit_output(
2797 old_text: &str,
2798 patch: &str,
2799 cursor_offset: Option<usize>,
2800 ) -> Result<String> {
2801 // Parse the unified diff into hunks. Each hunk has an `old_context`
2802 // string (context + deleted lines interleaved in order) and a list of
2803 // edits expressed as byte ranges within that context plus replacement
2804 // text.
2805 let hunks = parse_hunks(patch);
2806 if hunks.is_empty() {
2807 return Ok(String::new());
2808 }
2809
2810 // Apply each hunk by finding its old_context in the text and
2811 // performing the edits. We search forward from where the previous
2812 // hunk ended so that hunks are applied in order.
2813 let mut new_text = old_text.to_string();
2814 let mut search_from: usize = 0;
2815 let mut first_hunk_pos: Option<usize> = None;
2816
2817 for hunk in &hunks {
2818 let context_pos = new_text[search_from..]
2819 .find(&hunk.old_context)
2820 .map(|pos| pos + search_from)
2821 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2822
2823 if first_hunk_pos.is_none() {
2824 first_hunk_pos = Some(context_pos);
2825 }
2826
2827 // Apply edits in reverse order so byte offsets remain valid.
2828 for edit in hunk.edits.iter().rev() {
2829 let abs_start = context_pos + edit.range.start;
2830 let abs_end = context_pos + edit.range.end;
2831 new_text.replace_range(abs_start..abs_end, &edit.text);
2832 }
2833
2834 // Advance past this hunk's region in the (now modified) text.
2835 let new_region_len: usize =
2836 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2837 len + edit.text.len() - (edit.range.end - edit.range.start)
2838 });
2839 search_from = context_pos + new_region_len;
2840 }
2841
2842 // Now we have old_text and new_text. Find the changed line range by
2843 // comparing them.
2844 let old_lines: Vec<&str> = old_text.lines().collect();
2845 let new_lines: Vec<&str> = new_text.lines().collect();
2846
2847 // Find first differing line.
2848 let first_changed_row = old_lines
2849 .iter()
2850 .zip(new_lines.iter())
2851 .position(|(a, b)| a != b)
2852 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2853
2854 // Find last differing line (from the end).
2855 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2856 let common_suffix = old_lines
2857 .iter()
2858 .rev()
2859 .zip(new_lines.iter().rev())
2860 .take(max_suffix)
2861 .take_while(|(a, b)| a == b)
2862 .count();
2863
2864 let old_end = old_lines.len() - common_suffix;
2865 let new_end = new_lines.len() - common_suffix;
2866
2867 if first_changed_row == old_end && first_changed_row == new_end {
2868 return Ok(String::new());
2869 }
2870
2871 // Build the replacement text from new_lines[first_diff..new_end].
2872 let mut merged_new_text = String::new();
2873 for line in &new_lines[first_changed_row..new_end] {
2874 merged_new_text.push_str(line);
2875 merged_new_text.push('\n');
2876 }
2877
2878 // cursor_offset is relative to the first hunk's new content in
2879 // new_text. Translate it to an offset within merged_new_text, which
2880 // only contains lines first_diff..new_end of new_text.
2881 if let Some(hunk_offset) = cursor_offset {
2882 let hunk_start = first_hunk_pos.unwrap_or(0);
2883 let absolute_pos = hunk_start + hunk_offset;
2884
2885 // Byte offset where first_diff starts in new_text.
2886 let merged_start: usize = new_lines[..first_changed_row]
2887 .iter()
2888 .map(|line| line.len() + 1)
2889 .sum();
2890
2891 if absolute_pos >= merged_start {
2892 let relative_offset = absolute_pos - merged_start;
2893 if relative_offset <= merged_new_text.len() {
2894 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2895 }
2896 }
2897 }
2898
2899 // Build output with 2 lines of context above and below.
2900 let context_lines_count = 2;
2901 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2902 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2903
2904 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2905 let pattern = &lines[line_range];
2906 let pattern_len = pattern.len();
2907
2908 let mut count = 0;
2909 for offset in 0..=lines.len() - pattern_len {
2910 if &lines[offset..offset + pattern_len] == pattern {
2911 count += 1;
2912 }
2913 }
2914 count
2915 }
2916
2917 // Expand prefix and suffix until they are unique
2918 while prefix_start > 0 {
2919 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2920 prefix_start -= 1;
2921 } else {
2922 break;
2923 }
2924 }
2925 while suffix_end < old_lines.len() {
2926 if count_matches(old_end..suffix_end, &old_lines) > 1 {
2927 suffix_end += 1;
2928 } else {
2929 break;
2930 }
2931 }
2932
2933 let mut output = String::new();
2934 for line in &old_lines[prefix_start..first_changed_row] {
2935 output.push_str(line);
2936 output.push('\n');
2937 }
2938 output.push_str("<|fim_middle|>\n");
2939 output.push_str(&merged_new_text);
2940 output.push_str("<|fim_suffix|>\n");
2941 for line in &old_lines[old_end..suffix_end] {
2942 output.push_str(line);
2943 output.push('\n');
2944 }
2945
2946 Ok(output)
2947 }
2948
2949 struct ParsedHunk {
2950 old_context: String,
2951 edits: Vec<ParsedEdit>,
2952 }
2953
2954 struct ParsedEdit {
2955 range: Range<usize>,
2956 text: String,
2957 }
2958
2959 /// Parse a unified diff into content-based hunks. Each hunk contains an
2960 /// `old_context` string (context lines + deleted lines, which together
2961 /// form the text that should be found in the original) and a list of edits
2962 /// expressed as byte ranges within that context.
2963 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2964 let mut hunks = Vec::new();
2965 let mut current: Option<ParsedHunk> = None;
2966
2967 for line in patch.lines() {
2968 if line.starts_with("@@") {
2969 if let Some(hunk) = current.take() {
2970 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2971 hunks.push(hunk);
2972 }
2973 }
2974 current = Some(ParsedHunk {
2975 old_context: String::new(),
2976 edits: Vec::new(),
2977 });
2978 } else if line.starts_with("---") || line.starts_with("+++") {
2979 continue;
2980 } else if let Some(hunk) = &mut current {
2981 if let Some(added) = line.strip_prefix('+') {
2982 let pos = hunk.old_context.len();
2983 if let Some(last_edit) = hunk.edits.last_mut() {
2984 if last_edit.range.end == pos {
2985 writeln!(&mut last_edit.text, "{added}").ok();
2986 continue;
2987 }
2988 }
2989 hunk.edits.push(ParsedEdit {
2990 range: pos..pos,
2991 text: format!("{added}\n"),
2992 });
2993 } else if let Some(removed) = line.strip_prefix('-') {
2994 let start = hunk.old_context.len();
2995 writeln!(&mut hunk.old_context, "{removed}").ok();
2996 let end = hunk.old_context.len();
2997 if let Some(last_edit) = hunk.edits.last_mut() {
2998 if last_edit.range.end == start {
2999 last_edit.range.end = end;
3000 continue;
3001 }
3002 }
3003 hunk.edits.push(ParsedEdit {
3004 range: start..end,
3005 text: String::new(),
3006 });
3007 } else {
3008 let ctx = line.strip_prefix(' ').unwrap_or(line);
3009 writeln!(&mut hunk.old_context, "{ctx}").ok();
3010 }
3011 }
3012 }
3013
3014 if let Some(hunk) = current {
3015 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3016 hunks.push(hunk);
3017 }
3018 }
3019
3020 hunks
3021 }
3022
3023 #[cfg(test)]
3024 mod tests {
3025 use super::*;
3026 use indoc::indoc;
3027
3028 #[test]
3029 fn test_apply_variable_edit() {
3030 struct Case {
3031 name: &'static str,
3032 original: &'static str,
3033 model_output: &'static str,
3034 expected: &'static str,
3035 }
3036
3037 let cases = [
3038 Case {
3039 name: "simple_single_line_replacement",
3040 original: indoc! {"
3041 zero
3042 one
3043 two
3044 three
3045 four
3046 five
3047 "},
3048 model_output: indoc! {"
3049 two
3050 <|fim_middle|>
3051 THREE
3052 <|fim_suffix|>
3053 four
3054 "},
3055 expected: indoc! {"
3056 zero
3057 one
3058 two
3059 THREE
3060 four
3061 five
3062 "},
3063 },
3064 Case {
3065 name: "multi_line_replacement",
3066 original: indoc! {"
3067 a
3068 b
3069 c
3070 d
3071 e
3072 "},
3073 model_output: indoc! {"
3074 a
3075 <|fim_middle|>
3076 B
3077 C
3078 D
3079 <|fim_suffix|>
3080 e
3081 "},
3082 expected: indoc! {"
3083 a
3084 B
3085 C
3086 D
3087 e
3088 "},
3089 },
3090 Case {
3091 name: "insertion_between_existing_lines",
3092 original: indoc! {"
3093 a
3094 b
3095 c
3096 "},
3097 model_output: indoc! {"
3098 a
3099 <|fim_middle|>
3100 X
3101 <|fim_suffix|>
3102 b
3103 "},
3104 expected: indoc! {"
3105 a
3106 X
3107 b
3108 c
3109 "},
3110 },
3111 Case {
3112 name: "deletion",
3113 original: indoc! {"
3114 a
3115 b
3116 c
3117 d
3118 "},
3119 model_output: indoc! {"
3120 a
3121 <|fim_middle|>
3122 <|fim_suffix|>
3123 c
3124 "},
3125 expected: indoc! {"
3126 a
3127 c
3128 d
3129 "},
3130 },
3131 Case {
3132 name: "replacement_at_start_no_prefix_context",
3133 original: indoc! {"
3134 a
3135 b
3136 c
3137 "},
3138 model_output: indoc! {"
3139 <|fim_middle|>
3140 X
3141 <|fim_suffix|>
3142 b
3143 "},
3144 expected: indoc! {"
3145 X
3146 b
3147 c
3148 "},
3149 },
3150 Case {
3151 name: "replacement_at_end_no_suffix_context",
3152 original: indoc! {"
3153 a
3154 b
3155 c
3156 "},
3157 model_output: indoc! {"
3158 b
3159 <|fim_middle|>
3160 Z
3161 <|fim_suffix|>
3162 "},
3163 expected: indoc! {"
3164 a
3165 b
3166 Z
3167 "},
3168 },
3169 Case {
3170 name: "context_with_trailing_newline_is_preserved",
3171 original: indoc! {"
3172 a
3173 b
3174 c
3175 "},
3176 model_output: indoc! {"
3177 a
3178 <|fim_middle|>
3179 B
3180 <|fim_suffix|>
3181 c
3182 "},
3183 expected: indoc! {"
3184 a
3185 B
3186 c
3187 "},
3188 },
3189 Case {
3190 name: "cursor_marker_passes_through_untouched",
3191 original: indoc! {"
3192 a
3193 b
3194 c
3195 "},
3196 model_output: indoc! {"
3197 a
3198 <|fim_middle|>
3199 B<|user_cursor|>B
3200 <|fim_suffix|>
3201 c
3202 "},
3203 expected: indoc! {"
3204 a
3205 B<|user_cursor|>B
3206 c
3207 "},
3208 },
3209 Case {
3210 name: "multiple_prefix_context_lines",
3211 original: indoc! {"
3212 a
3213 b
3214 c
3215 d
3216 e
3217 "},
3218 model_output: indoc! {"
3219 b
3220 c
3221 <|fim_middle|>
3222 D
3223 <|fim_suffix|>
3224 e
3225 "},
3226 expected: indoc! {"
3227 a
3228 b
3229 c
3230 D
3231 e
3232 "},
3233 },
3234 ];
3235
3236 for case in cases {
3237 let (edit_range, replacement) =
3238 apply_variable_edit(case.original, case.model_output).unwrap();
3239 let mut edited = case.original.to_string();
3240 edited.replace_range(edit_range, &replacement);
3241 assert_eq!(edited, case.expected, "{}", case.name);
3242 }
3243 }
3244
3245 #[test]
3246 fn test_patch_to_variable_edit() {
3247 struct Case {
3248 name: &'static str,
3249 old: &'static str,
3250 patch: &'static str,
3251 cursor_offset: Option<usize>,
3252 expected_variable_edit: &'static str,
3253 expected_after_apply: &'static str,
3254 }
3255
3256 let cases = [
3257 Case {
3258 name: "simple_replacement",
3259 old: indoc! {"
3260 zero
3261 one
3262 two
3263 three
3264 four
3265 five
3266 "},
3267 patch: indoc! {"
3268 @@ -3,3 +3,3 @@
3269 two
3270 -three
3271 +THREE
3272 four
3273 "},
3274 cursor_offset: None,
3275 expected_variable_edit: indoc! {"
3276 one
3277 two
3278 <|fim_middle|>
3279 THREE
3280 <|fim_suffix|>
3281 four
3282 five
3283 "},
3284 expected_after_apply: indoc! {"
3285 zero
3286 one
3287 two
3288 THREE
3289 four
3290 five
3291 "},
3292 },
3293 Case {
3294 name: "insertion",
3295 old: indoc! {"
3296 a
3297 b
3298 c
3299 d
3300 e
3301 "},
3302 patch: indoc! {"
3303 @@ -2,0 +3,1 @@
3304 b
3305 +X
3306 c
3307 "},
3308 cursor_offset: None,
3309 expected_variable_edit: indoc! {"
3310 a
3311 b
3312 <|fim_middle|>
3313 X
3314 <|fim_suffix|>
3315 c
3316 d
3317 "},
3318 expected_after_apply: indoc! {"
3319 a
3320 b
3321 X
3322 c
3323 d
3324 e
3325 "},
3326 },
3327 Case {
3328 name: "deletion",
3329 old: indoc! {"
3330 a
3331 b
3332 c
3333 d
3334 e
3335 "},
3336 patch: indoc! {"
3337 @@ -2,3 +2,2 @@
3338 b
3339 -c
3340 d
3341 "},
3342 cursor_offset: None,
3343 expected_variable_edit: indoc! {"
3344 a
3345 b
3346 <|fim_middle|>
3347 <|fim_suffix|>
3348 d
3349 e
3350 "},
3351 expected_after_apply: indoc! {"
3352 a
3353 b
3354 d
3355 e
3356 "},
3357 },
3358 Case {
3359 name: "edit_near_start",
3360 old: indoc! {"
3361 first
3362 second
3363 third
3364 fourth
3365 "},
3366 patch: indoc! {"
3367 @@ -1,1 +1,1 @@
3368 -first
3369 +FIRST
3370 "},
3371 cursor_offset: None,
3372 expected_variable_edit: indoc! {"
3373 <|fim_middle|>
3374 FIRST
3375 <|fim_suffix|>
3376 second
3377 third
3378 "},
3379 expected_after_apply: indoc! {"
3380 FIRST
3381 second
3382 third
3383 fourth
3384 "},
3385 },
3386 Case {
3387 name: "edit_near_end",
3388 old: indoc! {"
3389 first
3390 second
3391 third
3392 fourth
3393 "},
3394 patch: indoc! {"
3395 @@ -4,1 +4,1 @@
3396 -fourth
3397 +FOURTH
3398 "},
3399 cursor_offset: None,
3400 expected_variable_edit: indoc! {"
3401 second
3402 third
3403 <|fim_middle|>
3404 FOURTH
3405 <|fim_suffix|>
3406 "},
3407 expected_after_apply: indoc! {"
3408 first
3409 second
3410 third
3411 FOURTH
3412 "},
3413 },
3414 Case {
3415 name: "cursor_at_start_of_replacement",
3416 old: indoc! {"
3417 zero
3418 one
3419 two
3420 three
3421 four
3422 five
3423 "},
3424 patch: indoc! {"
3425 @@ -3,3 +3,3 @@
3426 two
3427 -three
3428 +THREE
3429 four
3430 "},
3431 cursor_offset: Some(4),
3432 expected_variable_edit: indoc! {"
3433 one
3434 two
3435 <|fim_middle|>
3436 <|user_cursor|>THREE
3437 <|fim_suffix|>
3438 four
3439 five
3440 "},
3441 expected_after_apply: indoc! {"
3442 zero
3443 one
3444 two
3445 <|user_cursor|>THREE
3446 four
3447 five
3448 "},
3449 },
3450 Case {
3451 name: "cursor_in_middle_of_replacement",
3452 old: indoc! {"
3453 zero
3454 one
3455 two
3456 three
3457 four
3458 five
3459 "},
3460 patch: indoc! {"
3461 @@ -3,3 +3,3 @@
3462 two
3463 -three
3464 +THREE
3465 four
3466 "},
3467 cursor_offset: Some(6),
3468 expected_variable_edit: indoc! {"
3469 one
3470 two
3471 <|fim_middle|>
3472 TH<|user_cursor|>REE
3473 <|fim_suffix|>
3474 four
3475 five
3476 "},
3477 expected_after_apply: indoc! {"
3478 zero
3479 one
3480 two
3481 TH<|user_cursor|>REE
3482 four
3483 five
3484 "},
3485 },
3486 Case {
3487 name: "expands_context_when_two_lines_not_unique_before_and_after",
3488 old: indoc! {"
3489 one
3490 a
3491 b
3492 c
3493 d
3494 two
3495 a
3496 b
3497 c
3498 d
3499 three
3500 a
3501 b
3502 c
3503 d
3504 four
3505 "},
3506 patch: indoc! {"
3507 @@ -4,5 +4,5 @@
3508 two
3509 a
3510 b
3511 -c
3512 +C
3513 d
3514 three
3515 "},
3516 cursor_offset: None,
3517 expected_variable_edit: indoc! {"
3518 two
3519 a
3520 b
3521 <|fim_middle|>
3522 C
3523 <|fim_suffix|>
3524 d
3525 three
3526 "},
3527 expected_after_apply: indoc! {"
3528 one
3529 a
3530 b
3531 c
3532 d
3533 two
3534 a
3535 b
3536 C
3537 d
3538 three
3539 a
3540 b
3541 c
3542 d
3543 four
3544 "},
3545 },
3546 Case {
3547 name: "expands_context_when_two_lines_not_unique_before_and_after",
3548 old: indoc! {"
3549 {
3550 {
3551 one();
3552 }
3553 }
3554 {
3555 {
3556 two();
3557 }
3558 }
3559 {
3560 {
3561 three();
3562 }
3563 }
3564 {
3565 {
3566 four();
3567 }
3568 }
3569 "},
3570 patch: indoc! {"
3571 @@ -4,5 +4,5 @@
3572 {
3573 - two();
3574 + TWO();
3575 }
3576 "},
3577 cursor_offset: None,
3578 expected_variable_edit: indoc! {"
3579 one();
3580 }
3581 }
3582 {
3583 {
3584 <|fim_middle|>
3585 TWO();
3586 <|fim_suffix|>
3587 }
3588 }
3589 {
3590 {
3591 three();
3592 "},
3593 expected_after_apply: indoc! {"
3594 {
3595 {
3596 one();
3597 }
3598 }
3599 {
3600 {
3601 TWO();
3602 }
3603 }
3604 {
3605 {
3606 three();
3607 }
3608 }
3609 {
3610 {
3611 four();
3612 }
3613 }
3614 "},
3615 },
3616 ];
3617
3618 for case in cases {
3619 let output =
3620 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3621 .unwrap_or_else(|error| {
3622 panic!("failed converting patch for {}: {error}", case.name)
3623 });
3624 assert_eq!(
3625 output, case.expected_variable_edit,
3626 "patch->variable_edit mismatch for {}",
3627 case.name
3628 );
3629
3630 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3631 .unwrap_or_else(|error| {
3632 panic!("failed applying variable_edit for {}: {error}", case.name)
3633 });
3634 let mut edited_by_variable_edit = case.old.to_string();
3635 edited_by_variable_edit.replace_range(edit_range, &replacement);
3636 assert_eq!(
3637 edited_by_variable_edit, case.expected_after_apply,
3638 "variable_edit apply mismatch for {}",
3639 case.name
3640 );
3641
3642 let (expected_edit_range, expected_replacement) =
3643 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3644 |error| {
3645 panic!(
3646 "failed applying expected variable_edit for {}: {error}",
3647 case.name
3648 )
3649 },
3650 );
3651 let mut edited_by_expected_variable_edit = case.old.to_string();
3652 edited_by_expected_variable_edit
3653 .replace_range(expected_edit_range, &expected_replacement);
3654 assert_eq!(
3655 edited_by_expected_variable_edit, case.expected_after_apply,
3656 "expected variable_edit apply mismatch for {}",
3657 case.name
3658 );
3659 }
3660 }
3661
3662 #[test]
3663 fn test_write_cursor_excerpt_section() {
3664 let path = Path::new("test.rs");
3665 let context = "fn main() {\n hello();\n}\n";
3666 let cursor_offset = 17;
3667 let mut prompt = String::new();
3668 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3669 assert_eq!(
3670 prompt,
3671 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3672 );
3673 }
3674 }
3675}
3676
3677/// The zeta1 prompt format
3678pub mod zeta1 {
3679 use super::*;
3680 use std::fmt::Write;
3681
3682 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3683 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3684 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3685 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3686
3687 const INSTRUCTION_HEADER: &str = concat!(
3688 "### Instruction:\n",
3689 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3690 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3691 "into account the cursor location.\n\n",
3692 "### User Edits:\n\n"
3693 );
3694 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3695 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3696
3697 /// Formats a complete zeta1 prompt from the input events and excerpt.
3698 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3699 let mut prompt = String::with_capacity(
3700 INSTRUCTION_HEADER.len()
3701 + input_events.len()
3702 + EXCERPT_HEADER.len()
3703 + input_excerpt.len()
3704 + RESPONSE_HEADER.len(),
3705 );
3706 prompt.push_str(INSTRUCTION_HEADER);
3707 prompt.push_str(input_events);
3708 prompt.push_str(EXCERPT_HEADER);
3709 prompt.push_str(input_excerpt);
3710 prompt.push_str(RESPONSE_HEADER);
3711 prompt
3712 }
3713
3714 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3715 /// editable and context byte-offset ranges within `cursor_excerpt`.
3716 pub fn format_zeta1_from_input(
3717 input: &ZetaPromptInput,
3718 editable_range: Range<usize>,
3719 context_range: Range<usize>,
3720 ) -> String {
3721 let events = format_zeta1_events(&input.events);
3722 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3723 format_zeta1_prompt(&events, &excerpt)
3724 }
3725
3726 /// Formats events in zeta1 style (oldest first).
3727 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3728 let mut result = String::new();
3729 for event in events {
3730 let event_string = format_zeta1_event(event);
3731 if event_string.is_empty() {
3732 continue;
3733 }
3734 if !result.is_empty() {
3735 result.push_str("\n\n");
3736 }
3737 result.push_str(&event_string);
3738 }
3739 result
3740 }
3741
3742 fn format_zeta1_event(event: &Event) -> String {
3743 match event {
3744 Event::BufferChange {
3745 path,
3746 old_path,
3747 diff,
3748 ..
3749 } => {
3750 let mut prompt = String::new();
3751 if old_path != path {
3752 writeln!(
3753 prompt,
3754 "User renamed {} to {}\n",
3755 old_path.display(),
3756 path.display()
3757 )
3758 .ok();
3759 }
3760 if !diff.is_empty() {
3761 write!(
3762 prompt,
3763 "User edited {}:\n```diff\n{}\n```",
3764 path.display(),
3765 diff
3766 )
3767 .ok();
3768 }
3769 prompt
3770 }
3771 }
3772 }
3773
3774 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3775 /// within `cursor_excerpt`.
3776 fn format_zeta1_excerpt(
3777 input: &ZetaPromptInput,
3778 editable_range: Range<usize>,
3779 context_range: Range<usize>,
3780 ) -> String {
3781 let path_str = input.cursor_path.to_string_lossy();
3782 let excerpt = &*input.cursor_excerpt;
3783 let cursor_offset = input.cursor_offset_in_excerpt;
3784
3785 let mut prompt = String::new();
3786 writeln!(&mut prompt, "```{path_str}").ok();
3787
3788 let starts_at_file_beginning =
3789 input.excerpt_start_row == Some(0) && context_range.start == 0;
3790 if starts_at_file_beginning {
3791 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3792 }
3793
3794 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3795
3796 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3797 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3798 prompt.push_str(CURSOR_MARKER);
3799 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3800 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3801
3802 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3803 write!(prompt, "\n```").ok();
3804
3805 prompt
3806 }
3807
3808 /// Cleans zeta1 model output by extracting content between editable region
3809 /// markers and converting the zeta1 cursor marker to the universal one.
3810 /// Returns `None` if the output doesn't contain the expected markers.
3811 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3812 let content = output.replace(CURSOR_MARKER, "");
3813
3814 let content_start = content
3815 .find(EDITABLE_REGION_START_MARKER)
3816 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3817 .map(|pos| {
3818 if content.as_bytes().get(pos) == Some(&b'\n') {
3819 pos + 1
3820 } else {
3821 pos
3822 }
3823 })
3824 .unwrap_or(0);
3825
3826 let content_end = content
3827 .find(EDITABLE_REGION_END_MARKER)
3828 .map(|pos| {
3829 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3830 pos - 1
3831 } else {
3832 pos
3833 }
3834 })
3835 .unwrap_or(content.len());
3836
3837 if content_start > content_end {
3838 return Some(String::new());
3839 }
3840
3841 let extracted = &content[content_start..content_end];
3842
3843 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3844 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3845 let text_before_cursor = text_before_cursor
3846 .find(EDITABLE_REGION_START_MARKER)
3847 .map(|pos| {
3848 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3849 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3850 after_marker + 1
3851 } else {
3852 after_marker
3853 }
3854 })
3855 .unwrap_or(0);
3856 let offset_in_extracted = zeta1_cursor_pos
3857 .saturating_sub(text_before_cursor)
3858 .min(extracted.len());
3859 offset_in_extracted
3860 });
3861
3862 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3863 if let Some(offset) = cursor_offset {
3864 result.push_str(&extracted[..offset]);
3865 result.push_str(super::CURSOR_MARKER);
3866 result.push_str(&extracted[offset..]);
3867 } else {
3868 result.push_str(extracted);
3869 }
3870
3871 Some(result)
3872 }
3873}
3874
3875#[cfg(test)]
3876mod tests {
3877 use super::*;
3878 use indoc::indoc;
3879
3880 fn make_input(
3881 cursor_excerpt: &str,
3882 editable_range: Range<usize>,
3883 cursor_offset: usize,
3884 events: Vec<Event>,
3885 related_files: Vec<RelatedFile>,
3886 ) -> ZetaPromptInput {
3887 let context_range = 0..cursor_excerpt.len();
3888 ZetaPromptInput {
3889 cursor_path: Path::new("test.rs").into(),
3890 cursor_excerpt: cursor_excerpt.into(),
3891 cursor_offset_in_excerpt: cursor_offset,
3892 excerpt_start_row: None,
3893 events: events.into_iter().map(Arc::new).collect(),
3894 related_files: Some(related_files),
3895 active_buffer_diagnostics: vec![],
3896 excerpt_ranges: ExcerptRanges {
3897 editable_150: editable_range.clone(),
3898 editable_180: editable_range.clone(),
3899 editable_350: editable_range,
3900 editable_150_context_350: context_range.clone(),
3901 editable_180_context_350: context_range.clone(),
3902 editable_350_context_150: context_range,
3903 ..Default::default()
3904 },
3905 syntax_ranges: None,
3906 experiment: None,
3907 in_open_source_repo: false,
3908 can_collect_data: false,
3909 repo_url: None,
3910 }
3911 }
3912
3913 fn make_input_with_context_range(
3914 excerpt: &str,
3915 editable_range: Range<usize>,
3916 context_range: Range<usize>,
3917 cursor_offset: usize,
3918 ) -> ZetaPromptInput {
3919 ZetaPromptInput {
3920 cursor_path: Path::new("test.rs").into(),
3921 cursor_excerpt: excerpt.into(),
3922 cursor_offset_in_excerpt: cursor_offset,
3923 excerpt_start_row: None,
3924 events: vec![],
3925 related_files: Some(vec![]),
3926 active_buffer_diagnostics: vec![],
3927 excerpt_ranges: ExcerptRanges {
3928 editable_150: editable_range.clone(),
3929 editable_180: editable_range.clone(),
3930 editable_350: editable_range,
3931 editable_150_context_350: context_range.clone(),
3932 editable_180_context_350: context_range.clone(),
3933 editable_350_context_150: context_range,
3934 ..Default::default()
3935 },
3936 syntax_ranges: None,
3937 experiment: None,
3938 in_open_source_repo: false,
3939 can_collect_data: false,
3940 repo_url: None,
3941 }
3942 }
3943
3944 fn make_event(path: &str, diff: &str) -> Event {
3945 Event::BufferChange {
3946 path: Path::new(path).into(),
3947 old_path: Path::new(path).into(),
3948 diff: diff.to_string(),
3949 predicted: false,
3950 in_open_source_repo: false,
3951 }
3952 }
3953
3954 fn make_related_file(path: &str, content: &str) -> RelatedFile {
3955 RelatedFile {
3956 path: Path::new(path).into(),
3957 max_row: content.lines().count() as u32,
3958 excerpts: vec![RelatedExcerpt {
3959 row_range: 0..content.lines().count() as u32,
3960 text: content.into(),
3961 order: 0,
3962 }],
3963 in_open_source_repo: false,
3964 }
3965 }
3966
3967 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3968 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3969 }
3970
3971 #[test]
3972 fn test_no_truncation_when_within_budget() {
3973 let input = make_input(
3974 "prefix\neditable\nsuffix",
3975 7..15,
3976 10,
3977 vec![make_event("a.rs", "-old\n+new\n")],
3978 vec![make_related_file("related.rs", "fn helper() {}\n")],
3979 );
3980
3981 assert_eq!(
3982 format_with_budget(&input, 10000),
3983 indoc! {r#"
3984 <|file_sep|>related.rs
3985 fn helper() {}
3986 <|file_sep|>edit history
3987 --- a/a.rs
3988 +++ b/a.rs
3989 -old
3990 +new
3991 <|file_sep|>test.rs
3992 <|fim_prefix|>
3993 prefix
3994 <|fim_middle|>current
3995 edi<|user_cursor|>table
3996 <|fim_suffix|>
3997
3998 suffix
3999 <|fim_middle|>updated
4000 "#}
4001 );
4002 }
4003
4004 #[test]
4005 fn test_truncation_drops_edit_history_when_budget_tight() {
4006 let input = make_input(
4007 "code",
4008 0..4,
4009 2,
4010 vec![make_event("a.rs", "-x\n+y\n")],
4011 vec![
4012 make_related_file("r1.rs", "a\n"),
4013 make_related_file("r2.rs", "b\n"),
4014 ],
4015 );
4016
4017 assert_eq!(
4018 format_with_budget(&input, 10000),
4019 indoc! {r#"
4020 <|file_sep|>r1.rs
4021 a
4022 <|file_sep|>r2.rs
4023 b
4024 <|file_sep|>edit history
4025 --- a/a.rs
4026 +++ b/a.rs
4027 -x
4028 +y
4029 <|file_sep|>test.rs
4030 <|fim_prefix|>
4031 <|fim_middle|>current
4032 co<|user_cursor|>de
4033 <|fim_suffix|>
4034 <|fim_middle|>updated
4035 "#}
4036 );
4037
4038 assert_eq!(
4039 format_with_budget(&input, 50),
4040 indoc! {r#"
4041 <|file_sep|>r1.rs
4042 a
4043 <|file_sep|>r2.rs
4044 b
4045 <|file_sep|>test.rs
4046 <|fim_prefix|>
4047 <|fim_middle|>current
4048 co<|user_cursor|>de
4049 <|fim_suffix|>
4050 <|fim_middle|>updated
4051 "#}
4052 );
4053 }
4054
4055 #[test]
4056 fn test_truncation_includes_partial_excerpts() {
4057 let input = make_input(
4058 "x",
4059 0..1,
4060 0,
4061 vec![],
4062 vec![RelatedFile {
4063 path: Path::new("big.rs").into(),
4064 max_row: 30,
4065 in_open_source_repo: false,
4066 excerpts: vec![
4067 RelatedExcerpt {
4068 row_range: 0..10,
4069 text: "first excerpt\n".into(),
4070 order: 0,
4071 },
4072 RelatedExcerpt {
4073 row_range: 10..20,
4074 text: "second excerpt\n".into(),
4075 order: 0,
4076 },
4077 RelatedExcerpt {
4078 row_range: 20..30,
4079 text: "third excerpt\n".into(),
4080 order: 0,
4081 },
4082 ],
4083 }],
4084 );
4085
4086 assert_eq!(
4087 format_with_budget(&input, 10000),
4088 indoc! {r#"
4089 <|file_sep|>big.rs
4090 first excerpt
4091 ...
4092 second excerpt
4093 ...
4094 third excerpt
4095 <|file_sep|>test.rs
4096 <|fim_prefix|>
4097 <|fim_middle|>current
4098 <|user_cursor|>x
4099 <|fim_suffix|>
4100 <|fim_middle|>updated
4101 "#}
4102 );
4103
4104 assert_eq!(
4105 format_with_budget(&input, 50),
4106 indoc! {r#"
4107 <|file_sep|>big.rs
4108 first excerpt
4109 ...
4110 <|file_sep|>test.rs
4111 <|fim_prefix|>
4112 <|fim_middle|>current
4113 <|user_cursor|>x
4114 <|fim_suffix|>
4115 <|fim_middle|>updated
4116 "#}
4117 );
4118 }
4119
4120 #[test]
4121 fn test_truncation_prioritizes_lower_order_excerpts() {
4122 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4123 // With tight budget, only the lower-order excerpt from file_b should be included.
4124 let input = make_input(
4125 "x",
4126 0..1,
4127 0,
4128 vec![],
4129 vec![
4130 RelatedFile {
4131 path: Path::new("file_a.rs").into(),
4132 max_row: 10,
4133 in_open_source_repo: false,
4134 excerpts: vec![RelatedExcerpt {
4135 row_range: 0..10,
4136 text: "low priority content\n".into(),
4137 order: 5,
4138 }],
4139 },
4140 RelatedFile {
4141 path: Path::new("file_b.rs").into(),
4142 max_row: 10,
4143 in_open_source_repo: false,
4144 excerpts: vec![RelatedExcerpt {
4145 row_range: 0..10,
4146 text: "high priority content\n".into(),
4147 order: 1,
4148 }],
4149 },
4150 ],
4151 );
4152
4153 // With large budget, both files included; rendered in stable lexicographic order.
4154 assert_eq!(
4155 format_with_budget(&input, 10000),
4156 indoc! {r#"
4157 <|file_sep|>file_a.rs
4158 low priority content
4159 <|file_sep|>file_b.rs
4160 high priority content
4161 <|file_sep|>test.rs
4162 <|fim_prefix|>
4163 <|fim_middle|>current
4164 <|user_cursor|>x
4165 <|fim_suffix|>
4166 <|fim_middle|>updated
4167 "#}
4168 );
4169
4170 // With tight budget, only file_b (lower order) fits.
4171 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4172 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4173 // file_a would need another 14 tokens, which doesn't fit.
4174 assert_eq!(
4175 format_with_budget(&input, 52),
4176 indoc! {r#"
4177 <|file_sep|>file_b.rs
4178 high priority content
4179 <|file_sep|>test.rs
4180 <|fim_prefix|>
4181 <|fim_middle|>current
4182 <|user_cursor|>x
4183 <|fim_suffix|>
4184 <|fim_middle|>updated
4185 "#}
4186 );
4187 }
4188
4189 #[test]
4190 fn test_truncation_drops_high_order_excerpts_within_file() {
4191 // A single file has excerpts at order 1 and order 3. With a tight budget,
4192 // only the order-1 excerpts are included while the order-3 excerpt is
4193 // dropped — even though they belong to the same file. This also preserves
4194 // the parent invariant: parent outline items have order ≤ their best
4195 // child, so they're always included when any child is.
4196 let input = make_input(
4197 "x",
4198 0..1,
4199 0,
4200 vec![],
4201 vec![RelatedFile {
4202 path: Path::new("mod.rs").into(),
4203 max_row: 30,
4204 in_open_source_repo: false,
4205 excerpts: vec![
4206 RelatedExcerpt {
4207 row_range: 0..5,
4208 text: "mod header\n".into(),
4209 order: 1,
4210 },
4211 RelatedExcerpt {
4212 row_range: 5..15,
4213 text: "important fn\n".into(),
4214 order: 1,
4215 },
4216 RelatedExcerpt {
4217 row_range: 15..30,
4218 text: "less important fn\n".into(),
4219 order: 3,
4220 },
4221 ],
4222 }],
4223 );
4224
4225 // With large budget, all three excerpts included.
4226 assert_eq!(
4227 format_with_budget(&input, 10000),
4228 indoc! {r#"
4229 <|file_sep|>mod.rs
4230 mod header
4231 ...
4232 important fn
4233 ...
4234 less important fn
4235 <|file_sep|>test.rs
4236 <|fim_prefix|>
4237 <|fim_middle|>current
4238 <|user_cursor|>x
4239 <|fim_suffix|>
4240 <|fim_middle|>updated
4241 "#}
4242 );
4243
4244 // With tight budget, only order<=1 excerpts included (header + important fn).
4245 assert_eq!(
4246 format_with_budget(&input, 55),
4247 indoc! {r#"
4248 <|file_sep|>mod.rs
4249 mod header
4250 ...
4251 important fn
4252 ...
4253 <|file_sep|>test.rs
4254 <|fim_prefix|>
4255 <|fim_middle|>current
4256 <|user_cursor|>x
4257 <|fim_suffix|>
4258 <|fim_middle|>updated
4259 "#}
4260 );
4261 }
4262
4263 #[test]
4264 fn test_truncation_drops_older_events_first() {
4265 let input = make_input(
4266 "x",
4267 0..1,
4268 0,
4269 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4270 vec![],
4271 );
4272
4273 assert_eq!(
4274 format_with_budget(&input, 10000),
4275 indoc! {r#"
4276 <|file_sep|>edit history
4277 --- a/old.rs
4278 +++ b/old.rs
4279 -1
4280 --- a/new.rs
4281 +++ b/new.rs
4282 -2
4283 <|file_sep|>test.rs
4284 <|fim_prefix|>
4285 <|fim_middle|>current
4286 <|user_cursor|>x
4287 <|fim_suffix|>
4288 <|fim_middle|>updated
4289 "#}
4290 );
4291
4292 assert_eq!(
4293 format_with_budget(&input, 55),
4294 indoc! {r#"
4295 <|file_sep|>edit history
4296 --- a/new.rs
4297 +++ b/new.rs
4298 -2
4299 <|file_sep|>test.rs
4300 <|fim_prefix|>
4301 <|fim_middle|>current
4302 <|user_cursor|>x
4303 <|fim_suffix|>
4304 <|fim_middle|>updated
4305 "#}
4306 );
4307 }
4308
4309 #[test]
4310 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4311 let input = make_input(
4312 "fn main() {}",
4313 0..12,
4314 3,
4315 vec![make_event("a.rs", "-old\n+new\n")],
4316 vec![make_related_file("related.rs", "helper\n")],
4317 );
4318
4319 assert_eq!(
4320 format_with_budget(&input, 30),
4321 indoc! {r#"
4322 <|file_sep|>test.rs
4323 <|fim_prefix|>
4324 <|fim_middle|>current
4325 fn <|user_cursor|>main() {}
4326 <|fim_suffix|>
4327 <|fim_middle|>updated
4328 "#}
4329 );
4330 }
4331
4332 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4333 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4334 }
4335
4336 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4337 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4338 }
4339
4340 #[test]
4341 fn test_seed_coder_basic_format() {
4342 let input = make_input(
4343 "prefix\neditable\nsuffix",
4344 7..15,
4345 10,
4346 vec![make_event("a.rs", "-old\n+new\n")],
4347 vec![make_related_file("related.rs", "fn helper() {}\n")],
4348 );
4349
4350 assert_eq!(
4351 format_seed_coder(&input),
4352 indoc! {r#"
4353 <[fim-suffix]>
4354 suffix
4355 <[fim-prefix]><filename>related.rs
4356 fn helper() {}
4357
4358 <filename>edit_history
4359 --- a/a.rs
4360 +++ b/a.rs
4361 -old
4362 +new
4363
4364 <filename>test.rs
4365 prefix
4366 <<<<<<< CURRENT
4367 edi<|user_cursor|>table
4368 =======
4369 <[fim-middle]>"#}
4370 );
4371 }
4372
4373 #[test]
4374 fn test_seed_coder_no_context() {
4375 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4376
4377 assert_eq!(
4378 format_seed_coder(&input),
4379 indoc! {r#"
4380 <[fim-suffix]>
4381 after
4382 <[fim-prefix]><filename>test.rs
4383 before
4384 <<<<<<< CURRENT
4385 mid<|user_cursor|>dle
4386 =======
4387 <[fim-middle]>"#}
4388 );
4389 }
4390
4391 #[test]
4392 fn test_seed_coder_truncation_drops_context() {
4393 let input = make_input(
4394 "code",
4395 0..4,
4396 2,
4397 vec![make_event("a.rs", "-x\n+y\n")],
4398 vec![make_related_file("r1.rs", "content\n")],
4399 );
4400
4401 // With large budget, everything is included
4402 assert_eq!(
4403 format_seed_coder(&input),
4404 indoc! {r#"
4405 <[fim-suffix]>
4406 <[fim-prefix]><filename>r1.rs
4407 content
4408
4409 <filename>edit_history
4410 --- a/a.rs
4411 +++ b/a.rs
4412 -x
4413 +y
4414
4415 <filename>test.rs
4416 <<<<<<< CURRENT
4417 co<|user_cursor|>de
4418 =======
4419 <[fim-middle]>"#}
4420 );
4421
4422 // With tight budget, context is dropped but cursor section remains
4423 assert_eq!(
4424 format_seed_coder_with_budget(&input, 30),
4425 indoc! {r#"
4426 <[fim-suffix]>
4427 <[fim-prefix]><filename>test.rs
4428 <<<<<<< CURRENT
4429 co<|user_cursor|>de
4430 =======
4431 <[fim-middle]>"#}
4432 );
4433 }
4434
4435 #[test]
4436 fn test_seed_coder_truncation_prioritizes_lower_order() {
4437 let input = make_input(
4438 "code",
4439 0..4,
4440 2,
4441 vec![],
4442 vec![
4443 RelatedFile {
4444 path: Path::new("low_prio.rs").into(),
4445 max_row: 5,
4446 in_open_source_repo: false,
4447 excerpts: vec![RelatedExcerpt {
4448 row_range: 0..5,
4449 text: "low prio\n".into(),
4450 order: 10,
4451 }],
4452 },
4453 RelatedFile {
4454 path: Path::new("high_prio.rs").into(),
4455 max_row: 5,
4456 in_open_source_repo: false,
4457 excerpts: vec![RelatedExcerpt {
4458 row_range: 0..5,
4459 text: "high prio\n".into(),
4460 order: 1,
4461 }],
4462 },
4463 ],
4464 );
4465
4466 // With large budget, both included; rendered in stable lexicographic order.
4467 assert_eq!(
4468 format_seed_coder(&input),
4469 indoc! {r#"
4470 <[fim-suffix]>
4471 <[fim-prefix]><filename>low_prio.rs
4472 low prio
4473 <filename>high_prio.rs
4474 high prio
4475
4476 <filename>test.rs
4477 <<<<<<< CURRENT
4478 co<|user_cursor|>de
4479 =======
4480 <[fim-middle]>"#}
4481 );
4482
4483 // With tight budget, only high_prio included.
4484 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4485 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4486 assert_eq!(
4487 format_seed_coder_with_budget(&input, 44),
4488 indoc! {r#"
4489 <[fim-suffix]>
4490 <[fim-prefix]><filename>high_prio.rs
4491 high prio
4492
4493 <filename>test.rs
4494 <<<<<<< CURRENT
4495 co<|user_cursor|>de
4496 =======
4497 <[fim-middle]>"#}
4498 );
4499 }
4500
4501 #[test]
4502 fn test_format_zeta1_from_input_basic() {
4503 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4504 let input = ZetaPromptInput {
4505 cursor_path: Path::new("src/main.rs").into(),
4506 cursor_excerpt: excerpt.into(),
4507 cursor_offset_in_excerpt: 30,
4508 excerpt_start_row: Some(0),
4509 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4510 related_files: Some(vec![]),
4511 active_buffer_diagnostics: vec![],
4512 excerpt_ranges: ExcerptRanges {
4513 editable_150: 15..41,
4514 editable_180: 15..41,
4515 editable_350: 15..41,
4516 editable_150_context_350: 0..excerpt.len(),
4517 editable_180_context_350: 0..excerpt.len(),
4518 editable_350_context_150: 0..excerpt.len(),
4519 ..Default::default()
4520 },
4521 syntax_ranges: None,
4522 experiment: None,
4523 in_open_source_repo: false,
4524 can_collect_data: false,
4525 repo_url: None,
4526 };
4527
4528 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4529
4530 assert_eq!(
4531 prompt,
4532 concat!(
4533 "### Instruction:\n",
4534 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4535 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4536 "into account the cursor location.\n",
4537 "\n",
4538 "### User Edits:\n",
4539 "\n",
4540 "User edited other.rs:\n",
4541 "```diff\n",
4542 "-old\n",
4543 "+new\n",
4544 "\n",
4545 "```\n",
4546 "\n",
4547 "### User Excerpt:\n",
4548 "\n",
4549 "```src/main.rs\n",
4550 "<|start_of_file|>\n",
4551 "fn before() {}\n",
4552 "<|editable_region_start|>\n",
4553 "fn foo() {\n",
4554 " <|user_cursor_is_here|>let x = 1;\n",
4555 "\n",
4556 "<|editable_region_end|>}\n",
4557 "fn after() {}\n",
4558 "\n",
4559 "```\n",
4560 "\n",
4561 "### Response:\n",
4562 ),
4563 );
4564 }
4565
4566 #[test]
4567 fn test_format_zeta1_from_input_no_start_of_file() {
4568 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4569 let input = ZetaPromptInput {
4570 cursor_path: Path::new("src/main.rs").into(),
4571 cursor_excerpt: excerpt.into(),
4572 cursor_offset_in_excerpt: 15,
4573 excerpt_start_row: Some(10),
4574 events: vec![],
4575 related_files: Some(vec![]),
4576 active_buffer_diagnostics: vec![],
4577 excerpt_ranges: ExcerptRanges {
4578 editable_150: 0..28,
4579 editable_180: 0..28,
4580 editable_350: 0..28,
4581 editable_150_context_350: 0..28,
4582 editable_180_context_350: 0..28,
4583 editable_350_context_150: 0..28,
4584 ..Default::default()
4585 },
4586 syntax_ranges: None,
4587 experiment: None,
4588 in_open_source_repo: false,
4589 can_collect_data: false,
4590 repo_url: None,
4591 };
4592
4593 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4594
4595 assert_eq!(
4596 prompt,
4597 concat!(
4598 "### Instruction:\n",
4599 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4600 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4601 "into account the cursor location.\n",
4602 "\n",
4603 "### User Edits:\n",
4604 "\n",
4605 "\n",
4606 "\n",
4607 "### User Excerpt:\n",
4608 "\n",
4609 "```src/main.rs\n",
4610 "<|editable_region_start|>\n",
4611 "fn foo() {\n",
4612 " <|user_cursor_is_here|>let x = 1;\n",
4613 "}\n",
4614 "\n",
4615 "<|editable_region_end|>\n",
4616 "```\n",
4617 "\n",
4618 "### Response:\n",
4619 ),
4620 );
4621 }
4622
4623 #[test]
4624 fn test_format_zeta1_from_input_with_sub_ranges() {
4625 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4626 let editable_range = 10..37;
4627 let context_range = 0..excerpt.len();
4628
4629 let input = ZetaPromptInput {
4630 cursor_path: Path::new("test.rs").into(),
4631 cursor_excerpt: excerpt.into(),
4632 cursor_offset_in_excerpt: 25,
4633 excerpt_start_row: Some(0),
4634 events: vec![],
4635 related_files: Some(vec![]),
4636 active_buffer_diagnostics: vec![],
4637 excerpt_ranges: ExcerptRanges {
4638 editable_150: editable_range.clone(),
4639 editable_180: editable_range.clone(),
4640 editable_350: editable_range.clone(),
4641 editable_150_context_350: context_range.clone(),
4642 editable_180_context_350: context_range.clone(),
4643 editable_350_context_150: context_range.clone(),
4644 ..Default::default()
4645 },
4646 syntax_ranges: None,
4647 experiment: None,
4648 in_open_source_repo: false,
4649 can_collect_data: false,
4650 repo_url: None,
4651 };
4652
4653 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4654
4655 assert_eq!(
4656 prompt,
4657 concat!(
4658 "### Instruction:\n",
4659 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4660 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4661 "into account the cursor location.\n",
4662 "\n",
4663 "### User Edits:\n",
4664 "\n",
4665 "\n",
4666 "\n",
4667 "### User Excerpt:\n",
4668 "\n",
4669 "```test.rs\n",
4670 "<|start_of_file|>\n",
4671 "// prefix\n",
4672 "<|editable_region_start|>\n",
4673 "fn foo() {\n",
4674 " <|user_cursor_is_here|>let x = 1;\n",
4675 "}\n",
4676 "<|editable_region_end|>\n",
4677 "// suffix\n",
4678 "\n",
4679 "```\n",
4680 "\n",
4681 "### Response:\n",
4682 ),
4683 );
4684 }
4685
4686 #[test]
4687 fn test_clean_zeta1_model_output_basic() {
4688 let output = indoc! {"
4689 <|editable_region_start|>
4690 fn main() {
4691 println!(\"hello\");
4692 }
4693 <|editable_region_end|>
4694 "};
4695
4696 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4697 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4698 }
4699
4700 #[test]
4701 fn test_clean_zeta1_model_output_with_cursor() {
4702 let output = indoc! {"
4703 <|editable_region_start|>
4704 fn main() {
4705 <|user_cursor_is_here|>println!(\"hello\");
4706 }
4707 <|editable_region_end|>
4708 "};
4709
4710 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4711 assert_eq!(
4712 cleaned,
4713 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4714 );
4715 }
4716
4717 #[test]
4718 fn test_clean_zeta1_model_output_no_markers() {
4719 let output = "fn main() {}\n";
4720 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4721 assert_eq!(cleaned, "fn main() {}\n");
4722 }
4723
4724 #[test]
4725 fn test_clean_zeta1_model_output_empty_region() {
4726 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4727 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4728 assert_eq!(cleaned, "");
4729 }
4730
4731 fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4732 let mut result = excerpt.to_string();
4733 result.replace_range(
4734 parsed_output.range_in_excerpt.clone(),
4735 &parsed_output.new_editable_region,
4736 );
4737 result
4738 }
4739
4740 #[test]
4741 fn test_parse_zeta2_model_output() {
4742 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4743 let context_start = excerpt.find("ctx start").unwrap();
4744 let context_end = excerpt.find("after ctx").unwrap();
4745 let editable_start = excerpt.find("editable old").unwrap();
4746 let editable_end = editable_start + "editable old\n".len();
4747 let input = make_input_with_context_range(
4748 excerpt,
4749 editable_start..editable_end,
4750 context_start..context_end,
4751 editable_start,
4752 );
4753
4754 let output = parse_zeta2_model_output(
4755 "editable new\n>>>>>>> UPDATED\n",
4756 ZetaFormat::V0131GitMergeMarkersPrefix,
4757 &input,
4758 )
4759 .unwrap();
4760
4761 assert_eq!(
4762 apply_edit(excerpt, &output),
4763 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4764 );
4765 }
4766
4767 #[test]
4768 fn test_parse_zeta2_model_output_identity() {
4769 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4770 let editable_start = excerpt.find("bbb").unwrap();
4771 let editable_end = excerpt.find("ddd").unwrap();
4772 let input = make_input_with_context_range(
4773 excerpt,
4774 editable_start..editable_end,
4775 0..excerpt.len(),
4776 editable_start,
4777 );
4778
4779 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4780 let output =
4781 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4782
4783 assert_eq!(apply_edit(excerpt, &output), excerpt);
4784 }
4785
4786 #[test]
4787 fn test_parse_zeta2_model_output_strips_end_marker() {
4788 let excerpt = "hello\nworld\n";
4789 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4790
4791 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4792 let output1 =
4793 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4794 let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4795
4796 assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4797 assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4798 }
4799}