1use anyhow::{Result, anyhow};
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub repo_url: Option<String>,
66}
67
68#[derive(
69 Default,
70 Clone,
71 Copy,
72 Debug,
73 PartialEq,
74 Eq,
75 Hash,
76 EnumIter,
77 IntoStaticStr,
78 Serialize,
79 Deserialize,
80)]
81#[allow(non_camel_case_types)]
82pub enum ZetaFormat {
83 V0112MiddleAtEnd,
84 V0113Ordered,
85 V0114180EditableRegion,
86 V0120GitMergeMarkers,
87 #[default]
88 V0131GitMergeMarkersPrefix,
89 V0211Prefill,
90 V0211SeedCoder,
91 v0226Hashline,
92 V0304VariableEdit,
93 V0304SeedNoEdits,
94}
95
96impl std::fmt::Display for ZetaFormat {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 write!(f, "{}", <&'static str>::from(self))
99 }
100}
101
102impl ZetaFormat {
103 pub fn parse(format_name: &str) -> Result<Self> {
104 let mut results = ZetaFormat::iter().filter(|version| {
105 <&'static str>::from(version)
106 .to_lowercase()
107 .contains(&format_name.to_lowercase())
108 });
109 let Some(result) = results.next() else {
110 anyhow::bail!(
111 "`{format_name}` did not match any of:\n{}",
112 Self::options_as_string()
113 );
114 };
115 if results.next().is_some() {
116 anyhow::bail!(
117 "`{format_name}` matched more than one of:\n{}",
118 Self::options_as_string()
119 );
120 }
121 Ok(result)
122 }
123
124 pub fn options_as_string() -> String {
125 ZetaFormat::iter()
126 .map(|format| format!("- {}\n", <&'static str>::from(format)))
127 .collect::<Vec<_>>()
128 .concat()
129 }
130}
131
132#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
133#[serde(tag = "event")]
134pub enum Event {
135 BufferChange {
136 path: Arc<Path>,
137 old_path: Arc<Path>,
138 diff: String,
139 predicted: bool,
140 in_open_source_repo: bool,
141 },
142}
143
144impl Event {
145 pub fn in_open_source_repo(&self) -> bool {
146 match self {
147 Event::BufferChange {
148 in_open_source_repo,
149 ..
150 } => *in_open_source_repo,
151 }
152 }
153}
154
155pub fn write_event(prompt: &mut String, event: &Event) {
156 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
157 for component in path.components() {
158 prompt.push('/');
159 write!(prompt, "{}", component.as_os_str().display()).ok();
160 }
161 }
162 match event {
163 Event::BufferChange {
164 path,
165 old_path,
166 diff,
167 predicted,
168 in_open_source_repo: _,
169 } => {
170 if *predicted {
171 prompt.push_str("// User accepted prediction:\n");
172 }
173 prompt.push_str("--- a");
174 write_path_as_unix_str(prompt, old_path.as_ref());
175 prompt.push_str("\n+++ b");
176 write_path_as_unix_str(prompt, path.as_ref());
177 prompt.push('\n');
178 prompt.push_str(diff);
179 }
180 }
181}
182
183#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
184pub struct RelatedFile {
185 pub path: Arc<Path>,
186 pub max_row: u32,
187 pub excerpts: Vec<RelatedExcerpt>,
188 #[serde(default)]
189 pub in_open_source_repo: bool,
190}
191
192#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
193pub struct RelatedExcerpt {
194 pub row_range: Range<u32>,
195 pub text: Arc<str>,
196 #[serde(default)]
197 pub order: usize,
198}
199
200pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
201 special_tokens_for_format(format)
202 .iter()
203 .any(|token| input.cursor_excerpt.contains(token))
204}
205
206pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
207 format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
208}
209
210pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
211 match format {
212 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
213 ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
214 ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
215 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
216 ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
217 ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
218 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
219 ZetaFormat::v0226Hashline => hashline::special_tokens(),
220 ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
221 ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
222 }
223}
224
225pub fn excerpt_ranges_for_format(
226 format: ZetaFormat,
227 ranges: &ExcerptRanges,
228) -> (Range<usize>, Range<usize>) {
229 match format {
230 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
231 ranges.editable_150.clone(),
232 ranges.editable_150_context_350.clone(),
233 ),
234 ZetaFormat::V0114180EditableRegion => (
235 ranges.editable_180.clone(),
236 ranges.editable_180_context_350.clone(),
237 ),
238 ZetaFormat::V0120GitMergeMarkers
239 | ZetaFormat::V0131GitMergeMarkersPrefix
240 | ZetaFormat::V0211Prefill
241 | ZetaFormat::V0211SeedCoder
242 | ZetaFormat::v0226Hashline
243 | ZetaFormat::V0304SeedNoEdits => (
244 ranges.editable_350.clone(),
245 ranges.editable_350_context_150.clone(),
246 ),
247 ZetaFormat::V0304VariableEdit => {
248 let context = ranges
249 .context_8192
250 .clone()
251 .unwrap_or_else(|| ranges.editable_350_context_150.clone());
252 (context.clone(), context)
253 }
254 }
255}
256
257pub fn write_cursor_excerpt_section_for_format(
258 format: ZetaFormat,
259 prompt: &mut String,
260 path: &Path,
261 context: &str,
262 editable_range: &Range<usize>,
263 cursor_offset: usize,
264) {
265 match format {
266 ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
267 prompt,
268 path,
269 context,
270 editable_range,
271 cursor_offset,
272 ),
273 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
274 v0113_ordered::write_cursor_excerpt_section(
275 prompt,
276 path,
277 context,
278 editable_range,
279 cursor_offset,
280 )
281 }
282 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
283 prompt,
284 path,
285 context,
286 editable_range,
287 cursor_offset,
288 ),
289 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
290 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
291 prompt,
292 path,
293 context,
294 editable_range,
295 cursor_offset,
296 )
297 }
298 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
299 seed_coder::write_cursor_excerpt_section(
300 prompt,
301 path,
302 context,
303 editable_range,
304 cursor_offset,
305 )
306 }
307 ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
308 prompt,
309 path,
310 context,
311 editable_range,
312 cursor_offset,
313 ),
314 ZetaFormat::V0304VariableEdit => {
315 v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
316 }
317 }
318}
319
320fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
321 let start_row = text[0..range.start].matches('\n').count() as u32;
322 let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
323 if !text[..range.end].ends_with('\n') {
324 end_row += 1;
325 }
326 return start_row..end_row;
327}
328
329pub fn format_prompt_with_budget_for_format(
330 input: &ZetaPromptInput,
331 format: ZetaFormat,
332 max_tokens: usize,
333) -> String {
334 let (context, editable_range, context_range, cursor_offset) =
335 resolve_cursor_region(input, format);
336 let path = &*input.cursor_path;
337
338 let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
339 let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
340 let row_range = relative_row_range.start + cursor_excerpt_start_row
341 ..relative_row_range.end + cursor_excerpt_start_row;
342 &filter_redundant_excerpts(
343 input.related_files.clone(),
344 input.cursor_path.as_ref(),
345 row_range,
346 )
347 } else {
348 &input.related_files
349 };
350
351 match format {
352 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
353 seed_coder::format_prompt_with_budget(
354 path,
355 context,
356 &editable_range,
357 cursor_offset,
358 &input.events,
359 related_files,
360 max_tokens,
361 )
362 }
363 _ => {
364 let mut cursor_section = String::new();
365 write_cursor_excerpt_section_for_format(
366 format,
367 &mut cursor_section,
368 path,
369 context,
370 &editable_range,
371 cursor_offset,
372 );
373
374 let cursor_tokens = estimate_tokens(cursor_section.len());
375 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
376
377 let edit_history_section = format_edit_history_within_budget(
378 &input.events,
379 "<|file_sep|>",
380 "edit history",
381 budget_after_cursor,
382 );
383 let edit_history_tokens = estimate_tokens(edit_history_section.len());
384 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
385
386 let related_files_section = format_related_files_within_budget(
387 &related_files,
388 "<|file_sep|>",
389 "",
390 budget_after_edit_history,
391 );
392
393 let mut prompt = String::new();
394 prompt.push_str(&related_files_section);
395 prompt.push_str(&edit_history_section);
396 prompt.push_str(&cursor_section);
397 prompt
398 }
399 }
400}
401
402pub fn filter_redundant_excerpts(
403 mut related_files: Vec<RelatedFile>,
404 cursor_path: &Path,
405 cursor_row_range: Range<u32>,
406) -> Vec<RelatedFile> {
407 for file in &mut related_files {
408 if file.path.as_ref() == cursor_path {
409 file.excerpts.retain(|excerpt| {
410 excerpt.row_range.start < cursor_row_range.start
411 || excerpt.row_range.end > cursor_row_range.end
412 });
413 }
414 }
415 related_files.retain(|file| !file.excerpts.is_empty());
416 related_files
417}
418
419pub fn get_prefill_for_format(
420 format: ZetaFormat,
421 context: &str,
422 editable_range: &Range<usize>,
423) -> String {
424 match format {
425 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
426 ZetaFormat::V0112MiddleAtEnd
427 | ZetaFormat::V0113Ordered
428 | ZetaFormat::V0114180EditableRegion
429 | ZetaFormat::V0120GitMergeMarkers
430 | ZetaFormat::V0131GitMergeMarkersPrefix
431 | ZetaFormat::V0211SeedCoder
432 | ZetaFormat::v0226Hashline
433 | ZetaFormat::V0304VariableEdit => String::new(),
434 ZetaFormat::V0304SeedNoEdits => String::new(),
435 }
436}
437
438pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
439 match format {
440 ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
441 ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
442 ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
443 ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
444 ZetaFormat::V0112MiddleAtEnd
445 | ZetaFormat::V0113Ordered
446 | ZetaFormat::V0114180EditableRegion
447 | ZetaFormat::v0226Hashline
448 | ZetaFormat::V0304VariableEdit => None,
449 }
450}
451
452pub fn encode_patch_as_output_for_format(
453 format: ZetaFormat,
454 old_editable_region: &str,
455 patch: &str,
456 cursor_offset: Option<usize>,
457) -> Result<Option<String>> {
458 match format {
459 ZetaFormat::v0226Hashline => {
460 hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
461 }
462 ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
463 old_editable_region,
464 patch,
465 cursor_offset,
466 )
467 .map(Some),
468 ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
469 _ => Ok(None),
470 }
471}
472
473/// Parse model output for the given zeta format
474pub fn parse_zeta2_model_output(
475 output: &str,
476 format: ZetaFormat,
477 prompt_inputs: &ZetaPromptInput,
478) -> Result<(Range<usize>, String)> {
479 let output = match output_end_marker_for_format(format) {
480 Some(marker) => output.strip_suffix(marker).unwrap_or(output),
481 None => output,
482 };
483
484 let (context, editable_range_in_context, context_range, _) =
485 resolve_cursor_region(prompt_inputs, format);
486 let context_start = context_range.start;
487 let old_editable_region = &context[editable_range_in_context.clone()];
488
489 let (range_in_context, output) = match format {
490 ZetaFormat::v0226Hashline => (
491 editable_range_in_context,
492 if hashline::output_has_edit_commands(output) {
493 hashline::apply_edit_commands(old_editable_region, output)
494 } else {
495 output.to_string()
496 },
497 ),
498 ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
499 ZetaFormat::V0304SeedNoEdits => (
500 editable_range_in_context,
501 if output.starts_with(seed_coder::NO_EDITS) {
502 old_editable_region.to_string()
503 } else {
504 output.to_string()
505 },
506 ),
507 _ => (editable_range_in_context, output.to_string()),
508 };
509
510 let range_in_excerpt =
511 range_in_context.start + context_start..range_in_context.end + context_start;
512 Ok((range_in_excerpt, output))
513}
514
515pub fn excerpt_range_for_format(
516 format: ZetaFormat,
517 ranges: &ExcerptRanges,
518) -> (Range<usize>, Range<usize>) {
519 excerpt_ranges_for_format(format, ranges)
520}
521
522pub fn resolve_cursor_region(
523 input: &ZetaPromptInput,
524 format: ZetaFormat,
525) -> (&str, Range<usize>, Range<usize>, usize) {
526 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
527 let context_start = context_range.start;
528 let context_text = &input.cursor_excerpt[context_range.clone()];
529 let adjusted_editable =
530 (editable_range.start - context_start)..(editable_range.end - context_start);
531 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
532
533 (
534 context_text,
535 adjusted_editable,
536 context_range,
537 adjusted_cursor,
538 )
539}
540
541pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
542 let (context, editable_range, _, _) = resolve_cursor_region(input, format);
543 get_prefill_for_format(format, context, &editable_range)
544}
545
546fn format_edit_history_within_budget(
547 events: &[Arc<Event>],
548 file_marker: &str,
549 edit_history_name: &str,
550 max_tokens: usize,
551) -> String {
552 let header = format!("{}{}\n", file_marker, edit_history_name);
553 let header_tokens = estimate_tokens(header.len());
554 if header_tokens >= max_tokens {
555 return String::new();
556 }
557
558 let mut event_strings: Vec<String> = Vec::new();
559 let mut total_tokens = header_tokens;
560
561 for event in events.iter().rev() {
562 let mut event_str = String::new();
563 write_event(&mut event_str, event);
564 let event_tokens = estimate_tokens(event_str.len());
565
566 if total_tokens + event_tokens > max_tokens {
567 break;
568 }
569 total_tokens += event_tokens;
570 event_strings.push(event_str);
571 }
572
573 if event_strings.is_empty() {
574 return String::new();
575 }
576
577 let mut result = header;
578 for event_str in event_strings.iter().rev() {
579 result.push_str(event_str);
580 }
581 result
582}
583
584fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
585 let needs_newline = !excerpt.text.ends_with('\n');
586 let needs_ellipsis = excerpt.row_range.end < file_max_row;
587 let len = excerpt.text.len()
588 + if needs_newline { "\n".len() } else { 0 }
589 + if needs_ellipsis { "...\n".len() } else { 0 };
590 estimate_tokens(len)
591}
592
593pub fn format_related_files_within_budget(
594 related_files: &[RelatedFile],
595 file_prefix: &str,
596 file_suffix: &str,
597 max_tokens: usize,
598) -> String {
599 struct ExcerptCandidate {
600 file_ix: usize,
601 excerpt_ix: usize,
602 order: usize,
603 }
604
605 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
606 .iter()
607 .enumerate()
608 .flat_map(|(file_ix, file)| {
609 file.excerpts
610 .iter()
611 .enumerate()
612 .map(move |(excerpt_ix, e)| ExcerptCandidate {
613 file_ix,
614 excerpt_ix,
615 order: e.order,
616 })
617 })
618 .collect();
619
620 // Pre-compute file header strings and their token costs.
621 let file_headers: Vec<String> = related_files
622 .iter()
623 .map(|file| {
624 let path_str = file.path.to_string_lossy();
625 format!("{}{}\n", file_prefix, path_str)
626 })
627 .collect();
628
629 // Sort the excerpts by their order and determine how many fit within the budget.
630 let mut total_tokens = 0;
631 let mut included_excerpt_count = 0_usize;
632 let mut included_file_indices = vec![false; related_files.len()];
633 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
634 for candidate in &excerpt_candidates {
635 let file = &related_files[candidate.file_ix];
636 let excerpt = &file.excerpts[candidate.excerpt_ix];
637 let file_already_included = included_file_indices[candidate.file_ix];
638 let header_cost = if file_already_included {
639 0
640 } else {
641 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
642 };
643 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
644 if total_tokens + header_cost + excerpt_cost > max_tokens {
645 break;
646 }
647 total_tokens += header_cost + excerpt_cost;
648 if !file_already_included {
649 included_file_indices[candidate.file_ix] = true;
650 }
651 included_excerpt_count += 1;
652 }
653
654 excerpt_candidates.truncate(included_excerpt_count);
655 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
656
657 // Render all of the files that fit within the token budget, in the original order.
658 let mut result = String::new();
659 let mut last_file_ix = None;
660 for candidate in &excerpt_candidates {
661 if last_file_ix != Some(candidate.file_ix) {
662 if last_file_ix.is_some() {
663 result.push_str(file_suffix);
664 }
665 result.push_str(&file_headers[candidate.file_ix]);
666 last_file_ix = Some(candidate.file_ix);
667 }
668 let file = &related_files[candidate.file_ix];
669 let excerpt = &file.excerpts[candidate.excerpt_ix];
670 result.push_str(&excerpt.text);
671 if !result.ends_with('\n') {
672 result.push('\n');
673 }
674 if excerpt.row_range.end < file.max_row {
675 result.push_str("...\n");
676 }
677 }
678
679 result
680}
681
682pub fn write_related_files(
683 prompt: &mut String,
684 related_files: &[RelatedFile],
685) -> Vec<Range<usize>> {
686 let mut ranges = Vec::new();
687 for file in related_files {
688 let start = prompt.len();
689 let path_str = file.path.to_string_lossy();
690 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
691 for excerpt in &file.excerpts {
692 prompt.push_str(&excerpt.text);
693 if !prompt.ends_with('\n') {
694 prompt.push('\n');
695 }
696 if excerpt.row_range.end < file.max_row {
697 prompt.push_str("...\n");
698 }
699 }
700 let end = prompt.len();
701 ranges.push(start..end);
702 }
703 ranges
704}
705
706mod v0112_middle_at_end {
707 use super::*;
708
709 pub fn special_tokens() -> &'static [&'static str] {
710 &[
711 "<|fim_prefix|>",
712 "<|fim_suffix|>",
713 "<|fim_middle|>",
714 "<|file_sep|>",
715 CURSOR_MARKER,
716 ]
717 }
718
719 pub fn write_cursor_excerpt_section(
720 prompt: &mut String,
721 path: &Path,
722 context: &str,
723 editable_range: &Range<usize>,
724 cursor_offset: usize,
725 ) {
726 let path_str = path.to_string_lossy();
727 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
728
729 prompt.push_str("<|fim_prefix|>\n");
730 prompt.push_str(&context[..editable_range.start]);
731
732 prompt.push_str("<|fim_suffix|>\n");
733 prompt.push_str(&context[editable_range.end..]);
734 if !prompt.ends_with('\n') {
735 prompt.push('\n');
736 }
737
738 prompt.push_str("<|fim_middle|>current\n");
739 prompt.push_str(&context[editable_range.start..cursor_offset]);
740 prompt.push_str(CURSOR_MARKER);
741 prompt.push_str(&context[cursor_offset..editable_range.end]);
742 if !prompt.ends_with('\n') {
743 prompt.push('\n');
744 }
745
746 prompt.push_str("<|fim_middle|>updated\n");
747 }
748}
749
750mod v0113_ordered {
751 use super::*;
752
753 pub fn special_tokens() -> &'static [&'static str] {
754 &[
755 "<|fim_prefix|>",
756 "<|fim_suffix|>",
757 "<|fim_middle|>",
758 "<|file_sep|>",
759 CURSOR_MARKER,
760 ]
761 }
762
763 pub fn write_cursor_excerpt_section(
764 prompt: &mut String,
765 path: &Path,
766 context: &str,
767 editable_range: &Range<usize>,
768 cursor_offset: usize,
769 ) {
770 let path_str = path.to_string_lossy();
771 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
772
773 prompt.push_str("<|fim_prefix|>\n");
774 prompt.push_str(&context[..editable_range.start]);
775 if !prompt.ends_with('\n') {
776 prompt.push('\n');
777 }
778
779 prompt.push_str("<|fim_middle|>current\n");
780 prompt.push_str(&context[editable_range.start..cursor_offset]);
781 prompt.push_str(CURSOR_MARKER);
782 prompt.push_str(&context[cursor_offset..editable_range.end]);
783 if !prompt.ends_with('\n') {
784 prompt.push('\n');
785 }
786
787 prompt.push_str("<|fim_suffix|>\n");
788 prompt.push_str(&context[editable_range.end..]);
789 if !prompt.ends_with('\n') {
790 prompt.push('\n');
791 }
792
793 prompt.push_str("<|fim_middle|>updated\n");
794 }
795}
796
797mod v0114180_editable_region {
798 use super::*;
799
800 pub fn special_tokens() -> &'static [&'static str] {
801 v0113_ordered::special_tokens()
802 }
803}
804
805pub mod v0120_git_merge_markers {
806 //! A prompt that uses git-style merge conflict markers to represent the editable region.
807 //!
808 //! Example prompt:
809 //!
810 //! <|file_sep|>path/to/target_file.py
811 //! <|fim_prefix|>
812 //! code before editable region
813 //! <|fim_suffix|>
814 //! code after editable region
815 //! <|fim_middle|>
816 //! <<<<<<< CURRENT
817 //! code that
818 //! needs to<|user_cursor|>
819 //! be rewritten
820 //! =======
821 //!
822 //! Expected output (should be generated by the model):
823 //!
824 //! updated
825 //! code with
826 //! changes applied
827 //! >>>>>>> UPDATED
828
829 use super::*;
830
831 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
832 pub const SEPARATOR: &str = "=======\n";
833 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
834
835 pub fn special_tokens() -> &'static [&'static str] {
836 &[
837 "<|fim_prefix|>",
838 "<|fim_suffix|>",
839 "<|fim_middle|>",
840 "<|file_sep|>",
841 START_MARKER,
842 SEPARATOR,
843 END_MARKER,
844 CURSOR_MARKER,
845 ]
846 }
847
848 pub fn write_cursor_excerpt_section(
849 prompt: &mut String,
850 path: &Path,
851 context: &str,
852 editable_range: &Range<usize>,
853 cursor_offset: usize,
854 ) {
855 let path_str = path.to_string_lossy();
856 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
857
858 prompt.push_str("<|fim_prefix|>");
859 prompt.push_str(&context[..editable_range.start]);
860
861 prompt.push_str("<|fim_suffix|>");
862 prompt.push_str(&context[editable_range.end..]);
863 if !prompt.ends_with('\n') {
864 prompt.push('\n');
865 }
866
867 prompt.push_str("<|fim_middle|>");
868 prompt.push_str(START_MARKER);
869 prompt.push_str(&context[editable_range.start..cursor_offset]);
870 prompt.push_str(CURSOR_MARKER);
871 prompt.push_str(&context[cursor_offset..editable_range.end]);
872 if !prompt.ends_with('\n') {
873 prompt.push('\n');
874 }
875 prompt.push_str(SEPARATOR);
876 }
877}
878
879pub mod v0131_git_merge_markers_prefix {
880 //! A prompt that uses git-style merge conflict markers to represent the editable region.
881 //!
882 //! Example prompt:
883 //!
884 //! <|file_sep|>path/to/target_file.py
885 //! <|fim_prefix|>
886 //! code before editable region
887 //! <<<<<<< CURRENT
888 //! code that
889 //! needs to<|user_cursor|>
890 //! be rewritten
891 //! =======
892 //! <|fim_suffix|>
893 //! code after editable region
894 //! <|fim_middle|>
895 //!
896 //! Expected output (should be generated by the model):
897 //!
898 //! updated
899 //! code with
900 //! changes applied
901 //! >>>>>>> UPDATED
902
903 use super::*;
904
905 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
906 pub const SEPARATOR: &str = "=======\n";
907 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
908
909 pub fn special_tokens() -> &'static [&'static str] {
910 &[
911 "<|fim_prefix|>",
912 "<|fim_suffix|>",
913 "<|fim_middle|>",
914 "<|file_sep|>",
915 START_MARKER,
916 SEPARATOR,
917 END_MARKER,
918 CURSOR_MARKER,
919 ]
920 }
921
922 pub fn write_cursor_excerpt_section(
923 prompt: &mut String,
924 path: &Path,
925 context: &str,
926 editable_range: &Range<usize>,
927 cursor_offset: usize,
928 ) {
929 let path_str = path.to_string_lossy();
930 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
931
932 prompt.push_str("<|fim_prefix|>");
933 prompt.push_str(&context[..editable_range.start]);
934 prompt.push_str(START_MARKER);
935 prompt.push_str(&context[editable_range.start..cursor_offset]);
936 prompt.push_str(CURSOR_MARKER);
937 prompt.push_str(&context[cursor_offset..editable_range.end]);
938 if !prompt.ends_with('\n') {
939 prompt.push('\n');
940 }
941 prompt.push_str(SEPARATOR);
942
943 prompt.push_str("<|fim_suffix|>");
944 prompt.push_str(&context[editable_range.end..]);
945 if !prompt.ends_with('\n') {
946 prompt.push('\n');
947 }
948
949 prompt.push_str("<|fim_middle|>");
950 }
951}
952
953pub mod v0211_prefill {
954 use super::*;
955
956 pub fn special_tokens() -> &'static [&'static str] {
957 v0131_git_merge_markers_prefix::special_tokens()
958 }
959
960 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
961 let editable_region = &context[editable_range.start..editable_range.end];
962
963 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
964 let prefill_len = editable_region.floor_char_boundary(prefill_len);
965
966 // Find a token boundary to avoid splitting tokens in the prefill.
967 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
968 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
969 // the \n and consume any consecutive \n characters after it.
970 let prefill = &editable_region[..prefill_len];
971 match prefill.rfind('\n') {
972 Some(pos) => {
973 let mut end = pos + 1;
974 while end < editable_region.len()
975 && editable_region.as_bytes().get(end) == Some(&b'\n')
976 {
977 end += 1;
978 }
979 editable_region[..end].to_string()
980 }
981 // No newline found. Fall back to splitting before the last space
982 // (word-level boundary)
983 None => match prefill.rfind(' ') {
984 Some(pos) => prefill[..pos].to_string(),
985 None => prefill.to_string(),
986 },
987 }
988 }
989}
990
991pub mod hashline {
992
993 use std::fmt::Display;
994
995 pub const END_MARKER: &str = "<|fim_middle|>updated";
996 pub const START_MARKER: &str = "<|fim_middle|>current";
997
998 use super::*;
999
1000 const SET_COMMAND_MARKER: &str = "<|set|>";
1001 const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1002
1003 pub fn special_tokens() -> &'static [&'static str] {
1004 return &[
1005 SET_COMMAND_MARKER,
1006 "<|set_range|>",
1007 INSERT_COMMAND_MARKER,
1008 CURSOR_MARKER,
1009 "<|file_sep|>",
1010 "<|fim_prefix|>",
1011 "<|fim_suffix|>",
1012 "<|fim_middle|>",
1013 ];
1014 }
1015
1016 /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1017 #[derive(Debug, Clone, PartialEq, Eq)]
1018 struct LineRef {
1019 index: usize,
1020 hash: u8,
1021 }
1022
1023 impl Display for LineRef {
1024 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1025 write!(f, "{}:{:02x}", self.index, self.hash)
1026 }
1027 }
1028
1029 pub fn hash_line(line: &[u8]) -> u8 {
1030 let mut h: u8 = 0;
1031 for &byte in line {
1032 h = h.wrapping_add(byte);
1033 }
1034 return h;
1035 }
1036
1037 /// Write the hashline-encoded editable region into `out`. Each line of
1038 /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1039 /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1040 /// to the start of `editable_text`).
1041 pub fn write_hashline_editable_region(
1042 out: &mut String,
1043 editable_text: &str,
1044 cursor_offset_in_editable: usize,
1045 ) {
1046 let mut offset = 0;
1047 for (i, line) in editable_text.lines().enumerate() {
1048 let (head, cursor, tail) = if cursor_offset_in_editable > offset
1049 && cursor_offset_in_editable < offset + line.len()
1050 {
1051 (
1052 &line[..cursor_offset_in_editable - offset],
1053 CURSOR_MARKER,
1054 &line[cursor_offset_in_editable - offset..],
1055 )
1056 } else {
1057 (line, "", "")
1058 };
1059 write!(
1060 out,
1061 "\n{}|{head}{cursor}{tail}",
1062 LineRef {
1063 index: i,
1064 hash: hash_line(line.as_bytes())
1065 }
1066 )
1067 .unwrap();
1068 offset += line.len() + 1;
1069 }
1070 }
1071
1072 pub fn write_cursor_excerpt_section(
1073 prompt: &mut String,
1074 path: &Path,
1075 context: &str,
1076 editable_range: &Range<usize>,
1077 cursor_offset: usize,
1078 ) {
1079 let path_str = path.to_string_lossy();
1080 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1081
1082 prompt.push_str("<|fim_prefix|>\n");
1083 prompt.push_str(&context[..editable_range.start]);
1084 prompt.push_str(START_MARKER);
1085
1086 let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1087 let editable_region = &context[editable_range.clone()];
1088 write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1089
1090 if !prompt.ends_with('\n') {
1091 prompt.push('\n');
1092 }
1093
1094 prompt.push_str("<|fim_suffix|>\n");
1095 prompt.push_str(&context[editable_range.end..]);
1096 if !prompt.ends_with('\n') {
1097 prompt.push('\n');
1098 }
1099
1100 prompt.push_str(END_MARKER);
1101 }
1102
1103 /// A single edit command parsed from the model output.
1104 #[derive(Debug)]
1105 enum EditCommand<'a> {
1106 /// Replace a range of lines (inclusive on both ends). Single-line set is
1107 /// represented by `start == end`.
1108 Set {
1109 start: LineRef,
1110 end: LineRef,
1111 content: &'a str,
1112 },
1113 /// Insert new lines after the given line, or before the first line if
1114 /// `after` is `None`.
1115 Insert {
1116 after: Option<LineRef>,
1117 content: &'a str,
1118 },
1119 }
1120
1121 /// Parse a line reference like `3:c3` into a `LineRef`.
1122 fn parse_line_ref(s: &str) -> Option<LineRef> {
1123 let (idx_str, hash_str) = s.split_once(':')?;
1124 let index = idx_str.parse::<usize>().ok()?;
1125 let hash = u8::from_str_radix(hash_str, 16).ok()?;
1126 Some(LineRef { index, hash })
1127 }
1128
1129 /// Parse the model output into a list of `EditCommand`s.
1130 fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1131 let mut commands = Vec::new();
1132 let mut offset = 0usize;
1133
1134 while offset < model_output.len() {
1135 let next_nl = model_output[offset..]
1136 .find('\n')
1137 .map(|i| offset + i)
1138 .unwrap_or(model_output.len());
1139 let line = &model_output[offset..next_nl];
1140 let line_end = if next_nl < model_output.len() {
1141 next_nl + 1
1142 } else {
1143 next_nl
1144 };
1145
1146 let trimmed = line.trim();
1147 let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1148 (true, spec)
1149 } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1150 (false, spec)
1151 } else {
1152 offset = line_end;
1153 continue;
1154 };
1155
1156 let mut content_end = line_end;
1157 let mut scan = line_end;
1158
1159 while scan < model_output.len() {
1160 let body_nl = model_output[scan..]
1161 .find('\n')
1162 .map(|i| scan + i)
1163 .unwrap_or(model_output.len());
1164 let body_line = &model_output[scan..body_nl];
1165 if body_line.trim().starts_with(SET_COMMAND_MARKER)
1166 || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1167 {
1168 break;
1169 }
1170 scan = if body_nl < model_output.len() {
1171 body_nl + 1
1172 } else {
1173 body_nl
1174 };
1175 content_end = scan;
1176 }
1177
1178 let content = &model_output[line_end..content_end];
1179
1180 if is_set {
1181 if let Some((start_str, end_str)) = specifier.split_once('-') {
1182 if let (Some(start), Some(end)) =
1183 (parse_line_ref(start_str), parse_line_ref(end_str))
1184 {
1185 commands.push(EditCommand::Set {
1186 start,
1187 end,
1188 content,
1189 });
1190 }
1191 } else if let Some(target) = parse_line_ref(specifier) {
1192 commands.push(EditCommand::Set {
1193 start: target.clone(),
1194 end: target,
1195 content,
1196 });
1197 }
1198 } else {
1199 let after = parse_line_ref(specifier);
1200 commands.push(EditCommand::Insert { after, content });
1201 }
1202
1203 offset = scan;
1204 }
1205
1206 commands
1207 }
1208
1209 /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1210 /// (as opposed to being a plain full-replacement output).
1211 /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1212 /// editable region, returning the plain text content.
1213 pub fn strip_hashline_prefixes(region: &str) -> String {
1214 let mut decoded: String = region
1215 .lines()
1216 .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1217 .collect::<Vec<_>>()
1218 .join("\n");
1219 if region.ends_with('\n') {
1220 decoded.push('\n');
1221 }
1222 decoded
1223 }
1224
1225 pub fn output_has_edit_commands(model_output: &str) -> bool {
1226 model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1227 }
1228
1229 /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1230 /// original editable region text.
1231 ///
1232 /// `editable_region` is the original text of the editable region (without hash
1233 /// prefixes). `model_output` is the raw model response containing edit commands.
1234 ///
1235 /// Returns the full replacement text for the editable region.
1236 pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1237 let original_lines: Vec<&str> = editable_region.lines().collect();
1238 let old_hashes: Vec<u8> = original_lines
1239 .iter()
1240 .map(|line| hash_line(line.as_bytes()))
1241 .collect();
1242
1243 let commands = parse_edit_commands(model_output);
1244
1245 // For set operations: indexed by start line → Some((end line index, content))
1246 // For insert operations: indexed by line index → vec of content to insert after
1247 // Insert-before-first is tracked separately.
1248 let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1249 let mut insert_before_first: Vec<&str> = Vec::new();
1250 let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1251
1252 for command in &commands {
1253 match command {
1254 EditCommand::Set {
1255 start,
1256 end,
1257 content,
1258 } => {
1259 if start.index < old_hashes.len()
1260 && end.index < old_hashes.len()
1261 && start.index <= end.index
1262 && old_hashes[start.index] == start.hash
1263 && old_hashes[end.index] == end.hash
1264 {
1265 set_ops[start.index] = Some((end.index, *content));
1266 }
1267 }
1268 EditCommand::Insert { after, content } => match after {
1269 None => insert_before_first.push(*content),
1270 Some(line_ref) => {
1271 if line_ref.index < old_hashes.len()
1272 && old_hashes[line_ref.index] == line_ref.hash
1273 {
1274 insert_after[line_ref.index].push(*content);
1275 }
1276 }
1277 },
1278 }
1279 }
1280
1281 let mut result = String::new();
1282
1283 // Emit any insertions before the first line
1284 for content in &insert_before_first {
1285 result.push_str(content);
1286 if !content.ends_with('\n') {
1287 result.push('\n');
1288 }
1289 }
1290
1291 let mut i = 0;
1292 while i < original_lines.len() {
1293 if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1294 // Replace lines i..=end_index with the replacement content
1295 result.push_str(replacement);
1296 if !replacement.is_empty() && !replacement.ends_with('\n') {
1297 result.push('\n');
1298 }
1299 // Emit any insertions after the end of this set range
1300 if *end_index < insert_after.len() {
1301 for content in &insert_after[*end_index] {
1302 result.push_str(content);
1303 if !content.ends_with('\n') {
1304 result.push('\n');
1305 }
1306 }
1307 }
1308 i = end_index + 1;
1309 } else {
1310 // Keep the original line
1311 result.push_str(original_lines[i]);
1312 result.push('\n');
1313 // Emit any insertions after this line
1314 for content in &insert_after[i] {
1315 result.push_str(content);
1316 if !content.ends_with('\n') {
1317 result.push('\n');
1318 }
1319 }
1320 i += 1;
1321 }
1322 }
1323
1324 // Preserve trailing newline behavior: if the original ended with a
1325 // newline the result already has one; if it didn't, trim the extra one
1326 // we added.
1327 if !editable_region.ends_with('\n') && result.ends_with('\n') {
1328 result.pop();
1329 }
1330
1331 result
1332 }
1333
1334 /// Convert a unified diff patch into hashline edit commands.
1335 ///
1336 /// Parses the unified diff `patch` directly to determine which lines of
1337 /// `old_text` are deleted/replaced and what new lines are added, then emits
1338 /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1339 /// `{index}:{hash}` identifiers.
1340 ///
1341 /// `cursor_offset` is an optional byte offset into the first hunk's new
1342 /// text (context + additions) where the cursor marker should be placed.
1343 pub fn patch_to_edit_commands(
1344 old_text: &str,
1345 patch: &str,
1346 cursor_offset: Option<usize>,
1347 ) -> Result<String> {
1348 let old_lines: Vec<&str> = old_text.lines().collect();
1349 let old_hashes: Vec<u8> = old_lines
1350 .iter()
1351 .map(|line| hash_line(line.as_bytes()))
1352 .collect();
1353
1354 let mut result = String::new();
1355 let mut first_hunk = true;
1356
1357 struct Hunk<'a> {
1358 line_range: Range<usize>,
1359 new_text_lines: Vec<&'a str>,
1360 cursor_line_offset_in_new_text: Option<(usize, usize)>,
1361 }
1362
1363 // Parse the patch line by line. We only care about hunk headers,
1364 // context, deletions, and additions.
1365 let mut old_line_index: usize = 0;
1366 let mut current_hunk: Option<Hunk> = None;
1367 // Byte offset tracking within the hunk's new text for cursor placement.
1368 let mut new_text_byte_offset: usize = 0;
1369 // The line index of the last old line seen before/in the current hunk
1370 // (used for insert-after reference).
1371 let mut last_old_line_before_hunk: Option<usize> = None;
1372
1373 fn flush_hunk(
1374 hunk: Hunk,
1375 last_old_line: Option<usize>,
1376 result: &mut String,
1377 old_hashes: &[u8],
1378 ) {
1379 if hunk.line_range.is_empty() {
1380 // Pure insertion — reference the old line to insert after when in bounds.
1381 if let Some(after) = last_old_line
1382 && let Some(&hash) = old_hashes.get(after)
1383 {
1384 write!(
1385 result,
1386 "{INSERT_COMMAND_MARKER}{}\n",
1387 LineRef { index: after, hash }
1388 )
1389 .unwrap();
1390 } else {
1391 result.push_str(INSERT_COMMAND_MARKER);
1392 result.push('\n');
1393 }
1394 } else {
1395 let start = hunk.line_range.start;
1396 let end_exclusive = hunk.line_range.end;
1397 let deleted_line_count = end_exclusive.saturating_sub(start);
1398
1399 if deleted_line_count == 1 {
1400 if let Some(&hash) = old_hashes.get(start) {
1401 write!(
1402 result,
1403 "{SET_COMMAND_MARKER}{}\n",
1404 LineRef { index: start, hash }
1405 )
1406 .unwrap();
1407 } else {
1408 result.push_str(SET_COMMAND_MARKER);
1409 result.push('\n');
1410 }
1411 } else {
1412 let end_inclusive = end_exclusive - 1;
1413 match (
1414 old_hashes.get(start).copied(),
1415 old_hashes.get(end_inclusive).copied(),
1416 ) {
1417 (Some(start_hash), Some(end_hash)) => {
1418 write!(
1419 result,
1420 "{SET_COMMAND_MARKER}{}-{}\n",
1421 LineRef {
1422 index: start,
1423 hash: start_hash
1424 },
1425 LineRef {
1426 index: end_inclusive,
1427 hash: end_hash
1428 }
1429 )
1430 .unwrap();
1431 }
1432 _ => {
1433 result.push_str(SET_COMMAND_MARKER);
1434 result.push('\n');
1435 }
1436 }
1437 }
1438 }
1439 for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1440 if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1441 && line_offset == cursor_line_offset
1442 {
1443 result.push_str(&line[..char_offset]);
1444 result.push_str(CURSOR_MARKER);
1445 result.push_str(&line[char_offset..]);
1446 continue;
1447 }
1448
1449 result.push_str(line);
1450 }
1451 }
1452
1453 for raw_line in patch.split_inclusive('\n') {
1454 if raw_line.starts_with("@@") {
1455 // Flush any pending change hunk from a previous patch hunk.
1456 if let Some(hunk) = current_hunk.take() {
1457 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1458 }
1459
1460 // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1461 // We intentionally do not trust old_start as a direct local index into `old_text`,
1462 // because some patches are produced against a larger file region and carry
1463 // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1464 if first_hunk {
1465 new_text_byte_offset = 0;
1466 first_hunk = false;
1467 }
1468 continue;
1469 }
1470
1471 if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1472 continue;
1473 }
1474 if raw_line.starts_with("\\ No newline") {
1475 continue;
1476 }
1477
1478 if raw_line.starts_with('-') {
1479 // Extend or start a change hunk with this deleted old line.
1480 match &mut current_hunk {
1481 Some(Hunk {
1482 line_range: range, ..
1483 }) => range.end = old_line_index + 1,
1484 None => {
1485 current_hunk = Some(Hunk {
1486 line_range: old_line_index..old_line_index + 1,
1487 new_text_lines: Vec::new(),
1488 cursor_line_offset_in_new_text: None,
1489 });
1490 }
1491 }
1492 old_line_index += 1;
1493 } else if let Some(added_content) = raw_line.strip_prefix('+') {
1494 // Place cursor marker if cursor_offset falls within this line.
1495 let mut cursor_line_offset = None;
1496 if let Some(cursor_off) = cursor_offset
1497 && (first_hunk
1498 || cursor_off >= new_text_byte_offset
1499 && cursor_off <= new_text_byte_offset + added_content.len())
1500 {
1501 let line_offset = added_content.floor_char_boundary(
1502 cursor_off
1503 .saturating_sub(new_text_byte_offset)
1504 .min(added_content.len()),
1505 );
1506 cursor_line_offset = Some(line_offset);
1507 }
1508
1509 new_text_byte_offset += added_content.len();
1510
1511 let hunk = current_hunk.get_or_insert(Hunk {
1512 line_range: old_line_index..old_line_index,
1513 new_text_lines: vec![],
1514 cursor_line_offset_in_new_text: None,
1515 });
1516 hunk.new_text_lines.push(added_content);
1517 hunk.cursor_line_offset_in_new_text = cursor_line_offset
1518 .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1519 } else {
1520 // Context line (starts with ' ' or is empty).
1521 if let Some(hunk) = current_hunk.take() {
1522 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1523 }
1524 last_old_line_before_hunk = Some(old_line_index);
1525 old_line_index += 1;
1526 let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1527 new_text_byte_offset += content.len();
1528 }
1529 }
1530
1531 // Flush final group.
1532 if let Some(hunk) = current_hunk.take() {
1533 flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1534 }
1535
1536 // Trim a single trailing newline.
1537 if result.ends_with('\n') {
1538 result.pop();
1539 }
1540
1541 Ok(result)
1542 }
1543
1544 #[cfg(test)]
1545 mod tests {
1546 use super::*;
1547 use indoc::indoc;
1548
1549 #[test]
1550 fn test_format_cursor_region() {
1551 struct Case {
1552 name: &'static str,
1553 context: &'static str,
1554 editable_range: Range<usize>,
1555 cursor_offset: usize,
1556 expected: &'static str,
1557 }
1558
1559 let cases = [
1560 Case {
1561 name: "basic_cursor_placement",
1562 context: "hello world\n",
1563 editable_range: 0..12,
1564 cursor_offset: 5,
1565 expected: indoc! {"
1566 <|file_sep|>test.rs
1567 <|fim_prefix|>
1568 <|fim_middle|>current
1569 0:5c|hello<|user_cursor|> world
1570 <|fim_suffix|>
1571 <|fim_middle|>updated"},
1572 },
1573 Case {
1574 name: "multiline_cursor_on_second_line",
1575 context: "aaa\nbbb\nccc\n",
1576 editable_range: 0..12,
1577 cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1578 expected: indoc! {"
1579 <|file_sep|>test.rs
1580 <|fim_prefix|>
1581 <|fim_middle|>current
1582 0:23|aaa
1583 1:26|b<|user_cursor|>bb
1584 2:29|ccc
1585 <|fim_suffix|>
1586 <|fim_middle|>updated"},
1587 },
1588 Case {
1589 name: "no_trailing_newline_in_context",
1590 context: "line1\nline2",
1591 editable_range: 0..11,
1592 cursor_offset: 3,
1593 expected: indoc! {"
1594 <|file_sep|>test.rs
1595 <|fim_prefix|>
1596 <|fim_middle|>current
1597 0:d9|lin<|user_cursor|>e1
1598 1:da|line2
1599 <|fim_suffix|>
1600 <|fim_middle|>updated"},
1601 },
1602 Case {
1603 name: "leading_newline_in_editable_region",
1604 context: "\nabc\n",
1605 editable_range: 0..5,
1606 cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1607 expected: indoc! {"
1608 <|file_sep|>test.rs
1609 <|fim_prefix|>
1610 <|fim_middle|>current
1611 0:00|
1612 1:26|a<|user_cursor|>bc
1613 <|fim_suffix|>
1614 <|fim_middle|>updated"},
1615 },
1616 Case {
1617 name: "with_suffix",
1618 context: "abc\ndef",
1619 editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1620 cursor_offset: 2,
1621 expected: indoc! {"
1622 <|file_sep|>test.rs
1623 <|fim_prefix|>
1624 <|fim_middle|>current
1625 0:26|ab<|user_cursor|>c
1626 <|fim_suffix|>
1627 def
1628 <|fim_middle|>updated"},
1629 },
1630 Case {
1631 name: "unicode_two_byte_chars",
1632 context: "héllo\n",
1633 editable_range: 0..7,
1634 cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1635 expected: indoc! {"
1636 <|file_sep|>test.rs
1637 <|fim_prefix|>
1638 <|fim_middle|>current
1639 0:1b|hé<|user_cursor|>llo
1640 <|fim_suffix|>
1641 <|fim_middle|>updated"},
1642 },
1643 Case {
1644 name: "unicode_three_byte_chars",
1645 context: "日本語\n",
1646 editable_range: 0..10,
1647 cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1648 expected: indoc! {"
1649 <|file_sep|>test.rs
1650 <|fim_prefix|>
1651 <|fim_middle|>current
1652 0:80|日本<|user_cursor|>語
1653 <|fim_suffix|>
1654 <|fim_middle|>updated"},
1655 },
1656 Case {
1657 name: "unicode_four_byte_chars",
1658 context: "a🌍b\n",
1659 editable_range: 0..7,
1660 cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1661 expected: indoc! {"
1662 <|file_sep|>test.rs
1663 <|fim_prefix|>
1664 <|fim_middle|>current
1665 0:6b|a🌍<|user_cursor|>b
1666 <|fim_suffix|>
1667 <|fim_middle|>updated"},
1668 },
1669 Case {
1670 name: "cursor_at_start_of_region_not_placed",
1671 context: "abc\n",
1672 editable_range: 0..4,
1673 cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1674 expected: indoc! {"
1675 <|file_sep|>test.rs
1676 <|fim_prefix|>
1677 <|fim_middle|>current
1678 0:26|abc
1679 <|fim_suffix|>
1680 <|fim_middle|>updated"},
1681 },
1682 Case {
1683 name: "cursor_at_end_of_line_not_placed",
1684 context: "abc\ndef\n",
1685 editable_range: 0..8,
1686 cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1687 expected: indoc! {"
1688 <|file_sep|>test.rs
1689 <|fim_prefix|>
1690 <|fim_middle|>current
1691 0:26|abc
1692 1:2f|def
1693 <|fim_suffix|>
1694 <|fim_middle|>updated"},
1695 },
1696 Case {
1697 name: "cursor_offset_relative_to_context_not_editable_region",
1698 // cursor_offset is relative to `context`, so when editable_range.start > 0,
1699 // write_cursor_excerpt_section must subtract it before comparing against
1700 // per-line offsets within the editable region.
1701 context: "pre\naaa\nbbb\nsuf\n",
1702 editable_range: 4..12, // editable region = "aaa\nbbb\n"
1703 cursor_offset: 9, // byte 9 in context = second 'b' in "bbb"
1704 expected: indoc! {"
1705 <|file_sep|>test.rs
1706 <|fim_prefix|>
1707 pre
1708 <|fim_middle|>current
1709 0:23|aaa
1710 1:26|b<|user_cursor|>bb
1711 <|fim_suffix|>
1712 suf
1713 <|fim_middle|>updated"},
1714 },
1715 ];
1716
1717 for case in &cases {
1718 let mut prompt = String::new();
1719 hashline::write_cursor_excerpt_section(
1720 &mut prompt,
1721 Path::new("test.rs"),
1722 case.context,
1723 &case.editable_range,
1724 case.cursor_offset,
1725 );
1726 assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1727 }
1728 }
1729
1730 #[test]
1731 fn test_apply_edit_commands() {
1732 struct Case {
1733 name: &'static str,
1734 original: &'static str,
1735 model_output: &'static str,
1736 expected: &'static str,
1737 }
1738
1739 let cases = vec![
1740 Case {
1741 name: "set_single_line",
1742 original: indoc! {"
1743 let mut total = 0;
1744 for product in products {
1745 total += ;
1746 }
1747 total
1748 "},
1749 model_output: indoc! {"
1750 <|set|>2:87
1751 total += product.price;
1752 "},
1753 expected: indoc! {"
1754 let mut total = 0;
1755 for product in products {
1756 total += product.price;
1757 }
1758 total
1759 "},
1760 },
1761 Case {
1762 name: "set_range",
1763 original: indoc! {"
1764 fn foo() {
1765 let x = 1;
1766 let y = 2;
1767 let z = 3;
1768 }
1769 "},
1770 model_output: indoc! {"
1771 <|set|>1:46-3:4a
1772 let sum = 6;
1773 "},
1774 expected: indoc! {"
1775 fn foo() {
1776 let sum = 6;
1777 }
1778 "},
1779 },
1780 Case {
1781 name: "insert_after_line",
1782 original: indoc! {"
1783 fn main() {
1784 let x = 1;
1785 }
1786 "},
1787 model_output: indoc! {"
1788 <|insert|>1:46
1789 let y = 2;
1790 "},
1791 expected: indoc! {"
1792 fn main() {
1793 let x = 1;
1794 let y = 2;
1795 }
1796 "},
1797 },
1798 Case {
1799 name: "insert_before_first",
1800 original: indoc! {"
1801 let x = 1;
1802 let y = 2;
1803 "},
1804 model_output: indoc! {"
1805 <|insert|>
1806 use std::io;
1807 "},
1808 expected: indoc! {"
1809 use std::io;
1810 let x = 1;
1811 let y = 2;
1812 "},
1813 },
1814 Case {
1815 name: "set_with_cursor_marker",
1816 original: indoc! {"
1817 fn main() {
1818 println!();
1819 }
1820 "},
1821 model_output: indoc! {"
1822 <|set|>1:34
1823 eprintln!(\"<|user_cursor|>\");
1824 "},
1825 expected: indoc! {"
1826 fn main() {
1827 eprintln!(\"<|user_cursor|>\");
1828 }
1829 "},
1830 },
1831 Case {
1832 name: "multiple_set_commands",
1833 original: indoc! {"
1834 aaa
1835 bbb
1836 ccc
1837 ddd
1838 "},
1839 model_output: indoc! {"
1840 <|set|>0:23
1841 AAA
1842 <|set|>2:29
1843 CCC
1844 "},
1845 expected: indoc! {"
1846 AAA
1847 bbb
1848 CCC
1849 ddd
1850 "},
1851 },
1852 Case {
1853 name: "set_range_multiline_replacement",
1854 original: indoc! {"
1855 fn handle_submit() {
1856 }
1857
1858 fn handle_keystroke() {
1859 "},
1860 model_output: indoc! {"
1861 <|set|>0:3f-1:7d
1862 fn handle_submit(modal_state: &mut ModalState) {
1863 <|user_cursor|>
1864 }
1865 "},
1866 expected: indoc! {"
1867 fn handle_submit(modal_state: &mut ModalState) {
1868 <|user_cursor|>
1869 }
1870
1871 fn handle_keystroke() {
1872 "},
1873 },
1874 Case {
1875 name: "no_edit_commands_returns_original",
1876 original: indoc! {"
1877 hello
1878 world
1879 "},
1880 model_output: "some random text with no commands",
1881 expected: indoc! {"
1882 hello
1883 world
1884 "},
1885 },
1886 Case {
1887 name: "wrong_hash_set_ignored",
1888 original: indoc! {"
1889 aaa
1890 bbb
1891 "},
1892 model_output: indoc! {"
1893 <|set|>0:ff
1894 ZZZ
1895 "},
1896 expected: indoc! {"
1897 aaa
1898 bbb
1899 "},
1900 },
1901 Case {
1902 name: "insert_and_set_combined",
1903 original: indoc! {"
1904 alpha
1905 beta
1906 gamma
1907 "},
1908 model_output: indoc! {"
1909 <|set|>0:06
1910 ALPHA
1911 <|insert|>1:9c
1912 beta_extra
1913 "},
1914 expected: indoc! {"
1915 ALPHA
1916 beta
1917 beta_extra
1918 gamma
1919 "},
1920 },
1921 Case {
1922 name: "no_trailing_newline_preserved",
1923 original: "hello\nworld",
1924 model_output: indoc! {"
1925 <|set|>0:14
1926 HELLO
1927 "},
1928 expected: "HELLO\nworld",
1929 },
1930 Case {
1931 name: "set_range_hash_mismatch_in_end_bound",
1932 original: indoc! {"
1933 one
1934 two
1935 three
1936 "},
1937 model_output: indoc! {"
1938 <|set|>0:42-2:ff
1939 ONE_TWO_THREE
1940 "},
1941 expected: indoc! {"
1942 one
1943 two
1944 three
1945 "},
1946 },
1947 Case {
1948 name: "set_range_start_greater_than_end_ignored",
1949 original: indoc! {"
1950 a
1951 b
1952 c
1953 "},
1954 model_output: indoc! {"
1955 <|set|>2:63-1:62
1956 X
1957 "},
1958 expected: indoc! {"
1959 a
1960 b
1961 c
1962 "},
1963 },
1964 Case {
1965 name: "insert_out_of_bounds_ignored",
1966 original: indoc! {"
1967 x
1968 y
1969 "},
1970 model_output: indoc! {"
1971 <|insert|>99:aa
1972 z
1973 "},
1974 expected: indoc! {"
1975 x
1976 y
1977 "},
1978 },
1979 Case {
1980 name: "set_out_of_bounds_ignored",
1981 original: indoc! {"
1982 x
1983 y
1984 "},
1985 model_output: indoc! {"
1986 <|set|>99:aa
1987 z
1988 "},
1989 expected: indoc! {"
1990 x
1991 y
1992 "},
1993 },
1994 Case {
1995 name: "malformed_set_command_ignored",
1996 original: indoc! {"
1997 alpha
1998 beta
1999 "},
2000 model_output: indoc! {"
2001 <|set|>not-a-line-ref
2002 UPDATED
2003 "},
2004 expected: indoc! {"
2005 alpha
2006 beta
2007 "},
2008 },
2009 Case {
2010 name: "malformed_insert_hash_treated_as_before_first",
2011 original: indoc! {"
2012 alpha
2013 beta
2014 "},
2015 model_output: indoc! {"
2016 <|insert|>1:nothex
2017 preamble
2018 "},
2019 expected: indoc! {"
2020 preamble
2021 alpha
2022 beta
2023 "},
2024 },
2025 Case {
2026 name: "set_then_insert_same_target_orders_insert_after_replacement",
2027 original: indoc! {"
2028 cat
2029 dog
2030 "},
2031 model_output: indoc! {"
2032 <|set|>0:38
2033 CAT
2034 <|insert|>0:38
2035 TAIL
2036 "},
2037 expected: indoc! {"
2038 CAT
2039 TAIL
2040 dog
2041 "},
2042 },
2043 Case {
2044 name: "overlapping_set_ranges_last_wins",
2045 original: indoc! {"
2046 a
2047 b
2048 c
2049 d
2050 "},
2051 model_output: indoc! {"
2052 <|set|>0:61-2:63
2053 FIRST
2054 <|set|>1:62-3:64
2055 SECOND
2056 "},
2057 expected: indoc! {"
2058 FIRST
2059 d
2060 "},
2061 },
2062 Case {
2063 name: "insert_before_first_and_after_line",
2064 original: indoc! {"
2065 a
2066 b
2067 "},
2068 model_output: indoc! {"
2069 <|insert|>
2070 HEAD
2071 <|insert|>0:61
2072 MID
2073 "},
2074 expected: indoc! {"
2075 HEAD
2076 a
2077 MID
2078 b
2079 "},
2080 },
2081 ];
2082
2083 for case in &cases {
2084 let result = hashline::apply_edit_commands(case.original, &case.model_output);
2085 assert_eq!(result, case.expected, "failed case: {}", case.name);
2086 }
2087 }
2088
2089 #[test]
2090 fn test_output_has_edit_commands() {
2091 assert!(hashline::output_has_edit_commands(&format!(
2092 "{}0:ab\nnew",
2093 SET_COMMAND_MARKER
2094 )));
2095 assert!(hashline::output_has_edit_commands(&format!(
2096 "{}0:ab\nnew",
2097 INSERT_COMMAND_MARKER
2098 )));
2099 assert!(hashline::output_has_edit_commands(&format!(
2100 "some text\n{}1:cd\nstuff",
2101 SET_COMMAND_MARKER
2102 )));
2103 assert!(!hashline::output_has_edit_commands("just plain text"));
2104 assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2105 }
2106
2107 // ---- hashline::patch_to_edit_commands round-trip tests ----
2108
2109 #[test]
2110 fn test_patch_to_edit_commands() {
2111 struct Case {
2112 name: &'static str,
2113 old: &'static str,
2114 patch: &'static str,
2115 expected_new: &'static str,
2116 }
2117
2118 let cases = [
2119 Case {
2120 name: "single_line_replacement",
2121 old: indoc! {"
2122 let mut total = 0;
2123 for product in products {
2124 total += ;
2125 }
2126 total
2127 "},
2128 patch: indoc! {"
2129 @@ -1,5 +1,5 @@
2130 let mut total = 0;
2131 for product in products {
2132 - total += ;
2133 + total += product.price;
2134 }
2135 total
2136 "},
2137 expected_new: indoc! {"
2138 let mut total = 0;
2139 for product in products {
2140 total += product.price;
2141 }
2142 total
2143 "},
2144 },
2145 Case {
2146 name: "multiline_replacement",
2147 old: indoc! {"
2148 fn foo() {
2149 let x = 1;
2150 let y = 2;
2151 let z = 3;
2152 }
2153 "},
2154 patch: indoc! {"
2155 @@ -1,5 +1,3 @@
2156 fn foo() {
2157 - let x = 1;
2158 - let y = 2;
2159 - let z = 3;
2160 + let sum = 1 + 2 + 3;
2161 }
2162 "},
2163 expected_new: indoc! {"
2164 fn foo() {
2165 let sum = 1 + 2 + 3;
2166 }
2167 "},
2168 },
2169 Case {
2170 name: "insertion",
2171 old: indoc! {"
2172 fn main() {
2173 let x = 1;
2174 }
2175 "},
2176 patch: indoc! {"
2177 @@ -1,3 +1,4 @@
2178 fn main() {
2179 let x = 1;
2180 + let y = 2;
2181 }
2182 "},
2183 expected_new: indoc! {"
2184 fn main() {
2185 let x = 1;
2186 let y = 2;
2187 }
2188 "},
2189 },
2190 Case {
2191 name: "insertion_before_first",
2192 old: indoc! {"
2193 let x = 1;
2194 let y = 2;
2195 "},
2196 patch: indoc! {"
2197 @@ -1,2 +1,3 @@
2198 +use std::io;
2199 let x = 1;
2200 let y = 2;
2201 "},
2202 expected_new: indoc! {"
2203 use std::io;
2204 let x = 1;
2205 let y = 2;
2206 "},
2207 },
2208 Case {
2209 name: "deletion",
2210 old: indoc! {"
2211 aaa
2212 bbb
2213 ccc
2214 ddd
2215 "},
2216 patch: indoc! {"
2217 @@ -1,4 +1,2 @@
2218 aaa
2219 -bbb
2220 -ccc
2221 ddd
2222 "},
2223 expected_new: indoc! {"
2224 aaa
2225 ddd
2226 "},
2227 },
2228 Case {
2229 name: "multiple_changes",
2230 old: indoc! {"
2231 alpha
2232 beta
2233 gamma
2234 delta
2235 epsilon
2236 "},
2237 patch: indoc! {"
2238 @@ -1,5 +1,5 @@
2239 -alpha
2240 +ALPHA
2241 beta
2242 gamma
2243 -delta
2244 +DELTA
2245 epsilon
2246 "},
2247 expected_new: indoc! {"
2248 ALPHA
2249 beta
2250 gamma
2251 DELTA
2252 epsilon
2253 "},
2254 },
2255 Case {
2256 name: "replace_with_insertion",
2257 old: indoc! {r#"
2258 fn handle() {
2259 modal_state.close();
2260 modal_state.dismiss();
2261 "#},
2262 patch: indoc! {r#"
2263 @@ -1,3 +1,4 @@
2264 fn handle() {
2265 modal_state.close();
2266 + eprintln!("");
2267 modal_state.dismiss();
2268 "#},
2269 expected_new: indoc! {r#"
2270 fn handle() {
2271 modal_state.close();
2272 eprintln!("");
2273 modal_state.dismiss();
2274 "#},
2275 },
2276 Case {
2277 name: "complete_replacement",
2278 old: indoc! {"
2279 aaa
2280 bbb
2281 ccc
2282 "},
2283 patch: indoc! {"
2284 @@ -1,3 +1,3 @@
2285 -aaa
2286 -bbb
2287 -ccc
2288 +xxx
2289 +yyy
2290 +zzz
2291 "},
2292 expected_new: indoc! {"
2293 xxx
2294 yyy
2295 zzz
2296 "},
2297 },
2298 Case {
2299 name: "add_function_body",
2300 old: indoc! {"
2301 fn foo() {
2302 modal_state.dismiss();
2303 }
2304
2305 fn
2306
2307 fn handle_keystroke() {
2308 "},
2309 patch: indoc! {"
2310 @@ -1,6 +1,8 @@
2311 fn foo() {
2312 modal_state.dismiss();
2313 }
2314
2315 -fn
2316 +fn handle_submit() {
2317 + todo()
2318 +}
2319
2320 fn handle_keystroke() {
2321 "},
2322 expected_new: indoc! {"
2323 fn foo() {
2324 modal_state.dismiss();
2325 }
2326
2327 fn handle_submit() {
2328 todo()
2329 }
2330
2331 fn handle_keystroke() {
2332 "},
2333 },
2334 Case {
2335 name: "with_cursor_offset",
2336 old: indoc! {r#"
2337 fn main() {
2338 println!();
2339 }
2340 "#},
2341 patch: indoc! {r#"
2342 @@ -1,3 +1,3 @@
2343 fn main() {
2344 - println!();
2345 + eprintln!("");
2346 }
2347 "#},
2348 expected_new: indoc! {r#"
2349 fn main() {
2350 eprintln!("<|user_cursor|>");
2351 }
2352 "#},
2353 },
2354 Case {
2355 name: "non_local_hunk_header_pure_insertion_repro",
2356 old: indoc! {"
2357 aaa
2358 bbb
2359 "},
2360 patch: indoc! {"
2361 @@ -20,2 +20,3 @@
2362 aaa
2363 +xxx
2364 bbb
2365 "},
2366 expected_new: indoc! {"
2367 aaa
2368 xxx
2369 bbb
2370 "},
2371 },
2372 ];
2373
2374 for case in &cases {
2375 // The cursor_offset for patch_to_edit_commands is relative to
2376 // the first hunk's new text (context + additions). We compute
2377 // it by finding where the marker sits in the expected output
2378 // (which mirrors the new text of the hunk).
2379 let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2380
2381 let commands =
2382 hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2383 .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2384
2385 assert!(
2386 hashline::output_has_edit_commands(&commands),
2387 "case {}: expected edit commands, got: {commands:?}",
2388 case.name,
2389 );
2390
2391 let applied = hashline::apply_edit_commands(case.old, &commands);
2392 assert_eq!(applied, case.expected_new, "case {}", case.name);
2393 }
2394 }
2395 }
2396}
2397
2398pub mod seed_coder {
2399 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2400 //!
2401 //! Seed-Coder uses different FIM tokens and order than Qwen:
2402 //! - SPM order: suffix comes FIRST, then prefix, then middle
2403 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2404 //! - File markers: StarCoder-style `<filename>path` (single token + path)
2405 //!
2406 //! All context (related files, edit history) goes in the PREFIX section.
2407 //! The suffix contains only code after the editable region.
2408 //!
2409 //! Example prompt:
2410 //!
2411 //! <[fim-suffix]>
2412 //! code after editable region
2413 //! <[fim-prefix]><filename>related/file.py
2414 //! related file content
2415 //!
2416 //! <filename>edit_history
2417 //! --- a/some_file.py
2418 //! +++ b/some_file.py
2419 //! -old
2420 //! +new
2421 //!
2422 //! <filename>path/to/target_file.py
2423 //! code before editable region
2424 //! <<<<<<< CURRENT
2425 //! code that
2426 //! needs to<|user_cursor|>
2427 //! be rewritten
2428 //! =======
2429 //! <[fim-middle]>
2430 //!
2431 //! Expected output (model generates):
2432 //!
2433 //! updated
2434 //! code with
2435 //! changes applied
2436 //! >>>>>>> UPDATED
2437
2438 use super::*;
2439
2440 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2441 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2442 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2443 pub const FILE_MARKER: &str = "<filename>";
2444
2445 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2446 pub const SEPARATOR: &str = "=======\n";
2447 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2448
2449 pub const NO_EDITS: &str = "NO_EDITS\n";
2450
2451 pub fn special_tokens() -> &'static [&'static str] {
2452 &[
2453 FIM_SUFFIX,
2454 FIM_PREFIX,
2455 FIM_MIDDLE,
2456 FILE_MARKER,
2457 START_MARKER,
2458 SEPARATOR,
2459 END_MARKER,
2460 CURSOR_MARKER,
2461 ]
2462 }
2463
2464 pub fn write_cursor_excerpt_section(
2465 prompt: &mut String,
2466 path: &Path,
2467 context: &str,
2468 editable_range: &Range<usize>,
2469 cursor_offset: usize,
2470 ) {
2471 let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2472 prompt.push_str(§ion);
2473 }
2474
2475 pub fn format_prompt_with_budget(
2476 path: &Path,
2477 context: &str,
2478 editable_range: &Range<usize>,
2479 cursor_offset: usize,
2480 events: &[Arc<Event>],
2481 related_files: &[RelatedFile],
2482 max_tokens: usize,
2483 ) -> String {
2484 let suffix_section = build_suffix_section(context, editable_range);
2485 let cursor_prefix_section =
2486 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2487
2488 let suffix_tokens = estimate_tokens(suffix_section.len());
2489 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2490 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2491
2492 let edit_history_section = super::format_edit_history_within_budget(
2493 events,
2494 FILE_MARKER,
2495 "edit_history",
2496 budget_after_cursor,
2497 );
2498 let edit_history_tokens = estimate_tokens(edit_history_section.len());
2499 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2500
2501 let related_files_section = super::format_related_files_within_budget(
2502 related_files,
2503 FILE_MARKER,
2504 "",
2505 budget_after_edit_history,
2506 );
2507
2508 let mut prompt = String::new();
2509 prompt.push_str(&suffix_section);
2510 prompt.push_str(FIM_PREFIX);
2511 prompt.push_str(&related_files_section);
2512 if !related_files_section.is_empty() {
2513 prompt.push('\n');
2514 }
2515 prompt.push_str(&edit_history_section);
2516 if !edit_history_section.is_empty() {
2517 prompt.push('\n');
2518 }
2519 prompt.push_str(&cursor_prefix_section);
2520 prompt.push_str(FIM_MIDDLE);
2521 prompt
2522 }
2523
2524 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2525 let mut section = String::new();
2526 section.push_str(FIM_SUFFIX);
2527 section.push_str(&context[editable_range.end..]);
2528 if !section.ends_with('\n') {
2529 section.push('\n');
2530 }
2531 section
2532 }
2533
2534 fn build_cursor_prefix_section(
2535 path: &Path,
2536 context: &str,
2537 editable_range: &Range<usize>,
2538 cursor_offset: usize,
2539 ) -> String {
2540 let mut section = String::new();
2541 let path_str = path.to_string_lossy();
2542 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2543
2544 section.push_str(&context[..editable_range.start]);
2545 section.push_str(START_MARKER);
2546 section.push_str(&context[editable_range.start..cursor_offset]);
2547 section.push_str(CURSOR_MARKER);
2548 section.push_str(&context[cursor_offset..editable_range.end]);
2549 if !section.ends_with('\n') {
2550 section.push('\n');
2551 }
2552 section.push_str(SEPARATOR);
2553 section
2554 }
2555
2556 /// Format patch as containing no changes if it's empty; otherwise return None.
2557 pub(crate) fn no_edits(patch: &str) -> Option<String> {
2558 // Count lines in the patch
2559 let empty_patch = patch.lines().count() <= 3;
2560 if empty_patch {
2561 Some(format!("{NO_EDITS}{END_MARKER}"))
2562 } else {
2563 None
2564 }
2565 }
2566}
2567
2568pub mod v0304_variable_edit {
2569 //! A prompt format with no fixed editable region. The entire context is shown
2570 //! to the model, and it chooses which text to replace by outputting surrounding
2571 //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2572 //! text.
2573 //!
2574 //! Example prompt:
2575 //!
2576 //! <|file_sep|>path/to/file.py
2577 //! zero
2578 //! one
2579 //! two
2580 //! three<|user_cursor|>
2581 //! four
2582 //! five
2583 //! <|fim_prefix|>
2584 //
2585 //! Expected output (model generates):
2586 //!
2587 //! two
2588 //! <|fim_middle|>
2589 //! THREE
2590 //! <|fim_suffix|>
2591 //! four
2592 //!
2593 //! The output means: find "two\n...\nfour" in the context, and replace
2594 //! everything between "two\n" and "four" with "THREE\n".
2595
2596 use super::*;
2597
2598 pub fn special_tokens() -> &'static [&'static str] {
2599 &[
2600 "<|fim_prefix|>",
2601 "<|fim_suffix|>",
2602 "<|fim_middle|>",
2603 "<|file_sep|>",
2604 CURSOR_MARKER,
2605 ]
2606 }
2607
2608 pub fn write_cursor_excerpt_section(
2609 prompt: &mut String,
2610 path: &Path,
2611 context: &str,
2612 cursor_offset: usize,
2613 ) {
2614 let path_str = path.to_string_lossy();
2615 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2616
2617 prompt.push_str(&context[..cursor_offset]);
2618 prompt.push_str(CURSOR_MARKER);
2619 prompt.push_str(&context[cursor_offset..]);
2620 if !prompt.ends_with('\n') {
2621 prompt.push('\n');
2622 }
2623 prompt.push_str("<|fim_prefix|>\n")
2624 }
2625
2626 /// Apply a variable-edit model output to the original context text.
2627 ///
2628 /// The model output has the form:
2629 ///
2630 /// - prefix context lines
2631 /// - `<|fim_middle|>`
2632 /// - new text
2633 /// - `<|fim_suffix|>`
2634 /// - suffix context lines
2635 ///
2636 /// We locate the prefix/suffix context lines in the original text and replace
2637 /// everything between them with the new text.
2638 pub fn apply_variable_edit(
2639 context: &str,
2640 model_output: &str,
2641 ) -> Result<(Range<usize>, String)> {
2642 let (prefix_context, rest) = model_output
2643 .split_once("<|fim_middle|>\n")
2644 .or_else(|| model_output.split_once("<|fim_middle|>"))
2645 .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2646
2647 let (new_text, suffix_context) = rest
2648 .split_once("<|fim_suffix|>\n")
2649 .or_else(|| rest.split_once("<|fim_suffix|>"))
2650 .unwrap_or((rest, ""));
2651
2652 let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2653 suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2654 } else {
2655 suffix_context
2656 };
2657
2658 let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2659 .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2660 + prefix_context.len();
2661 let suffix_offset = if suffix_context.is_empty() {
2662 context.len()
2663 } else {
2664 find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2665 .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2666 + prefix_offset
2667 };
2668
2669 let edit_range = prefix_offset..suffix_offset;
2670 return Ok((edit_range, new_text.to_string()));
2671 }
2672
2673 fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2674 if needle.is_empty() {
2675 return Some(0);
2676 }
2677
2678 haystack.match_indices(needle).find_map(|(offset, _)| {
2679 let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2680 matched_line_start.then_some(offset)
2681 })
2682 }
2683
2684 /// Convert a unified diff patch into the variable-edit output format.
2685 ///
2686 /// Parses `patch` as a unified diff against `old_text` and produces model
2687 /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2688 /// delimiters. The diff is resolved by content matching rather than line
2689 /// numbers.
2690 pub fn patch_to_variable_edit_output(
2691 old_text: &str,
2692 patch: &str,
2693 cursor_offset: Option<usize>,
2694 ) -> Result<String> {
2695 // Parse the unified diff into hunks. Each hunk has an `old_context`
2696 // string (context + deleted lines interleaved in order) and a list of
2697 // edits expressed as byte ranges within that context plus replacement
2698 // text.
2699 let hunks = parse_hunks(patch);
2700 if hunks.is_empty() {
2701 return Ok(String::new());
2702 }
2703
2704 // Apply each hunk by finding its old_context in the text and
2705 // performing the edits. We search forward from where the previous
2706 // hunk ended so that hunks are applied in order.
2707 let mut new_text = old_text.to_string();
2708 let mut search_from: usize = 0;
2709 let mut first_hunk_pos: Option<usize> = None;
2710
2711 for hunk in &hunks {
2712 let context_pos = new_text[search_from..]
2713 .find(&hunk.old_context)
2714 .map(|pos| pos + search_from)
2715 .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2716
2717 if first_hunk_pos.is_none() {
2718 first_hunk_pos = Some(context_pos);
2719 }
2720
2721 // Apply edits in reverse order so byte offsets remain valid.
2722 for edit in hunk.edits.iter().rev() {
2723 let abs_start = context_pos + edit.range.start;
2724 let abs_end = context_pos + edit.range.end;
2725 new_text.replace_range(abs_start..abs_end, &edit.text);
2726 }
2727
2728 // Advance past this hunk's region in the (now modified) text.
2729 let new_region_len: usize =
2730 hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2731 len + edit.text.len() - (edit.range.end - edit.range.start)
2732 });
2733 search_from = context_pos + new_region_len;
2734 }
2735
2736 // Now we have old_text and new_text. Find the changed line range by
2737 // comparing them.
2738 let old_lines: Vec<&str> = old_text.lines().collect();
2739 let new_lines: Vec<&str> = new_text.lines().collect();
2740
2741 // Find first differing line.
2742 let first_changed_row = old_lines
2743 .iter()
2744 .zip(new_lines.iter())
2745 .position(|(a, b)| a != b)
2746 .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2747
2748 // Find last differing line (from the end).
2749 let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2750 let common_suffix = old_lines
2751 .iter()
2752 .rev()
2753 .zip(new_lines.iter().rev())
2754 .take(max_suffix)
2755 .take_while(|(a, b)| a == b)
2756 .count();
2757
2758 let old_end = old_lines.len() - common_suffix;
2759 let new_end = new_lines.len() - common_suffix;
2760
2761 if first_changed_row == old_end && first_changed_row == new_end {
2762 return Ok(String::new());
2763 }
2764
2765 // Build the replacement text from new_lines[first_diff..new_end].
2766 let mut merged_new_text = String::new();
2767 for line in &new_lines[first_changed_row..new_end] {
2768 merged_new_text.push_str(line);
2769 merged_new_text.push('\n');
2770 }
2771
2772 // cursor_offset is relative to the first hunk's new content in
2773 // new_text. Translate it to an offset within merged_new_text, which
2774 // only contains lines first_diff..new_end of new_text.
2775 if let Some(hunk_offset) = cursor_offset {
2776 let hunk_start = first_hunk_pos.unwrap_or(0);
2777 let absolute_pos = hunk_start + hunk_offset;
2778
2779 // Byte offset where first_diff starts in new_text.
2780 let merged_start: usize = new_lines[..first_changed_row]
2781 .iter()
2782 .map(|line| line.len() + 1)
2783 .sum();
2784
2785 if absolute_pos >= merged_start {
2786 let relative_offset = absolute_pos - merged_start;
2787 if relative_offset <= merged_new_text.len() {
2788 merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2789 }
2790 }
2791 }
2792
2793 // Build output with 2 lines of context above and below.
2794 let context_lines_count = 2;
2795 let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2796 let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2797
2798 fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2799 let pattern = &lines[line_range];
2800 let pattern_len = pattern.len();
2801
2802 let mut count = 0;
2803 for offset in 0..=lines.len() - pattern_len {
2804 if &lines[offset..offset + pattern_len] == pattern {
2805 count += 1;
2806 }
2807 }
2808 count
2809 }
2810
2811 // Expand prefix and suffix until they are unique
2812 while prefix_start > 0 {
2813 if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2814 prefix_start -= 1;
2815 } else {
2816 break;
2817 }
2818 }
2819 while suffix_end < old_lines.len() {
2820 if count_matches(old_end..suffix_end, &old_lines) > 1 {
2821 suffix_end += 1;
2822 } else {
2823 break;
2824 }
2825 }
2826
2827 let mut output = String::new();
2828 for line in &old_lines[prefix_start..first_changed_row] {
2829 output.push_str(line);
2830 output.push('\n');
2831 }
2832 output.push_str("<|fim_middle|>\n");
2833 output.push_str(&merged_new_text);
2834 output.push_str("<|fim_suffix|>\n");
2835 for line in &old_lines[old_end..suffix_end] {
2836 output.push_str(line);
2837 output.push('\n');
2838 }
2839
2840 Ok(output)
2841 }
2842
2843 struct ParsedHunk {
2844 old_context: String,
2845 edits: Vec<ParsedEdit>,
2846 }
2847
2848 struct ParsedEdit {
2849 range: Range<usize>,
2850 text: String,
2851 }
2852
2853 /// Parse a unified diff into content-based hunks. Each hunk contains an
2854 /// `old_context` string (context lines + deleted lines, which together
2855 /// form the text that should be found in the original) and a list of edits
2856 /// expressed as byte ranges within that context.
2857 fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2858 let mut hunks = Vec::new();
2859 let mut current: Option<ParsedHunk> = None;
2860
2861 for line in patch.lines() {
2862 if line.starts_with("@@") {
2863 if let Some(hunk) = current.take() {
2864 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2865 hunks.push(hunk);
2866 }
2867 }
2868 current = Some(ParsedHunk {
2869 old_context: String::new(),
2870 edits: Vec::new(),
2871 });
2872 } else if line.starts_with("---") || line.starts_with("+++") {
2873 continue;
2874 } else if let Some(hunk) = &mut current {
2875 if let Some(added) = line.strip_prefix('+') {
2876 let pos = hunk.old_context.len();
2877 if let Some(last_edit) = hunk.edits.last_mut() {
2878 if last_edit.range.end == pos {
2879 writeln!(&mut last_edit.text, "{added}").ok();
2880 continue;
2881 }
2882 }
2883 hunk.edits.push(ParsedEdit {
2884 range: pos..pos,
2885 text: format!("{added}\n"),
2886 });
2887 } else if let Some(removed) = line.strip_prefix('-') {
2888 let start = hunk.old_context.len();
2889 writeln!(&mut hunk.old_context, "{removed}").ok();
2890 let end = hunk.old_context.len();
2891 if let Some(last_edit) = hunk.edits.last_mut() {
2892 if last_edit.range.end == start {
2893 last_edit.range.end = end;
2894 continue;
2895 }
2896 }
2897 hunk.edits.push(ParsedEdit {
2898 range: start..end,
2899 text: String::new(),
2900 });
2901 } else {
2902 let ctx = line.strip_prefix(' ').unwrap_or(line);
2903 writeln!(&mut hunk.old_context, "{ctx}").ok();
2904 }
2905 }
2906 }
2907
2908 if let Some(hunk) = current {
2909 if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2910 hunks.push(hunk);
2911 }
2912 }
2913
2914 hunks
2915 }
2916
2917 #[cfg(test)]
2918 mod tests {
2919 use super::*;
2920 use indoc::indoc;
2921
2922 #[test]
2923 fn test_apply_variable_edit() {
2924 struct Case {
2925 name: &'static str,
2926 original: &'static str,
2927 model_output: &'static str,
2928 expected: &'static str,
2929 }
2930
2931 let cases = [
2932 Case {
2933 name: "simple_single_line_replacement",
2934 original: indoc! {"
2935 zero
2936 one
2937 two
2938 three
2939 four
2940 five
2941 "},
2942 model_output: indoc! {"
2943 two
2944 <|fim_middle|>
2945 THREE
2946 <|fim_suffix|>
2947 four
2948 "},
2949 expected: indoc! {"
2950 zero
2951 one
2952 two
2953 THREE
2954 four
2955 five
2956 "},
2957 },
2958 Case {
2959 name: "multi_line_replacement",
2960 original: indoc! {"
2961 a
2962 b
2963 c
2964 d
2965 e
2966 "},
2967 model_output: indoc! {"
2968 a
2969 <|fim_middle|>
2970 B
2971 C
2972 D
2973 <|fim_suffix|>
2974 e
2975 "},
2976 expected: indoc! {"
2977 a
2978 B
2979 C
2980 D
2981 e
2982 "},
2983 },
2984 Case {
2985 name: "insertion_between_existing_lines",
2986 original: indoc! {"
2987 a
2988 b
2989 c
2990 "},
2991 model_output: indoc! {"
2992 a
2993 <|fim_middle|>
2994 X
2995 <|fim_suffix|>
2996 b
2997 "},
2998 expected: indoc! {"
2999 a
3000 X
3001 b
3002 c
3003 "},
3004 },
3005 Case {
3006 name: "deletion",
3007 original: indoc! {"
3008 a
3009 b
3010 c
3011 d
3012 "},
3013 model_output: indoc! {"
3014 a
3015 <|fim_middle|>
3016 <|fim_suffix|>
3017 c
3018 "},
3019 expected: indoc! {"
3020 a
3021 c
3022 d
3023 "},
3024 },
3025 Case {
3026 name: "replacement_at_start_no_prefix_context",
3027 original: indoc! {"
3028 a
3029 b
3030 c
3031 "},
3032 model_output: indoc! {"
3033 <|fim_middle|>
3034 X
3035 <|fim_suffix|>
3036 b
3037 "},
3038 expected: indoc! {"
3039 X
3040 b
3041 c
3042 "},
3043 },
3044 Case {
3045 name: "replacement_at_end_no_suffix_context",
3046 original: indoc! {"
3047 a
3048 b
3049 c
3050 "},
3051 model_output: indoc! {"
3052 b
3053 <|fim_middle|>
3054 Z
3055 <|fim_suffix|>
3056 "},
3057 expected: indoc! {"
3058 a
3059 b
3060 Z
3061 "},
3062 },
3063 Case {
3064 name: "context_with_trailing_newline_is_preserved",
3065 original: indoc! {"
3066 a
3067 b
3068 c
3069 "},
3070 model_output: indoc! {"
3071 a
3072 <|fim_middle|>
3073 B
3074 <|fim_suffix|>
3075 c
3076 "},
3077 expected: indoc! {"
3078 a
3079 B
3080 c
3081 "},
3082 },
3083 Case {
3084 name: "cursor_marker_passes_through_untouched",
3085 original: indoc! {"
3086 a
3087 b
3088 c
3089 "},
3090 model_output: indoc! {"
3091 a
3092 <|fim_middle|>
3093 B<|user_cursor|>B
3094 <|fim_suffix|>
3095 c
3096 "},
3097 expected: indoc! {"
3098 a
3099 B<|user_cursor|>B
3100 c
3101 "},
3102 },
3103 Case {
3104 name: "multiple_prefix_context_lines",
3105 original: indoc! {"
3106 a
3107 b
3108 c
3109 d
3110 e
3111 "},
3112 model_output: indoc! {"
3113 b
3114 c
3115 <|fim_middle|>
3116 D
3117 <|fim_suffix|>
3118 e
3119 "},
3120 expected: indoc! {"
3121 a
3122 b
3123 c
3124 D
3125 e
3126 "},
3127 },
3128 ];
3129
3130 for case in cases {
3131 let (edit_range, replacement) =
3132 apply_variable_edit(case.original, case.model_output).unwrap();
3133 let mut edited = case.original.to_string();
3134 edited.replace_range(edit_range, &replacement);
3135 assert_eq!(edited, case.expected, "{}", case.name);
3136 }
3137 }
3138
3139 #[test]
3140 fn test_patch_to_variable_edit() {
3141 struct Case {
3142 name: &'static str,
3143 old: &'static str,
3144 patch: &'static str,
3145 cursor_offset: Option<usize>,
3146 expected_variable_edit: &'static str,
3147 expected_after_apply: &'static str,
3148 }
3149
3150 let cases = [
3151 Case {
3152 name: "simple_replacement",
3153 old: indoc! {"
3154 zero
3155 one
3156 two
3157 three
3158 four
3159 five
3160 "},
3161 patch: indoc! {"
3162 @@ -3,3 +3,3 @@
3163 two
3164 -three
3165 +THREE
3166 four
3167 "},
3168 cursor_offset: None,
3169 expected_variable_edit: indoc! {"
3170 one
3171 two
3172 <|fim_middle|>
3173 THREE
3174 <|fim_suffix|>
3175 four
3176 five
3177 "},
3178 expected_after_apply: indoc! {"
3179 zero
3180 one
3181 two
3182 THREE
3183 four
3184 five
3185 "},
3186 },
3187 Case {
3188 name: "insertion",
3189 old: indoc! {"
3190 a
3191 b
3192 c
3193 d
3194 e
3195 "},
3196 patch: indoc! {"
3197 @@ -2,0 +3,1 @@
3198 b
3199 +X
3200 c
3201 "},
3202 cursor_offset: None,
3203 expected_variable_edit: indoc! {"
3204 a
3205 b
3206 <|fim_middle|>
3207 X
3208 <|fim_suffix|>
3209 c
3210 d
3211 "},
3212 expected_after_apply: indoc! {"
3213 a
3214 b
3215 X
3216 c
3217 d
3218 e
3219 "},
3220 },
3221 Case {
3222 name: "deletion",
3223 old: indoc! {"
3224 a
3225 b
3226 c
3227 d
3228 e
3229 "},
3230 patch: indoc! {"
3231 @@ -2,3 +2,2 @@
3232 b
3233 -c
3234 d
3235 "},
3236 cursor_offset: None,
3237 expected_variable_edit: indoc! {"
3238 a
3239 b
3240 <|fim_middle|>
3241 <|fim_suffix|>
3242 d
3243 e
3244 "},
3245 expected_after_apply: indoc! {"
3246 a
3247 b
3248 d
3249 e
3250 "},
3251 },
3252 Case {
3253 name: "edit_near_start",
3254 old: indoc! {"
3255 first
3256 second
3257 third
3258 fourth
3259 "},
3260 patch: indoc! {"
3261 @@ -1,1 +1,1 @@
3262 -first
3263 +FIRST
3264 "},
3265 cursor_offset: None,
3266 expected_variable_edit: indoc! {"
3267 <|fim_middle|>
3268 FIRST
3269 <|fim_suffix|>
3270 second
3271 third
3272 "},
3273 expected_after_apply: indoc! {"
3274 FIRST
3275 second
3276 third
3277 fourth
3278 "},
3279 },
3280 Case {
3281 name: "edit_near_end",
3282 old: indoc! {"
3283 first
3284 second
3285 third
3286 fourth
3287 "},
3288 patch: indoc! {"
3289 @@ -4,1 +4,1 @@
3290 -fourth
3291 +FOURTH
3292 "},
3293 cursor_offset: None,
3294 expected_variable_edit: indoc! {"
3295 second
3296 third
3297 <|fim_middle|>
3298 FOURTH
3299 <|fim_suffix|>
3300 "},
3301 expected_after_apply: indoc! {"
3302 first
3303 second
3304 third
3305 FOURTH
3306 "},
3307 },
3308 Case {
3309 name: "cursor_at_start_of_replacement",
3310 old: indoc! {"
3311 zero
3312 one
3313 two
3314 three
3315 four
3316 five
3317 "},
3318 patch: indoc! {"
3319 @@ -3,3 +3,3 @@
3320 two
3321 -three
3322 +THREE
3323 four
3324 "},
3325 cursor_offset: Some(4),
3326 expected_variable_edit: indoc! {"
3327 one
3328 two
3329 <|fim_middle|>
3330 <|user_cursor|>THREE
3331 <|fim_suffix|>
3332 four
3333 five
3334 "},
3335 expected_after_apply: indoc! {"
3336 zero
3337 one
3338 two
3339 <|user_cursor|>THREE
3340 four
3341 five
3342 "},
3343 },
3344 Case {
3345 name: "cursor_in_middle_of_replacement",
3346 old: indoc! {"
3347 zero
3348 one
3349 two
3350 three
3351 four
3352 five
3353 "},
3354 patch: indoc! {"
3355 @@ -3,3 +3,3 @@
3356 two
3357 -three
3358 +THREE
3359 four
3360 "},
3361 cursor_offset: Some(6),
3362 expected_variable_edit: indoc! {"
3363 one
3364 two
3365 <|fim_middle|>
3366 TH<|user_cursor|>REE
3367 <|fim_suffix|>
3368 four
3369 five
3370 "},
3371 expected_after_apply: indoc! {"
3372 zero
3373 one
3374 two
3375 TH<|user_cursor|>REE
3376 four
3377 five
3378 "},
3379 },
3380 Case {
3381 name: "expands_context_when_two_lines_not_unique_before_and_after",
3382 old: indoc! {"
3383 one
3384 a
3385 b
3386 c
3387 d
3388 two
3389 a
3390 b
3391 c
3392 d
3393 three
3394 a
3395 b
3396 c
3397 d
3398 four
3399 "},
3400 patch: indoc! {"
3401 @@ -4,5 +4,5 @@
3402 two
3403 a
3404 b
3405 -c
3406 +C
3407 d
3408 three
3409 "},
3410 cursor_offset: None,
3411 expected_variable_edit: indoc! {"
3412 two
3413 a
3414 b
3415 <|fim_middle|>
3416 C
3417 <|fim_suffix|>
3418 d
3419 three
3420 "},
3421 expected_after_apply: indoc! {"
3422 one
3423 a
3424 b
3425 c
3426 d
3427 two
3428 a
3429 b
3430 C
3431 d
3432 three
3433 a
3434 b
3435 c
3436 d
3437 four
3438 "},
3439 },
3440 Case {
3441 name: "expands_context_when_two_lines_not_unique_before_and_after",
3442 old: indoc! {"
3443 {
3444 {
3445 one();
3446 }
3447 }
3448 {
3449 {
3450 two();
3451 }
3452 }
3453 {
3454 {
3455 three();
3456 }
3457 }
3458 {
3459 {
3460 four();
3461 }
3462 }
3463 "},
3464 patch: indoc! {"
3465 @@ -4,5 +4,5 @@
3466 {
3467 - two();
3468 + TWO();
3469 }
3470 "},
3471 cursor_offset: None,
3472 expected_variable_edit: indoc! {"
3473 one();
3474 }
3475 }
3476 {
3477 {
3478 <|fim_middle|>
3479 TWO();
3480 <|fim_suffix|>
3481 }
3482 }
3483 {
3484 {
3485 three();
3486 "},
3487 expected_after_apply: indoc! {"
3488 {
3489 {
3490 one();
3491 }
3492 }
3493 {
3494 {
3495 TWO();
3496 }
3497 }
3498 {
3499 {
3500 three();
3501 }
3502 }
3503 {
3504 {
3505 four();
3506 }
3507 }
3508 "},
3509 },
3510 ];
3511
3512 for case in cases {
3513 let output =
3514 patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3515 .unwrap_or_else(|error| {
3516 panic!("failed converting patch for {}: {error}", case.name)
3517 });
3518 assert_eq!(
3519 output, case.expected_variable_edit,
3520 "patch->variable_edit mismatch for {}",
3521 case.name
3522 );
3523
3524 let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3525 .unwrap_or_else(|error| {
3526 panic!("failed applying variable_edit for {}: {error}", case.name)
3527 });
3528 let mut edited_by_variable_edit = case.old.to_string();
3529 edited_by_variable_edit.replace_range(edit_range, &replacement);
3530 assert_eq!(
3531 edited_by_variable_edit, case.expected_after_apply,
3532 "variable_edit apply mismatch for {}",
3533 case.name
3534 );
3535
3536 let (expected_edit_range, expected_replacement) =
3537 apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3538 |error| {
3539 panic!(
3540 "failed applying expected variable_edit for {}: {error}",
3541 case.name
3542 )
3543 },
3544 );
3545 let mut edited_by_expected_variable_edit = case.old.to_string();
3546 edited_by_expected_variable_edit
3547 .replace_range(expected_edit_range, &expected_replacement);
3548 assert_eq!(
3549 edited_by_expected_variable_edit, case.expected_after_apply,
3550 "expected variable_edit apply mismatch for {}",
3551 case.name
3552 );
3553 }
3554 }
3555
3556 #[test]
3557 fn test_write_cursor_excerpt_section() {
3558 let path = Path::new("test.rs");
3559 let context = "fn main() {\n hello();\n}\n";
3560 let cursor_offset = 17;
3561 let mut prompt = String::new();
3562 write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3563 assert_eq!(
3564 prompt,
3565 "<|file_sep|>test.rs\nfn main() {\n h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3566 );
3567 }
3568 }
3569}
3570
3571/// The zeta1 prompt format
3572pub mod zeta1 {
3573 use super::*;
3574 use std::fmt::Write;
3575
3576 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3577 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3578 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3579 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3580
3581 const INSTRUCTION_HEADER: &str = concat!(
3582 "### Instruction:\n",
3583 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3584 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3585 "into account the cursor location.\n\n",
3586 "### User Edits:\n\n"
3587 );
3588 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3589 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3590
3591 /// Formats a complete zeta1 prompt from the input events and excerpt.
3592 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3593 let mut prompt = String::with_capacity(
3594 INSTRUCTION_HEADER.len()
3595 + input_events.len()
3596 + EXCERPT_HEADER.len()
3597 + input_excerpt.len()
3598 + RESPONSE_HEADER.len(),
3599 );
3600 prompt.push_str(INSTRUCTION_HEADER);
3601 prompt.push_str(input_events);
3602 prompt.push_str(EXCERPT_HEADER);
3603 prompt.push_str(input_excerpt);
3604 prompt.push_str(RESPONSE_HEADER);
3605 prompt
3606 }
3607
3608 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3609 /// editable and context byte-offset ranges within `cursor_excerpt`.
3610 pub fn format_zeta1_from_input(
3611 input: &ZetaPromptInput,
3612 editable_range: Range<usize>,
3613 context_range: Range<usize>,
3614 ) -> String {
3615 let events = format_zeta1_events(&input.events);
3616 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3617 format_zeta1_prompt(&events, &excerpt)
3618 }
3619
3620 /// Formats events in zeta1 style (oldest first).
3621 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3622 let mut result = String::new();
3623 for event in events {
3624 let event_string = format_zeta1_event(event);
3625 if event_string.is_empty() {
3626 continue;
3627 }
3628 if !result.is_empty() {
3629 result.push_str("\n\n");
3630 }
3631 result.push_str(&event_string);
3632 }
3633 result
3634 }
3635
3636 fn format_zeta1_event(event: &Event) -> String {
3637 match event {
3638 Event::BufferChange {
3639 path,
3640 old_path,
3641 diff,
3642 ..
3643 } => {
3644 let mut prompt = String::new();
3645 if old_path != path {
3646 writeln!(
3647 prompt,
3648 "User renamed {} to {}\n",
3649 old_path.display(),
3650 path.display()
3651 )
3652 .ok();
3653 }
3654 if !diff.is_empty() {
3655 write!(
3656 prompt,
3657 "User edited {}:\n```diff\n{}\n```",
3658 path.display(),
3659 diff
3660 )
3661 .ok();
3662 }
3663 prompt
3664 }
3665 }
3666 }
3667
3668 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3669 /// within `cursor_excerpt`.
3670 fn format_zeta1_excerpt(
3671 input: &ZetaPromptInput,
3672 editable_range: Range<usize>,
3673 context_range: Range<usize>,
3674 ) -> String {
3675 let path_str = input.cursor_path.to_string_lossy();
3676 let excerpt = &*input.cursor_excerpt;
3677 let cursor_offset = input.cursor_offset_in_excerpt;
3678
3679 let mut prompt = String::new();
3680 writeln!(&mut prompt, "```{path_str}").ok();
3681
3682 let starts_at_file_beginning =
3683 input.excerpt_start_row == Some(0) && context_range.start == 0;
3684 if starts_at_file_beginning {
3685 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3686 }
3687
3688 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3689
3690 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3691 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3692 prompt.push_str(CURSOR_MARKER);
3693 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3694 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3695
3696 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3697 write!(prompt, "\n```").ok();
3698
3699 prompt
3700 }
3701
3702 /// Cleans zeta1 model output by extracting content between editable region
3703 /// markers and converting the zeta1 cursor marker to the universal one.
3704 /// Returns `None` if the output doesn't contain the expected markers.
3705 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3706 let content = output.replace(CURSOR_MARKER, "");
3707
3708 let content_start = content
3709 .find(EDITABLE_REGION_START_MARKER)
3710 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3711 .map(|pos| {
3712 if content.as_bytes().get(pos) == Some(&b'\n') {
3713 pos + 1
3714 } else {
3715 pos
3716 }
3717 })
3718 .unwrap_or(0);
3719
3720 let content_end = content
3721 .find(EDITABLE_REGION_END_MARKER)
3722 .map(|pos| {
3723 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3724 pos - 1
3725 } else {
3726 pos
3727 }
3728 })
3729 .unwrap_or(content.len());
3730
3731 if content_start > content_end {
3732 return Some(String::new());
3733 }
3734
3735 let extracted = &content[content_start..content_end];
3736
3737 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3738 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3739 let text_before_cursor = text_before_cursor
3740 .find(EDITABLE_REGION_START_MARKER)
3741 .map(|pos| {
3742 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3743 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3744 after_marker + 1
3745 } else {
3746 after_marker
3747 }
3748 })
3749 .unwrap_or(0);
3750 let offset_in_extracted = zeta1_cursor_pos
3751 .saturating_sub(text_before_cursor)
3752 .min(extracted.len());
3753 offset_in_extracted
3754 });
3755
3756 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3757 if let Some(offset) = cursor_offset {
3758 result.push_str(&extracted[..offset]);
3759 result.push_str(super::CURSOR_MARKER);
3760 result.push_str(&extracted[offset..]);
3761 } else {
3762 result.push_str(extracted);
3763 }
3764
3765 Some(result)
3766 }
3767}
3768
3769#[cfg(test)]
3770mod tests {
3771 use super::*;
3772 use indoc::indoc;
3773
3774 fn make_input(
3775 cursor_excerpt: &str,
3776 editable_range: Range<usize>,
3777 cursor_offset: usize,
3778 events: Vec<Event>,
3779 related_files: Vec<RelatedFile>,
3780 ) -> ZetaPromptInput {
3781 let context_range = 0..cursor_excerpt.len();
3782 ZetaPromptInput {
3783 cursor_path: Path::new("test.rs").into(),
3784 cursor_excerpt: cursor_excerpt.into(),
3785 cursor_offset_in_excerpt: cursor_offset,
3786 excerpt_start_row: None,
3787 events: events.into_iter().map(Arc::new).collect(),
3788 related_files,
3789 excerpt_ranges: ExcerptRanges {
3790 editable_150: editable_range.clone(),
3791 editable_180: editable_range.clone(),
3792 editable_350: editable_range,
3793 editable_150_context_350: context_range.clone(),
3794 editable_180_context_350: context_range.clone(),
3795 editable_350_context_150: context_range,
3796 ..Default::default()
3797 },
3798 experiment: None,
3799 in_open_source_repo: false,
3800 can_collect_data: false,
3801 repo_url: None,
3802 }
3803 }
3804
3805 fn make_input_with_context_range(
3806 excerpt: &str,
3807 editable_range: Range<usize>,
3808 context_range: Range<usize>,
3809 cursor_offset: usize,
3810 ) -> ZetaPromptInput {
3811 ZetaPromptInput {
3812 cursor_path: Path::new("test.rs").into(),
3813 cursor_excerpt: excerpt.into(),
3814 cursor_offset_in_excerpt: cursor_offset,
3815 excerpt_start_row: None,
3816 events: vec![],
3817 related_files: vec![],
3818 excerpt_ranges: ExcerptRanges {
3819 editable_150: editable_range.clone(),
3820 editable_180: editable_range.clone(),
3821 editable_350: editable_range,
3822 editable_150_context_350: context_range.clone(),
3823 editable_180_context_350: context_range.clone(),
3824 editable_350_context_150: context_range,
3825 ..Default::default()
3826 },
3827 experiment: None,
3828 in_open_source_repo: false,
3829 can_collect_data: false,
3830 repo_url: None,
3831 }
3832 }
3833
3834 fn make_event(path: &str, diff: &str) -> Event {
3835 Event::BufferChange {
3836 path: Path::new(path).into(),
3837 old_path: Path::new(path).into(),
3838 diff: diff.to_string(),
3839 predicted: false,
3840 in_open_source_repo: false,
3841 }
3842 }
3843
3844 fn make_related_file(path: &str, content: &str) -> RelatedFile {
3845 RelatedFile {
3846 path: Path::new(path).into(),
3847 max_row: content.lines().count() as u32,
3848 excerpts: vec![RelatedExcerpt {
3849 row_range: 0..content.lines().count() as u32,
3850 text: content.into(),
3851 order: 0,
3852 }],
3853 in_open_source_repo: false,
3854 }
3855 }
3856
3857 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3858 format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3859 }
3860
3861 #[test]
3862 fn test_no_truncation_when_within_budget() {
3863 let input = make_input(
3864 "prefix\neditable\nsuffix",
3865 7..15,
3866 10,
3867 vec![make_event("a.rs", "-old\n+new\n")],
3868 vec![make_related_file("related.rs", "fn helper() {}\n")],
3869 );
3870
3871 assert_eq!(
3872 format_with_budget(&input, 10000),
3873 indoc! {r#"
3874 <|file_sep|>related.rs
3875 fn helper() {}
3876 <|file_sep|>edit history
3877 --- a/a.rs
3878 +++ b/a.rs
3879 -old
3880 +new
3881 <|file_sep|>test.rs
3882 <|fim_prefix|>
3883 prefix
3884 <|fim_middle|>current
3885 edi<|user_cursor|>table
3886 <|fim_suffix|>
3887
3888 suffix
3889 <|fim_middle|>updated
3890 "#}
3891 );
3892 }
3893
3894 #[test]
3895 fn test_truncation_drops_edit_history_when_budget_tight() {
3896 let input = make_input(
3897 "code",
3898 0..4,
3899 2,
3900 vec![make_event("a.rs", "-x\n+y\n")],
3901 vec![
3902 make_related_file("r1.rs", "a\n"),
3903 make_related_file("r2.rs", "b\n"),
3904 ],
3905 );
3906
3907 assert_eq!(
3908 format_with_budget(&input, 10000),
3909 indoc! {r#"
3910 <|file_sep|>r1.rs
3911 a
3912 <|file_sep|>r2.rs
3913 b
3914 <|file_sep|>edit history
3915 --- a/a.rs
3916 +++ b/a.rs
3917 -x
3918 +y
3919 <|file_sep|>test.rs
3920 <|fim_prefix|>
3921 <|fim_middle|>current
3922 co<|user_cursor|>de
3923 <|fim_suffix|>
3924 <|fim_middle|>updated
3925 "#}
3926 );
3927
3928 assert_eq!(
3929 format_with_budget(&input, 50),
3930 indoc! {r#"
3931 <|file_sep|>r1.rs
3932 a
3933 <|file_sep|>r2.rs
3934 b
3935 <|file_sep|>test.rs
3936 <|fim_prefix|>
3937 <|fim_middle|>current
3938 co<|user_cursor|>de
3939 <|fim_suffix|>
3940 <|fim_middle|>updated
3941 "#}
3942 );
3943 }
3944
3945 #[test]
3946 fn test_truncation_includes_partial_excerpts() {
3947 let input = make_input(
3948 "x",
3949 0..1,
3950 0,
3951 vec![],
3952 vec![RelatedFile {
3953 path: Path::new("big.rs").into(),
3954 max_row: 30,
3955 in_open_source_repo: false,
3956 excerpts: vec![
3957 RelatedExcerpt {
3958 row_range: 0..10,
3959 text: "first excerpt\n".into(),
3960 order: 0,
3961 },
3962 RelatedExcerpt {
3963 row_range: 10..20,
3964 text: "second excerpt\n".into(),
3965 order: 0,
3966 },
3967 RelatedExcerpt {
3968 row_range: 20..30,
3969 text: "third excerpt\n".into(),
3970 order: 0,
3971 },
3972 ],
3973 }],
3974 );
3975
3976 assert_eq!(
3977 format_with_budget(&input, 10000),
3978 indoc! {r#"
3979 <|file_sep|>big.rs
3980 first excerpt
3981 ...
3982 second excerpt
3983 ...
3984 third excerpt
3985 <|file_sep|>test.rs
3986 <|fim_prefix|>
3987 <|fim_middle|>current
3988 <|user_cursor|>x
3989 <|fim_suffix|>
3990 <|fim_middle|>updated
3991 "#}
3992 );
3993
3994 assert_eq!(
3995 format_with_budget(&input, 50),
3996 indoc! {r#"
3997 <|file_sep|>big.rs
3998 first excerpt
3999 ...
4000 <|file_sep|>test.rs
4001 <|fim_prefix|>
4002 <|fim_middle|>current
4003 <|user_cursor|>x
4004 <|fim_suffix|>
4005 <|fim_middle|>updated
4006 "#}
4007 );
4008 }
4009
4010 #[test]
4011 fn test_truncation_prioritizes_lower_order_excerpts() {
4012 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4013 // With tight budget, only the lower-order excerpt from file_b should be included.
4014 let input = make_input(
4015 "x",
4016 0..1,
4017 0,
4018 vec![],
4019 vec![
4020 RelatedFile {
4021 path: Path::new("file_a.rs").into(),
4022 max_row: 10,
4023 in_open_source_repo: false,
4024 excerpts: vec![RelatedExcerpt {
4025 row_range: 0..10,
4026 text: "low priority content\n".into(),
4027 order: 5,
4028 }],
4029 },
4030 RelatedFile {
4031 path: Path::new("file_b.rs").into(),
4032 max_row: 10,
4033 in_open_source_repo: false,
4034 excerpts: vec![RelatedExcerpt {
4035 row_range: 0..10,
4036 text: "high priority content\n".into(),
4037 order: 1,
4038 }],
4039 },
4040 ],
4041 );
4042
4043 // With large budget, both files included; rendered in stable lexicographic order.
4044 assert_eq!(
4045 format_with_budget(&input, 10000),
4046 indoc! {r#"
4047 <|file_sep|>file_a.rs
4048 low priority content
4049 <|file_sep|>file_b.rs
4050 high priority content
4051 <|file_sep|>test.rs
4052 <|fim_prefix|>
4053 <|fim_middle|>current
4054 <|user_cursor|>x
4055 <|fim_suffix|>
4056 <|fim_middle|>updated
4057 "#}
4058 );
4059
4060 // With tight budget, only file_b (lower order) fits.
4061 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4062 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4063 // file_a would need another 14 tokens, which doesn't fit.
4064 assert_eq!(
4065 format_with_budget(&input, 52),
4066 indoc! {r#"
4067 <|file_sep|>file_b.rs
4068 high priority content
4069 <|file_sep|>test.rs
4070 <|fim_prefix|>
4071 <|fim_middle|>current
4072 <|user_cursor|>x
4073 <|fim_suffix|>
4074 <|fim_middle|>updated
4075 "#}
4076 );
4077 }
4078
4079 #[test]
4080 fn test_truncation_drops_high_order_excerpts_within_file() {
4081 // A single file has excerpts at order 1 and order 3. With a tight budget,
4082 // only the order-1 excerpts are included while the order-3 excerpt is
4083 // dropped — even though they belong to the same file. This also preserves
4084 // the parent invariant: parent outline items have order ≤ their best
4085 // child, so they're always included when any child is.
4086 let input = make_input(
4087 "x",
4088 0..1,
4089 0,
4090 vec![],
4091 vec![RelatedFile {
4092 path: Path::new("mod.rs").into(),
4093 max_row: 30,
4094 in_open_source_repo: false,
4095 excerpts: vec![
4096 RelatedExcerpt {
4097 row_range: 0..5,
4098 text: "mod header\n".into(),
4099 order: 1,
4100 },
4101 RelatedExcerpt {
4102 row_range: 5..15,
4103 text: "important fn\n".into(),
4104 order: 1,
4105 },
4106 RelatedExcerpt {
4107 row_range: 15..30,
4108 text: "less important fn\n".into(),
4109 order: 3,
4110 },
4111 ],
4112 }],
4113 );
4114
4115 // With large budget, all three excerpts included.
4116 assert_eq!(
4117 format_with_budget(&input, 10000),
4118 indoc! {r#"
4119 <|file_sep|>mod.rs
4120 mod header
4121 ...
4122 important fn
4123 ...
4124 less important fn
4125 <|file_sep|>test.rs
4126 <|fim_prefix|>
4127 <|fim_middle|>current
4128 <|user_cursor|>x
4129 <|fim_suffix|>
4130 <|fim_middle|>updated
4131 "#}
4132 );
4133
4134 // With tight budget, only order<=1 excerpts included (header + important fn).
4135 assert_eq!(
4136 format_with_budget(&input, 55),
4137 indoc! {r#"
4138 <|file_sep|>mod.rs
4139 mod header
4140 ...
4141 important fn
4142 ...
4143 <|file_sep|>test.rs
4144 <|fim_prefix|>
4145 <|fim_middle|>current
4146 <|user_cursor|>x
4147 <|fim_suffix|>
4148 <|fim_middle|>updated
4149 "#}
4150 );
4151 }
4152
4153 #[test]
4154 fn test_truncation_drops_older_events_first() {
4155 let input = make_input(
4156 "x",
4157 0..1,
4158 0,
4159 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4160 vec![],
4161 );
4162
4163 assert_eq!(
4164 format_with_budget(&input, 10000),
4165 indoc! {r#"
4166 <|file_sep|>edit history
4167 --- a/old.rs
4168 +++ b/old.rs
4169 -1
4170 --- a/new.rs
4171 +++ b/new.rs
4172 -2
4173 <|file_sep|>test.rs
4174 <|fim_prefix|>
4175 <|fim_middle|>current
4176 <|user_cursor|>x
4177 <|fim_suffix|>
4178 <|fim_middle|>updated
4179 "#}
4180 );
4181
4182 assert_eq!(
4183 format_with_budget(&input, 55),
4184 indoc! {r#"
4185 <|file_sep|>edit history
4186 --- a/new.rs
4187 +++ b/new.rs
4188 -2
4189 <|file_sep|>test.rs
4190 <|fim_prefix|>
4191 <|fim_middle|>current
4192 <|user_cursor|>x
4193 <|fim_suffix|>
4194 <|fim_middle|>updated
4195 "#}
4196 );
4197 }
4198
4199 #[test]
4200 fn test_cursor_excerpt_always_included_with_minimal_budget() {
4201 let input = make_input(
4202 "fn main() {}",
4203 0..12,
4204 3,
4205 vec![make_event("a.rs", "-old\n+new\n")],
4206 vec![make_related_file("related.rs", "helper\n")],
4207 );
4208
4209 assert_eq!(
4210 format_with_budget(&input, 30),
4211 indoc! {r#"
4212 <|file_sep|>test.rs
4213 <|fim_prefix|>
4214 <|fim_middle|>current
4215 fn <|user_cursor|>main() {}
4216 <|fim_suffix|>
4217 <|fim_middle|>updated
4218 "#}
4219 );
4220 }
4221
4222 fn format_seed_coder(input: &ZetaPromptInput) -> String {
4223 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4224 }
4225
4226 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4227 format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4228 }
4229
4230 #[test]
4231 fn test_seed_coder_basic_format() {
4232 let input = make_input(
4233 "prefix\neditable\nsuffix",
4234 7..15,
4235 10,
4236 vec![make_event("a.rs", "-old\n+new\n")],
4237 vec![make_related_file("related.rs", "fn helper() {}\n")],
4238 );
4239
4240 assert_eq!(
4241 format_seed_coder(&input),
4242 indoc! {r#"
4243 <[fim-suffix]>
4244 suffix
4245 <[fim-prefix]><filename>related.rs
4246 fn helper() {}
4247
4248 <filename>edit_history
4249 --- a/a.rs
4250 +++ b/a.rs
4251 -old
4252 +new
4253
4254 <filename>test.rs
4255 prefix
4256 <<<<<<< CURRENT
4257 edi<|user_cursor|>table
4258 =======
4259 <[fim-middle]>"#}
4260 );
4261 }
4262
4263 #[test]
4264 fn test_seed_coder_no_context() {
4265 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4266
4267 assert_eq!(
4268 format_seed_coder(&input),
4269 indoc! {r#"
4270 <[fim-suffix]>
4271 after
4272 <[fim-prefix]><filename>test.rs
4273 before
4274 <<<<<<< CURRENT
4275 mid<|user_cursor|>dle
4276 =======
4277 <[fim-middle]>"#}
4278 );
4279 }
4280
4281 #[test]
4282 fn test_seed_coder_truncation_drops_context() {
4283 let input = make_input(
4284 "code",
4285 0..4,
4286 2,
4287 vec![make_event("a.rs", "-x\n+y\n")],
4288 vec![make_related_file("r1.rs", "content\n")],
4289 );
4290
4291 // With large budget, everything is included
4292 assert_eq!(
4293 format_seed_coder(&input),
4294 indoc! {r#"
4295 <[fim-suffix]>
4296 <[fim-prefix]><filename>r1.rs
4297 content
4298
4299 <filename>edit_history
4300 --- a/a.rs
4301 +++ b/a.rs
4302 -x
4303 +y
4304
4305 <filename>test.rs
4306 <<<<<<< CURRENT
4307 co<|user_cursor|>de
4308 =======
4309 <[fim-middle]>"#}
4310 );
4311
4312 // With tight budget, context is dropped but cursor section remains
4313 assert_eq!(
4314 format_seed_coder_with_budget(&input, 30),
4315 indoc! {r#"
4316 <[fim-suffix]>
4317 <[fim-prefix]><filename>test.rs
4318 <<<<<<< CURRENT
4319 co<|user_cursor|>de
4320 =======
4321 <[fim-middle]>"#}
4322 );
4323 }
4324
4325 #[test]
4326 fn test_seed_coder_truncation_prioritizes_lower_order() {
4327 let input = make_input(
4328 "code",
4329 0..4,
4330 2,
4331 vec![],
4332 vec![
4333 RelatedFile {
4334 path: Path::new("low_prio.rs").into(),
4335 max_row: 5,
4336 in_open_source_repo: false,
4337 excerpts: vec![RelatedExcerpt {
4338 row_range: 0..5,
4339 text: "low prio\n".into(),
4340 order: 10,
4341 }],
4342 },
4343 RelatedFile {
4344 path: Path::new("high_prio.rs").into(),
4345 max_row: 5,
4346 in_open_source_repo: false,
4347 excerpts: vec![RelatedExcerpt {
4348 row_range: 0..5,
4349 text: "high prio\n".into(),
4350 order: 1,
4351 }],
4352 },
4353 ],
4354 );
4355
4356 // With large budget, both included; rendered in stable lexicographic order.
4357 assert_eq!(
4358 format_seed_coder(&input),
4359 indoc! {r#"
4360 <[fim-suffix]>
4361 <[fim-prefix]><filename>low_prio.rs
4362 low prio
4363 <filename>high_prio.rs
4364 high prio
4365
4366 <filename>test.rs
4367 <<<<<<< CURRENT
4368 co<|user_cursor|>de
4369 =======
4370 <[fim-middle]>"#}
4371 );
4372
4373 // With tight budget, only high_prio included.
4374 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4375 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4376 assert_eq!(
4377 format_seed_coder_with_budget(&input, 44),
4378 indoc! {r#"
4379 <[fim-suffix]>
4380 <[fim-prefix]><filename>high_prio.rs
4381 high prio
4382
4383 <filename>test.rs
4384 <<<<<<< CURRENT
4385 co<|user_cursor|>de
4386 =======
4387 <[fim-middle]>"#}
4388 );
4389 }
4390
4391 #[test]
4392 fn test_format_zeta1_from_input_basic() {
4393 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
4394 let input = ZetaPromptInput {
4395 cursor_path: Path::new("src/main.rs").into(),
4396 cursor_excerpt: excerpt.into(),
4397 cursor_offset_in_excerpt: 30,
4398 excerpt_start_row: Some(0),
4399 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4400 related_files: vec![],
4401 excerpt_ranges: ExcerptRanges {
4402 editable_150: 15..41,
4403 editable_180: 15..41,
4404 editable_350: 15..41,
4405 editable_150_context_350: 0..excerpt.len(),
4406 editable_180_context_350: 0..excerpt.len(),
4407 editable_350_context_150: 0..excerpt.len(),
4408 ..Default::default()
4409 },
4410 experiment: None,
4411 in_open_source_repo: false,
4412 can_collect_data: false,
4413 repo_url: None,
4414 };
4415
4416 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4417
4418 assert_eq!(
4419 prompt,
4420 concat!(
4421 "### Instruction:\n",
4422 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4423 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4424 "into account the cursor location.\n",
4425 "\n",
4426 "### User Edits:\n",
4427 "\n",
4428 "User edited other.rs:\n",
4429 "```diff\n",
4430 "-old\n",
4431 "+new\n",
4432 "\n",
4433 "```\n",
4434 "\n",
4435 "### User Excerpt:\n",
4436 "\n",
4437 "```src/main.rs\n",
4438 "<|start_of_file|>\n",
4439 "fn before() {}\n",
4440 "<|editable_region_start|>\n",
4441 "fn foo() {\n",
4442 " <|user_cursor_is_here|>let x = 1;\n",
4443 "\n",
4444 "<|editable_region_end|>}\n",
4445 "fn after() {}\n",
4446 "\n",
4447 "```\n",
4448 "\n",
4449 "### Response:\n",
4450 ),
4451 );
4452 }
4453
4454 #[test]
4455 fn test_format_zeta1_from_input_no_start_of_file() {
4456 let excerpt = "fn foo() {\n let x = 1;\n}\n";
4457 let input = ZetaPromptInput {
4458 cursor_path: Path::new("src/main.rs").into(),
4459 cursor_excerpt: excerpt.into(),
4460 cursor_offset_in_excerpt: 15,
4461 excerpt_start_row: Some(10),
4462 events: vec![],
4463 related_files: vec![],
4464 excerpt_ranges: ExcerptRanges {
4465 editable_150: 0..28,
4466 editable_180: 0..28,
4467 editable_350: 0..28,
4468 editable_150_context_350: 0..28,
4469 editable_180_context_350: 0..28,
4470 editable_350_context_150: 0..28,
4471 ..Default::default()
4472 },
4473 experiment: None,
4474 in_open_source_repo: false,
4475 can_collect_data: false,
4476 repo_url: None,
4477 };
4478
4479 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4480
4481 assert_eq!(
4482 prompt,
4483 concat!(
4484 "### Instruction:\n",
4485 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4486 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4487 "into account the cursor location.\n",
4488 "\n",
4489 "### User Edits:\n",
4490 "\n",
4491 "\n",
4492 "\n",
4493 "### User Excerpt:\n",
4494 "\n",
4495 "```src/main.rs\n",
4496 "<|editable_region_start|>\n",
4497 "fn foo() {\n",
4498 " <|user_cursor_is_here|>let x = 1;\n",
4499 "}\n",
4500 "\n",
4501 "<|editable_region_end|>\n",
4502 "```\n",
4503 "\n",
4504 "### Response:\n",
4505 ),
4506 );
4507 }
4508
4509 #[test]
4510 fn test_format_zeta1_from_input_with_sub_ranges() {
4511 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
4512 let editable_range = 10..37;
4513 let context_range = 0..excerpt.len();
4514
4515 let input = ZetaPromptInput {
4516 cursor_path: Path::new("test.rs").into(),
4517 cursor_excerpt: excerpt.into(),
4518 cursor_offset_in_excerpt: 25,
4519 excerpt_start_row: Some(0),
4520 events: vec![],
4521 related_files: vec![],
4522 excerpt_ranges: ExcerptRanges {
4523 editable_150: editable_range.clone(),
4524 editable_180: editable_range.clone(),
4525 editable_350: editable_range.clone(),
4526 editable_150_context_350: context_range.clone(),
4527 editable_180_context_350: context_range.clone(),
4528 editable_350_context_150: context_range.clone(),
4529 ..Default::default()
4530 },
4531 experiment: None,
4532 in_open_source_repo: false,
4533 can_collect_data: false,
4534 repo_url: None,
4535 };
4536
4537 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4538
4539 assert_eq!(
4540 prompt,
4541 concat!(
4542 "### Instruction:\n",
4543 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4544 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4545 "into account the cursor location.\n",
4546 "\n",
4547 "### User Edits:\n",
4548 "\n",
4549 "\n",
4550 "\n",
4551 "### User Excerpt:\n",
4552 "\n",
4553 "```test.rs\n",
4554 "<|start_of_file|>\n",
4555 "// prefix\n",
4556 "<|editable_region_start|>\n",
4557 "fn foo() {\n",
4558 " <|user_cursor_is_here|>let x = 1;\n",
4559 "}\n",
4560 "<|editable_region_end|>\n",
4561 "// suffix\n",
4562 "\n",
4563 "```\n",
4564 "\n",
4565 "### Response:\n",
4566 ),
4567 );
4568 }
4569
4570 #[test]
4571 fn test_clean_zeta1_model_output_basic() {
4572 let output = indoc! {"
4573 <|editable_region_start|>
4574 fn main() {
4575 println!(\"hello\");
4576 }
4577 <|editable_region_end|>
4578 "};
4579
4580 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4581 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
4582 }
4583
4584 #[test]
4585 fn test_clean_zeta1_model_output_with_cursor() {
4586 let output = indoc! {"
4587 <|editable_region_start|>
4588 fn main() {
4589 <|user_cursor_is_here|>println!(\"hello\");
4590 }
4591 <|editable_region_end|>
4592 "};
4593
4594 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4595 assert_eq!(
4596 cleaned,
4597 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
4598 );
4599 }
4600
4601 #[test]
4602 fn test_clean_zeta1_model_output_no_markers() {
4603 let output = "fn main() {}\n";
4604 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4605 assert_eq!(cleaned, "fn main() {}\n");
4606 }
4607
4608 #[test]
4609 fn test_clean_zeta1_model_output_empty_region() {
4610 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4611 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4612 assert_eq!(cleaned, "");
4613 }
4614
4615 fn apply_edit(excerpt: &str, range: &Range<usize>, new_text: &str) -> String {
4616 let mut result = excerpt.to_string();
4617 result.replace_range(range.clone(), new_text);
4618 result
4619 }
4620
4621 #[test]
4622 fn test_parse_zeta2_model_output() {
4623 let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4624 let context_start = excerpt.find("ctx start").unwrap();
4625 let context_end = excerpt.find("after ctx").unwrap();
4626 let editable_start = excerpt.find("editable old").unwrap();
4627 let editable_end = editable_start + "editable old\n".len();
4628 let input = make_input_with_context_range(
4629 excerpt,
4630 editable_start..editable_end,
4631 context_start..context_end,
4632 editable_start,
4633 );
4634
4635 let (range, text) = parse_zeta2_model_output(
4636 "editable new\n>>>>>>> UPDATED\n",
4637 ZetaFormat::V0131GitMergeMarkersPrefix,
4638 &input,
4639 )
4640 .unwrap();
4641
4642 assert_eq!(
4643 apply_edit(excerpt, &range, &text),
4644 "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4645 );
4646 }
4647
4648 #[test]
4649 fn test_parse_zeta2_model_output_identity() {
4650 let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4651 let editable_start = excerpt.find("bbb").unwrap();
4652 let editable_end = excerpt.find("ddd").unwrap();
4653 let input = make_input_with_context_range(
4654 excerpt,
4655 editable_start..editable_end,
4656 0..excerpt.len(),
4657 editable_start,
4658 );
4659
4660 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4661 let (range, text) =
4662 parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4663
4664 assert_eq!(apply_edit(excerpt, &range, &text), excerpt);
4665 }
4666
4667 #[test]
4668 fn test_parse_zeta2_model_output_strips_end_marker() {
4669 let excerpt = "hello\nworld\n";
4670 let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4671
4672 let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4673 let (range1, text1) =
4674 parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4675 let (range2, text2) = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4676
4677 assert_eq!(
4678 apply_edit(excerpt, &range1, &text1),
4679 apply_edit(excerpt, &range2, &text2)
4680 );
4681 assert_eq!(apply_edit(excerpt, &range1, &text1), "new content\n");
4682 }
4683}