1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68 #[serde(default)]
69 pub can_collect_data: bool,
70}
71
72#[derive(
73 Default,
74 Clone,
75 Copy,
76 Debug,
77 PartialEq,
78 Eq,
79 Hash,
80 EnumIter,
81 IntoStaticStr,
82 Serialize,
83 Deserialize,
84)]
85#[allow(non_camel_case_types)]
86pub enum ZetaFormat {
87 V0112MiddleAtEnd,
88 V0113Ordered,
89 V0114180EditableRegion,
90 V0120GitMergeMarkers,
91 #[default]
92 V0131GitMergeMarkersPrefix,
93 V0211Prefill,
94 V0211SeedCoder,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131
132 pub fn special_tokens(&self) -> &'static [&'static str] {
133 match self {
134 ZetaFormat::V0112MiddleAtEnd
135 | ZetaFormat::V0113Ordered
136 | ZetaFormat::V0114180EditableRegion => &[
137 "<|fim_prefix|>",
138 "<|fim_suffix|>",
139 "<|fim_middle|>",
140 "<|file_sep|>",
141 CURSOR_MARKER,
142 ],
143 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
144 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
145 v0131_git_merge_markers_prefix::special_tokens()
146 }
147 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
148 }
149 }
150}
151
152#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
153#[serde(tag = "event")]
154pub enum Event {
155 BufferChange {
156 path: Arc<Path>,
157 old_path: Arc<Path>,
158 diff: String,
159 predicted: bool,
160 in_open_source_repo: bool,
161 },
162}
163
164impl Event {
165 pub fn in_open_source_repo(&self) -> bool {
166 match self {
167 Event::BufferChange {
168 in_open_source_repo,
169 ..
170 } => *in_open_source_repo,
171 }
172 }
173}
174
175pub fn write_event(prompt: &mut String, event: &Event) {
176 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
177 for component in path.components() {
178 prompt.push('/');
179 write!(prompt, "{}", component.as_os_str().display()).ok();
180 }
181 }
182 match event {
183 Event::BufferChange {
184 path,
185 old_path,
186 diff,
187 predicted,
188 in_open_source_repo: _,
189 } => {
190 if *predicted {
191 prompt.push_str("// User accepted prediction:\n");
192 }
193 prompt.push_str("--- a");
194 write_path_as_unix_str(prompt, old_path.as_ref());
195 prompt.push_str("\n+++ b");
196 write_path_as_unix_str(prompt, path.as_ref());
197 prompt.push('\n');
198 prompt.push_str(diff);
199 }
200 }
201}
202
203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
204pub struct RelatedFile {
205 pub path: Arc<Path>,
206 pub max_row: u32,
207 pub excerpts: Vec<RelatedExcerpt>,
208 #[serde(default)]
209 pub in_open_source_repo: bool,
210}
211
212#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
213pub struct RelatedExcerpt {
214 pub row_range: Range<u32>,
215 pub text: Arc<str>,
216 #[serde(default)]
217 pub order: usize,
218}
219
220pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
221 format
222 .special_tokens()
223 .iter()
224 .any(|token| input.cursor_excerpt.contains(token))
225}
226
227pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
228 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
229}
230
231/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
232pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
233 match format {
234 ZetaFormat::V0120GitMergeMarkers => output
235 .strip_suffix(v0120_git_merge_markers::END_MARKER)
236 .unwrap_or(output),
237 ZetaFormat::V0131GitMergeMarkersPrefix => output
238 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
239 .unwrap_or(output),
240 ZetaFormat::V0211SeedCoder => output
241 .strip_suffix(seed_coder::END_MARKER)
242 .unwrap_or(output),
243 _ => output,
244 }
245}
246
247pub fn excerpt_range_for_format(
248 format: ZetaFormat,
249 ranges: &ExcerptRanges,
250) -> (Range<usize>, Range<usize>) {
251 match format {
252 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
253 ranges.editable_150.clone(),
254 ranges.editable_150_context_350.clone(),
255 ),
256 ZetaFormat::V0114180EditableRegion
257 | ZetaFormat::V0120GitMergeMarkers
258 | ZetaFormat::V0131GitMergeMarkersPrefix
259 | ZetaFormat::V0211Prefill
260 | ZetaFormat::V0211SeedCoder => (
261 ranges.editable_350.clone(),
262 ranges.editable_350_context_150.clone(),
263 ),
264 }
265}
266
267pub fn resolve_cursor_region(
268 input: &ZetaPromptInput,
269 format: ZetaFormat,
270) -> (&str, Range<usize>, usize) {
271 let Some(ranges) = &input.excerpt_ranges else {
272 return (
273 &input.cursor_excerpt,
274 input.editable_range_in_excerpt.clone(),
275 input.cursor_offset_in_excerpt,
276 );
277 };
278
279 let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
280 let context_start = context_range.start;
281 let context_text = &input.cursor_excerpt[context_range];
282 let adjusted_editable =
283 (editable_range.start - context_start)..(editable_range.end - context_start);
284 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
285
286 (context_text, adjusted_editable, adjusted_cursor)
287}
288
289fn format_zeta_prompt_with_budget(
290 input: &ZetaPromptInput,
291 format: ZetaFormat,
292 max_tokens: usize,
293) -> String {
294 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
295 let path = &*input.cursor_path;
296
297 let mut cursor_section = String::new();
298 match format {
299 ZetaFormat::V0112MiddleAtEnd => {
300 v0112_middle_at_end::write_cursor_excerpt_section(
301 &mut cursor_section,
302 path,
303 context,
304 &editable_range,
305 cursor_offset,
306 );
307 }
308 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
309 v0113_ordered::write_cursor_excerpt_section(
310 &mut cursor_section,
311 path,
312 context,
313 &editable_range,
314 cursor_offset,
315 )
316 }
317 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
318 &mut cursor_section,
319 path,
320 context,
321 &editable_range,
322 cursor_offset,
323 ),
324 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
325 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
326 &mut cursor_section,
327 path,
328 context,
329 &editable_range,
330 cursor_offset,
331 )
332 }
333 ZetaFormat::V0211SeedCoder => {
334 return seed_coder::format_prompt_with_budget(
335 path,
336 context,
337 &editable_range,
338 cursor_offset,
339 &input.events,
340 &input.related_files,
341 max_tokens,
342 );
343 }
344 }
345
346 let cursor_tokens = estimate_tokens(cursor_section.len());
347 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
348
349 let edit_history_section = format_edit_history_within_budget(
350 &input.events,
351 "<|file_sep|>",
352 "edit history",
353 budget_after_cursor,
354 );
355 let edit_history_tokens = estimate_tokens(edit_history_section.len());
356 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
357
358 let related_files_section = format_related_files_within_budget(
359 &input.related_files,
360 "<|file_sep|>",
361 "",
362 budget_after_edit_history,
363 );
364
365 let mut prompt = String::new();
366 prompt.push_str(&related_files_section);
367 prompt.push_str(&edit_history_section);
368 prompt.push_str(&cursor_section);
369 prompt
370}
371
372pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
373 match format {
374 ZetaFormat::V0112MiddleAtEnd
375 | ZetaFormat::V0113Ordered
376 | ZetaFormat::V0114180EditableRegion
377 | ZetaFormat::V0120GitMergeMarkers
378 | ZetaFormat::V0131GitMergeMarkersPrefix
379 | ZetaFormat::V0211SeedCoder => String::new(),
380 ZetaFormat::V0211Prefill => {
381 let (context, editable_range, _) = resolve_cursor_region(input, format);
382 v0211_prefill::get_prefill(context, &editable_range)
383 }
384 }
385}
386
387fn format_edit_history_within_budget(
388 events: &[Arc<Event>],
389 file_marker: &str,
390 edit_history_name: &str,
391 max_tokens: usize,
392) -> String {
393 let header = format!("{}{}\n", file_marker, edit_history_name);
394 let header_tokens = estimate_tokens(header.len());
395 if header_tokens >= max_tokens {
396 return String::new();
397 }
398
399 let mut event_strings: Vec<String> = Vec::new();
400 let mut total_tokens = header_tokens;
401
402 for event in events.iter().rev() {
403 let mut event_str = String::new();
404 write_event(&mut event_str, event);
405 let event_tokens = estimate_tokens(event_str.len());
406
407 if total_tokens + event_tokens > max_tokens {
408 break;
409 }
410 total_tokens += event_tokens;
411 event_strings.push(event_str);
412 }
413
414 if event_strings.is_empty() {
415 return String::new();
416 }
417
418 let mut result = header;
419 for event_str in event_strings.iter().rev() {
420 result.push_str(event_str);
421 }
422 result
423}
424
425fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
426 let needs_newline = !excerpt.text.ends_with('\n');
427 let needs_ellipsis = excerpt.row_range.end < file_max_row;
428 let len = excerpt.text.len()
429 + if needs_newline { "\n".len() } else { 0 }
430 + if needs_ellipsis { "...\n".len() } else { 0 };
431 estimate_tokens(len)
432}
433
434pub fn format_related_files_within_budget(
435 related_files: &[RelatedFile],
436 file_prefix: &str,
437 file_suffix: &str,
438 max_tokens: usize,
439) -> String {
440 struct ExcerptCandidate {
441 file_ix: usize,
442 excerpt_ix: usize,
443 order: usize,
444 }
445
446 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
447 .iter()
448 .enumerate()
449 .flat_map(|(file_ix, file)| {
450 file.excerpts
451 .iter()
452 .enumerate()
453 .map(move |(excerpt_ix, e)| ExcerptCandidate {
454 file_ix,
455 excerpt_ix,
456 order: e.order,
457 })
458 })
459 .collect();
460
461 // Pre-compute file header strings and their token costs.
462 let file_headers: Vec<String> = related_files
463 .iter()
464 .map(|file| {
465 let path_str = file.path.to_string_lossy();
466 format!("{}{}\n", file_prefix, path_str)
467 })
468 .collect();
469
470 // Sort the excerpts by their order and determine how many fit within the budget.
471 let mut total_tokens = 0;
472 let mut included_excerpt_count = 0_usize;
473 let mut included_file_indices = vec![false; related_files.len()];
474 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
475 for candidate in &excerpt_candidates {
476 let file = &related_files[candidate.file_ix];
477 let excerpt = &file.excerpts[candidate.excerpt_ix];
478 let file_already_included = included_file_indices[candidate.file_ix];
479 let header_cost = if file_already_included {
480 0
481 } else {
482 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
483 };
484 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
485 if total_tokens + header_cost + excerpt_cost > max_tokens {
486 break;
487 }
488 total_tokens += header_cost + excerpt_cost;
489 if !file_already_included {
490 included_file_indices[candidate.file_ix] = true;
491 }
492 included_excerpt_count += 1;
493 }
494
495 excerpt_candidates.truncate(included_excerpt_count);
496 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
497
498 // Render all of the files that fit within the token budget, in the original order.
499 let mut result = String::new();
500 let mut last_file_ix = None;
501 for candidate in &excerpt_candidates {
502 if last_file_ix != Some(candidate.file_ix) {
503 if last_file_ix.is_some() {
504 result.push_str(file_suffix);
505 }
506 result.push_str(&file_headers[candidate.file_ix]);
507 last_file_ix = Some(candidate.file_ix);
508 }
509 let file = &related_files[candidate.file_ix];
510 let excerpt = &file.excerpts[candidate.excerpt_ix];
511 result.push_str(&excerpt.text);
512 if !result.ends_with('\n') {
513 result.push('\n');
514 }
515 if excerpt.row_range.end < file.max_row {
516 result.push_str("...\n");
517 }
518 }
519
520 result
521}
522
523pub fn write_related_files(
524 prompt: &mut String,
525 related_files: &[RelatedFile],
526) -> Vec<Range<usize>> {
527 let mut ranges = Vec::new();
528 for file in related_files {
529 let start = prompt.len();
530 let path_str = file.path.to_string_lossy();
531 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
532 for excerpt in &file.excerpts {
533 prompt.push_str(&excerpt.text);
534 if !prompt.ends_with('\n') {
535 prompt.push('\n');
536 }
537 if excerpt.row_range.end < file.max_row {
538 prompt.push_str("...\n");
539 }
540 }
541 let end = prompt.len();
542 ranges.push(start..end);
543 }
544 ranges
545}
546
547mod v0112_middle_at_end {
548 use super::*;
549
550 pub fn write_cursor_excerpt_section(
551 prompt: &mut String,
552 path: &Path,
553 context: &str,
554 editable_range: &Range<usize>,
555 cursor_offset: usize,
556 ) {
557 let path_str = path.to_string_lossy();
558 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
559
560 prompt.push_str("<|fim_prefix|>\n");
561 prompt.push_str(&context[..editable_range.start]);
562
563 prompt.push_str("<|fim_suffix|>\n");
564 prompt.push_str(&context[editable_range.end..]);
565 if !prompt.ends_with('\n') {
566 prompt.push('\n');
567 }
568
569 prompt.push_str("<|fim_middle|>current\n");
570 prompt.push_str(&context[editable_range.start..cursor_offset]);
571 prompt.push_str(CURSOR_MARKER);
572 prompt.push_str(&context[cursor_offset..editable_range.end]);
573 if !prompt.ends_with('\n') {
574 prompt.push('\n');
575 }
576
577 prompt.push_str("<|fim_middle|>updated\n");
578 }
579}
580
581mod v0113_ordered {
582 use super::*;
583
584 pub fn write_cursor_excerpt_section(
585 prompt: &mut String,
586 path: &Path,
587 context: &str,
588 editable_range: &Range<usize>,
589 cursor_offset: usize,
590 ) {
591 let path_str = path.to_string_lossy();
592 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
593
594 prompt.push_str("<|fim_prefix|>\n");
595 prompt.push_str(&context[..editable_range.start]);
596 if !prompt.ends_with('\n') {
597 prompt.push('\n');
598 }
599
600 prompt.push_str("<|fim_middle|>current\n");
601 prompt.push_str(&context[editable_range.start..cursor_offset]);
602 prompt.push_str(CURSOR_MARKER);
603 prompt.push_str(&context[cursor_offset..editable_range.end]);
604 if !prompt.ends_with('\n') {
605 prompt.push('\n');
606 }
607
608 prompt.push_str("<|fim_suffix|>\n");
609 prompt.push_str(&context[editable_range.end..]);
610 if !prompt.ends_with('\n') {
611 prompt.push('\n');
612 }
613
614 prompt.push_str("<|fim_middle|>updated\n");
615 }
616}
617
618pub mod v0120_git_merge_markers {
619 //! A prompt that uses git-style merge conflict markers to represent the editable region.
620 //!
621 //! Example prompt:
622 //!
623 //! <|file_sep|>path/to/target_file.py
624 //! <|fim_prefix|>
625 //! code before editable region
626 //! <|fim_suffix|>
627 //! code after editable region
628 //! <|fim_middle|>
629 //! <<<<<<< CURRENT
630 //! code that
631 //! needs to<|user_cursor|>
632 //! be rewritten
633 //! =======
634 //!
635 //! Expected output (should be generated by the model):
636 //!
637 //! updated
638 //! code with
639 //! changes applied
640 //! >>>>>>> UPDATED
641
642 use super::*;
643
644 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
645 pub const SEPARATOR: &str = "=======\n";
646 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
647
648 pub fn special_tokens() -> &'static [&'static str] {
649 &[
650 "<|fim_prefix|>",
651 "<|fim_suffix|>",
652 "<|fim_middle|>",
653 "<|file_sep|>",
654 START_MARKER,
655 SEPARATOR,
656 END_MARKER,
657 CURSOR_MARKER,
658 ]
659 }
660
661 pub fn write_cursor_excerpt_section(
662 prompt: &mut String,
663 path: &Path,
664 context: &str,
665 editable_range: &Range<usize>,
666 cursor_offset: usize,
667 ) {
668 let path_str = path.to_string_lossy();
669 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
670
671 prompt.push_str("<|fim_prefix|>");
672 prompt.push_str(&context[..editable_range.start]);
673
674 prompt.push_str("<|fim_suffix|>");
675 prompt.push_str(&context[editable_range.end..]);
676 if !prompt.ends_with('\n') {
677 prompt.push('\n');
678 }
679
680 prompt.push_str("<|fim_middle|>");
681 prompt.push_str(START_MARKER);
682 prompt.push_str(&context[editable_range.start..cursor_offset]);
683 prompt.push_str(CURSOR_MARKER);
684 prompt.push_str(&context[cursor_offset..editable_range.end]);
685 if !prompt.ends_with('\n') {
686 prompt.push('\n');
687 }
688 prompt.push_str(SEPARATOR);
689 }
690}
691
692pub mod v0131_git_merge_markers_prefix {
693 //! A prompt that uses git-style merge conflict markers to represent the editable region.
694 //!
695 //! Example prompt:
696 //!
697 //! <|file_sep|>path/to/target_file.py
698 //! <|fim_prefix|>
699 //! code before editable region
700 //! <<<<<<< CURRENT
701 //! code that
702 //! needs to<|user_cursor|>
703 //! be rewritten
704 //! =======
705 //! <|fim_suffix|>
706 //! code after editable region
707 //! <|fim_middle|>
708 //!
709 //! Expected output (should be generated by the model):
710 //!
711 //! updated
712 //! code with
713 //! changes applied
714 //! >>>>>>> UPDATED
715
716 use super::*;
717
718 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
719 pub const SEPARATOR: &str = "=======\n";
720 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
721
722 pub fn special_tokens() -> &'static [&'static str] {
723 &[
724 "<|fim_prefix|>",
725 "<|fim_suffix|>",
726 "<|fim_middle|>",
727 "<|file_sep|>",
728 START_MARKER,
729 SEPARATOR,
730 END_MARKER,
731 CURSOR_MARKER,
732 ]
733 }
734
735 pub fn write_cursor_excerpt_section(
736 prompt: &mut String,
737 path: &Path,
738 context: &str,
739 editable_range: &Range<usize>,
740 cursor_offset: usize,
741 ) {
742 let path_str = path.to_string_lossy();
743 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
744
745 prompt.push_str("<|fim_prefix|>");
746 prompt.push_str(&context[..editable_range.start]);
747 prompt.push_str(START_MARKER);
748 prompt.push_str(&context[editable_range.start..cursor_offset]);
749 prompt.push_str(CURSOR_MARKER);
750 prompt.push_str(&context[cursor_offset..editable_range.end]);
751 if !prompt.ends_with('\n') {
752 prompt.push('\n');
753 }
754 prompt.push_str(SEPARATOR);
755
756 prompt.push_str("<|fim_suffix|>");
757 prompt.push_str(&context[editable_range.end..]);
758 if !prompt.ends_with('\n') {
759 prompt.push('\n');
760 }
761
762 prompt.push_str("<|fim_middle|>");
763 }
764}
765
766pub mod v0211_prefill {
767 use super::*;
768
769 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
770 let editable_region = &context[editable_range.start..editable_range.end];
771
772 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
773 let prefill_len = editable_region.floor_char_boundary(prefill_len);
774
775 // Find a token boundary to avoid splitting tokens in the prefill.
776 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
777 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
778 // the \n and consume any consecutive \n characters after it.
779 let prefill = &editable_region[..prefill_len];
780 match prefill.rfind('\n') {
781 Some(pos) => {
782 let mut end = pos + 1;
783 while end < editable_region.len()
784 && editable_region.as_bytes().get(end) == Some(&b'\n')
785 {
786 end += 1;
787 }
788 editable_region[..end].to_string()
789 }
790 // No newline found. Fall back to splitting before the last space
791 // (word-level boundary)
792 None => match prefill.rfind(' ') {
793 Some(pos) => prefill[..pos].to_string(),
794 None => prefill.to_string(),
795 },
796 }
797 }
798}
799
800pub mod seed_coder {
801 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
802 //!
803 //! Seed-Coder uses different FIM tokens and order than Qwen:
804 //! - SPM order: suffix comes FIRST, then prefix, then middle
805 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
806 //! - File markers: StarCoder-style `<filename>path` (single token + path)
807 //!
808 //! All context (related files, edit history) goes in the PREFIX section.
809 //! The suffix contains only code after the editable region.
810 //!
811 //! Example prompt:
812 //!
813 //! <[fim-suffix]>
814 //! code after editable region
815 //! <[fim-prefix]><filename>related/file.py
816 //! related file content
817 //!
818 //! <filename>edit_history
819 //! --- a/some_file.py
820 //! +++ b/some_file.py
821 //! -old
822 //! +new
823 //!
824 //! <filename>path/to/target_file.py
825 //! code before editable region
826 //! <<<<<<< CURRENT
827 //! code that
828 //! needs to<|user_cursor|>
829 //! be rewritten
830 //! =======
831 //! <[fim-middle]>
832 //!
833 //! Expected output (model generates):
834 //!
835 //! updated
836 //! code with
837 //! changes applied
838 //! >>>>>>> UPDATED
839
840 use super::*;
841
842 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
843 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
844 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
845 pub const FILE_MARKER: &str = "<filename>";
846
847 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
848 pub const SEPARATOR: &str = "=======\n";
849 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
850
851 pub fn special_tokens() -> &'static [&'static str] {
852 &[
853 FIM_SUFFIX,
854 FIM_PREFIX,
855 FIM_MIDDLE,
856 FILE_MARKER,
857 START_MARKER,
858 SEPARATOR,
859 END_MARKER,
860 CURSOR_MARKER,
861 ]
862 }
863
864 pub fn format_prompt_with_budget(
865 path: &Path,
866 context: &str,
867 editable_range: &Range<usize>,
868 cursor_offset: usize,
869 events: &[Arc<Event>],
870 related_files: &[RelatedFile],
871 max_tokens: usize,
872 ) -> String {
873 let suffix_section = build_suffix_section(context, editable_range);
874 let cursor_prefix_section =
875 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
876
877 let suffix_tokens = estimate_tokens(suffix_section.len());
878 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
879 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
880
881 let edit_history_section = super::format_edit_history_within_budget(
882 events,
883 FILE_MARKER,
884 "edit_history",
885 budget_after_cursor,
886 );
887 let edit_history_tokens = estimate_tokens(edit_history_section.len());
888 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
889
890 let related_files_section = super::format_related_files_within_budget(
891 related_files,
892 FILE_MARKER,
893 "",
894 budget_after_edit_history,
895 );
896
897 let mut prompt = String::new();
898 prompt.push_str(&suffix_section);
899 prompt.push_str(FIM_PREFIX);
900 prompt.push_str(&related_files_section);
901 if !related_files_section.is_empty() {
902 prompt.push('\n');
903 }
904 prompt.push_str(&edit_history_section);
905 if !edit_history_section.is_empty() {
906 prompt.push('\n');
907 }
908 prompt.push_str(&cursor_prefix_section);
909 prompt.push_str(FIM_MIDDLE);
910 prompt
911 }
912
913 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
914 let mut section = String::new();
915 section.push_str(FIM_SUFFIX);
916 section.push_str(&context[editable_range.end..]);
917 if !section.ends_with('\n') {
918 section.push('\n');
919 }
920 section
921 }
922
923 fn build_cursor_prefix_section(
924 path: &Path,
925 context: &str,
926 editable_range: &Range<usize>,
927 cursor_offset: usize,
928 ) -> String {
929 let mut section = String::new();
930 let path_str = path.to_string_lossy();
931 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
932
933 section.push_str(&context[..editable_range.start]);
934 section.push_str(START_MARKER);
935 section.push_str(&context[editable_range.start..cursor_offset]);
936 section.push_str(CURSOR_MARKER);
937 section.push_str(&context[cursor_offset..editable_range.end]);
938 if !section.ends_with('\n') {
939 section.push('\n');
940 }
941 section.push_str(SEPARATOR);
942 section
943 }
944}
945
946/// The zeta1 prompt format
947pub mod zeta1 {
948 use super::*;
949 use std::fmt::Write;
950
951 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
952 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
953 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
954 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
955
956 const INSTRUCTION_HEADER: &str = concat!(
957 "### Instruction:\n",
958 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
959 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
960 "into account the cursor location.\n\n",
961 "### User Edits:\n\n"
962 );
963 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
964 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
965
966 /// Formats a complete zeta1 prompt from the input events and excerpt.
967 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
968 let mut prompt = String::with_capacity(
969 INSTRUCTION_HEADER.len()
970 + input_events.len()
971 + EXCERPT_HEADER.len()
972 + input_excerpt.len()
973 + RESPONSE_HEADER.len(),
974 );
975 prompt.push_str(INSTRUCTION_HEADER);
976 prompt.push_str(input_events);
977 prompt.push_str(EXCERPT_HEADER);
978 prompt.push_str(input_excerpt);
979 prompt.push_str(RESPONSE_HEADER);
980 prompt
981 }
982
983 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
984 /// editable and context byte-offset ranges within `cursor_excerpt`.
985 pub fn format_zeta1_from_input(
986 input: &ZetaPromptInput,
987 editable_range: Range<usize>,
988 context_range: Range<usize>,
989 ) -> String {
990 let events = format_zeta1_events(&input.events);
991 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
992 format_zeta1_prompt(&events, &excerpt)
993 }
994
995 /// Formats events in zeta1 style (oldest first).
996 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
997 let mut result = String::new();
998 for event in events {
999 let event_string = format_zeta1_event(event);
1000 if event_string.is_empty() {
1001 continue;
1002 }
1003 if !result.is_empty() {
1004 result.push_str("\n\n");
1005 }
1006 result.push_str(&event_string);
1007 }
1008 result
1009 }
1010
1011 fn format_zeta1_event(event: &Event) -> String {
1012 match event {
1013 Event::BufferChange {
1014 path,
1015 old_path,
1016 diff,
1017 ..
1018 } => {
1019 let mut prompt = String::new();
1020 if old_path != path {
1021 writeln!(
1022 prompt,
1023 "User renamed {} to {}\n",
1024 old_path.display(),
1025 path.display()
1026 )
1027 .ok();
1028 }
1029 if !diff.is_empty() {
1030 write!(
1031 prompt,
1032 "User edited {}:\n```diff\n{}\n```",
1033 path.display(),
1034 diff
1035 )
1036 .ok();
1037 }
1038 prompt
1039 }
1040 }
1041 }
1042
1043 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1044 /// within `cursor_excerpt`.
1045 fn format_zeta1_excerpt(
1046 input: &ZetaPromptInput,
1047 editable_range: Range<usize>,
1048 context_range: Range<usize>,
1049 ) -> String {
1050 let path_str = input.cursor_path.to_string_lossy();
1051 let excerpt = &*input.cursor_excerpt;
1052 let cursor_offset = input.cursor_offset_in_excerpt;
1053
1054 let mut prompt = String::new();
1055 writeln!(&mut prompt, "```{path_str}").ok();
1056
1057 let starts_at_file_beginning =
1058 input.excerpt_start_row == Some(0) && context_range.start == 0;
1059 if starts_at_file_beginning {
1060 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1061 }
1062
1063 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1064
1065 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1066 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1067 prompt.push_str(CURSOR_MARKER);
1068 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1069 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1070
1071 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1072 write!(prompt, "\n```").ok();
1073
1074 prompt
1075 }
1076
1077 /// Cleans zeta1 model output by extracting content between editable region
1078 /// markers and converting the zeta1 cursor marker to the universal one.
1079 /// Returns `None` if the output doesn't contain the expected markers.
1080 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1081 let content = output.replace(CURSOR_MARKER, "");
1082
1083 let content_start = content
1084 .find(EDITABLE_REGION_START_MARKER)
1085 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1086 .map(|pos| {
1087 if content.as_bytes().get(pos) == Some(&b'\n') {
1088 pos + 1
1089 } else {
1090 pos
1091 }
1092 })
1093 .unwrap_or(0);
1094
1095 let content_end = content
1096 .find(EDITABLE_REGION_END_MARKER)
1097 .map(|pos| {
1098 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1099 pos - 1
1100 } else {
1101 pos
1102 }
1103 })
1104 .unwrap_or(content.len());
1105
1106 if content_start > content_end {
1107 return Some(String::new());
1108 }
1109
1110 let extracted = &content[content_start..content_end];
1111
1112 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1113 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1114 let text_before_cursor = text_before_cursor
1115 .find(EDITABLE_REGION_START_MARKER)
1116 .map(|pos| {
1117 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1118 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1119 after_marker + 1
1120 } else {
1121 after_marker
1122 }
1123 })
1124 .unwrap_or(0);
1125 let offset_in_extracted = zeta1_cursor_pos
1126 .saturating_sub(text_before_cursor)
1127 .min(extracted.len());
1128 offset_in_extracted
1129 });
1130
1131 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1132 if let Some(offset) = cursor_offset {
1133 result.push_str(&extracted[..offset]);
1134 result.push_str(super::CURSOR_MARKER);
1135 result.push_str(&extracted[offset..]);
1136 } else {
1137 result.push_str(extracted);
1138 }
1139
1140 Some(result)
1141 }
1142}
1143
1144#[cfg(test)]
1145mod tests {
1146 use super::*;
1147 use indoc::indoc;
1148
1149 fn make_input(
1150 cursor_excerpt: &str,
1151 editable_range: Range<usize>,
1152 cursor_offset: usize,
1153 events: Vec<Event>,
1154 related_files: Vec<RelatedFile>,
1155 ) -> ZetaPromptInput {
1156 ZetaPromptInput {
1157 cursor_path: Path::new("test.rs").into(),
1158 cursor_excerpt: cursor_excerpt.into(),
1159 editable_range_in_excerpt: editable_range,
1160 cursor_offset_in_excerpt: cursor_offset,
1161 excerpt_start_row: None,
1162 events: events.into_iter().map(Arc::new).collect(),
1163 related_files,
1164 excerpt_ranges: None,
1165 preferred_model: None,
1166 in_open_source_repo: false,
1167 can_collect_data: false,
1168 }
1169 }
1170
1171 fn make_event(path: &str, diff: &str) -> Event {
1172 Event::BufferChange {
1173 path: Path::new(path).into(),
1174 old_path: Path::new(path).into(),
1175 diff: diff.to_string(),
1176 predicted: false,
1177 in_open_source_repo: false,
1178 }
1179 }
1180
1181 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1182 RelatedFile {
1183 path: Path::new(path).into(),
1184 max_row: content.lines().count() as u32,
1185 excerpts: vec![RelatedExcerpt {
1186 row_range: 0..content.lines().count() as u32,
1187 text: content.into(),
1188 order: 0,
1189 }],
1190 in_open_source_repo: false,
1191 }
1192 }
1193
1194 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1195 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1196 }
1197
1198 #[test]
1199 fn test_no_truncation_when_within_budget() {
1200 let input = make_input(
1201 "prefix\neditable\nsuffix",
1202 7..15,
1203 10,
1204 vec![make_event("a.rs", "-old\n+new\n")],
1205 vec![make_related_file("related.rs", "fn helper() {}\n")],
1206 );
1207
1208 assert_eq!(
1209 format_with_budget(&input, 10000),
1210 indoc! {r#"
1211 <|file_sep|>related.rs
1212 fn helper() {}
1213 <|file_sep|>edit history
1214 --- a/a.rs
1215 +++ b/a.rs
1216 -old
1217 +new
1218 <|file_sep|>test.rs
1219 <|fim_prefix|>
1220 prefix
1221 <|fim_middle|>current
1222 edi<|user_cursor|>table
1223 <|fim_suffix|>
1224
1225 suffix
1226 <|fim_middle|>updated
1227 "#}
1228 );
1229 }
1230
1231 #[test]
1232 fn test_truncation_drops_edit_history_when_budget_tight() {
1233 let input = make_input(
1234 "code",
1235 0..4,
1236 2,
1237 vec![make_event("a.rs", "-x\n+y\n")],
1238 vec![
1239 make_related_file("r1.rs", "a\n"),
1240 make_related_file("r2.rs", "b\n"),
1241 ],
1242 );
1243
1244 assert_eq!(
1245 format_with_budget(&input, 10000),
1246 indoc! {r#"
1247 <|file_sep|>r1.rs
1248 a
1249 <|file_sep|>r2.rs
1250 b
1251 <|file_sep|>edit history
1252 --- a/a.rs
1253 +++ b/a.rs
1254 -x
1255 +y
1256 <|file_sep|>test.rs
1257 <|fim_prefix|>
1258 <|fim_middle|>current
1259 co<|user_cursor|>de
1260 <|fim_suffix|>
1261 <|fim_middle|>updated
1262 "#}
1263 );
1264
1265 assert_eq!(
1266 format_with_budget(&input, 50),
1267 indoc! {r#"
1268 <|file_sep|>r1.rs
1269 a
1270 <|file_sep|>r2.rs
1271 b
1272 <|file_sep|>test.rs
1273 <|fim_prefix|>
1274 <|fim_middle|>current
1275 co<|user_cursor|>de
1276 <|fim_suffix|>
1277 <|fim_middle|>updated
1278 "#}
1279 );
1280 }
1281
1282 #[test]
1283 fn test_truncation_includes_partial_excerpts() {
1284 let input = make_input(
1285 "x",
1286 0..1,
1287 0,
1288 vec![],
1289 vec![RelatedFile {
1290 path: Path::new("big.rs").into(),
1291 max_row: 30,
1292 in_open_source_repo: false,
1293 excerpts: vec![
1294 RelatedExcerpt {
1295 row_range: 0..10,
1296 text: "first excerpt\n".into(),
1297 order: 0,
1298 },
1299 RelatedExcerpt {
1300 row_range: 10..20,
1301 text: "second excerpt\n".into(),
1302 order: 0,
1303 },
1304 RelatedExcerpt {
1305 row_range: 20..30,
1306 text: "third excerpt\n".into(),
1307 order: 0,
1308 },
1309 ],
1310 }],
1311 );
1312
1313 assert_eq!(
1314 format_with_budget(&input, 10000),
1315 indoc! {r#"
1316 <|file_sep|>big.rs
1317 first excerpt
1318 ...
1319 second excerpt
1320 ...
1321 third excerpt
1322 <|file_sep|>test.rs
1323 <|fim_prefix|>
1324 <|fim_middle|>current
1325 <|user_cursor|>x
1326 <|fim_suffix|>
1327 <|fim_middle|>updated
1328 "#}
1329 );
1330
1331 assert_eq!(
1332 format_with_budget(&input, 50),
1333 indoc! {r#"
1334 <|file_sep|>big.rs
1335 first excerpt
1336 ...
1337 <|file_sep|>test.rs
1338 <|fim_prefix|>
1339 <|fim_middle|>current
1340 <|user_cursor|>x
1341 <|fim_suffix|>
1342 <|fim_middle|>updated
1343 "#}
1344 );
1345 }
1346
1347 #[test]
1348 fn test_truncation_prioritizes_lower_order_excerpts() {
1349 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1350 // With tight budget, only the lower-order excerpt from file_b should be included.
1351 let input = make_input(
1352 "x",
1353 0..1,
1354 0,
1355 vec![],
1356 vec![
1357 RelatedFile {
1358 path: Path::new("file_a.rs").into(),
1359 max_row: 10,
1360 in_open_source_repo: false,
1361 excerpts: vec![RelatedExcerpt {
1362 row_range: 0..10,
1363 text: "low priority content\n".into(),
1364 order: 5,
1365 }],
1366 },
1367 RelatedFile {
1368 path: Path::new("file_b.rs").into(),
1369 max_row: 10,
1370 in_open_source_repo: false,
1371 excerpts: vec![RelatedExcerpt {
1372 row_range: 0..10,
1373 text: "high priority content\n".into(),
1374 order: 1,
1375 }],
1376 },
1377 ],
1378 );
1379
1380 // With large budget, both files included; rendered in stable lexicographic order.
1381 assert_eq!(
1382 format_with_budget(&input, 10000),
1383 indoc! {r#"
1384 <|file_sep|>file_a.rs
1385 low priority content
1386 <|file_sep|>file_b.rs
1387 high priority content
1388 <|file_sep|>test.rs
1389 <|fim_prefix|>
1390 <|fim_middle|>current
1391 <|user_cursor|>x
1392 <|fim_suffix|>
1393 <|fim_middle|>updated
1394 "#}
1395 );
1396
1397 // With tight budget, only file_b (lower order) fits.
1398 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1399 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1400 // file_a would need another 14 tokens, which doesn't fit.
1401 assert_eq!(
1402 format_with_budget(&input, 52),
1403 indoc! {r#"
1404 <|file_sep|>file_b.rs
1405 high priority content
1406 <|file_sep|>test.rs
1407 <|fim_prefix|>
1408 <|fim_middle|>current
1409 <|user_cursor|>x
1410 <|fim_suffix|>
1411 <|fim_middle|>updated
1412 "#}
1413 );
1414 }
1415
1416 #[test]
1417 fn test_truncation_drops_high_order_excerpts_within_file() {
1418 // A single file has excerpts at order 1 and order 3. With a tight budget,
1419 // only the order-1 excerpts are included while the order-3 excerpt is
1420 // dropped — even though they belong to the same file. This also preserves
1421 // the parent invariant: parent outline items have order ≤ their best
1422 // child, so they're always included when any child is.
1423 let input = make_input(
1424 "x",
1425 0..1,
1426 0,
1427 vec![],
1428 vec![RelatedFile {
1429 path: Path::new("mod.rs").into(),
1430 max_row: 30,
1431 in_open_source_repo: false,
1432 excerpts: vec![
1433 RelatedExcerpt {
1434 row_range: 0..5,
1435 text: "mod header\n".into(),
1436 order: 1,
1437 },
1438 RelatedExcerpt {
1439 row_range: 5..15,
1440 text: "important fn\n".into(),
1441 order: 1,
1442 },
1443 RelatedExcerpt {
1444 row_range: 15..30,
1445 text: "less important fn\n".into(),
1446 order: 3,
1447 },
1448 ],
1449 }],
1450 );
1451
1452 // With large budget, all three excerpts included.
1453 assert_eq!(
1454 format_with_budget(&input, 10000),
1455 indoc! {r#"
1456 <|file_sep|>mod.rs
1457 mod header
1458 ...
1459 important fn
1460 ...
1461 less important fn
1462 <|file_sep|>test.rs
1463 <|fim_prefix|>
1464 <|fim_middle|>current
1465 <|user_cursor|>x
1466 <|fim_suffix|>
1467 <|fim_middle|>updated
1468 "#}
1469 );
1470
1471 // With tight budget, only order<=1 excerpts included (header + important fn).
1472 assert_eq!(
1473 format_with_budget(&input, 55),
1474 indoc! {r#"
1475 <|file_sep|>mod.rs
1476 mod header
1477 ...
1478 important fn
1479 ...
1480 <|file_sep|>test.rs
1481 <|fim_prefix|>
1482 <|fim_middle|>current
1483 <|user_cursor|>x
1484 <|fim_suffix|>
1485 <|fim_middle|>updated
1486 "#}
1487 );
1488 }
1489
1490 #[test]
1491 fn test_truncation_drops_older_events_first() {
1492 let input = make_input(
1493 "x",
1494 0..1,
1495 0,
1496 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1497 vec![],
1498 );
1499
1500 assert_eq!(
1501 format_with_budget(&input, 10000),
1502 indoc! {r#"
1503 <|file_sep|>edit history
1504 --- a/old.rs
1505 +++ b/old.rs
1506 -1
1507 --- a/new.rs
1508 +++ b/new.rs
1509 -2
1510 <|file_sep|>test.rs
1511 <|fim_prefix|>
1512 <|fim_middle|>current
1513 <|user_cursor|>x
1514 <|fim_suffix|>
1515 <|fim_middle|>updated
1516 "#}
1517 );
1518
1519 assert_eq!(
1520 format_with_budget(&input, 55),
1521 indoc! {r#"
1522 <|file_sep|>edit history
1523 --- a/new.rs
1524 +++ b/new.rs
1525 -2
1526 <|file_sep|>test.rs
1527 <|fim_prefix|>
1528 <|fim_middle|>current
1529 <|user_cursor|>x
1530 <|fim_suffix|>
1531 <|fim_middle|>updated
1532 "#}
1533 );
1534 }
1535
1536 #[test]
1537 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1538 let input = make_input(
1539 "fn main() {}",
1540 0..12,
1541 3,
1542 vec![make_event("a.rs", "-old\n+new\n")],
1543 vec![make_related_file("related.rs", "helper\n")],
1544 );
1545
1546 assert_eq!(
1547 format_with_budget(&input, 30),
1548 indoc! {r#"
1549 <|file_sep|>test.rs
1550 <|fim_prefix|>
1551 <|fim_middle|>current
1552 fn <|user_cursor|>main() {}
1553 <|fim_suffix|>
1554 <|fim_middle|>updated
1555 "#}
1556 );
1557 }
1558
1559 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1560 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1561 }
1562
1563 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1564 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1565 }
1566
1567 #[test]
1568 fn test_seed_coder_basic_format() {
1569 let input = make_input(
1570 "prefix\neditable\nsuffix",
1571 7..15,
1572 10,
1573 vec![make_event("a.rs", "-old\n+new\n")],
1574 vec![make_related_file("related.rs", "fn helper() {}\n")],
1575 );
1576
1577 assert_eq!(
1578 format_seed_coder(&input),
1579 indoc! {r#"
1580 <[fim-suffix]>
1581 suffix
1582 <[fim-prefix]><filename>related.rs
1583 fn helper() {}
1584
1585 <filename>edit_history
1586 --- a/a.rs
1587 +++ b/a.rs
1588 -old
1589 +new
1590
1591 <filename>test.rs
1592 prefix
1593 <<<<<<< CURRENT
1594 edi<|user_cursor|>table
1595 =======
1596 <[fim-middle]>"#}
1597 );
1598 }
1599
1600 #[test]
1601 fn test_seed_coder_no_context() {
1602 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1603
1604 assert_eq!(
1605 format_seed_coder(&input),
1606 indoc! {r#"
1607 <[fim-suffix]>
1608 after
1609 <[fim-prefix]><filename>test.rs
1610 before
1611 <<<<<<< CURRENT
1612 mid<|user_cursor|>dle
1613 =======
1614 <[fim-middle]>"#}
1615 );
1616 }
1617
1618 #[test]
1619 fn test_seed_coder_truncation_drops_context() {
1620 let input = make_input(
1621 "code",
1622 0..4,
1623 2,
1624 vec![make_event("a.rs", "-x\n+y\n")],
1625 vec![make_related_file("r1.rs", "content\n")],
1626 );
1627
1628 // With large budget, everything is included
1629 assert_eq!(
1630 format_seed_coder(&input),
1631 indoc! {r#"
1632 <[fim-suffix]>
1633 <[fim-prefix]><filename>r1.rs
1634 content
1635
1636 <filename>edit_history
1637 --- a/a.rs
1638 +++ b/a.rs
1639 -x
1640 +y
1641
1642 <filename>test.rs
1643 <<<<<<< CURRENT
1644 co<|user_cursor|>de
1645 =======
1646 <[fim-middle]>"#}
1647 );
1648
1649 // With tight budget, context is dropped but cursor section remains
1650 assert_eq!(
1651 format_seed_coder_with_budget(&input, 30),
1652 indoc! {r#"
1653 <[fim-suffix]>
1654 <[fim-prefix]><filename>test.rs
1655 <<<<<<< CURRENT
1656 co<|user_cursor|>de
1657 =======
1658 <[fim-middle]>"#}
1659 );
1660 }
1661
1662 #[test]
1663 fn test_seed_coder_truncation_prioritizes_lower_order() {
1664 let input = make_input(
1665 "code",
1666 0..4,
1667 2,
1668 vec![],
1669 vec![
1670 RelatedFile {
1671 path: Path::new("low_prio.rs").into(),
1672 max_row: 5,
1673 in_open_source_repo: false,
1674 excerpts: vec![RelatedExcerpt {
1675 row_range: 0..5,
1676 text: "low prio\n".into(),
1677 order: 10,
1678 }],
1679 },
1680 RelatedFile {
1681 path: Path::new("high_prio.rs").into(),
1682 max_row: 5,
1683 in_open_source_repo: false,
1684 excerpts: vec![RelatedExcerpt {
1685 row_range: 0..5,
1686 text: "high prio\n".into(),
1687 order: 1,
1688 }],
1689 },
1690 ],
1691 );
1692
1693 // With large budget, both included; rendered in stable lexicographic order.
1694 assert_eq!(
1695 format_seed_coder(&input),
1696 indoc! {r#"
1697 <[fim-suffix]>
1698 <[fim-prefix]><filename>low_prio.rs
1699 low prio
1700 <filename>high_prio.rs
1701 high prio
1702
1703 <filename>test.rs
1704 <<<<<<< CURRENT
1705 co<|user_cursor|>de
1706 =======
1707 <[fim-middle]>"#}
1708 );
1709
1710 // With tight budget, only high_prio included.
1711 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1712 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1713 assert_eq!(
1714 format_seed_coder_with_budget(&input, 44),
1715 indoc! {r#"
1716 <[fim-suffix]>
1717 <[fim-prefix]><filename>high_prio.rs
1718 high prio
1719
1720 <filename>test.rs
1721 <<<<<<< CURRENT
1722 co<|user_cursor|>de
1723 =======
1724 <[fim-middle]>"#}
1725 );
1726 }
1727
1728 #[test]
1729 fn test_seed_coder_clean_output() {
1730 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1731 let output_without_marker = "new code\n";
1732
1733 assert_eq!(
1734 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1735 "new code\n"
1736 );
1737 assert_eq!(
1738 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1739 "new code\n"
1740 );
1741 }
1742
1743 #[test]
1744 fn test_format_zeta1_from_input_basic() {
1745 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1746 let input = ZetaPromptInput {
1747 cursor_path: Path::new("src/main.rs").into(),
1748 cursor_excerpt: excerpt.into(),
1749 editable_range_in_excerpt: 15..41,
1750 cursor_offset_in_excerpt: 30,
1751 excerpt_start_row: Some(0),
1752 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1753 related_files: vec![],
1754 excerpt_ranges: None,
1755 preferred_model: None,
1756 in_open_source_repo: false,
1757 can_collect_data: false,
1758 };
1759
1760 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1761
1762 assert_eq!(
1763 prompt,
1764 concat!(
1765 "### Instruction:\n",
1766 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1767 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1768 "into account the cursor location.\n",
1769 "\n",
1770 "### User Edits:\n",
1771 "\n",
1772 "User edited other.rs:\n",
1773 "```diff\n",
1774 "-old\n",
1775 "+new\n",
1776 "\n",
1777 "```\n",
1778 "\n",
1779 "### User Excerpt:\n",
1780 "\n",
1781 "```src/main.rs\n",
1782 "<|start_of_file|>\n",
1783 "fn before() {}\n",
1784 "<|editable_region_start|>\n",
1785 "fn foo() {\n",
1786 " <|user_cursor_is_here|>let x = 1;\n",
1787 "\n",
1788 "<|editable_region_end|>}\n",
1789 "fn after() {}\n",
1790 "\n",
1791 "```\n",
1792 "\n",
1793 "### Response:\n",
1794 ),
1795 );
1796 }
1797
1798 #[test]
1799 fn test_format_zeta1_from_input_no_start_of_file() {
1800 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1801 let input = ZetaPromptInput {
1802 cursor_path: Path::new("src/main.rs").into(),
1803 cursor_excerpt: excerpt.into(),
1804 editable_range_in_excerpt: 0..28,
1805 cursor_offset_in_excerpt: 15,
1806 excerpt_start_row: Some(10),
1807 events: vec![],
1808 related_files: vec![],
1809 excerpt_ranges: None,
1810 preferred_model: None,
1811 in_open_source_repo: false,
1812 can_collect_data: false,
1813 };
1814
1815 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1816
1817 assert_eq!(
1818 prompt,
1819 concat!(
1820 "### Instruction:\n",
1821 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1822 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1823 "into account the cursor location.\n",
1824 "\n",
1825 "### User Edits:\n",
1826 "\n",
1827 "\n",
1828 "\n",
1829 "### User Excerpt:\n",
1830 "\n",
1831 "```src/main.rs\n",
1832 "<|editable_region_start|>\n",
1833 "fn foo() {\n",
1834 " <|user_cursor_is_here|>let x = 1;\n",
1835 "}\n",
1836 "\n",
1837 "<|editable_region_end|>\n",
1838 "```\n",
1839 "\n",
1840 "### Response:\n",
1841 ),
1842 );
1843 }
1844
1845 #[test]
1846 fn test_format_zeta1_from_input_with_sub_ranges() {
1847 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1848 let editable_range = 10..37;
1849 let context_range = 0..excerpt.len();
1850
1851 let input = ZetaPromptInput {
1852 cursor_path: Path::new("test.rs").into(),
1853 cursor_excerpt: excerpt.into(),
1854 editable_range_in_excerpt: editable_range.clone(),
1855 cursor_offset_in_excerpt: 25,
1856 excerpt_start_row: Some(0),
1857 events: vec![],
1858 related_files: vec![],
1859 excerpt_ranges: None,
1860 preferred_model: None,
1861 in_open_source_repo: false,
1862 can_collect_data: false,
1863 };
1864
1865 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1866
1867 assert_eq!(
1868 prompt,
1869 concat!(
1870 "### Instruction:\n",
1871 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1872 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1873 "into account the cursor location.\n",
1874 "\n",
1875 "### User Edits:\n",
1876 "\n",
1877 "\n",
1878 "\n",
1879 "### User Excerpt:\n",
1880 "\n",
1881 "```test.rs\n",
1882 "<|start_of_file|>\n",
1883 "// prefix\n",
1884 "<|editable_region_start|>\n",
1885 "fn foo() {\n",
1886 " <|user_cursor_is_here|>let x = 1;\n",
1887 "}\n",
1888 "<|editable_region_end|>\n",
1889 "// suffix\n",
1890 "\n",
1891 "```\n",
1892 "\n",
1893 "### Response:\n",
1894 ),
1895 );
1896 }
1897
1898 #[test]
1899 fn test_clean_zeta1_model_output_basic() {
1900 let output = indoc! {"
1901 <|editable_region_start|>
1902 fn main() {
1903 println!(\"hello\");
1904 }
1905 <|editable_region_end|>
1906 "};
1907
1908 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1909 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1910 }
1911
1912 #[test]
1913 fn test_clean_zeta1_model_output_with_cursor() {
1914 let output = indoc! {"
1915 <|editable_region_start|>
1916 fn main() {
1917 <|user_cursor_is_here|>println!(\"hello\");
1918 }
1919 <|editable_region_end|>
1920 "};
1921
1922 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1923 assert_eq!(
1924 cleaned,
1925 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1926 );
1927 }
1928
1929 #[test]
1930 fn test_clean_zeta1_model_output_no_markers() {
1931 let output = "fn main() {}\n";
1932 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1933 assert_eq!(cleaned, "fn main() {}\n");
1934 }
1935
1936 #[test]
1937 fn test_clean_zeta1_model_output_empty_region() {
1938 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1939 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1940 assert_eq!(cleaned, "");
1941 }
1942}