1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
22/// editable and context token budgets. Allows the server to select the
23/// appropriate ranges for whichever model it uses.
24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
25pub struct ExcerptRanges {
26 /// Editable region computed with a 150-token budget.
27 pub editable_150: Range<usize>,
28 /// Editable region computed with a 180-token budget.
29 pub editable_180: Range<usize>,
30 /// Editable region computed with a 350-token budget.
31 pub editable_350: Range<usize>,
32 /// Editable region computed with a 350-token budget.
33 pub editable_512: Option<Range<usize>>,
34 /// Context boundary when using editable_150 with 350 tokens of additional context.
35 pub editable_150_context_350: Range<usize>,
36 /// Context boundary when using editable_180 with 350 tokens of additional context.
37 pub editable_180_context_350: Range<usize>,
38 /// Context boundary when using editable_350 with 150 tokens of additional context.
39 pub editable_350_context_150: Range<usize>,
40 pub editable_350_context_512: Option<Range<usize>>,
41 pub editable_350_context_1024: Option<Range<usize>>,
42 pub context_4096: Option<Range<usize>>,
43 pub context_8192: Option<Range<usize>>,
44}
45
46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
47pub struct ZetaPromptInput {
48 pub cursor_path: Arc<Path>,
49 pub cursor_excerpt: Arc<str>,
50 pub cursor_offset_in_excerpt: usize,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
52 pub excerpt_start_row: Option<u32>,
53 pub events: Vec<Arc<Event>>,
54 pub related_files: Vec<RelatedFile>,
55 /// These ranges let the server select model-appropriate subsets.
56 pub excerpt_ranges: ExcerptRanges,
57 /// The name of the edit prediction model experiment to use.
58 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub experiment: Option<String>,
60 #[serde(default)]
61 pub in_open_source_repo: bool,
62 #[serde(default)]
63 pub can_collect_data: bool,
64}
65
66#[derive(
67 Default,
68 Clone,
69 Copy,
70 Debug,
71 PartialEq,
72 Eq,
73 Hash,
74 EnumIter,
75 IntoStaticStr,
76 Serialize,
77 Deserialize,
78)]
79#[allow(non_camel_case_types)]
80pub enum ZetaFormat {
81 V0112MiddleAtEnd,
82 V0113Ordered,
83 V0114180EditableRegion,
84 V0120GitMergeMarkers,
85 #[default]
86 V0131GitMergeMarkersPrefix,
87 V0211Prefill,
88 V0211SeedCoder,
89}
90
91impl std::fmt::Display for ZetaFormat {
92 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93 write!(f, "{}", <&'static str>::from(self))
94 }
95}
96
97impl ZetaFormat {
98 pub fn parse(format_name: &str) -> Result<Self> {
99 let mut results = ZetaFormat::iter().filter(|version| {
100 <&'static str>::from(version)
101 .to_lowercase()
102 .contains(&format_name.to_lowercase())
103 });
104 let Some(result) = results.next() else {
105 anyhow::bail!(
106 "`{format_name}` did not match any of:\n{}",
107 Self::options_as_string()
108 );
109 };
110 if results.next().is_some() {
111 anyhow::bail!(
112 "`{format_name}` matched more than one of:\n{}",
113 Self::options_as_string()
114 );
115 }
116 Ok(result)
117 }
118
119 pub fn options_as_string() -> String {
120 ZetaFormat::iter()
121 .map(|format| format!("- {}\n", <&'static str>::from(format)))
122 .collect::<Vec<_>>()
123 .concat()
124 }
125
126 pub fn special_tokens(&self) -> &'static [&'static str] {
127 match self {
128 ZetaFormat::V0112MiddleAtEnd
129 | ZetaFormat::V0113Ordered
130 | ZetaFormat::V0114180EditableRegion => &[
131 "<|fim_prefix|>",
132 "<|fim_suffix|>",
133 "<|fim_middle|>",
134 "<|file_sep|>",
135 CURSOR_MARKER,
136 ],
137 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
138 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
139 v0131_git_merge_markers_prefix::special_tokens()
140 }
141 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
142 }
143 }
144}
145
146#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
147#[serde(tag = "event")]
148pub enum Event {
149 BufferChange {
150 path: Arc<Path>,
151 old_path: Arc<Path>,
152 diff: String,
153 predicted: bool,
154 in_open_source_repo: bool,
155 },
156}
157
158impl Event {
159 pub fn in_open_source_repo(&self) -> bool {
160 match self {
161 Event::BufferChange {
162 in_open_source_repo,
163 ..
164 } => *in_open_source_repo,
165 }
166 }
167}
168
169pub fn write_event(prompt: &mut String, event: &Event) {
170 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
171 for component in path.components() {
172 prompt.push('/');
173 write!(prompt, "{}", component.as_os_str().display()).ok();
174 }
175 }
176 match event {
177 Event::BufferChange {
178 path,
179 old_path,
180 diff,
181 predicted,
182 in_open_source_repo: _,
183 } => {
184 if *predicted {
185 prompt.push_str("// User accepted prediction:\n");
186 }
187 prompt.push_str("--- a");
188 write_path_as_unix_str(prompt, old_path.as_ref());
189 prompt.push_str("\n+++ b");
190 write_path_as_unix_str(prompt, path.as_ref());
191 prompt.push('\n');
192 prompt.push_str(diff);
193 }
194 }
195}
196
197#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
198pub struct RelatedFile {
199 pub path: Arc<Path>,
200 pub max_row: u32,
201 pub excerpts: Vec<RelatedExcerpt>,
202 #[serde(default)]
203 pub in_open_source_repo: bool,
204}
205
206#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
207pub struct RelatedExcerpt {
208 pub row_range: Range<u32>,
209 pub text: Arc<str>,
210 #[serde(default)]
211 pub order: usize,
212}
213
214pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
215 format
216 .special_tokens()
217 .iter()
218 .any(|token| input.cursor_excerpt.contains(token))
219}
220
221pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
222 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
223}
224
225/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
226pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
227 match format {
228 ZetaFormat::V0120GitMergeMarkers => output
229 .strip_suffix(v0120_git_merge_markers::END_MARKER)
230 .unwrap_or(output),
231 ZetaFormat::V0131GitMergeMarkersPrefix => output
232 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
233 .unwrap_or(output),
234 ZetaFormat::V0211SeedCoder => output
235 .strip_suffix(seed_coder::END_MARKER)
236 .unwrap_or(output),
237 _ => output,
238 }
239}
240
241pub fn excerpt_range_for_format(
242 format: ZetaFormat,
243 ranges: &ExcerptRanges,
244) -> (Range<usize>, Range<usize>) {
245 match format {
246 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
247 ranges.editable_150.clone(),
248 ranges.editable_150_context_350.clone(),
249 ),
250 ZetaFormat::V0114180EditableRegion
251 | ZetaFormat::V0120GitMergeMarkers
252 | ZetaFormat::V0131GitMergeMarkersPrefix
253 | ZetaFormat::V0211Prefill
254 | ZetaFormat::V0211SeedCoder => (
255 ranges.editable_350.clone(),
256 ranges.editable_350_context_150.clone(),
257 ),
258 }
259}
260
261pub fn resolve_cursor_region(
262 input: &ZetaPromptInput,
263 format: ZetaFormat,
264) -> (&str, Range<usize>, usize) {
265 let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
266 let context_start = context_range.start;
267 let context_text = &input.cursor_excerpt[context_range];
268 let adjusted_editable =
269 (editable_range.start - context_start)..(editable_range.end - context_start);
270 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
271
272 (context_text, adjusted_editable, adjusted_cursor)
273}
274
275fn format_zeta_prompt_with_budget(
276 input: &ZetaPromptInput,
277 format: ZetaFormat,
278 max_tokens: usize,
279) -> String {
280 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
281 let path = &*input.cursor_path;
282
283 let mut cursor_section = String::new();
284 match format {
285 ZetaFormat::V0112MiddleAtEnd => {
286 v0112_middle_at_end::write_cursor_excerpt_section(
287 &mut cursor_section,
288 path,
289 context,
290 &editable_range,
291 cursor_offset,
292 );
293 }
294 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
295 v0113_ordered::write_cursor_excerpt_section(
296 &mut cursor_section,
297 path,
298 context,
299 &editable_range,
300 cursor_offset,
301 )
302 }
303 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
304 &mut cursor_section,
305 path,
306 context,
307 &editable_range,
308 cursor_offset,
309 ),
310 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
311 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
312 &mut cursor_section,
313 path,
314 context,
315 &editable_range,
316 cursor_offset,
317 )
318 }
319 ZetaFormat::V0211SeedCoder => {
320 return seed_coder::format_prompt_with_budget(
321 path,
322 context,
323 &editable_range,
324 cursor_offset,
325 &input.events,
326 &input.related_files,
327 max_tokens,
328 );
329 }
330 }
331
332 let cursor_tokens = estimate_tokens(cursor_section.len());
333 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
334
335 let edit_history_section = format_edit_history_within_budget(
336 &input.events,
337 "<|file_sep|>",
338 "edit history",
339 budget_after_cursor,
340 );
341 let edit_history_tokens = estimate_tokens(edit_history_section.len());
342 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
343
344 let related_files_section = format_related_files_within_budget(
345 &input.related_files,
346 "<|file_sep|>",
347 "",
348 budget_after_edit_history,
349 );
350
351 let mut prompt = String::new();
352 prompt.push_str(&related_files_section);
353 prompt.push_str(&edit_history_section);
354 prompt.push_str(&cursor_section);
355 prompt
356}
357
358pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
359 match format {
360 ZetaFormat::V0112MiddleAtEnd
361 | ZetaFormat::V0113Ordered
362 | ZetaFormat::V0114180EditableRegion
363 | ZetaFormat::V0120GitMergeMarkers
364 | ZetaFormat::V0131GitMergeMarkersPrefix
365 | ZetaFormat::V0211SeedCoder => String::new(),
366 ZetaFormat::V0211Prefill => {
367 let (context, editable_range, _) = resolve_cursor_region(input, format);
368 v0211_prefill::get_prefill(context, &editable_range)
369 }
370 }
371}
372
373fn format_edit_history_within_budget(
374 events: &[Arc<Event>],
375 file_marker: &str,
376 edit_history_name: &str,
377 max_tokens: usize,
378) -> String {
379 let header = format!("{}{}\n", file_marker, edit_history_name);
380 let header_tokens = estimate_tokens(header.len());
381 if header_tokens >= max_tokens {
382 return String::new();
383 }
384
385 let mut event_strings: Vec<String> = Vec::new();
386 let mut total_tokens = header_tokens;
387
388 for event in events.iter().rev() {
389 let mut event_str = String::new();
390 write_event(&mut event_str, event);
391 let event_tokens = estimate_tokens(event_str.len());
392
393 if total_tokens + event_tokens > max_tokens {
394 break;
395 }
396 total_tokens += event_tokens;
397 event_strings.push(event_str);
398 }
399
400 if event_strings.is_empty() {
401 return String::new();
402 }
403
404 let mut result = header;
405 for event_str in event_strings.iter().rev() {
406 result.push_str(event_str);
407 }
408 result
409}
410
411fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
412 let needs_newline = !excerpt.text.ends_with('\n');
413 let needs_ellipsis = excerpt.row_range.end < file_max_row;
414 let len = excerpt.text.len()
415 + if needs_newline { "\n".len() } else { 0 }
416 + if needs_ellipsis { "...\n".len() } else { 0 };
417 estimate_tokens(len)
418}
419
420pub fn format_related_files_within_budget(
421 related_files: &[RelatedFile],
422 file_prefix: &str,
423 file_suffix: &str,
424 max_tokens: usize,
425) -> String {
426 struct ExcerptCandidate {
427 file_ix: usize,
428 excerpt_ix: usize,
429 order: usize,
430 }
431
432 let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
433 .iter()
434 .enumerate()
435 .flat_map(|(file_ix, file)| {
436 file.excerpts
437 .iter()
438 .enumerate()
439 .map(move |(excerpt_ix, e)| ExcerptCandidate {
440 file_ix,
441 excerpt_ix,
442 order: e.order,
443 })
444 })
445 .collect();
446
447 // Pre-compute file header strings and their token costs.
448 let file_headers: Vec<String> = related_files
449 .iter()
450 .map(|file| {
451 let path_str = file.path.to_string_lossy();
452 format!("{}{}\n", file_prefix, path_str)
453 })
454 .collect();
455
456 // Sort the excerpts by their order and determine how many fit within the budget.
457 let mut total_tokens = 0;
458 let mut included_excerpt_count = 0_usize;
459 let mut included_file_indices = vec![false; related_files.len()];
460 excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
461 for candidate in &excerpt_candidates {
462 let file = &related_files[candidate.file_ix];
463 let excerpt = &file.excerpts[candidate.excerpt_ix];
464 let file_already_included = included_file_indices[candidate.file_ix];
465 let header_cost = if file_already_included {
466 0
467 } else {
468 estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
469 };
470 let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
471 if total_tokens + header_cost + excerpt_cost > max_tokens {
472 break;
473 }
474 total_tokens += header_cost + excerpt_cost;
475 if !file_already_included {
476 included_file_indices[candidate.file_ix] = true;
477 }
478 included_excerpt_count += 1;
479 }
480
481 excerpt_candidates.truncate(included_excerpt_count);
482 excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
483
484 // Render all of the files that fit within the token budget, in the original order.
485 let mut result = String::new();
486 let mut last_file_ix = None;
487 for candidate in &excerpt_candidates {
488 if last_file_ix != Some(candidate.file_ix) {
489 if last_file_ix.is_some() {
490 result.push_str(file_suffix);
491 }
492 result.push_str(&file_headers[candidate.file_ix]);
493 last_file_ix = Some(candidate.file_ix);
494 }
495 let file = &related_files[candidate.file_ix];
496 let excerpt = &file.excerpts[candidate.excerpt_ix];
497 result.push_str(&excerpt.text);
498 if !result.ends_with('\n') {
499 result.push('\n');
500 }
501 if excerpt.row_range.end < file.max_row {
502 result.push_str("...\n");
503 }
504 }
505
506 result
507}
508
509pub fn write_related_files(
510 prompt: &mut String,
511 related_files: &[RelatedFile],
512) -> Vec<Range<usize>> {
513 let mut ranges = Vec::new();
514 for file in related_files {
515 let start = prompt.len();
516 let path_str = file.path.to_string_lossy();
517 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
518 for excerpt in &file.excerpts {
519 prompt.push_str(&excerpt.text);
520 if !prompt.ends_with('\n') {
521 prompt.push('\n');
522 }
523 if excerpt.row_range.end < file.max_row {
524 prompt.push_str("...\n");
525 }
526 }
527 let end = prompt.len();
528 ranges.push(start..end);
529 }
530 ranges
531}
532
533mod v0112_middle_at_end {
534 use super::*;
535
536 pub fn write_cursor_excerpt_section(
537 prompt: &mut String,
538 path: &Path,
539 context: &str,
540 editable_range: &Range<usize>,
541 cursor_offset: usize,
542 ) {
543 let path_str = path.to_string_lossy();
544 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
545
546 prompt.push_str("<|fim_prefix|>\n");
547 prompt.push_str(&context[..editable_range.start]);
548
549 prompt.push_str("<|fim_suffix|>\n");
550 prompt.push_str(&context[editable_range.end..]);
551 if !prompt.ends_with('\n') {
552 prompt.push('\n');
553 }
554
555 prompt.push_str("<|fim_middle|>current\n");
556 prompt.push_str(&context[editable_range.start..cursor_offset]);
557 prompt.push_str(CURSOR_MARKER);
558 prompt.push_str(&context[cursor_offset..editable_range.end]);
559 if !prompt.ends_with('\n') {
560 prompt.push('\n');
561 }
562
563 prompt.push_str("<|fim_middle|>updated\n");
564 }
565}
566
567mod v0113_ordered {
568 use super::*;
569
570 pub fn write_cursor_excerpt_section(
571 prompt: &mut String,
572 path: &Path,
573 context: &str,
574 editable_range: &Range<usize>,
575 cursor_offset: usize,
576 ) {
577 let path_str = path.to_string_lossy();
578 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
579
580 prompt.push_str("<|fim_prefix|>\n");
581 prompt.push_str(&context[..editable_range.start]);
582 if !prompt.ends_with('\n') {
583 prompt.push('\n');
584 }
585
586 prompt.push_str("<|fim_middle|>current\n");
587 prompt.push_str(&context[editable_range.start..cursor_offset]);
588 prompt.push_str(CURSOR_MARKER);
589 prompt.push_str(&context[cursor_offset..editable_range.end]);
590 if !prompt.ends_with('\n') {
591 prompt.push('\n');
592 }
593
594 prompt.push_str("<|fim_suffix|>\n");
595 prompt.push_str(&context[editable_range.end..]);
596 if !prompt.ends_with('\n') {
597 prompt.push('\n');
598 }
599
600 prompt.push_str("<|fim_middle|>updated\n");
601 }
602}
603
604pub mod v0120_git_merge_markers {
605 //! A prompt that uses git-style merge conflict markers to represent the editable region.
606 //!
607 //! Example prompt:
608 //!
609 //! <|file_sep|>path/to/target_file.py
610 //! <|fim_prefix|>
611 //! code before editable region
612 //! <|fim_suffix|>
613 //! code after editable region
614 //! <|fim_middle|>
615 //! <<<<<<< CURRENT
616 //! code that
617 //! needs to<|user_cursor|>
618 //! be rewritten
619 //! =======
620 //!
621 //! Expected output (should be generated by the model):
622 //!
623 //! updated
624 //! code with
625 //! changes applied
626 //! >>>>>>> UPDATED
627
628 use super::*;
629
630 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
631 pub const SEPARATOR: &str = "=======\n";
632 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
633
634 pub fn special_tokens() -> &'static [&'static str] {
635 &[
636 "<|fim_prefix|>",
637 "<|fim_suffix|>",
638 "<|fim_middle|>",
639 "<|file_sep|>",
640 START_MARKER,
641 SEPARATOR,
642 END_MARKER,
643 CURSOR_MARKER,
644 ]
645 }
646
647 pub fn write_cursor_excerpt_section(
648 prompt: &mut String,
649 path: &Path,
650 context: &str,
651 editable_range: &Range<usize>,
652 cursor_offset: usize,
653 ) {
654 let path_str = path.to_string_lossy();
655 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
656
657 prompt.push_str("<|fim_prefix|>");
658 prompt.push_str(&context[..editable_range.start]);
659
660 prompt.push_str("<|fim_suffix|>");
661 prompt.push_str(&context[editable_range.end..]);
662 if !prompt.ends_with('\n') {
663 prompt.push('\n');
664 }
665
666 prompt.push_str("<|fim_middle|>");
667 prompt.push_str(START_MARKER);
668 prompt.push_str(&context[editable_range.start..cursor_offset]);
669 prompt.push_str(CURSOR_MARKER);
670 prompt.push_str(&context[cursor_offset..editable_range.end]);
671 if !prompt.ends_with('\n') {
672 prompt.push('\n');
673 }
674 prompt.push_str(SEPARATOR);
675 }
676}
677
678pub mod v0131_git_merge_markers_prefix {
679 //! A prompt that uses git-style merge conflict markers to represent the editable region.
680 //!
681 //! Example prompt:
682 //!
683 //! <|file_sep|>path/to/target_file.py
684 //! <|fim_prefix|>
685 //! code before editable region
686 //! <<<<<<< CURRENT
687 //! code that
688 //! needs to<|user_cursor|>
689 //! be rewritten
690 //! =======
691 //! <|fim_suffix|>
692 //! code after editable region
693 //! <|fim_middle|>
694 //!
695 //! Expected output (should be generated by the model):
696 //!
697 //! updated
698 //! code with
699 //! changes applied
700 //! >>>>>>> UPDATED
701
702 use super::*;
703
704 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
705 pub const SEPARATOR: &str = "=======\n";
706 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
707
708 pub fn special_tokens() -> &'static [&'static str] {
709 &[
710 "<|fim_prefix|>",
711 "<|fim_suffix|>",
712 "<|fim_middle|>",
713 "<|file_sep|>",
714 START_MARKER,
715 SEPARATOR,
716 END_MARKER,
717 CURSOR_MARKER,
718 ]
719 }
720
721 pub fn write_cursor_excerpt_section(
722 prompt: &mut String,
723 path: &Path,
724 context: &str,
725 editable_range: &Range<usize>,
726 cursor_offset: usize,
727 ) {
728 let path_str = path.to_string_lossy();
729 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
730
731 prompt.push_str("<|fim_prefix|>");
732 prompt.push_str(&context[..editable_range.start]);
733 prompt.push_str(START_MARKER);
734 prompt.push_str(&context[editable_range.start..cursor_offset]);
735 prompt.push_str(CURSOR_MARKER);
736 prompt.push_str(&context[cursor_offset..editable_range.end]);
737 if !prompt.ends_with('\n') {
738 prompt.push('\n');
739 }
740 prompt.push_str(SEPARATOR);
741
742 prompt.push_str("<|fim_suffix|>");
743 prompt.push_str(&context[editable_range.end..]);
744 if !prompt.ends_with('\n') {
745 prompt.push('\n');
746 }
747
748 prompt.push_str("<|fim_middle|>");
749 }
750}
751
752pub mod v0211_prefill {
753 use super::*;
754
755 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
756 let editable_region = &context[editable_range.start..editable_range.end];
757
758 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
759 let prefill_len = editable_region.floor_char_boundary(prefill_len);
760
761 // Find a token boundary to avoid splitting tokens in the prefill.
762 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
763 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
764 // the \n and consume any consecutive \n characters after it.
765 let prefill = &editable_region[..prefill_len];
766 match prefill.rfind('\n') {
767 Some(pos) => {
768 let mut end = pos + 1;
769 while end < editable_region.len()
770 && editable_region.as_bytes().get(end) == Some(&b'\n')
771 {
772 end += 1;
773 }
774 editable_region[..end].to_string()
775 }
776 // No newline found. Fall back to splitting before the last space
777 // (word-level boundary)
778 None => match prefill.rfind(' ') {
779 Some(pos) => prefill[..pos].to_string(),
780 None => prefill.to_string(),
781 },
782 }
783 }
784}
785
786pub mod seed_coder {
787 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
788 //!
789 //! Seed-Coder uses different FIM tokens and order than Qwen:
790 //! - SPM order: suffix comes FIRST, then prefix, then middle
791 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
792 //! - File markers: StarCoder-style `<filename>path` (single token + path)
793 //!
794 //! All context (related files, edit history) goes in the PREFIX section.
795 //! The suffix contains only code after the editable region.
796 //!
797 //! Example prompt:
798 //!
799 //! <[fim-suffix]>
800 //! code after editable region
801 //! <[fim-prefix]><filename>related/file.py
802 //! related file content
803 //!
804 //! <filename>edit_history
805 //! --- a/some_file.py
806 //! +++ b/some_file.py
807 //! -old
808 //! +new
809 //!
810 //! <filename>path/to/target_file.py
811 //! code before editable region
812 //! <<<<<<< CURRENT
813 //! code that
814 //! needs to<|user_cursor|>
815 //! be rewritten
816 //! =======
817 //! <[fim-middle]>
818 //!
819 //! Expected output (model generates):
820 //!
821 //! updated
822 //! code with
823 //! changes applied
824 //! >>>>>>> UPDATED
825
826 use super::*;
827
828 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
829 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
830 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
831 pub const FILE_MARKER: &str = "<filename>";
832
833 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
834 pub const SEPARATOR: &str = "=======\n";
835 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
836
837 pub fn special_tokens() -> &'static [&'static str] {
838 &[
839 FIM_SUFFIX,
840 FIM_PREFIX,
841 FIM_MIDDLE,
842 FILE_MARKER,
843 START_MARKER,
844 SEPARATOR,
845 END_MARKER,
846 CURSOR_MARKER,
847 ]
848 }
849
850 pub fn format_prompt_with_budget(
851 path: &Path,
852 context: &str,
853 editable_range: &Range<usize>,
854 cursor_offset: usize,
855 events: &[Arc<Event>],
856 related_files: &[RelatedFile],
857 max_tokens: usize,
858 ) -> String {
859 let suffix_section = build_suffix_section(context, editable_range);
860 let cursor_prefix_section =
861 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
862
863 let suffix_tokens = estimate_tokens(suffix_section.len());
864 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
865 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
866
867 let edit_history_section = super::format_edit_history_within_budget(
868 events,
869 FILE_MARKER,
870 "edit_history",
871 budget_after_cursor,
872 );
873 let edit_history_tokens = estimate_tokens(edit_history_section.len());
874 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
875
876 let related_files_section = super::format_related_files_within_budget(
877 related_files,
878 FILE_MARKER,
879 "",
880 budget_after_edit_history,
881 );
882
883 let mut prompt = String::new();
884 prompt.push_str(&suffix_section);
885 prompt.push_str(FIM_PREFIX);
886 prompt.push_str(&related_files_section);
887 if !related_files_section.is_empty() {
888 prompt.push('\n');
889 }
890 prompt.push_str(&edit_history_section);
891 if !edit_history_section.is_empty() {
892 prompt.push('\n');
893 }
894 prompt.push_str(&cursor_prefix_section);
895 prompt.push_str(FIM_MIDDLE);
896 prompt
897 }
898
899 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
900 let mut section = String::new();
901 section.push_str(FIM_SUFFIX);
902 section.push_str(&context[editable_range.end..]);
903 if !section.ends_with('\n') {
904 section.push('\n');
905 }
906 section
907 }
908
909 fn build_cursor_prefix_section(
910 path: &Path,
911 context: &str,
912 editable_range: &Range<usize>,
913 cursor_offset: usize,
914 ) -> String {
915 let mut section = String::new();
916 let path_str = path.to_string_lossy();
917 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
918
919 section.push_str(&context[..editable_range.start]);
920 section.push_str(START_MARKER);
921 section.push_str(&context[editable_range.start..cursor_offset]);
922 section.push_str(CURSOR_MARKER);
923 section.push_str(&context[cursor_offset..editable_range.end]);
924 if !section.ends_with('\n') {
925 section.push('\n');
926 }
927 section.push_str(SEPARATOR);
928 section
929 }
930}
931
932/// The zeta1 prompt format
933pub mod zeta1 {
934 use super::*;
935 use std::fmt::Write;
936
937 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
938 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
939 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
940 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
941
942 const INSTRUCTION_HEADER: &str = concat!(
943 "### Instruction:\n",
944 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
945 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
946 "into account the cursor location.\n\n",
947 "### User Edits:\n\n"
948 );
949 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
950 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
951
952 /// Formats a complete zeta1 prompt from the input events and excerpt.
953 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
954 let mut prompt = String::with_capacity(
955 INSTRUCTION_HEADER.len()
956 + input_events.len()
957 + EXCERPT_HEADER.len()
958 + input_excerpt.len()
959 + RESPONSE_HEADER.len(),
960 );
961 prompt.push_str(INSTRUCTION_HEADER);
962 prompt.push_str(input_events);
963 prompt.push_str(EXCERPT_HEADER);
964 prompt.push_str(input_excerpt);
965 prompt.push_str(RESPONSE_HEADER);
966 prompt
967 }
968
969 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
970 /// editable and context byte-offset ranges within `cursor_excerpt`.
971 pub fn format_zeta1_from_input(
972 input: &ZetaPromptInput,
973 editable_range: Range<usize>,
974 context_range: Range<usize>,
975 ) -> String {
976 let events = format_zeta1_events(&input.events);
977 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
978 format_zeta1_prompt(&events, &excerpt)
979 }
980
981 /// Formats events in zeta1 style (oldest first).
982 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
983 let mut result = String::new();
984 for event in events {
985 let event_string = format_zeta1_event(event);
986 if event_string.is_empty() {
987 continue;
988 }
989 if !result.is_empty() {
990 result.push_str("\n\n");
991 }
992 result.push_str(&event_string);
993 }
994 result
995 }
996
997 fn format_zeta1_event(event: &Event) -> String {
998 match event {
999 Event::BufferChange {
1000 path,
1001 old_path,
1002 diff,
1003 ..
1004 } => {
1005 let mut prompt = String::new();
1006 if old_path != path {
1007 writeln!(
1008 prompt,
1009 "User renamed {} to {}\n",
1010 old_path.display(),
1011 path.display()
1012 )
1013 .ok();
1014 }
1015 if !diff.is_empty() {
1016 write!(
1017 prompt,
1018 "User edited {}:\n```diff\n{}\n```",
1019 path.display(),
1020 diff
1021 )
1022 .ok();
1023 }
1024 prompt
1025 }
1026 }
1027 }
1028
1029 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1030 /// within `cursor_excerpt`.
1031 fn format_zeta1_excerpt(
1032 input: &ZetaPromptInput,
1033 editable_range: Range<usize>,
1034 context_range: Range<usize>,
1035 ) -> String {
1036 let path_str = input.cursor_path.to_string_lossy();
1037 let excerpt = &*input.cursor_excerpt;
1038 let cursor_offset = input.cursor_offset_in_excerpt;
1039
1040 let mut prompt = String::new();
1041 writeln!(&mut prompt, "```{path_str}").ok();
1042
1043 let starts_at_file_beginning =
1044 input.excerpt_start_row == Some(0) && context_range.start == 0;
1045 if starts_at_file_beginning {
1046 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1047 }
1048
1049 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1050
1051 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1052 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1053 prompt.push_str(CURSOR_MARKER);
1054 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1055 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1056
1057 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1058 write!(prompt, "\n```").ok();
1059
1060 prompt
1061 }
1062
1063 /// Cleans zeta1 model output by extracting content between editable region
1064 /// markers and converting the zeta1 cursor marker to the universal one.
1065 /// Returns `None` if the output doesn't contain the expected markers.
1066 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1067 let content = output.replace(CURSOR_MARKER, "");
1068
1069 let content_start = content
1070 .find(EDITABLE_REGION_START_MARKER)
1071 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1072 .map(|pos| {
1073 if content.as_bytes().get(pos) == Some(&b'\n') {
1074 pos + 1
1075 } else {
1076 pos
1077 }
1078 })
1079 .unwrap_or(0);
1080
1081 let content_end = content
1082 .find(EDITABLE_REGION_END_MARKER)
1083 .map(|pos| {
1084 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1085 pos - 1
1086 } else {
1087 pos
1088 }
1089 })
1090 .unwrap_or(content.len());
1091
1092 if content_start > content_end {
1093 return Some(String::new());
1094 }
1095
1096 let extracted = &content[content_start..content_end];
1097
1098 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1099 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1100 let text_before_cursor = text_before_cursor
1101 .find(EDITABLE_REGION_START_MARKER)
1102 .map(|pos| {
1103 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1104 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1105 after_marker + 1
1106 } else {
1107 after_marker
1108 }
1109 })
1110 .unwrap_or(0);
1111 let offset_in_extracted = zeta1_cursor_pos
1112 .saturating_sub(text_before_cursor)
1113 .min(extracted.len());
1114 offset_in_extracted
1115 });
1116
1117 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1118 if let Some(offset) = cursor_offset {
1119 result.push_str(&extracted[..offset]);
1120 result.push_str(super::CURSOR_MARKER);
1121 result.push_str(&extracted[offset..]);
1122 } else {
1123 result.push_str(extracted);
1124 }
1125
1126 Some(result)
1127 }
1128}
1129
1130#[cfg(test)]
1131mod tests {
1132 use super::*;
1133 use indoc::indoc;
1134
1135 fn make_input(
1136 cursor_excerpt: &str,
1137 editable_range: Range<usize>,
1138 cursor_offset: usize,
1139 events: Vec<Event>,
1140 related_files: Vec<RelatedFile>,
1141 ) -> ZetaPromptInput {
1142 let context_range = 0..cursor_excerpt.len();
1143 ZetaPromptInput {
1144 cursor_path: Path::new("test.rs").into(),
1145 cursor_excerpt: cursor_excerpt.into(),
1146 cursor_offset_in_excerpt: cursor_offset,
1147 excerpt_start_row: None,
1148 events: events.into_iter().map(Arc::new).collect(),
1149 related_files,
1150 excerpt_ranges: ExcerptRanges {
1151 editable_150: editable_range.clone(),
1152 editable_180: editable_range.clone(),
1153 editable_350: editable_range,
1154 editable_150_context_350: context_range.clone(),
1155 editable_180_context_350: context_range.clone(),
1156 editable_350_context_150: context_range,
1157 ..Default::default()
1158 },
1159 experiment: None,
1160 in_open_source_repo: false,
1161 can_collect_data: false,
1162 }
1163 }
1164
1165 fn make_event(path: &str, diff: &str) -> Event {
1166 Event::BufferChange {
1167 path: Path::new(path).into(),
1168 old_path: Path::new(path).into(),
1169 diff: diff.to_string(),
1170 predicted: false,
1171 in_open_source_repo: false,
1172 }
1173 }
1174
1175 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1176 RelatedFile {
1177 path: Path::new(path).into(),
1178 max_row: content.lines().count() as u32,
1179 excerpts: vec![RelatedExcerpt {
1180 row_range: 0..content.lines().count() as u32,
1181 text: content.into(),
1182 order: 0,
1183 }],
1184 in_open_source_repo: false,
1185 }
1186 }
1187
1188 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1189 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1190 }
1191
1192 #[test]
1193 fn test_no_truncation_when_within_budget() {
1194 let input = make_input(
1195 "prefix\neditable\nsuffix",
1196 7..15,
1197 10,
1198 vec![make_event("a.rs", "-old\n+new\n")],
1199 vec![make_related_file("related.rs", "fn helper() {}\n")],
1200 );
1201
1202 assert_eq!(
1203 format_with_budget(&input, 10000),
1204 indoc! {r#"
1205 <|file_sep|>related.rs
1206 fn helper() {}
1207 <|file_sep|>edit history
1208 --- a/a.rs
1209 +++ b/a.rs
1210 -old
1211 +new
1212 <|file_sep|>test.rs
1213 <|fim_prefix|>
1214 prefix
1215 <|fim_middle|>current
1216 edi<|user_cursor|>table
1217 <|fim_suffix|>
1218
1219 suffix
1220 <|fim_middle|>updated
1221 "#}
1222 );
1223 }
1224
1225 #[test]
1226 fn test_truncation_drops_edit_history_when_budget_tight() {
1227 let input = make_input(
1228 "code",
1229 0..4,
1230 2,
1231 vec![make_event("a.rs", "-x\n+y\n")],
1232 vec![
1233 make_related_file("r1.rs", "a\n"),
1234 make_related_file("r2.rs", "b\n"),
1235 ],
1236 );
1237
1238 assert_eq!(
1239 format_with_budget(&input, 10000),
1240 indoc! {r#"
1241 <|file_sep|>r1.rs
1242 a
1243 <|file_sep|>r2.rs
1244 b
1245 <|file_sep|>edit history
1246 --- a/a.rs
1247 +++ b/a.rs
1248 -x
1249 +y
1250 <|file_sep|>test.rs
1251 <|fim_prefix|>
1252 <|fim_middle|>current
1253 co<|user_cursor|>de
1254 <|fim_suffix|>
1255 <|fim_middle|>updated
1256 "#}
1257 );
1258
1259 assert_eq!(
1260 format_with_budget(&input, 50),
1261 indoc! {r#"
1262 <|file_sep|>r1.rs
1263 a
1264 <|file_sep|>r2.rs
1265 b
1266 <|file_sep|>test.rs
1267 <|fim_prefix|>
1268 <|fim_middle|>current
1269 co<|user_cursor|>de
1270 <|fim_suffix|>
1271 <|fim_middle|>updated
1272 "#}
1273 );
1274 }
1275
1276 #[test]
1277 fn test_truncation_includes_partial_excerpts() {
1278 let input = make_input(
1279 "x",
1280 0..1,
1281 0,
1282 vec![],
1283 vec![RelatedFile {
1284 path: Path::new("big.rs").into(),
1285 max_row: 30,
1286 in_open_source_repo: false,
1287 excerpts: vec![
1288 RelatedExcerpt {
1289 row_range: 0..10,
1290 text: "first excerpt\n".into(),
1291 order: 0,
1292 },
1293 RelatedExcerpt {
1294 row_range: 10..20,
1295 text: "second excerpt\n".into(),
1296 order: 0,
1297 },
1298 RelatedExcerpt {
1299 row_range: 20..30,
1300 text: "third excerpt\n".into(),
1301 order: 0,
1302 },
1303 ],
1304 }],
1305 );
1306
1307 assert_eq!(
1308 format_with_budget(&input, 10000),
1309 indoc! {r#"
1310 <|file_sep|>big.rs
1311 first excerpt
1312 ...
1313 second excerpt
1314 ...
1315 third excerpt
1316 <|file_sep|>test.rs
1317 <|fim_prefix|>
1318 <|fim_middle|>current
1319 <|user_cursor|>x
1320 <|fim_suffix|>
1321 <|fim_middle|>updated
1322 "#}
1323 );
1324
1325 assert_eq!(
1326 format_with_budget(&input, 50),
1327 indoc! {r#"
1328 <|file_sep|>big.rs
1329 first excerpt
1330 ...
1331 <|file_sep|>test.rs
1332 <|fim_prefix|>
1333 <|fim_middle|>current
1334 <|user_cursor|>x
1335 <|fim_suffix|>
1336 <|fim_middle|>updated
1337 "#}
1338 );
1339 }
1340
1341 #[test]
1342 fn test_truncation_prioritizes_lower_order_excerpts() {
1343 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1344 // With tight budget, only the lower-order excerpt from file_b should be included.
1345 let input = make_input(
1346 "x",
1347 0..1,
1348 0,
1349 vec![],
1350 vec![
1351 RelatedFile {
1352 path: Path::new("file_a.rs").into(),
1353 max_row: 10,
1354 in_open_source_repo: false,
1355 excerpts: vec![RelatedExcerpt {
1356 row_range: 0..10,
1357 text: "low priority content\n".into(),
1358 order: 5,
1359 }],
1360 },
1361 RelatedFile {
1362 path: Path::new("file_b.rs").into(),
1363 max_row: 10,
1364 in_open_source_repo: false,
1365 excerpts: vec![RelatedExcerpt {
1366 row_range: 0..10,
1367 text: "high priority content\n".into(),
1368 order: 1,
1369 }],
1370 },
1371 ],
1372 );
1373
1374 // With large budget, both files included; rendered in stable lexicographic order.
1375 assert_eq!(
1376 format_with_budget(&input, 10000),
1377 indoc! {r#"
1378 <|file_sep|>file_a.rs
1379 low priority content
1380 <|file_sep|>file_b.rs
1381 high priority content
1382 <|file_sep|>test.rs
1383 <|fim_prefix|>
1384 <|fim_middle|>current
1385 <|user_cursor|>x
1386 <|fim_suffix|>
1387 <|fim_middle|>updated
1388 "#}
1389 );
1390
1391 // With tight budget, only file_b (lower order) fits.
1392 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1393 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1394 // file_a would need another 14 tokens, which doesn't fit.
1395 assert_eq!(
1396 format_with_budget(&input, 52),
1397 indoc! {r#"
1398 <|file_sep|>file_b.rs
1399 high priority content
1400 <|file_sep|>test.rs
1401 <|fim_prefix|>
1402 <|fim_middle|>current
1403 <|user_cursor|>x
1404 <|fim_suffix|>
1405 <|fim_middle|>updated
1406 "#}
1407 );
1408 }
1409
1410 #[test]
1411 fn test_truncation_drops_high_order_excerpts_within_file() {
1412 // A single file has excerpts at order 1 and order 3. With a tight budget,
1413 // only the order-1 excerpts are included while the order-3 excerpt is
1414 // dropped — even though they belong to the same file. This also preserves
1415 // the parent invariant: parent outline items have order ≤ their best
1416 // child, so they're always included when any child is.
1417 let input = make_input(
1418 "x",
1419 0..1,
1420 0,
1421 vec![],
1422 vec![RelatedFile {
1423 path: Path::new("mod.rs").into(),
1424 max_row: 30,
1425 in_open_source_repo: false,
1426 excerpts: vec![
1427 RelatedExcerpt {
1428 row_range: 0..5,
1429 text: "mod header\n".into(),
1430 order: 1,
1431 },
1432 RelatedExcerpt {
1433 row_range: 5..15,
1434 text: "important fn\n".into(),
1435 order: 1,
1436 },
1437 RelatedExcerpt {
1438 row_range: 15..30,
1439 text: "less important fn\n".into(),
1440 order: 3,
1441 },
1442 ],
1443 }],
1444 );
1445
1446 // With large budget, all three excerpts included.
1447 assert_eq!(
1448 format_with_budget(&input, 10000),
1449 indoc! {r#"
1450 <|file_sep|>mod.rs
1451 mod header
1452 ...
1453 important fn
1454 ...
1455 less important fn
1456 <|file_sep|>test.rs
1457 <|fim_prefix|>
1458 <|fim_middle|>current
1459 <|user_cursor|>x
1460 <|fim_suffix|>
1461 <|fim_middle|>updated
1462 "#}
1463 );
1464
1465 // With tight budget, only order<=1 excerpts included (header + important fn).
1466 assert_eq!(
1467 format_with_budget(&input, 55),
1468 indoc! {r#"
1469 <|file_sep|>mod.rs
1470 mod header
1471 ...
1472 important fn
1473 ...
1474 <|file_sep|>test.rs
1475 <|fim_prefix|>
1476 <|fim_middle|>current
1477 <|user_cursor|>x
1478 <|fim_suffix|>
1479 <|fim_middle|>updated
1480 "#}
1481 );
1482 }
1483
1484 #[test]
1485 fn test_truncation_drops_older_events_first() {
1486 let input = make_input(
1487 "x",
1488 0..1,
1489 0,
1490 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1491 vec![],
1492 );
1493
1494 assert_eq!(
1495 format_with_budget(&input, 10000),
1496 indoc! {r#"
1497 <|file_sep|>edit history
1498 --- a/old.rs
1499 +++ b/old.rs
1500 -1
1501 --- a/new.rs
1502 +++ b/new.rs
1503 -2
1504 <|file_sep|>test.rs
1505 <|fim_prefix|>
1506 <|fim_middle|>current
1507 <|user_cursor|>x
1508 <|fim_suffix|>
1509 <|fim_middle|>updated
1510 "#}
1511 );
1512
1513 assert_eq!(
1514 format_with_budget(&input, 55),
1515 indoc! {r#"
1516 <|file_sep|>edit history
1517 --- a/new.rs
1518 +++ b/new.rs
1519 -2
1520 <|file_sep|>test.rs
1521 <|fim_prefix|>
1522 <|fim_middle|>current
1523 <|user_cursor|>x
1524 <|fim_suffix|>
1525 <|fim_middle|>updated
1526 "#}
1527 );
1528 }
1529
1530 #[test]
1531 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1532 let input = make_input(
1533 "fn main() {}",
1534 0..12,
1535 3,
1536 vec![make_event("a.rs", "-old\n+new\n")],
1537 vec![make_related_file("related.rs", "helper\n")],
1538 );
1539
1540 assert_eq!(
1541 format_with_budget(&input, 30),
1542 indoc! {r#"
1543 <|file_sep|>test.rs
1544 <|fim_prefix|>
1545 <|fim_middle|>current
1546 fn <|user_cursor|>main() {}
1547 <|fim_suffix|>
1548 <|fim_middle|>updated
1549 "#}
1550 );
1551 }
1552
1553 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1554 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1555 }
1556
1557 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1558 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1559 }
1560
1561 #[test]
1562 fn test_seed_coder_basic_format() {
1563 let input = make_input(
1564 "prefix\neditable\nsuffix",
1565 7..15,
1566 10,
1567 vec![make_event("a.rs", "-old\n+new\n")],
1568 vec![make_related_file("related.rs", "fn helper() {}\n")],
1569 );
1570
1571 assert_eq!(
1572 format_seed_coder(&input),
1573 indoc! {r#"
1574 <[fim-suffix]>
1575 suffix
1576 <[fim-prefix]><filename>related.rs
1577 fn helper() {}
1578
1579 <filename>edit_history
1580 --- a/a.rs
1581 +++ b/a.rs
1582 -old
1583 +new
1584
1585 <filename>test.rs
1586 prefix
1587 <<<<<<< CURRENT
1588 edi<|user_cursor|>table
1589 =======
1590 <[fim-middle]>"#}
1591 );
1592 }
1593
1594 #[test]
1595 fn test_seed_coder_no_context() {
1596 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1597
1598 assert_eq!(
1599 format_seed_coder(&input),
1600 indoc! {r#"
1601 <[fim-suffix]>
1602 after
1603 <[fim-prefix]><filename>test.rs
1604 before
1605 <<<<<<< CURRENT
1606 mid<|user_cursor|>dle
1607 =======
1608 <[fim-middle]>"#}
1609 );
1610 }
1611
1612 #[test]
1613 fn test_seed_coder_truncation_drops_context() {
1614 let input = make_input(
1615 "code",
1616 0..4,
1617 2,
1618 vec![make_event("a.rs", "-x\n+y\n")],
1619 vec![make_related_file("r1.rs", "content\n")],
1620 );
1621
1622 // With large budget, everything is included
1623 assert_eq!(
1624 format_seed_coder(&input),
1625 indoc! {r#"
1626 <[fim-suffix]>
1627 <[fim-prefix]><filename>r1.rs
1628 content
1629
1630 <filename>edit_history
1631 --- a/a.rs
1632 +++ b/a.rs
1633 -x
1634 +y
1635
1636 <filename>test.rs
1637 <<<<<<< CURRENT
1638 co<|user_cursor|>de
1639 =======
1640 <[fim-middle]>"#}
1641 );
1642
1643 // With tight budget, context is dropped but cursor section remains
1644 assert_eq!(
1645 format_seed_coder_with_budget(&input, 30),
1646 indoc! {r#"
1647 <[fim-suffix]>
1648 <[fim-prefix]><filename>test.rs
1649 <<<<<<< CURRENT
1650 co<|user_cursor|>de
1651 =======
1652 <[fim-middle]>"#}
1653 );
1654 }
1655
1656 #[test]
1657 fn test_seed_coder_truncation_prioritizes_lower_order() {
1658 let input = make_input(
1659 "code",
1660 0..4,
1661 2,
1662 vec![],
1663 vec![
1664 RelatedFile {
1665 path: Path::new("low_prio.rs").into(),
1666 max_row: 5,
1667 in_open_source_repo: false,
1668 excerpts: vec![RelatedExcerpt {
1669 row_range: 0..5,
1670 text: "low prio\n".into(),
1671 order: 10,
1672 }],
1673 },
1674 RelatedFile {
1675 path: Path::new("high_prio.rs").into(),
1676 max_row: 5,
1677 in_open_source_repo: false,
1678 excerpts: vec![RelatedExcerpt {
1679 row_range: 0..5,
1680 text: "high prio\n".into(),
1681 order: 1,
1682 }],
1683 },
1684 ],
1685 );
1686
1687 // With large budget, both included; rendered in stable lexicographic order.
1688 assert_eq!(
1689 format_seed_coder(&input),
1690 indoc! {r#"
1691 <[fim-suffix]>
1692 <[fim-prefix]><filename>low_prio.rs
1693 low prio
1694 <filename>high_prio.rs
1695 high prio
1696
1697 <filename>test.rs
1698 <<<<<<< CURRENT
1699 co<|user_cursor|>de
1700 =======
1701 <[fim-middle]>"#}
1702 );
1703
1704 // With tight budget, only high_prio included.
1705 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1706 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1707 assert_eq!(
1708 format_seed_coder_with_budget(&input, 44),
1709 indoc! {r#"
1710 <[fim-suffix]>
1711 <[fim-prefix]><filename>high_prio.rs
1712 high prio
1713
1714 <filename>test.rs
1715 <<<<<<< CURRENT
1716 co<|user_cursor|>de
1717 =======
1718 <[fim-middle]>"#}
1719 );
1720 }
1721
1722 #[test]
1723 fn test_seed_coder_clean_output() {
1724 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1725 let output_without_marker = "new code\n";
1726
1727 assert_eq!(
1728 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1729 "new code\n"
1730 );
1731 assert_eq!(
1732 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1733 "new code\n"
1734 );
1735 }
1736
1737 #[test]
1738 fn test_format_zeta1_from_input_basic() {
1739 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1740 let input = ZetaPromptInput {
1741 cursor_path: Path::new("src/main.rs").into(),
1742 cursor_excerpt: excerpt.into(),
1743 cursor_offset_in_excerpt: 30,
1744 excerpt_start_row: Some(0),
1745 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1746 related_files: vec![],
1747 excerpt_ranges: ExcerptRanges {
1748 editable_150: 15..41,
1749 editable_180: 15..41,
1750 editable_350: 15..41,
1751 editable_150_context_350: 0..excerpt.len(),
1752 editable_180_context_350: 0..excerpt.len(),
1753 editable_350_context_150: 0..excerpt.len(),
1754 ..Default::default()
1755 },
1756 experiment: None,
1757 in_open_source_repo: false,
1758 can_collect_data: false,
1759 };
1760
1761 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1762
1763 assert_eq!(
1764 prompt,
1765 concat!(
1766 "### Instruction:\n",
1767 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1768 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1769 "into account the cursor location.\n",
1770 "\n",
1771 "### User Edits:\n",
1772 "\n",
1773 "User edited other.rs:\n",
1774 "```diff\n",
1775 "-old\n",
1776 "+new\n",
1777 "\n",
1778 "```\n",
1779 "\n",
1780 "### User Excerpt:\n",
1781 "\n",
1782 "```src/main.rs\n",
1783 "<|start_of_file|>\n",
1784 "fn before() {}\n",
1785 "<|editable_region_start|>\n",
1786 "fn foo() {\n",
1787 " <|user_cursor_is_here|>let x = 1;\n",
1788 "\n",
1789 "<|editable_region_end|>}\n",
1790 "fn after() {}\n",
1791 "\n",
1792 "```\n",
1793 "\n",
1794 "### Response:\n",
1795 ),
1796 );
1797 }
1798
1799 #[test]
1800 fn test_format_zeta1_from_input_no_start_of_file() {
1801 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1802 let input = ZetaPromptInput {
1803 cursor_path: Path::new("src/main.rs").into(),
1804 cursor_excerpt: excerpt.into(),
1805 cursor_offset_in_excerpt: 15,
1806 excerpt_start_row: Some(10),
1807 events: vec![],
1808 related_files: vec![],
1809 excerpt_ranges: ExcerptRanges {
1810 editable_150: 0..28,
1811 editable_180: 0..28,
1812 editable_350: 0..28,
1813 editable_150_context_350: 0..28,
1814 editable_180_context_350: 0..28,
1815 editable_350_context_150: 0..28,
1816 ..Default::default()
1817 },
1818 experiment: None,
1819 in_open_source_repo: false,
1820 can_collect_data: false,
1821 };
1822
1823 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1824
1825 assert_eq!(
1826 prompt,
1827 concat!(
1828 "### Instruction:\n",
1829 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1830 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1831 "into account the cursor location.\n",
1832 "\n",
1833 "### User Edits:\n",
1834 "\n",
1835 "\n",
1836 "\n",
1837 "### User Excerpt:\n",
1838 "\n",
1839 "```src/main.rs\n",
1840 "<|editable_region_start|>\n",
1841 "fn foo() {\n",
1842 " <|user_cursor_is_here|>let x = 1;\n",
1843 "}\n",
1844 "\n",
1845 "<|editable_region_end|>\n",
1846 "```\n",
1847 "\n",
1848 "### Response:\n",
1849 ),
1850 );
1851 }
1852
1853 #[test]
1854 fn test_format_zeta1_from_input_with_sub_ranges() {
1855 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1856 let editable_range = 10..37;
1857 let context_range = 0..excerpt.len();
1858
1859 let input = ZetaPromptInput {
1860 cursor_path: Path::new("test.rs").into(),
1861 cursor_excerpt: excerpt.into(),
1862 cursor_offset_in_excerpt: 25,
1863 excerpt_start_row: Some(0),
1864 events: vec![],
1865 related_files: vec![],
1866 excerpt_ranges: ExcerptRanges {
1867 editable_150: editable_range.clone(),
1868 editable_180: editable_range.clone(),
1869 editable_350: editable_range.clone(),
1870 editable_150_context_350: context_range.clone(),
1871 editable_180_context_350: context_range.clone(),
1872 editable_350_context_150: context_range.clone(),
1873 ..Default::default()
1874 },
1875 experiment: None,
1876 in_open_source_repo: false,
1877 can_collect_data: false,
1878 };
1879
1880 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1881
1882 assert_eq!(
1883 prompt,
1884 concat!(
1885 "### Instruction:\n",
1886 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1887 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1888 "into account the cursor location.\n",
1889 "\n",
1890 "### User Edits:\n",
1891 "\n",
1892 "\n",
1893 "\n",
1894 "### User Excerpt:\n",
1895 "\n",
1896 "```test.rs\n",
1897 "<|start_of_file|>\n",
1898 "// prefix\n",
1899 "<|editable_region_start|>\n",
1900 "fn foo() {\n",
1901 " <|user_cursor_is_here|>let x = 1;\n",
1902 "}\n",
1903 "<|editable_region_end|>\n",
1904 "// suffix\n",
1905 "\n",
1906 "```\n",
1907 "\n",
1908 "### Response:\n",
1909 ),
1910 );
1911 }
1912
1913 #[test]
1914 fn test_clean_zeta1_model_output_basic() {
1915 let output = indoc! {"
1916 <|editable_region_start|>
1917 fn main() {
1918 println!(\"hello\");
1919 }
1920 <|editable_region_end|>
1921 "};
1922
1923 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1924 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1925 }
1926
1927 #[test]
1928 fn test_clean_zeta1_model_output_with_cursor() {
1929 let output = indoc! {"
1930 <|editable_region_start|>
1931 fn main() {
1932 <|user_cursor_is_here|>println!(\"hello\");
1933 }
1934 <|editable_region_end|>
1935 "};
1936
1937 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1938 assert_eq!(
1939 cleaned,
1940 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1941 );
1942 }
1943
1944 #[test]
1945 fn test_clean_zeta1_model_output_no_markers() {
1946 let output = "fn main() {}\n";
1947 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1948 assert_eq!(cleaned, "fn main() {}\n");
1949 }
1950
1951 #[test]
1952 fn test_clean_zeta1_model_output_empty_region() {
1953 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1954 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1955 assert_eq!(cleaned, "");
1956 }
1957}