1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68 #[serde(default)]
69 pub can_collect_data: bool,
70}
71
72#[derive(
73 Default,
74 Clone,
75 Copy,
76 Debug,
77 PartialEq,
78 Eq,
79 Hash,
80 EnumIter,
81 IntoStaticStr,
82 Serialize,
83 Deserialize,
84)]
85#[allow(non_camel_case_types)]
86pub enum ZetaFormat {
87 V0112MiddleAtEnd,
88 V0113Ordered,
89 V0114180EditableRegion,
90 V0120GitMergeMarkers,
91 #[default]
92 V0131GitMergeMarkersPrefix,
93 V0211Prefill,
94 V0211SeedCoder,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131
132 pub fn special_tokens(&self) -> &'static [&'static str] {
133 match self {
134 ZetaFormat::V0112MiddleAtEnd
135 | ZetaFormat::V0113Ordered
136 | ZetaFormat::V0114180EditableRegion => &[
137 "<|fim_prefix|>",
138 "<|fim_suffix|>",
139 "<|fim_middle|>",
140 "<|file_sep|>",
141 CURSOR_MARKER,
142 ],
143 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
144 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
145 v0131_git_merge_markers_prefix::special_tokens()
146 }
147 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
148 }
149 }
150}
151
152#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
153#[serde(tag = "event")]
154pub enum Event {
155 BufferChange {
156 path: Arc<Path>,
157 old_path: Arc<Path>,
158 diff: String,
159 predicted: bool,
160 in_open_source_repo: bool,
161 },
162}
163
164impl Event {
165 pub fn in_open_source_repo(&self) -> bool {
166 match self {
167 Event::BufferChange {
168 in_open_source_repo,
169 ..
170 } => *in_open_source_repo,
171 }
172 }
173}
174
175pub fn write_event(prompt: &mut String, event: &Event) {
176 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
177 for component in path.components() {
178 prompt.push('/');
179 write!(prompt, "{}", component.as_os_str().display()).ok();
180 }
181 }
182 match event {
183 Event::BufferChange {
184 path,
185 old_path,
186 diff,
187 predicted,
188 in_open_source_repo: _,
189 } => {
190 if *predicted {
191 prompt.push_str("// User accepted prediction:\n");
192 }
193 prompt.push_str("--- a");
194 write_path_as_unix_str(prompt, old_path.as_ref());
195 prompt.push_str("\n+++ b");
196 write_path_as_unix_str(prompt, path.as_ref());
197 prompt.push('\n');
198 prompt.push_str(diff);
199 }
200 }
201}
202
203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
204pub struct RelatedFile {
205 pub path: Arc<Path>,
206 pub max_row: u32,
207 pub excerpts: Vec<RelatedExcerpt>,
208 #[serde(default)]
209 pub in_open_source_repo: bool,
210}
211
212#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
213pub struct RelatedExcerpt {
214 pub row_range: Range<u32>,
215 pub text: Arc<str>,
216 #[serde(default)]
217 pub order: usize,
218}
219
220pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
221 format
222 .special_tokens()
223 .iter()
224 .any(|token| input.cursor_excerpt.contains(token))
225}
226
227pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
228 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
229}
230
231/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
232pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
233 match format {
234 ZetaFormat::V0120GitMergeMarkers => output
235 .strip_suffix(v0120_git_merge_markers::END_MARKER)
236 .unwrap_or(output),
237 ZetaFormat::V0131GitMergeMarkersPrefix => output
238 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
239 .unwrap_or(output),
240 ZetaFormat::V0211SeedCoder => output
241 .strip_suffix(seed_coder::END_MARKER)
242 .unwrap_or(output),
243 _ => output,
244 }
245}
246
247pub fn excerpt_range_for_format(
248 format: ZetaFormat,
249 ranges: &ExcerptRanges,
250) -> (Range<usize>, Range<usize>) {
251 match format {
252 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
253 ranges.editable_150.clone(),
254 ranges.editable_150_context_350.clone(),
255 ),
256 ZetaFormat::V0114180EditableRegion
257 | ZetaFormat::V0120GitMergeMarkers
258 | ZetaFormat::V0131GitMergeMarkersPrefix
259 | ZetaFormat::V0211Prefill
260 | ZetaFormat::V0211SeedCoder => (
261 ranges.editable_350.clone(),
262 ranges.editable_350_context_150.clone(),
263 ),
264 }
265}
266
267pub fn resolve_cursor_region(
268 input: &ZetaPromptInput,
269 format: ZetaFormat,
270) -> (&str, Range<usize>, usize) {
271 let Some(ranges) = &input.excerpt_ranges else {
272 return (
273 &input.cursor_excerpt,
274 input.editable_range_in_excerpt.clone(),
275 input.cursor_offset_in_excerpt,
276 );
277 };
278
279 let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
280 let context_start = context_range.start;
281 let context_text = &input.cursor_excerpt[context_range];
282 let adjusted_editable =
283 (editable_range.start - context_start)..(editable_range.end - context_start);
284 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
285
286 (context_text, adjusted_editable, adjusted_cursor)
287}
288
289fn format_zeta_prompt_with_budget(
290 input: &ZetaPromptInput,
291 format: ZetaFormat,
292 max_tokens: usize,
293) -> String {
294 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
295 let path = &*input.cursor_path;
296
297 let mut cursor_section = String::new();
298 match format {
299 ZetaFormat::V0112MiddleAtEnd => {
300 v0112_middle_at_end::write_cursor_excerpt_section(
301 &mut cursor_section,
302 path,
303 context,
304 &editable_range,
305 cursor_offset,
306 );
307 }
308 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
309 v0113_ordered::write_cursor_excerpt_section(
310 &mut cursor_section,
311 path,
312 context,
313 &editable_range,
314 cursor_offset,
315 )
316 }
317 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
318 &mut cursor_section,
319 path,
320 context,
321 &editable_range,
322 cursor_offset,
323 ),
324 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
325 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
326 &mut cursor_section,
327 path,
328 context,
329 &editable_range,
330 cursor_offset,
331 )
332 }
333 ZetaFormat::V0211SeedCoder => {
334 return seed_coder::format_prompt_with_budget(
335 path,
336 context,
337 &editable_range,
338 cursor_offset,
339 &input.events,
340 &input.related_files,
341 max_tokens,
342 );
343 }
344 }
345
346 let cursor_tokens = estimate_tokens(cursor_section.len());
347 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
348
349 let edit_history_section = format_edit_history_within_budget(
350 &input.events,
351 "<|file_sep|>",
352 "edit history",
353 budget_after_cursor,
354 );
355 let edit_history_tokens = estimate_tokens(edit_history_section.len());
356 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
357
358 let related_files_section = format_related_files_within_budget(
359 &input.related_files,
360 "<|file_sep|>",
361 budget_after_edit_history,
362 );
363
364 let mut prompt = String::new();
365 prompt.push_str(&related_files_section);
366 prompt.push_str(&edit_history_section);
367 prompt.push_str(&cursor_section);
368 prompt
369}
370
371pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
372 match format {
373 ZetaFormat::V0112MiddleAtEnd
374 | ZetaFormat::V0113Ordered
375 | ZetaFormat::V0114180EditableRegion
376 | ZetaFormat::V0120GitMergeMarkers
377 | ZetaFormat::V0131GitMergeMarkersPrefix
378 | ZetaFormat::V0211SeedCoder => String::new(),
379 ZetaFormat::V0211Prefill => {
380 let (context, editable_range, _) = resolve_cursor_region(input, format);
381 v0211_prefill::get_prefill(context, &editable_range)
382 }
383 }
384}
385
386fn format_edit_history_within_budget(
387 events: &[Arc<Event>],
388 file_marker: &str,
389 edit_history_name: &str,
390 max_tokens: usize,
391) -> String {
392 let header = format!("{}{}\n", file_marker, edit_history_name);
393 let header_tokens = estimate_tokens(header.len());
394 if header_tokens >= max_tokens {
395 return String::new();
396 }
397
398 let mut event_strings: Vec<String> = Vec::new();
399 let mut total_tokens = header_tokens;
400
401 for event in events.iter().rev() {
402 let mut event_str = String::new();
403 write_event(&mut event_str, event);
404 let event_tokens = estimate_tokens(event_str.len());
405
406 if total_tokens + event_tokens > max_tokens {
407 break;
408 }
409 total_tokens += event_tokens;
410 event_strings.push(event_str);
411 }
412
413 if event_strings.is_empty() {
414 return String::new();
415 }
416
417 let mut result = header;
418 for event_str in event_strings.iter().rev() {
419 result.push_str(event_str);
420 }
421 result
422}
423
424fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
425 let needs_newline = !excerpt.text.ends_with('\n');
426 let needs_ellipsis = excerpt.row_range.end < file_max_row;
427 let len = excerpt.text.len()
428 + if needs_newline { "\n".len() } else { 0 }
429 + if needs_ellipsis { "...\n".len() } else { 0 };
430 estimate_tokens(len)
431}
432
433fn format_related_files_within_budget(
434 related_files: &[RelatedFile],
435 file_marker: &str,
436 max_tokens: usize,
437) -> String {
438 // Collect the distinct order values across all excerpts, sorted ascending.
439 let mut order_levels: Vec<usize> = related_files
440 .iter()
441 .flat_map(|f| f.excerpts.iter().map(|e| e.order))
442 .collect();
443 order_levels.sort_unstable();
444 order_levels.dedup();
445
446 // Pre-compute file header strings and their token costs.
447 let file_headers: Vec<String> = related_files
448 .iter()
449 .map(|file| {
450 let path_str = file.path.to_string_lossy();
451 format!("{}{}\n", file_marker, path_str)
452 })
453 .collect();
454
455 // Track which excerpts are included per file.
456 let mut included: Vec<Vec<bool>> = related_files
457 .iter()
458 .map(|file| vec![false; file.excerpts.len()])
459 .collect();
460 let mut file_included: Vec<bool> = vec![false; related_files.len()];
461 let mut total_tokens = 0;
462
463 // Process order levels from best (lowest) to worst. At each level, try to
464 // include all not-yet-included excerpts with that order across all files.
465 // If the full level doesn't fit, include a partial prefix (top-to-bottom
466 // within each file) and stop — don't proceed to worse order levels.
467 'outer: for &order in &order_levels {
468 // Gather the work for this order level: for each file that has excerpts
469 // at this order, collect the not-yet-included excerpt indices (in their
470 // original positional order) and the token cost to add them (including
471 // the file header if the file isn't already included).
472 struct FileWork {
473 file_idx: usize,
474 excerpt_indices: Vec<usize>,
475 header_cost: usize,
476 excerpt_costs: Vec<usize>,
477 }
478
479 let mut work_items: Vec<FileWork> = Vec::new();
480 for (file_idx, file) in related_files.iter().enumerate() {
481 let mut excerpt_indices = Vec::new();
482 let mut excerpt_costs = Vec::new();
483 for (eidx, excerpt) in file.excerpts.iter().enumerate() {
484 if excerpt.order == order && !included[file_idx][eidx] {
485 excerpt_indices.push(eidx);
486 excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row));
487 }
488 }
489 if excerpt_indices.is_empty() {
490 continue;
491 }
492 let header_cost = if file_included[file_idx] {
493 0
494 } else {
495 estimate_tokens(file_headers[file_idx].len())
496 };
497 work_items.push(FileWork {
498 file_idx,
499 excerpt_indices,
500 header_cost,
501 excerpt_costs,
502 });
503 }
504
505 // Compute the total cost for this entire order level.
506 let level_cost: usize = work_items
507 .iter()
508 .map(|w| w.header_cost + w.excerpt_costs.iter().sum::<usize>())
509 .sum();
510
511 if total_tokens + level_cost <= max_tokens {
512 // The whole level fits — include everything.
513 for work in &work_items {
514 total_tokens += work.header_cost;
515 file_included[work.file_idx] = true;
516 for (i, &eidx) in work.excerpt_indices.iter().enumerate() {
517 included[work.file_idx][eidx] = true;
518 total_tokens += work.excerpt_costs[i];
519 }
520 }
521 } else {
522 // The whole level doesn't fit. Include as many excerpts as possible
523 // from each file (in positional order), then stop entirely.
524 for work in &work_items {
525 let available = max_tokens.saturating_sub(total_tokens);
526 let mut file_cost = work.header_cost;
527
528 let mut count = 0;
529 for i in 0..work.excerpt_indices.len() {
530 if file_cost + work.excerpt_costs[i] > available {
531 break;
532 }
533 file_cost += work.excerpt_costs[i];
534 count += 1;
535 }
536
537 if count > 0 {
538 total_tokens += work.header_cost;
539 file_included[work.file_idx] = true;
540 for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() {
541 included[work.file_idx][eidx] = true;
542 total_tokens += work.excerpt_costs[i];
543 }
544 }
545 }
546 break 'outer;
547 }
548 }
549
550 // Determine file rendering order: by the best (lowest) order of any
551 // included excerpt, breaking ties by original file index.
552 let mut file_order: Vec<(usize, usize)> = Vec::new();
553 for (file_idx, file) in related_files.iter().enumerate() {
554 if !file_included[file_idx] {
555 continue;
556 }
557 let best_order = file
558 .excerpts
559 .iter()
560 .enumerate()
561 .filter(|(eidx, _)| included[file_idx][*eidx])
562 .map(|(_, e)| e.order)
563 .min()
564 .unwrap_or(usize::MAX);
565 file_order.push((file_idx, best_order));
566 }
567 file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx));
568
569 // Render included files and excerpts in positional order within each file.
570 let mut result = String::new();
571 for &(file_idx, _) in &file_order {
572 let file = &related_files[file_idx];
573 result.push_str(&file_headers[file_idx]);
574 for (eidx, excerpt) in file.excerpts.iter().enumerate() {
575 if !included[file_idx][eidx] {
576 continue;
577 }
578 result.push_str(&excerpt.text);
579 if !result.ends_with('\n') {
580 result.push('\n');
581 }
582 if excerpt.row_range.end < file.max_row {
583 result.push_str("...\n");
584 }
585 }
586 }
587
588 result
589}
590
591pub fn write_related_files(
592 prompt: &mut String,
593 related_files: &[RelatedFile],
594) -> Vec<Range<usize>> {
595 let mut ranges = Vec::new();
596 for file in related_files {
597 let start = prompt.len();
598 let path_str = file.path.to_string_lossy();
599 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
600 for excerpt in &file.excerpts {
601 prompt.push_str(&excerpt.text);
602 if !prompt.ends_with('\n') {
603 prompt.push('\n');
604 }
605 if excerpt.row_range.end < file.max_row {
606 prompt.push_str("...\n");
607 }
608 }
609 let end = prompt.len();
610 ranges.push(start..end);
611 }
612 ranges
613}
614
615mod v0112_middle_at_end {
616 use super::*;
617
618 pub fn write_cursor_excerpt_section(
619 prompt: &mut String,
620 path: &Path,
621 context: &str,
622 editable_range: &Range<usize>,
623 cursor_offset: usize,
624 ) {
625 let path_str = path.to_string_lossy();
626 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
627
628 prompt.push_str("<|fim_prefix|>\n");
629 prompt.push_str(&context[..editable_range.start]);
630
631 prompt.push_str("<|fim_suffix|>\n");
632 prompt.push_str(&context[editable_range.end..]);
633 if !prompt.ends_with('\n') {
634 prompt.push('\n');
635 }
636
637 prompt.push_str("<|fim_middle|>current\n");
638 prompt.push_str(&context[editable_range.start..cursor_offset]);
639 prompt.push_str(CURSOR_MARKER);
640 prompt.push_str(&context[cursor_offset..editable_range.end]);
641 if !prompt.ends_with('\n') {
642 prompt.push('\n');
643 }
644
645 prompt.push_str("<|fim_middle|>updated\n");
646 }
647}
648
649mod v0113_ordered {
650 use super::*;
651
652 pub fn write_cursor_excerpt_section(
653 prompt: &mut String,
654 path: &Path,
655 context: &str,
656 editable_range: &Range<usize>,
657 cursor_offset: usize,
658 ) {
659 let path_str = path.to_string_lossy();
660 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
661
662 prompt.push_str("<|fim_prefix|>\n");
663 prompt.push_str(&context[..editable_range.start]);
664 if !prompt.ends_with('\n') {
665 prompt.push('\n');
666 }
667
668 prompt.push_str("<|fim_middle|>current\n");
669 prompt.push_str(&context[editable_range.start..cursor_offset]);
670 prompt.push_str(CURSOR_MARKER);
671 prompt.push_str(&context[cursor_offset..editable_range.end]);
672 if !prompt.ends_with('\n') {
673 prompt.push('\n');
674 }
675
676 prompt.push_str("<|fim_suffix|>\n");
677 prompt.push_str(&context[editable_range.end..]);
678 if !prompt.ends_with('\n') {
679 prompt.push('\n');
680 }
681
682 prompt.push_str("<|fim_middle|>updated\n");
683 }
684}
685
686pub mod v0120_git_merge_markers {
687 //! A prompt that uses git-style merge conflict markers to represent the editable region.
688 //!
689 //! Example prompt:
690 //!
691 //! <|file_sep|>path/to/target_file.py
692 //! <|fim_prefix|>
693 //! code before editable region
694 //! <|fim_suffix|>
695 //! code after editable region
696 //! <|fim_middle|>
697 //! <<<<<<< CURRENT
698 //! code that
699 //! needs to<|user_cursor|>
700 //! be rewritten
701 //! =======
702 //!
703 //! Expected output (should be generated by the model):
704 //!
705 //! updated
706 //! code with
707 //! changes applied
708 //! >>>>>>> UPDATED
709
710 use super::*;
711
712 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
713 pub const SEPARATOR: &str = "=======\n";
714 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
715
716 pub fn special_tokens() -> &'static [&'static str] {
717 &[
718 "<|fim_prefix|>",
719 "<|fim_suffix|>",
720 "<|fim_middle|>",
721 "<|file_sep|>",
722 START_MARKER,
723 SEPARATOR,
724 END_MARKER,
725 CURSOR_MARKER,
726 ]
727 }
728
729 pub fn write_cursor_excerpt_section(
730 prompt: &mut String,
731 path: &Path,
732 context: &str,
733 editable_range: &Range<usize>,
734 cursor_offset: usize,
735 ) {
736 let path_str = path.to_string_lossy();
737 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
738
739 prompt.push_str("<|fim_prefix|>");
740 prompt.push_str(&context[..editable_range.start]);
741
742 prompt.push_str("<|fim_suffix|>");
743 prompt.push_str(&context[editable_range.end..]);
744 if !prompt.ends_with('\n') {
745 prompt.push('\n');
746 }
747
748 prompt.push_str("<|fim_middle|>");
749 prompt.push_str(START_MARKER);
750 prompt.push_str(&context[editable_range.start..cursor_offset]);
751 prompt.push_str(CURSOR_MARKER);
752 prompt.push_str(&context[cursor_offset..editable_range.end]);
753 if !prompt.ends_with('\n') {
754 prompt.push('\n');
755 }
756 prompt.push_str(SEPARATOR);
757 }
758}
759
760pub mod v0131_git_merge_markers_prefix {
761 //! A prompt that uses git-style merge conflict markers to represent the editable region.
762 //!
763 //! Example prompt:
764 //!
765 //! <|file_sep|>path/to/target_file.py
766 //! <|fim_prefix|>
767 //! code before editable region
768 //! <<<<<<< CURRENT
769 //! code that
770 //! needs to<|user_cursor|>
771 //! be rewritten
772 //! =======
773 //! <|fim_suffix|>
774 //! code after editable region
775 //! <|fim_middle|>
776 //!
777 //! Expected output (should be generated by the model):
778 //!
779 //! updated
780 //! code with
781 //! changes applied
782 //! >>>>>>> UPDATED
783
784 use super::*;
785
786 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
787 pub const SEPARATOR: &str = "=======\n";
788 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
789
790 pub fn special_tokens() -> &'static [&'static str] {
791 &[
792 "<|fim_prefix|>",
793 "<|fim_suffix|>",
794 "<|fim_middle|>",
795 "<|file_sep|>",
796 START_MARKER,
797 SEPARATOR,
798 END_MARKER,
799 CURSOR_MARKER,
800 ]
801 }
802
803 pub fn write_cursor_excerpt_section(
804 prompt: &mut String,
805 path: &Path,
806 context: &str,
807 editable_range: &Range<usize>,
808 cursor_offset: usize,
809 ) {
810 let path_str = path.to_string_lossy();
811 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
812
813 prompt.push_str("<|fim_prefix|>");
814 prompt.push_str(&context[..editable_range.start]);
815 prompt.push_str(START_MARKER);
816 prompt.push_str(&context[editable_range.start..cursor_offset]);
817 prompt.push_str(CURSOR_MARKER);
818 prompt.push_str(&context[cursor_offset..editable_range.end]);
819 if !prompt.ends_with('\n') {
820 prompt.push('\n');
821 }
822 prompt.push_str(SEPARATOR);
823
824 prompt.push_str("<|fim_suffix|>");
825 prompt.push_str(&context[editable_range.end..]);
826 if !prompt.ends_with('\n') {
827 prompt.push('\n');
828 }
829
830 prompt.push_str("<|fim_middle|>");
831 }
832}
833
834pub mod v0211_prefill {
835 use super::*;
836
837 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
838 let editable_region = &context[editable_range.start..editable_range.end];
839
840 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
841 let prefill_len = editable_region.floor_char_boundary(prefill_len);
842
843 // Find a token boundary to avoid splitting tokens in the prefill.
844 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
845 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
846 // the \n and consume any consecutive \n characters after it.
847 let prefill = &editable_region[..prefill_len];
848 match prefill.rfind('\n') {
849 Some(pos) => {
850 let mut end = pos + 1;
851 while end < editable_region.len()
852 && editable_region.as_bytes().get(end) == Some(&b'\n')
853 {
854 end += 1;
855 }
856 editable_region[..end].to_string()
857 }
858 // No newline found. Fall back to splitting before the last space
859 // (word-level boundary)
860 None => match prefill.rfind(' ') {
861 Some(pos) => prefill[..pos].to_string(),
862 None => prefill.to_string(),
863 },
864 }
865 }
866}
867
868pub mod seed_coder {
869 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
870 //!
871 //! Seed-Coder uses different FIM tokens and order than Qwen:
872 //! - SPM order: suffix comes FIRST, then prefix, then middle
873 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
874 //! - File markers: StarCoder-style `<filename>path` (single token + path)
875 //!
876 //! All context (related files, edit history) goes in the PREFIX section.
877 //! The suffix contains only code after the editable region.
878 //!
879 //! Example prompt:
880 //!
881 //! <[fim-suffix]>
882 //! code after editable region
883 //! <[fim-prefix]><filename>related/file.py
884 //! related file content
885 //!
886 //! <filename>edit_history
887 //! --- a/some_file.py
888 //! +++ b/some_file.py
889 //! -old
890 //! +new
891 //!
892 //! <filename>path/to/target_file.py
893 //! code before editable region
894 //! <<<<<<< CURRENT
895 //! code that
896 //! needs to<|user_cursor|>
897 //! be rewritten
898 //! =======
899 //! <[fim-middle]>
900 //!
901 //! Expected output (model generates):
902 //!
903 //! updated
904 //! code with
905 //! changes applied
906 //! >>>>>>> UPDATED
907
908 use super::*;
909
910 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
911 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
912 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
913 pub const FILE_MARKER: &str = "<filename>";
914
915 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
916 pub const SEPARATOR: &str = "=======\n";
917 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
918
919 pub fn special_tokens() -> &'static [&'static str] {
920 &[
921 FIM_SUFFIX,
922 FIM_PREFIX,
923 FIM_MIDDLE,
924 FILE_MARKER,
925 START_MARKER,
926 SEPARATOR,
927 END_MARKER,
928 CURSOR_MARKER,
929 ]
930 }
931
932 pub fn format_prompt_with_budget(
933 path: &Path,
934 context: &str,
935 editable_range: &Range<usize>,
936 cursor_offset: usize,
937 events: &[Arc<Event>],
938 related_files: &[RelatedFile],
939 max_tokens: usize,
940 ) -> String {
941 let suffix_section = build_suffix_section(context, editable_range);
942 let cursor_prefix_section =
943 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
944
945 let suffix_tokens = estimate_tokens(suffix_section.len());
946 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
947 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
948
949 let edit_history_section = super::format_edit_history_within_budget(
950 events,
951 FILE_MARKER,
952 "edit_history",
953 budget_after_cursor,
954 );
955 let edit_history_tokens = estimate_tokens(edit_history_section.len());
956 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
957
958 let related_files_section = super::format_related_files_within_budget(
959 related_files,
960 FILE_MARKER,
961 budget_after_edit_history,
962 );
963
964 let mut prompt = String::new();
965 prompt.push_str(&suffix_section);
966 prompt.push_str(FIM_PREFIX);
967 prompt.push_str(&related_files_section);
968 if !related_files_section.is_empty() {
969 prompt.push('\n');
970 }
971 prompt.push_str(&edit_history_section);
972 if !edit_history_section.is_empty() {
973 prompt.push('\n');
974 }
975 prompt.push_str(&cursor_prefix_section);
976 prompt.push_str(FIM_MIDDLE);
977 prompt
978 }
979
980 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
981 let mut section = String::new();
982 section.push_str(FIM_SUFFIX);
983 section.push_str(&context[editable_range.end..]);
984 if !section.ends_with('\n') {
985 section.push('\n');
986 }
987 section
988 }
989
990 fn build_cursor_prefix_section(
991 path: &Path,
992 context: &str,
993 editable_range: &Range<usize>,
994 cursor_offset: usize,
995 ) -> String {
996 let mut section = String::new();
997 let path_str = path.to_string_lossy();
998 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
999
1000 section.push_str(&context[..editable_range.start]);
1001 section.push_str(START_MARKER);
1002 section.push_str(&context[editable_range.start..cursor_offset]);
1003 section.push_str(CURSOR_MARKER);
1004 section.push_str(&context[cursor_offset..editable_range.end]);
1005 if !section.ends_with('\n') {
1006 section.push('\n');
1007 }
1008 section.push_str(SEPARATOR);
1009 section
1010 }
1011}
1012
1013/// The zeta1 prompt format
1014pub mod zeta1 {
1015 use super::*;
1016 use std::fmt::Write;
1017
1018 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
1019 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
1020 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
1021 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
1022
1023 const INSTRUCTION_HEADER: &str = concat!(
1024 "### Instruction:\n",
1025 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1026 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1027 "into account the cursor location.\n\n",
1028 "### User Edits:\n\n"
1029 );
1030 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
1031 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
1032
1033 /// Formats a complete zeta1 prompt from the input events and excerpt.
1034 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
1035 let mut prompt = String::with_capacity(
1036 INSTRUCTION_HEADER.len()
1037 + input_events.len()
1038 + EXCERPT_HEADER.len()
1039 + input_excerpt.len()
1040 + RESPONSE_HEADER.len(),
1041 );
1042 prompt.push_str(INSTRUCTION_HEADER);
1043 prompt.push_str(input_events);
1044 prompt.push_str(EXCERPT_HEADER);
1045 prompt.push_str(input_excerpt);
1046 prompt.push_str(RESPONSE_HEADER);
1047 prompt
1048 }
1049
1050 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
1051 /// editable and context byte-offset ranges within `cursor_excerpt`.
1052 pub fn format_zeta1_from_input(
1053 input: &ZetaPromptInput,
1054 editable_range: Range<usize>,
1055 context_range: Range<usize>,
1056 ) -> String {
1057 let events = format_zeta1_events(&input.events);
1058 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
1059 format_zeta1_prompt(&events, &excerpt)
1060 }
1061
1062 /// Formats events in zeta1 style (oldest first).
1063 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
1064 let mut result = String::new();
1065 for event in events {
1066 let event_string = format_zeta1_event(event);
1067 if event_string.is_empty() {
1068 continue;
1069 }
1070 if !result.is_empty() {
1071 result.push_str("\n\n");
1072 }
1073 result.push_str(&event_string);
1074 }
1075 result
1076 }
1077
1078 fn format_zeta1_event(event: &Event) -> String {
1079 match event {
1080 Event::BufferChange {
1081 path,
1082 old_path,
1083 diff,
1084 ..
1085 } => {
1086 let mut prompt = String::new();
1087 if old_path != path {
1088 writeln!(
1089 prompt,
1090 "User renamed {} to {}\n",
1091 old_path.display(),
1092 path.display()
1093 )
1094 .ok();
1095 }
1096 if !diff.is_empty() {
1097 write!(
1098 prompt,
1099 "User edited {}:\n```diff\n{}\n```",
1100 path.display(),
1101 diff
1102 )
1103 .ok();
1104 }
1105 prompt
1106 }
1107 }
1108 }
1109
1110 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1111 /// within `cursor_excerpt`.
1112 fn format_zeta1_excerpt(
1113 input: &ZetaPromptInput,
1114 editable_range: Range<usize>,
1115 context_range: Range<usize>,
1116 ) -> String {
1117 let path_str = input.cursor_path.to_string_lossy();
1118 let excerpt = &*input.cursor_excerpt;
1119 let cursor_offset = input.cursor_offset_in_excerpt;
1120
1121 let mut prompt = String::new();
1122 writeln!(&mut prompt, "```{path_str}").ok();
1123
1124 let starts_at_file_beginning =
1125 input.excerpt_start_row == Some(0) && context_range.start == 0;
1126 if starts_at_file_beginning {
1127 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1128 }
1129
1130 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1131
1132 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1133 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1134 prompt.push_str(CURSOR_MARKER);
1135 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1136 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1137
1138 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1139 write!(prompt, "\n```").ok();
1140
1141 prompt
1142 }
1143
1144 /// Cleans zeta1 model output by extracting content between editable region
1145 /// markers and converting the zeta1 cursor marker to the universal one.
1146 /// Returns `None` if the output doesn't contain the expected markers.
1147 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1148 let content = output.replace(CURSOR_MARKER, "");
1149
1150 let content_start = content
1151 .find(EDITABLE_REGION_START_MARKER)
1152 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1153 .map(|pos| {
1154 if content.as_bytes().get(pos) == Some(&b'\n') {
1155 pos + 1
1156 } else {
1157 pos
1158 }
1159 })
1160 .unwrap_or(0);
1161
1162 let content_end = content
1163 .find(EDITABLE_REGION_END_MARKER)
1164 .map(|pos| {
1165 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1166 pos - 1
1167 } else {
1168 pos
1169 }
1170 })
1171 .unwrap_or(content.len());
1172
1173 if content_start > content_end {
1174 return Some(String::new());
1175 }
1176
1177 let extracted = &content[content_start..content_end];
1178
1179 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1180 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1181 let text_before_cursor = text_before_cursor
1182 .find(EDITABLE_REGION_START_MARKER)
1183 .map(|pos| {
1184 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1185 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1186 after_marker + 1
1187 } else {
1188 after_marker
1189 }
1190 })
1191 .unwrap_or(0);
1192 let offset_in_extracted = zeta1_cursor_pos
1193 .saturating_sub(text_before_cursor)
1194 .min(extracted.len());
1195 offset_in_extracted
1196 });
1197
1198 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1199 if let Some(offset) = cursor_offset {
1200 result.push_str(&extracted[..offset]);
1201 result.push_str(super::CURSOR_MARKER);
1202 result.push_str(&extracted[offset..]);
1203 } else {
1204 result.push_str(extracted);
1205 }
1206
1207 Some(result)
1208 }
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213 use super::*;
1214 use indoc::indoc;
1215
1216 fn make_input(
1217 cursor_excerpt: &str,
1218 editable_range: Range<usize>,
1219 cursor_offset: usize,
1220 events: Vec<Event>,
1221 related_files: Vec<RelatedFile>,
1222 ) -> ZetaPromptInput {
1223 ZetaPromptInput {
1224 cursor_path: Path::new("test.rs").into(),
1225 cursor_excerpt: cursor_excerpt.into(),
1226 editable_range_in_excerpt: editable_range,
1227 cursor_offset_in_excerpt: cursor_offset,
1228 excerpt_start_row: None,
1229 events: events.into_iter().map(Arc::new).collect(),
1230 related_files,
1231 excerpt_ranges: None,
1232 preferred_model: None,
1233 in_open_source_repo: false,
1234 can_collect_data: false,
1235 }
1236 }
1237
1238 fn make_event(path: &str, diff: &str) -> Event {
1239 Event::BufferChange {
1240 path: Path::new(path).into(),
1241 old_path: Path::new(path).into(),
1242 diff: diff.to_string(),
1243 predicted: false,
1244 in_open_source_repo: false,
1245 }
1246 }
1247
1248 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1249 RelatedFile {
1250 path: Path::new(path).into(),
1251 max_row: content.lines().count() as u32,
1252 excerpts: vec![RelatedExcerpt {
1253 row_range: 0..content.lines().count() as u32,
1254 text: content.into(),
1255 order: 0,
1256 }],
1257 in_open_source_repo: false,
1258 }
1259 }
1260
1261 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1262 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1263 }
1264
1265 #[test]
1266 fn test_no_truncation_when_within_budget() {
1267 let input = make_input(
1268 "prefix\neditable\nsuffix",
1269 7..15,
1270 10,
1271 vec![make_event("a.rs", "-old\n+new\n")],
1272 vec![make_related_file("related.rs", "fn helper() {}\n")],
1273 );
1274
1275 assert_eq!(
1276 format_with_budget(&input, 10000),
1277 indoc! {r#"
1278 <|file_sep|>related.rs
1279 fn helper() {}
1280 <|file_sep|>edit history
1281 --- a/a.rs
1282 +++ b/a.rs
1283 -old
1284 +new
1285 <|file_sep|>test.rs
1286 <|fim_prefix|>
1287 prefix
1288 <|fim_middle|>current
1289 edi<|user_cursor|>table
1290 <|fim_suffix|>
1291
1292 suffix
1293 <|fim_middle|>updated
1294 "#}
1295 );
1296 }
1297
1298 #[test]
1299 fn test_truncation_drops_edit_history_when_budget_tight() {
1300 let input = make_input(
1301 "code",
1302 0..4,
1303 2,
1304 vec![make_event("a.rs", "-x\n+y\n")],
1305 vec![
1306 make_related_file("r1.rs", "a\n"),
1307 make_related_file("r2.rs", "b\n"),
1308 ],
1309 );
1310
1311 assert_eq!(
1312 format_with_budget(&input, 10000),
1313 indoc! {r#"
1314 <|file_sep|>r1.rs
1315 a
1316 <|file_sep|>r2.rs
1317 b
1318 <|file_sep|>edit history
1319 --- a/a.rs
1320 +++ b/a.rs
1321 -x
1322 +y
1323 <|file_sep|>test.rs
1324 <|fim_prefix|>
1325 <|fim_middle|>current
1326 co<|user_cursor|>de
1327 <|fim_suffix|>
1328 <|fim_middle|>updated
1329 "#}
1330 );
1331
1332 assert_eq!(
1333 format_with_budget(&input, 50),
1334 indoc! {r#"
1335 <|file_sep|>r1.rs
1336 a
1337 <|file_sep|>r2.rs
1338 b
1339 <|file_sep|>test.rs
1340 <|fim_prefix|>
1341 <|fim_middle|>current
1342 co<|user_cursor|>de
1343 <|fim_suffix|>
1344 <|fim_middle|>updated
1345 "#}
1346 );
1347 }
1348
1349 #[test]
1350 fn test_truncation_includes_partial_excerpts() {
1351 let input = make_input(
1352 "x",
1353 0..1,
1354 0,
1355 vec![],
1356 vec![RelatedFile {
1357 path: Path::new("big.rs").into(),
1358 max_row: 30,
1359 in_open_source_repo: false,
1360 excerpts: vec![
1361 RelatedExcerpt {
1362 row_range: 0..10,
1363 text: "first excerpt\n".into(),
1364 order: 0,
1365 },
1366 RelatedExcerpt {
1367 row_range: 10..20,
1368 text: "second excerpt\n".into(),
1369 order: 0,
1370 },
1371 RelatedExcerpt {
1372 row_range: 20..30,
1373 text: "third excerpt\n".into(),
1374 order: 0,
1375 },
1376 ],
1377 }],
1378 );
1379
1380 assert_eq!(
1381 format_with_budget(&input, 10000),
1382 indoc! {r#"
1383 <|file_sep|>big.rs
1384 first excerpt
1385 ...
1386 second excerpt
1387 ...
1388 third excerpt
1389 <|file_sep|>test.rs
1390 <|fim_prefix|>
1391 <|fim_middle|>current
1392 <|user_cursor|>x
1393 <|fim_suffix|>
1394 <|fim_middle|>updated
1395 "#}
1396 );
1397
1398 assert_eq!(
1399 format_with_budget(&input, 50),
1400 indoc! {r#"
1401 <|file_sep|>big.rs
1402 first excerpt
1403 ...
1404 <|file_sep|>test.rs
1405 <|fim_prefix|>
1406 <|fim_middle|>current
1407 <|user_cursor|>x
1408 <|fim_suffix|>
1409 <|fim_middle|>updated
1410 "#}
1411 );
1412 }
1413
1414 #[test]
1415 fn test_truncation_prioritizes_lower_order_excerpts() {
1416 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1417 // With tight budget, only the lower-order excerpt from file_b should be included.
1418 let input = make_input(
1419 "x",
1420 0..1,
1421 0,
1422 vec![],
1423 vec![
1424 RelatedFile {
1425 path: Path::new("file_a.rs").into(),
1426 max_row: 10,
1427 in_open_source_repo: false,
1428 excerpts: vec![RelatedExcerpt {
1429 row_range: 0..10,
1430 text: "low priority content\n".into(),
1431 order: 5,
1432 }],
1433 },
1434 RelatedFile {
1435 path: Path::new("file_b.rs").into(),
1436 max_row: 10,
1437 in_open_source_repo: false,
1438 excerpts: vec![RelatedExcerpt {
1439 row_range: 0..10,
1440 text: "high priority content\n".into(),
1441 order: 1,
1442 }],
1443 },
1444 ],
1445 );
1446
1447 // With large budget, both files included; file_b (order 1) renders before file_a (order 5).
1448 assert_eq!(
1449 format_with_budget(&input, 10000),
1450 indoc! {r#"
1451 <|file_sep|>file_b.rs
1452 high priority content
1453 <|file_sep|>file_a.rs
1454 low priority content
1455 <|file_sep|>test.rs
1456 <|fim_prefix|>
1457 <|fim_middle|>current
1458 <|user_cursor|>x
1459 <|fim_suffix|>
1460 <|fim_middle|>updated
1461 "#}
1462 );
1463
1464 // With tight budget, only file_b (lower order) fits.
1465 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1466 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1467 // file_a would need another 14 tokens, which doesn't fit.
1468 assert_eq!(
1469 format_with_budget(&input, 52),
1470 indoc! {r#"
1471 <|file_sep|>file_b.rs
1472 high priority content
1473 <|file_sep|>test.rs
1474 <|fim_prefix|>
1475 <|fim_middle|>current
1476 <|user_cursor|>x
1477 <|fim_suffix|>
1478 <|fim_middle|>updated
1479 "#}
1480 );
1481 }
1482
1483 #[test]
1484 fn test_truncation_drops_high_order_excerpts_within_file() {
1485 // A single file has excerpts at order 1 and order 3. With a tight budget,
1486 // only the order-1 excerpts are included while the order-3 excerpt is
1487 // dropped — even though they belong to the same file. This also preserves
1488 // the parent invariant: parent outline items have order ≤ their best
1489 // child, so they're always included when any child is.
1490 let input = make_input(
1491 "x",
1492 0..1,
1493 0,
1494 vec![],
1495 vec![RelatedFile {
1496 path: Path::new("mod.rs").into(),
1497 max_row: 30,
1498 in_open_source_repo: false,
1499 excerpts: vec![
1500 RelatedExcerpt {
1501 row_range: 0..5,
1502 text: "mod header\n".into(),
1503 order: 1,
1504 },
1505 RelatedExcerpt {
1506 row_range: 5..15,
1507 text: "important fn\n".into(),
1508 order: 1,
1509 },
1510 RelatedExcerpt {
1511 row_range: 15..30,
1512 text: "less important fn\n".into(),
1513 order: 3,
1514 },
1515 ],
1516 }],
1517 );
1518
1519 // With large budget, all three excerpts included.
1520 assert_eq!(
1521 format_with_budget(&input, 10000),
1522 indoc! {r#"
1523 <|file_sep|>mod.rs
1524 mod header
1525 ...
1526 important fn
1527 ...
1528 less important fn
1529 <|file_sep|>test.rs
1530 <|fim_prefix|>
1531 <|fim_middle|>current
1532 <|user_cursor|>x
1533 <|fim_suffix|>
1534 <|fim_middle|>updated
1535 "#}
1536 );
1537
1538 // With tight budget, only order<=1 excerpts included (header + important fn).
1539 assert_eq!(
1540 format_with_budget(&input, 55),
1541 indoc! {r#"
1542 <|file_sep|>mod.rs
1543 mod header
1544 ...
1545 important fn
1546 ...
1547 <|file_sep|>test.rs
1548 <|fim_prefix|>
1549 <|fim_middle|>current
1550 <|user_cursor|>x
1551 <|fim_suffix|>
1552 <|fim_middle|>updated
1553 "#}
1554 );
1555 }
1556
1557 #[test]
1558 fn test_truncation_drops_older_events_first() {
1559 let input = make_input(
1560 "x",
1561 0..1,
1562 0,
1563 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1564 vec![],
1565 );
1566
1567 assert_eq!(
1568 format_with_budget(&input, 10000),
1569 indoc! {r#"
1570 <|file_sep|>edit history
1571 --- a/old.rs
1572 +++ b/old.rs
1573 -1
1574 --- a/new.rs
1575 +++ b/new.rs
1576 -2
1577 <|file_sep|>test.rs
1578 <|fim_prefix|>
1579 <|fim_middle|>current
1580 <|user_cursor|>x
1581 <|fim_suffix|>
1582 <|fim_middle|>updated
1583 "#}
1584 );
1585
1586 assert_eq!(
1587 format_with_budget(&input, 55),
1588 indoc! {r#"
1589 <|file_sep|>edit history
1590 --- a/new.rs
1591 +++ b/new.rs
1592 -2
1593 <|file_sep|>test.rs
1594 <|fim_prefix|>
1595 <|fim_middle|>current
1596 <|user_cursor|>x
1597 <|fim_suffix|>
1598 <|fim_middle|>updated
1599 "#}
1600 );
1601 }
1602
1603 #[test]
1604 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1605 let input = make_input(
1606 "fn main() {}",
1607 0..12,
1608 3,
1609 vec![make_event("a.rs", "-old\n+new\n")],
1610 vec![make_related_file("related.rs", "helper\n")],
1611 );
1612
1613 assert_eq!(
1614 format_with_budget(&input, 30),
1615 indoc! {r#"
1616 <|file_sep|>test.rs
1617 <|fim_prefix|>
1618 <|fim_middle|>current
1619 fn <|user_cursor|>main() {}
1620 <|fim_suffix|>
1621 <|fim_middle|>updated
1622 "#}
1623 );
1624 }
1625
1626 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1627 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1628 }
1629
1630 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1631 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1632 }
1633
1634 #[test]
1635 fn test_seed_coder_basic_format() {
1636 let input = make_input(
1637 "prefix\neditable\nsuffix",
1638 7..15,
1639 10,
1640 vec![make_event("a.rs", "-old\n+new\n")],
1641 vec![make_related_file("related.rs", "fn helper() {}\n")],
1642 );
1643
1644 assert_eq!(
1645 format_seed_coder(&input),
1646 indoc! {r#"
1647 <[fim-suffix]>
1648 suffix
1649 <[fim-prefix]><filename>related.rs
1650 fn helper() {}
1651
1652 <filename>edit_history
1653 --- a/a.rs
1654 +++ b/a.rs
1655 -old
1656 +new
1657
1658 <filename>test.rs
1659 prefix
1660 <<<<<<< CURRENT
1661 edi<|user_cursor|>table
1662 =======
1663 <[fim-middle]>"#}
1664 );
1665 }
1666
1667 #[test]
1668 fn test_seed_coder_no_context() {
1669 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1670
1671 assert_eq!(
1672 format_seed_coder(&input),
1673 indoc! {r#"
1674 <[fim-suffix]>
1675 after
1676 <[fim-prefix]><filename>test.rs
1677 before
1678 <<<<<<< CURRENT
1679 mid<|user_cursor|>dle
1680 =======
1681 <[fim-middle]>"#}
1682 );
1683 }
1684
1685 #[test]
1686 fn test_seed_coder_truncation_drops_context() {
1687 let input = make_input(
1688 "code",
1689 0..4,
1690 2,
1691 vec![make_event("a.rs", "-x\n+y\n")],
1692 vec![make_related_file("r1.rs", "content\n")],
1693 );
1694
1695 // With large budget, everything is included
1696 assert_eq!(
1697 format_seed_coder(&input),
1698 indoc! {r#"
1699 <[fim-suffix]>
1700 <[fim-prefix]><filename>r1.rs
1701 content
1702
1703 <filename>edit_history
1704 --- a/a.rs
1705 +++ b/a.rs
1706 -x
1707 +y
1708
1709 <filename>test.rs
1710 <<<<<<< CURRENT
1711 co<|user_cursor|>de
1712 =======
1713 <[fim-middle]>"#}
1714 );
1715
1716 // With tight budget, context is dropped but cursor section remains
1717 assert_eq!(
1718 format_seed_coder_with_budget(&input, 30),
1719 indoc! {r#"
1720 <[fim-suffix]>
1721 <[fim-prefix]><filename>test.rs
1722 <<<<<<< CURRENT
1723 co<|user_cursor|>de
1724 =======
1725 <[fim-middle]>"#}
1726 );
1727 }
1728
1729 #[test]
1730 fn test_seed_coder_truncation_prioritizes_lower_order() {
1731 let input = make_input(
1732 "code",
1733 0..4,
1734 2,
1735 vec![],
1736 vec![
1737 RelatedFile {
1738 path: Path::new("low_prio.rs").into(),
1739 max_row: 5,
1740 in_open_source_repo: false,
1741 excerpts: vec![RelatedExcerpt {
1742 row_range: 0..5,
1743 text: "low prio\n".into(),
1744 order: 10,
1745 }],
1746 },
1747 RelatedFile {
1748 path: Path::new("high_prio.rs").into(),
1749 max_row: 5,
1750 in_open_source_repo: false,
1751 excerpts: vec![RelatedExcerpt {
1752 row_range: 0..5,
1753 text: "high prio\n".into(),
1754 order: 1,
1755 }],
1756 },
1757 ],
1758 );
1759
1760 // With large budget, both included; high_prio first due to lower order.
1761 assert_eq!(
1762 format_seed_coder(&input),
1763 indoc! {r#"
1764 <[fim-suffix]>
1765 <[fim-prefix]><filename>high_prio.rs
1766 high prio
1767 <filename>low_prio.rs
1768 low prio
1769
1770 <filename>test.rs
1771 <<<<<<< CURRENT
1772 co<|user_cursor|>de
1773 =======
1774 <[fim-middle]>"#}
1775 );
1776
1777 // With tight budget, only high_prio included.
1778 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1779 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1780 assert_eq!(
1781 format_seed_coder_with_budget(&input, 44),
1782 indoc! {r#"
1783 <[fim-suffix]>
1784 <[fim-prefix]><filename>high_prio.rs
1785 high prio
1786
1787 <filename>test.rs
1788 <<<<<<< CURRENT
1789 co<|user_cursor|>de
1790 =======
1791 <[fim-middle]>"#}
1792 );
1793 }
1794
1795 #[test]
1796 fn test_seed_coder_clean_output() {
1797 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1798 let output_without_marker = "new code\n";
1799
1800 assert_eq!(
1801 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1802 "new code\n"
1803 );
1804 assert_eq!(
1805 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1806 "new code\n"
1807 );
1808 }
1809
1810 #[test]
1811 fn test_format_zeta1_from_input_basic() {
1812 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1813 let input = ZetaPromptInput {
1814 cursor_path: Path::new("src/main.rs").into(),
1815 cursor_excerpt: excerpt.into(),
1816 editable_range_in_excerpt: 15..41,
1817 cursor_offset_in_excerpt: 30,
1818 excerpt_start_row: Some(0),
1819 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1820 related_files: vec![],
1821 excerpt_ranges: None,
1822 preferred_model: None,
1823 in_open_source_repo: false,
1824 can_collect_data: false,
1825 };
1826
1827 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1828
1829 assert_eq!(
1830 prompt,
1831 concat!(
1832 "### Instruction:\n",
1833 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1834 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1835 "into account the cursor location.\n",
1836 "\n",
1837 "### User Edits:\n",
1838 "\n",
1839 "User edited other.rs:\n",
1840 "```diff\n",
1841 "-old\n",
1842 "+new\n",
1843 "\n",
1844 "```\n",
1845 "\n",
1846 "### User Excerpt:\n",
1847 "\n",
1848 "```src/main.rs\n",
1849 "<|start_of_file|>\n",
1850 "fn before() {}\n",
1851 "<|editable_region_start|>\n",
1852 "fn foo() {\n",
1853 " <|user_cursor_is_here|>let x = 1;\n",
1854 "\n",
1855 "<|editable_region_end|>}\n",
1856 "fn after() {}\n",
1857 "\n",
1858 "```\n",
1859 "\n",
1860 "### Response:\n",
1861 ),
1862 );
1863 }
1864
1865 #[test]
1866 fn test_format_zeta1_from_input_no_start_of_file() {
1867 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1868 let input = ZetaPromptInput {
1869 cursor_path: Path::new("src/main.rs").into(),
1870 cursor_excerpt: excerpt.into(),
1871 editable_range_in_excerpt: 0..28,
1872 cursor_offset_in_excerpt: 15,
1873 excerpt_start_row: Some(10),
1874 events: vec![],
1875 related_files: vec![],
1876 excerpt_ranges: None,
1877 preferred_model: None,
1878 in_open_source_repo: false,
1879 can_collect_data: false,
1880 };
1881
1882 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1883
1884 assert_eq!(
1885 prompt,
1886 concat!(
1887 "### Instruction:\n",
1888 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1889 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1890 "into account the cursor location.\n",
1891 "\n",
1892 "### User Edits:\n",
1893 "\n",
1894 "\n",
1895 "\n",
1896 "### User Excerpt:\n",
1897 "\n",
1898 "```src/main.rs\n",
1899 "<|editable_region_start|>\n",
1900 "fn foo() {\n",
1901 " <|user_cursor_is_here|>let x = 1;\n",
1902 "}\n",
1903 "\n",
1904 "<|editable_region_end|>\n",
1905 "```\n",
1906 "\n",
1907 "### Response:\n",
1908 ),
1909 );
1910 }
1911
1912 #[test]
1913 fn test_format_zeta1_from_input_with_sub_ranges() {
1914 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1915 let editable_range = 10..37;
1916 let context_range = 0..excerpt.len();
1917
1918 let input = ZetaPromptInput {
1919 cursor_path: Path::new("test.rs").into(),
1920 cursor_excerpt: excerpt.into(),
1921 editable_range_in_excerpt: editable_range.clone(),
1922 cursor_offset_in_excerpt: 25,
1923 excerpt_start_row: Some(0),
1924 events: vec![],
1925 related_files: vec![],
1926 excerpt_ranges: None,
1927 preferred_model: None,
1928 in_open_source_repo: false,
1929 can_collect_data: false,
1930 };
1931
1932 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1933
1934 assert_eq!(
1935 prompt,
1936 concat!(
1937 "### Instruction:\n",
1938 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1939 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1940 "into account the cursor location.\n",
1941 "\n",
1942 "### User Edits:\n",
1943 "\n",
1944 "\n",
1945 "\n",
1946 "### User Excerpt:\n",
1947 "\n",
1948 "```test.rs\n",
1949 "<|start_of_file|>\n",
1950 "// prefix\n",
1951 "<|editable_region_start|>\n",
1952 "fn foo() {\n",
1953 " <|user_cursor_is_here|>let x = 1;\n",
1954 "}\n",
1955 "<|editable_region_end|>\n",
1956 "// suffix\n",
1957 "\n",
1958 "```\n",
1959 "\n",
1960 "### Response:\n",
1961 ),
1962 );
1963 }
1964
1965 #[test]
1966 fn test_clean_zeta1_model_output_basic() {
1967 let output = indoc! {"
1968 <|editable_region_start|>
1969 fn main() {
1970 println!(\"hello\");
1971 }
1972 <|editable_region_end|>
1973 "};
1974
1975 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1976 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1977 }
1978
1979 #[test]
1980 fn test_clean_zeta1_model_output_with_cursor() {
1981 let output = indoc! {"
1982 <|editable_region_start|>
1983 fn main() {
1984 <|user_cursor_is_here|>println!(\"hello\");
1985 }
1986 <|editable_region_end|>
1987 "};
1988
1989 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1990 assert_eq!(
1991 cleaned,
1992 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1993 );
1994 }
1995
1996 #[test]
1997 fn test_clean_zeta1_model_output_no_markers() {
1998 let output = "fn main() {}\n";
1999 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2000 assert_eq!(cleaned, "fn main() {}\n");
2001 }
2002
2003 #[test]
2004 fn test_clean_zeta1_model_output_empty_region() {
2005 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
2006 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2007 assert_eq!(cleaned, "");
2008 }
2009}