1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Editable region computed with a 350-token budget.
40 pub editable_512: Option<Range<usize>>,
41 /// Context boundary when using editable_150 with 350 tokens of additional context.
42 pub editable_150_context_350: Range<usize>,
43 /// Context boundary when using editable_180 with 350 tokens of additional context.
44 pub editable_180_context_350: Range<usize>,
45 /// Context boundary when using editable_350 with 150 tokens of additional context.
46 pub editable_350_context_150: Range<usize>,
47 pub editable_350_context_512: Option<Range<usize>>,
48 pub editable_350_context_1024: Option<Range<usize>>,
49 pub context_4096: Option<Range<usize>>,
50 pub context_8192: Option<Range<usize>>,
51}
52
53#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
54pub struct ZetaPromptInput {
55 pub cursor_path: Arc<Path>,
56 pub cursor_excerpt: Arc<str>,
57 pub editable_range_in_excerpt: Range<usize>,
58 pub cursor_offset_in_excerpt: usize,
59 #[serde(default, skip_serializing_if = "Option::is_none")]
60 pub excerpt_start_row: Option<u32>,
61 pub events: Vec<Arc<Event>>,
62 pub related_files: Vec<RelatedFile>,
63 /// When set, the excerpt was computed with a larger budget (~512 tokens)
64 /// and these ranges let the server select model-appropriate subsets.
65 /// When absent, the excerpt IS the context region and
66 /// `editable_range_in_excerpt` is the only editable range.
67 #[serde(default, skip_serializing_if = "Option::is_none")]
68 pub excerpt_ranges: Option<ExcerptRanges>,
69 /// Client's preferred model. The server may override.
70 #[serde(default, skip_serializing_if = "Option::is_none")]
71 pub preferred_model: Option<EditPredictionModelKind>,
72 #[serde(default)]
73 pub in_open_source_repo: bool,
74 #[serde(default)]
75 pub can_collect_data: bool,
76}
77
78#[derive(
79 Default,
80 Clone,
81 Copy,
82 Debug,
83 PartialEq,
84 Eq,
85 Hash,
86 EnumIter,
87 IntoStaticStr,
88 Serialize,
89 Deserialize,
90)]
91#[allow(non_camel_case_types)]
92pub enum ZetaFormat {
93 V0112MiddleAtEnd,
94 V0113Ordered,
95 V0114180EditableRegion,
96 V0120GitMergeMarkers,
97 #[default]
98 V0131GitMergeMarkersPrefix,
99 V0211Prefill,
100 V0211SeedCoder,
101}
102
103impl std::fmt::Display for ZetaFormat {
104 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105 write!(f, "{}", <&'static str>::from(self))
106 }
107}
108
109impl ZetaFormat {
110 pub fn parse(format_name: &str) -> Result<Self> {
111 let mut results = ZetaFormat::iter().filter(|version| {
112 <&'static str>::from(version)
113 .to_lowercase()
114 .contains(&format_name.to_lowercase())
115 });
116 let Some(result) = results.next() else {
117 anyhow::bail!(
118 "`{format_name}` did not match any of:\n{}",
119 Self::options_as_string()
120 );
121 };
122 if results.next().is_some() {
123 anyhow::bail!(
124 "`{format_name}` matched more than one of:\n{}",
125 Self::options_as_string()
126 );
127 }
128 Ok(result)
129 }
130
131 pub fn options_as_string() -> String {
132 ZetaFormat::iter()
133 .map(|format| format!("- {}\n", <&'static str>::from(format)))
134 .collect::<Vec<_>>()
135 .concat()
136 }
137
138 pub fn special_tokens(&self) -> &'static [&'static str] {
139 match self {
140 ZetaFormat::V0112MiddleAtEnd
141 | ZetaFormat::V0113Ordered
142 | ZetaFormat::V0114180EditableRegion => &[
143 "<|fim_prefix|>",
144 "<|fim_suffix|>",
145 "<|fim_middle|>",
146 "<|file_sep|>",
147 CURSOR_MARKER,
148 ],
149 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
150 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
151 v0131_git_merge_markers_prefix::special_tokens()
152 }
153 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
154 }
155 }
156}
157
158#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
159#[serde(tag = "event")]
160pub enum Event {
161 BufferChange {
162 path: Arc<Path>,
163 old_path: Arc<Path>,
164 diff: String,
165 predicted: bool,
166 in_open_source_repo: bool,
167 },
168}
169
170impl Event {
171 pub fn in_open_source_repo(&self) -> bool {
172 match self {
173 Event::BufferChange {
174 in_open_source_repo,
175 ..
176 } => *in_open_source_repo,
177 }
178 }
179}
180
181pub fn write_event(prompt: &mut String, event: &Event) {
182 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
183 for component in path.components() {
184 prompt.push('/');
185 write!(prompt, "{}", component.as_os_str().display()).ok();
186 }
187 }
188 match event {
189 Event::BufferChange {
190 path,
191 old_path,
192 diff,
193 predicted,
194 in_open_source_repo: _,
195 } => {
196 if *predicted {
197 prompt.push_str("// User accepted prediction:\n");
198 }
199 prompt.push_str("--- a");
200 write_path_as_unix_str(prompt, old_path.as_ref());
201 prompt.push_str("\n+++ b");
202 write_path_as_unix_str(prompt, path.as_ref());
203 prompt.push('\n');
204 prompt.push_str(diff);
205 }
206 }
207}
208
209#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
210pub struct RelatedFile {
211 pub path: Arc<Path>,
212 pub max_row: u32,
213 pub excerpts: Vec<RelatedExcerpt>,
214 #[serde(default)]
215 pub in_open_source_repo: bool,
216}
217
218#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
219pub struct RelatedExcerpt {
220 pub row_range: Range<u32>,
221 pub text: Arc<str>,
222 #[serde(default)]
223 pub order: usize,
224}
225
226pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
227 format
228 .special_tokens()
229 .iter()
230 .any(|token| input.cursor_excerpt.contains(token))
231}
232
233pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
234 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
235}
236
237/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
238pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
239 match format {
240 ZetaFormat::V0120GitMergeMarkers => output
241 .strip_suffix(v0120_git_merge_markers::END_MARKER)
242 .unwrap_or(output),
243 ZetaFormat::V0131GitMergeMarkersPrefix => output
244 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
245 .unwrap_or(output),
246 ZetaFormat::V0211SeedCoder => output
247 .strip_suffix(seed_coder::END_MARKER)
248 .unwrap_or(output),
249 _ => output,
250 }
251}
252
253pub fn excerpt_range_for_format(
254 format: ZetaFormat,
255 ranges: &ExcerptRanges,
256) -> (Range<usize>, Range<usize>) {
257 match format {
258 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
259 ranges.editable_150.clone(),
260 ranges.editable_150_context_350.clone(),
261 ),
262 ZetaFormat::V0114180EditableRegion
263 | ZetaFormat::V0120GitMergeMarkers
264 | ZetaFormat::V0131GitMergeMarkersPrefix
265 | ZetaFormat::V0211Prefill
266 | ZetaFormat::V0211SeedCoder => (
267 ranges.editable_350.clone(),
268 ranges.editable_350_context_150.clone(),
269 ),
270 }
271}
272
273pub fn resolve_cursor_region(
274 input: &ZetaPromptInput,
275 format: ZetaFormat,
276) -> (&str, Range<usize>, usize) {
277 let Some(ranges) = &input.excerpt_ranges else {
278 return (
279 &input.cursor_excerpt,
280 input.editable_range_in_excerpt.clone(),
281 input.cursor_offset_in_excerpt,
282 );
283 };
284
285 let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
286 let context_start = context_range.start;
287 let context_text = &input.cursor_excerpt[context_range];
288 let adjusted_editable =
289 (editable_range.start - context_start)..(editable_range.end - context_start);
290 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
291
292 (context_text, adjusted_editable, adjusted_cursor)
293}
294
295fn format_zeta_prompt_with_budget(
296 input: &ZetaPromptInput,
297 format: ZetaFormat,
298 max_tokens: usize,
299) -> String {
300 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
301 let path = &*input.cursor_path;
302
303 let mut cursor_section = String::new();
304 match format {
305 ZetaFormat::V0112MiddleAtEnd => {
306 v0112_middle_at_end::write_cursor_excerpt_section(
307 &mut cursor_section,
308 path,
309 context,
310 &editable_range,
311 cursor_offset,
312 );
313 }
314 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
315 v0113_ordered::write_cursor_excerpt_section(
316 &mut cursor_section,
317 path,
318 context,
319 &editable_range,
320 cursor_offset,
321 )
322 }
323 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
324 &mut cursor_section,
325 path,
326 context,
327 &editable_range,
328 cursor_offset,
329 ),
330 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
331 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
332 &mut cursor_section,
333 path,
334 context,
335 &editable_range,
336 cursor_offset,
337 )
338 }
339 ZetaFormat::V0211SeedCoder => {
340 return seed_coder::format_prompt_with_budget(
341 path,
342 context,
343 &editable_range,
344 cursor_offset,
345 &input.events,
346 &input.related_files,
347 max_tokens,
348 );
349 }
350 }
351
352 let cursor_tokens = estimate_tokens(cursor_section.len());
353 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
354
355 let edit_history_section = format_edit_history_within_budget(
356 &input.events,
357 "<|file_sep|>",
358 "edit history",
359 budget_after_cursor,
360 );
361 let edit_history_tokens = estimate_tokens(edit_history_section.len());
362 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
363
364 let related_files_section = format_related_files_within_budget(
365 &input.related_files,
366 "<|file_sep|>",
367 budget_after_edit_history,
368 );
369
370 let mut prompt = String::new();
371 prompt.push_str(&related_files_section);
372 prompt.push_str(&edit_history_section);
373 prompt.push_str(&cursor_section);
374 prompt
375}
376
377pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
378 match format {
379 ZetaFormat::V0112MiddleAtEnd
380 | ZetaFormat::V0113Ordered
381 | ZetaFormat::V0114180EditableRegion
382 | ZetaFormat::V0120GitMergeMarkers
383 | ZetaFormat::V0131GitMergeMarkersPrefix
384 | ZetaFormat::V0211SeedCoder => String::new(),
385 ZetaFormat::V0211Prefill => {
386 let (context, editable_range, _) = resolve_cursor_region(input, format);
387 v0211_prefill::get_prefill(context, &editable_range)
388 }
389 }
390}
391
392fn format_edit_history_within_budget(
393 events: &[Arc<Event>],
394 file_marker: &str,
395 edit_history_name: &str,
396 max_tokens: usize,
397) -> String {
398 let header = format!("{}{}\n", file_marker, edit_history_name);
399 let header_tokens = estimate_tokens(header.len());
400 if header_tokens >= max_tokens {
401 return String::new();
402 }
403
404 let mut event_strings: Vec<String> = Vec::new();
405 let mut total_tokens = header_tokens;
406
407 for event in events.iter().rev() {
408 let mut event_str = String::new();
409 write_event(&mut event_str, event);
410 let event_tokens = estimate_tokens(event_str.len());
411
412 if total_tokens + event_tokens > max_tokens {
413 break;
414 }
415 total_tokens += event_tokens;
416 event_strings.push(event_str);
417 }
418
419 if event_strings.is_empty() {
420 return String::new();
421 }
422
423 let mut result = header;
424 for event_str in event_strings.iter().rev() {
425 result.push_str(event_str);
426 }
427 result
428}
429
430fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
431 let needs_newline = !excerpt.text.ends_with('\n');
432 let needs_ellipsis = excerpt.row_range.end < file_max_row;
433 let len = excerpt.text.len()
434 + if needs_newline { "\n".len() } else { 0 }
435 + if needs_ellipsis { "...\n".len() } else { 0 };
436 estimate_tokens(len)
437}
438
439fn format_related_files_within_budget(
440 related_files: &[RelatedFile],
441 file_marker: &str,
442 max_tokens: usize,
443) -> String {
444 // Collect the distinct order values across all excerpts, sorted ascending.
445 let mut order_levels: Vec<usize> = related_files
446 .iter()
447 .flat_map(|f| f.excerpts.iter().map(|e| e.order))
448 .collect();
449 order_levels.sort_unstable();
450 order_levels.dedup();
451
452 // Pre-compute file header strings and their token costs.
453 let file_headers: Vec<String> = related_files
454 .iter()
455 .map(|file| {
456 let path_str = file.path.to_string_lossy();
457 format!("{}{}\n", file_marker, path_str)
458 })
459 .collect();
460
461 // Track which excerpts are included per file.
462 let mut included: Vec<Vec<bool>> = related_files
463 .iter()
464 .map(|file| vec![false; file.excerpts.len()])
465 .collect();
466 let mut file_included: Vec<bool> = vec![false; related_files.len()];
467 let mut total_tokens = 0;
468
469 // Process order levels from best (lowest) to worst. At each level, try to
470 // include all not-yet-included excerpts with that order across all files.
471 // If the full level doesn't fit, include a partial prefix (top-to-bottom
472 // within each file) and stop — don't proceed to worse order levels.
473 'outer: for &order in &order_levels {
474 // Gather the work for this order level: for each file that has excerpts
475 // at this order, collect the not-yet-included excerpt indices (in their
476 // original positional order) and the token cost to add them (including
477 // the file header if the file isn't already included).
478 struct FileWork {
479 file_idx: usize,
480 excerpt_indices: Vec<usize>,
481 header_cost: usize,
482 excerpt_costs: Vec<usize>,
483 }
484
485 let mut work_items: Vec<FileWork> = Vec::new();
486 for (file_idx, file) in related_files.iter().enumerate() {
487 let mut excerpt_indices = Vec::new();
488 let mut excerpt_costs = Vec::new();
489 for (eidx, excerpt) in file.excerpts.iter().enumerate() {
490 if excerpt.order == order && !included[file_idx][eidx] {
491 excerpt_indices.push(eidx);
492 excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row));
493 }
494 }
495 if excerpt_indices.is_empty() {
496 continue;
497 }
498 let header_cost = if file_included[file_idx] {
499 0
500 } else {
501 estimate_tokens(file_headers[file_idx].len())
502 };
503 work_items.push(FileWork {
504 file_idx,
505 excerpt_indices,
506 header_cost,
507 excerpt_costs,
508 });
509 }
510
511 // Compute the total cost for this entire order level.
512 let level_cost: usize = work_items
513 .iter()
514 .map(|w| w.header_cost + w.excerpt_costs.iter().sum::<usize>())
515 .sum();
516
517 if total_tokens + level_cost <= max_tokens {
518 // The whole level fits — include everything.
519 for work in &work_items {
520 total_tokens += work.header_cost;
521 file_included[work.file_idx] = true;
522 for (i, &eidx) in work.excerpt_indices.iter().enumerate() {
523 included[work.file_idx][eidx] = true;
524 total_tokens += work.excerpt_costs[i];
525 }
526 }
527 } else {
528 // The whole level doesn't fit. Include as many excerpts as possible
529 // from each file (in positional order), then stop entirely.
530 for work in &work_items {
531 let available = max_tokens.saturating_sub(total_tokens);
532 let mut file_cost = work.header_cost;
533
534 let mut count = 0;
535 for i in 0..work.excerpt_indices.len() {
536 if file_cost + work.excerpt_costs[i] > available {
537 break;
538 }
539 file_cost += work.excerpt_costs[i];
540 count += 1;
541 }
542
543 if count > 0 {
544 total_tokens += work.header_cost;
545 file_included[work.file_idx] = true;
546 for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() {
547 included[work.file_idx][eidx] = true;
548 total_tokens += work.excerpt_costs[i];
549 }
550 }
551 }
552 break 'outer;
553 }
554 }
555
556 // Determine file rendering order: by the best (lowest) order of any
557 // included excerpt, breaking ties by original file index.
558 let mut file_order: Vec<(usize, usize)> = Vec::new();
559 for (file_idx, file) in related_files.iter().enumerate() {
560 if !file_included[file_idx] {
561 continue;
562 }
563 let best_order = file
564 .excerpts
565 .iter()
566 .enumerate()
567 .filter(|(eidx, _)| included[file_idx][*eidx])
568 .map(|(_, e)| e.order)
569 .min()
570 .unwrap_or(usize::MAX);
571 file_order.push((file_idx, best_order));
572 }
573 file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx));
574
575 // Render included files and excerpts in positional order within each file.
576 let mut result = String::new();
577 for &(file_idx, _) in &file_order {
578 let file = &related_files[file_idx];
579 result.push_str(&file_headers[file_idx]);
580 for (eidx, excerpt) in file.excerpts.iter().enumerate() {
581 if !included[file_idx][eidx] {
582 continue;
583 }
584 result.push_str(&excerpt.text);
585 if !result.ends_with('\n') {
586 result.push('\n');
587 }
588 if excerpt.row_range.end < file.max_row {
589 result.push_str("...\n");
590 }
591 }
592 }
593
594 result
595}
596
597pub fn write_related_files(
598 prompt: &mut String,
599 related_files: &[RelatedFile],
600) -> Vec<Range<usize>> {
601 let mut ranges = Vec::new();
602 for file in related_files {
603 let start = prompt.len();
604 let path_str = file.path.to_string_lossy();
605 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
606 for excerpt in &file.excerpts {
607 prompt.push_str(&excerpt.text);
608 if !prompt.ends_with('\n') {
609 prompt.push('\n');
610 }
611 if excerpt.row_range.end < file.max_row {
612 prompt.push_str("...\n");
613 }
614 }
615 let end = prompt.len();
616 ranges.push(start..end);
617 }
618 ranges
619}
620
621mod v0112_middle_at_end {
622 use super::*;
623
624 pub fn write_cursor_excerpt_section(
625 prompt: &mut String,
626 path: &Path,
627 context: &str,
628 editable_range: &Range<usize>,
629 cursor_offset: usize,
630 ) {
631 let path_str = path.to_string_lossy();
632 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
633
634 prompt.push_str("<|fim_prefix|>\n");
635 prompt.push_str(&context[..editable_range.start]);
636
637 prompt.push_str("<|fim_suffix|>\n");
638 prompt.push_str(&context[editable_range.end..]);
639 if !prompt.ends_with('\n') {
640 prompt.push('\n');
641 }
642
643 prompt.push_str("<|fim_middle|>current\n");
644 prompt.push_str(&context[editable_range.start..cursor_offset]);
645 prompt.push_str(CURSOR_MARKER);
646 prompt.push_str(&context[cursor_offset..editable_range.end]);
647 if !prompt.ends_with('\n') {
648 prompt.push('\n');
649 }
650
651 prompt.push_str("<|fim_middle|>updated\n");
652 }
653}
654
655mod v0113_ordered {
656 use super::*;
657
658 pub fn write_cursor_excerpt_section(
659 prompt: &mut String,
660 path: &Path,
661 context: &str,
662 editable_range: &Range<usize>,
663 cursor_offset: usize,
664 ) {
665 let path_str = path.to_string_lossy();
666 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
667
668 prompt.push_str("<|fim_prefix|>\n");
669 prompt.push_str(&context[..editable_range.start]);
670 if !prompt.ends_with('\n') {
671 prompt.push('\n');
672 }
673
674 prompt.push_str("<|fim_middle|>current\n");
675 prompt.push_str(&context[editable_range.start..cursor_offset]);
676 prompt.push_str(CURSOR_MARKER);
677 prompt.push_str(&context[cursor_offset..editable_range.end]);
678 if !prompt.ends_with('\n') {
679 prompt.push('\n');
680 }
681
682 prompt.push_str("<|fim_suffix|>\n");
683 prompt.push_str(&context[editable_range.end..]);
684 if !prompt.ends_with('\n') {
685 prompt.push('\n');
686 }
687
688 prompt.push_str("<|fim_middle|>updated\n");
689 }
690}
691
692pub mod v0120_git_merge_markers {
693 //! A prompt that uses git-style merge conflict markers to represent the editable region.
694 //!
695 //! Example prompt:
696 //!
697 //! <|file_sep|>path/to/target_file.py
698 //! <|fim_prefix|>
699 //! code before editable region
700 //! <|fim_suffix|>
701 //! code after editable region
702 //! <|fim_middle|>
703 //! <<<<<<< CURRENT
704 //! code that
705 //! needs to<|user_cursor|>
706 //! be rewritten
707 //! =======
708 //!
709 //! Expected output (should be generated by the model):
710 //!
711 //! updated
712 //! code with
713 //! changes applied
714 //! >>>>>>> UPDATED
715
716 use super::*;
717
718 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
719 pub const SEPARATOR: &str = "=======\n";
720 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
721
722 pub fn special_tokens() -> &'static [&'static str] {
723 &[
724 "<|fim_prefix|>",
725 "<|fim_suffix|>",
726 "<|fim_middle|>",
727 "<|file_sep|>",
728 START_MARKER,
729 SEPARATOR,
730 END_MARKER,
731 CURSOR_MARKER,
732 ]
733 }
734
735 pub fn write_cursor_excerpt_section(
736 prompt: &mut String,
737 path: &Path,
738 context: &str,
739 editable_range: &Range<usize>,
740 cursor_offset: usize,
741 ) {
742 let path_str = path.to_string_lossy();
743 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
744
745 prompt.push_str("<|fim_prefix|>");
746 prompt.push_str(&context[..editable_range.start]);
747
748 prompt.push_str("<|fim_suffix|>");
749 prompt.push_str(&context[editable_range.end..]);
750 if !prompt.ends_with('\n') {
751 prompt.push('\n');
752 }
753
754 prompt.push_str("<|fim_middle|>");
755 prompt.push_str(START_MARKER);
756 prompt.push_str(&context[editable_range.start..cursor_offset]);
757 prompt.push_str(CURSOR_MARKER);
758 prompt.push_str(&context[cursor_offset..editable_range.end]);
759 if !prompt.ends_with('\n') {
760 prompt.push('\n');
761 }
762 prompt.push_str(SEPARATOR);
763 }
764}
765
766pub mod v0131_git_merge_markers_prefix {
767 //! A prompt that uses git-style merge conflict markers to represent the editable region.
768 //!
769 //! Example prompt:
770 //!
771 //! <|file_sep|>path/to/target_file.py
772 //! <|fim_prefix|>
773 //! code before editable region
774 //! <<<<<<< CURRENT
775 //! code that
776 //! needs to<|user_cursor|>
777 //! be rewritten
778 //! =======
779 //! <|fim_suffix|>
780 //! code after editable region
781 //! <|fim_middle|>
782 //!
783 //! Expected output (should be generated by the model):
784 //!
785 //! updated
786 //! code with
787 //! changes applied
788 //! >>>>>>> UPDATED
789
790 use super::*;
791
792 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
793 pub const SEPARATOR: &str = "=======\n";
794 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
795
796 pub fn special_tokens() -> &'static [&'static str] {
797 &[
798 "<|fim_prefix|>",
799 "<|fim_suffix|>",
800 "<|fim_middle|>",
801 "<|file_sep|>",
802 START_MARKER,
803 SEPARATOR,
804 END_MARKER,
805 CURSOR_MARKER,
806 ]
807 }
808
809 pub fn write_cursor_excerpt_section(
810 prompt: &mut String,
811 path: &Path,
812 context: &str,
813 editable_range: &Range<usize>,
814 cursor_offset: usize,
815 ) {
816 let path_str = path.to_string_lossy();
817 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
818
819 prompt.push_str("<|fim_prefix|>");
820 prompt.push_str(&context[..editable_range.start]);
821 prompt.push_str(START_MARKER);
822 prompt.push_str(&context[editable_range.start..cursor_offset]);
823 prompt.push_str(CURSOR_MARKER);
824 prompt.push_str(&context[cursor_offset..editable_range.end]);
825 if !prompt.ends_with('\n') {
826 prompt.push('\n');
827 }
828 prompt.push_str(SEPARATOR);
829
830 prompt.push_str("<|fim_suffix|>");
831 prompt.push_str(&context[editable_range.end..]);
832 if !prompt.ends_with('\n') {
833 prompt.push('\n');
834 }
835
836 prompt.push_str("<|fim_middle|>");
837 }
838}
839
840pub mod v0211_prefill {
841 use super::*;
842
843 pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
844 let editable_region = &context[editable_range.start..editable_range.end];
845
846 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
847 let prefill_len = editable_region.floor_char_boundary(prefill_len);
848
849 // Find a token boundary to avoid splitting tokens in the prefill.
850 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
851 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
852 // the \n and consume any consecutive \n characters after it.
853 let prefill = &editable_region[..prefill_len];
854 match prefill.rfind('\n') {
855 Some(pos) => {
856 let mut end = pos + 1;
857 while end < editable_region.len()
858 && editable_region.as_bytes().get(end) == Some(&b'\n')
859 {
860 end += 1;
861 }
862 editable_region[..end].to_string()
863 }
864 // No newline found. Fall back to splitting before the last space
865 // (word-level boundary)
866 None => match prefill.rfind(' ') {
867 Some(pos) => prefill[..pos].to_string(),
868 None => prefill.to_string(),
869 },
870 }
871 }
872}
873
874pub mod seed_coder {
875 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
876 //!
877 //! Seed-Coder uses different FIM tokens and order than Qwen:
878 //! - SPM order: suffix comes FIRST, then prefix, then middle
879 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
880 //! - File markers: StarCoder-style `<filename>path` (single token + path)
881 //!
882 //! All context (related files, edit history) goes in the PREFIX section.
883 //! The suffix contains only code after the editable region.
884 //!
885 //! Example prompt:
886 //!
887 //! <[fim-suffix]>
888 //! code after editable region
889 //! <[fim-prefix]><filename>related/file.py
890 //! related file content
891 //!
892 //! <filename>edit_history
893 //! --- a/some_file.py
894 //! +++ b/some_file.py
895 //! -old
896 //! +new
897 //!
898 //! <filename>path/to/target_file.py
899 //! code before editable region
900 //! <<<<<<< CURRENT
901 //! code that
902 //! needs to<|user_cursor|>
903 //! be rewritten
904 //! =======
905 //! <[fim-middle]>
906 //!
907 //! Expected output (model generates):
908 //!
909 //! updated
910 //! code with
911 //! changes applied
912 //! >>>>>>> UPDATED
913
914 use super::*;
915
916 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
917 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
918 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
919 pub const FILE_MARKER: &str = "<filename>";
920
921 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
922 pub const SEPARATOR: &str = "=======\n";
923 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
924
925 pub fn special_tokens() -> &'static [&'static str] {
926 &[
927 FIM_SUFFIX,
928 FIM_PREFIX,
929 FIM_MIDDLE,
930 FILE_MARKER,
931 START_MARKER,
932 SEPARATOR,
933 END_MARKER,
934 CURSOR_MARKER,
935 ]
936 }
937
938 pub fn format_prompt_with_budget(
939 path: &Path,
940 context: &str,
941 editable_range: &Range<usize>,
942 cursor_offset: usize,
943 events: &[Arc<Event>],
944 related_files: &[RelatedFile],
945 max_tokens: usize,
946 ) -> String {
947 let suffix_section = build_suffix_section(context, editable_range);
948 let cursor_prefix_section =
949 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
950
951 let suffix_tokens = estimate_tokens(suffix_section.len());
952 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
953 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
954
955 let edit_history_section = super::format_edit_history_within_budget(
956 events,
957 FILE_MARKER,
958 "edit_history",
959 budget_after_cursor,
960 );
961 let edit_history_tokens = estimate_tokens(edit_history_section.len());
962 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
963
964 let related_files_section = super::format_related_files_within_budget(
965 related_files,
966 FILE_MARKER,
967 budget_after_edit_history,
968 );
969
970 let mut prompt = String::new();
971 prompt.push_str(&suffix_section);
972 prompt.push_str(FIM_PREFIX);
973 prompt.push_str(&related_files_section);
974 if !related_files_section.is_empty() {
975 prompt.push('\n');
976 }
977 prompt.push_str(&edit_history_section);
978 if !edit_history_section.is_empty() {
979 prompt.push('\n');
980 }
981 prompt.push_str(&cursor_prefix_section);
982 prompt.push_str(FIM_MIDDLE);
983 prompt
984 }
985
986 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
987 let mut section = String::new();
988 section.push_str(FIM_SUFFIX);
989 section.push_str(&context[editable_range.end..]);
990 if !section.ends_with('\n') {
991 section.push('\n');
992 }
993 section
994 }
995
996 fn build_cursor_prefix_section(
997 path: &Path,
998 context: &str,
999 editable_range: &Range<usize>,
1000 cursor_offset: usize,
1001 ) -> String {
1002 let mut section = String::new();
1003 let path_str = path.to_string_lossy();
1004 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
1005
1006 section.push_str(&context[..editable_range.start]);
1007 section.push_str(START_MARKER);
1008 section.push_str(&context[editable_range.start..cursor_offset]);
1009 section.push_str(CURSOR_MARKER);
1010 section.push_str(&context[cursor_offset..editable_range.end]);
1011 if !section.ends_with('\n') {
1012 section.push('\n');
1013 }
1014 section.push_str(SEPARATOR);
1015 section
1016 }
1017}
1018
1019/// The zeta1 prompt format
1020pub mod zeta1 {
1021 use super::*;
1022 use std::fmt::Write;
1023
1024 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
1025 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
1026 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
1027 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
1028
1029 const INSTRUCTION_HEADER: &str = concat!(
1030 "### Instruction:\n",
1031 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1032 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1033 "into account the cursor location.\n\n",
1034 "### User Edits:\n\n"
1035 );
1036 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
1037 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
1038
1039 /// Formats a complete zeta1 prompt from the input events and excerpt.
1040 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
1041 let mut prompt = String::with_capacity(
1042 INSTRUCTION_HEADER.len()
1043 + input_events.len()
1044 + EXCERPT_HEADER.len()
1045 + input_excerpt.len()
1046 + RESPONSE_HEADER.len(),
1047 );
1048 prompt.push_str(INSTRUCTION_HEADER);
1049 prompt.push_str(input_events);
1050 prompt.push_str(EXCERPT_HEADER);
1051 prompt.push_str(input_excerpt);
1052 prompt.push_str(RESPONSE_HEADER);
1053 prompt
1054 }
1055
1056 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
1057 /// editable and context byte-offset ranges within `cursor_excerpt`.
1058 pub fn format_zeta1_from_input(
1059 input: &ZetaPromptInput,
1060 editable_range: Range<usize>,
1061 context_range: Range<usize>,
1062 ) -> String {
1063 let events = format_zeta1_events(&input.events);
1064 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
1065 format_zeta1_prompt(&events, &excerpt)
1066 }
1067
1068 /// Formats events in zeta1 style (oldest first).
1069 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
1070 let mut result = String::new();
1071 for event in events {
1072 let event_string = format_zeta1_event(event);
1073 if event_string.is_empty() {
1074 continue;
1075 }
1076 if !result.is_empty() {
1077 result.push_str("\n\n");
1078 }
1079 result.push_str(&event_string);
1080 }
1081 result
1082 }
1083
1084 fn format_zeta1_event(event: &Event) -> String {
1085 match event {
1086 Event::BufferChange {
1087 path,
1088 old_path,
1089 diff,
1090 ..
1091 } => {
1092 let mut prompt = String::new();
1093 if old_path != path {
1094 writeln!(
1095 prompt,
1096 "User renamed {} to {}\n",
1097 old_path.display(),
1098 path.display()
1099 )
1100 .ok();
1101 }
1102 if !diff.is_empty() {
1103 write!(
1104 prompt,
1105 "User edited {}:\n```diff\n{}\n```",
1106 path.display(),
1107 diff
1108 )
1109 .ok();
1110 }
1111 prompt
1112 }
1113 }
1114 }
1115
1116 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1117 /// within `cursor_excerpt`.
1118 fn format_zeta1_excerpt(
1119 input: &ZetaPromptInput,
1120 editable_range: Range<usize>,
1121 context_range: Range<usize>,
1122 ) -> String {
1123 let path_str = input.cursor_path.to_string_lossy();
1124 let excerpt = &*input.cursor_excerpt;
1125 let cursor_offset = input.cursor_offset_in_excerpt;
1126
1127 let mut prompt = String::new();
1128 writeln!(&mut prompt, "```{path_str}").ok();
1129
1130 let starts_at_file_beginning =
1131 input.excerpt_start_row == Some(0) && context_range.start == 0;
1132 if starts_at_file_beginning {
1133 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1134 }
1135
1136 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1137
1138 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1139 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1140 prompt.push_str(CURSOR_MARKER);
1141 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1142 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1143
1144 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1145 write!(prompt, "\n```").ok();
1146
1147 prompt
1148 }
1149
1150 /// Cleans zeta1 model output by extracting content between editable region
1151 /// markers and converting the zeta1 cursor marker to the universal one.
1152 /// Returns `None` if the output doesn't contain the expected markers.
1153 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1154 let content = output.replace(CURSOR_MARKER, "");
1155
1156 let content_start = content
1157 .find(EDITABLE_REGION_START_MARKER)
1158 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1159 .map(|pos| {
1160 if content.as_bytes().get(pos) == Some(&b'\n') {
1161 pos + 1
1162 } else {
1163 pos
1164 }
1165 })
1166 .unwrap_or(0);
1167
1168 let content_end = content
1169 .find(EDITABLE_REGION_END_MARKER)
1170 .map(|pos| {
1171 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1172 pos - 1
1173 } else {
1174 pos
1175 }
1176 })
1177 .unwrap_or(content.len());
1178
1179 if content_start > content_end {
1180 return Some(String::new());
1181 }
1182
1183 let extracted = &content[content_start..content_end];
1184
1185 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1186 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1187 let text_before_cursor = text_before_cursor
1188 .find(EDITABLE_REGION_START_MARKER)
1189 .map(|pos| {
1190 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1191 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1192 after_marker + 1
1193 } else {
1194 after_marker
1195 }
1196 })
1197 .unwrap_or(0);
1198 let offset_in_extracted = zeta1_cursor_pos
1199 .saturating_sub(text_before_cursor)
1200 .min(extracted.len());
1201 offset_in_extracted
1202 });
1203
1204 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1205 if let Some(offset) = cursor_offset {
1206 result.push_str(&extracted[..offset]);
1207 result.push_str(super::CURSOR_MARKER);
1208 result.push_str(&extracted[offset..]);
1209 } else {
1210 result.push_str(extracted);
1211 }
1212
1213 Some(result)
1214 }
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219 use super::*;
1220 use indoc::indoc;
1221
1222 fn make_input(
1223 cursor_excerpt: &str,
1224 editable_range: Range<usize>,
1225 cursor_offset: usize,
1226 events: Vec<Event>,
1227 related_files: Vec<RelatedFile>,
1228 ) -> ZetaPromptInput {
1229 ZetaPromptInput {
1230 cursor_path: Path::new("test.rs").into(),
1231 cursor_excerpt: cursor_excerpt.into(),
1232 editable_range_in_excerpt: editable_range,
1233 cursor_offset_in_excerpt: cursor_offset,
1234 excerpt_start_row: None,
1235 events: events.into_iter().map(Arc::new).collect(),
1236 related_files,
1237 excerpt_ranges: None,
1238 preferred_model: None,
1239 in_open_source_repo: false,
1240 can_collect_data: false,
1241 }
1242 }
1243
1244 fn make_event(path: &str, diff: &str) -> Event {
1245 Event::BufferChange {
1246 path: Path::new(path).into(),
1247 old_path: Path::new(path).into(),
1248 diff: diff.to_string(),
1249 predicted: false,
1250 in_open_source_repo: false,
1251 }
1252 }
1253
1254 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1255 RelatedFile {
1256 path: Path::new(path).into(),
1257 max_row: content.lines().count() as u32,
1258 excerpts: vec![RelatedExcerpt {
1259 row_range: 0..content.lines().count() as u32,
1260 text: content.into(),
1261 order: 0,
1262 }],
1263 in_open_source_repo: false,
1264 }
1265 }
1266
1267 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1268 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1269 }
1270
1271 #[test]
1272 fn test_no_truncation_when_within_budget() {
1273 let input = make_input(
1274 "prefix\neditable\nsuffix",
1275 7..15,
1276 10,
1277 vec![make_event("a.rs", "-old\n+new\n")],
1278 vec![make_related_file("related.rs", "fn helper() {}\n")],
1279 );
1280
1281 assert_eq!(
1282 format_with_budget(&input, 10000),
1283 indoc! {r#"
1284 <|file_sep|>related.rs
1285 fn helper() {}
1286 <|file_sep|>edit history
1287 --- a/a.rs
1288 +++ b/a.rs
1289 -old
1290 +new
1291 <|file_sep|>test.rs
1292 <|fim_prefix|>
1293 prefix
1294 <|fim_middle|>current
1295 edi<|user_cursor|>table
1296 <|fim_suffix|>
1297
1298 suffix
1299 <|fim_middle|>updated
1300 "#}
1301 );
1302 }
1303
1304 #[test]
1305 fn test_truncation_drops_edit_history_when_budget_tight() {
1306 let input = make_input(
1307 "code",
1308 0..4,
1309 2,
1310 vec![make_event("a.rs", "-x\n+y\n")],
1311 vec![
1312 make_related_file("r1.rs", "a\n"),
1313 make_related_file("r2.rs", "b\n"),
1314 ],
1315 );
1316
1317 assert_eq!(
1318 format_with_budget(&input, 10000),
1319 indoc! {r#"
1320 <|file_sep|>r1.rs
1321 a
1322 <|file_sep|>r2.rs
1323 b
1324 <|file_sep|>edit history
1325 --- a/a.rs
1326 +++ b/a.rs
1327 -x
1328 +y
1329 <|file_sep|>test.rs
1330 <|fim_prefix|>
1331 <|fim_middle|>current
1332 co<|user_cursor|>de
1333 <|fim_suffix|>
1334 <|fim_middle|>updated
1335 "#}
1336 );
1337
1338 assert_eq!(
1339 format_with_budget(&input, 50),
1340 indoc! {r#"
1341 <|file_sep|>r1.rs
1342 a
1343 <|file_sep|>r2.rs
1344 b
1345 <|file_sep|>test.rs
1346 <|fim_prefix|>
1347 <|fim_middle|>current
1348 co<|user_cursor|>de
1349 <|fim_suffix|>
1350 <|fim_middle|>updated
1351 "#}
1352 );
1353 }
1354
1355 #[test]
1356 fn test_truncation_includes_partial_excerpts() {
1357 let input = make_input(
1358 "x",
1359 0..1,
1360 0,
1361 vec![],
1362 vec![RelatedFile {
1363 path: Path::new("big.rs").into(),
1364 max_row: 30,
1365 in_open_source_repo: false,
1366 excerpts: vec![
1367 RelatedExcerpt {
1368 row_range: 0..10,
1369 text: "first excerpt\n".into(),
1370 order: 0,
1371 },
1372 RelatedExcerpt {
1373 row_range: 10..20,
1374 text: "second excerpt\n".into(),
1375 order: 0,
1376 },
1377 RelatedExcerpt {
1378 row_range: 20..30,
1379 text: "third excerpt\n".into(),
1380 order: 0,
1381 },
1382 ],
1383 }],
1384 );
1385
1386 assert_eq!(
1387 format_with_budget(&input, 10000),
1388 indoc! {r#"
1389 <|file_sep|>big.rs
1390 first excerpt
1391 ...
1392 second excerpt
1393 ...
1394 third excerpt
1395 <|file_sep|>test.rs
1396 <|fim_prefix|>
1397 <|fim_middle|>current
1398 <|user_cursor|>x
1399 <|fim_suffix|>
1400 <|fim_middle|>updated
1401 "#}
1402 );
1403
1404 assert_eq!(
1405 format_with_budget(&input, 50),
1406 indoc! {r#"
1407 <|file_sep|>big.rs
1408 first excerpt
1409 ...
1410 <|file_sep|>test.rs
1411 <|fim_prefix|>
1412 <|fim_middle|>current
1413 <|user_cursor|>x
1414 <|fim_suffix|>
1415 <|fim_middle|>updated
1416 "#}
1417 );
1418 }
1419
1420 #[test]
1421 fn test_truncation_prioritizes_lower_order_excerpts() {
1422 // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1423 // With tight budget, only the lower-order excerpt from file_b should be included.
1424 let input = make_input(
1425 "x",
1426 0..1,
1427 0,
1428 vec![],
1429 vec![
1430 RelatedFile {
1431 path: Path::new("file_a.rs").into(),
1432 max_row: 10,
1433 in_open_source_repo: false,
1434 excerpts: vec![RelatedExcerpt {
1435 row_range: 0..10,
1436 text: "low priority content\n".into(),
1437 order: 5,
1438 }],
1439 },
1440 RelatedFile {
1441 path: Path::new("file_b.rs").into(),
1442 max_row: 10,
1443 in_open_source_repo: false,
1444 excerpts: vec![RelatedExcerpt {
1445 row_range: 0..10,
1446 text: "high priority content\n".into(),
1447 order: 1,
1448 }],
1449 },
1450 ],
1451 );
1452
1453 // With large budget, both files included; file_b (order 1) renders before file_a (order 5).
1454 assert_eq!(
1455 format_with_budget(&input, 10000),
1456 indoc! {r#"
1457 <|file_sep|>file_b.rs
1458 high priority content
1459 <|file_sep|>file_a.rs
1460 low priority content
1461 <|file_sep|>test.rs
1462 <|fim_prefix|>
1463 <|fim_middle|>current
1464 <|user_cursor|>x
1465 <|fim_suffix|>
1466 <|fim_middle|>updated
1467 "#}
1468 );
1469
1470 // With tight budget, only file_b (lower order) fits.
1471 // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1472 // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1473 // file_a would need another 14 tokens, which doesn't fit.
1474 assert_eq!(
1475 format_with_budget(&input, 52),
1476 indoc! {r#"
1477 <|file_sep|>file_b.rs
1478 high priority content
1479 <|file_sep|>test.rs
1480 <|fim_prefix|>
1481 <|fim_middle|>current
1482 <|user_cursor|>x
1483 <|fim_suffix|>
1484 <|fim_middle|>updated
1485 "#}
1486 );
1487 }
1488
1489 #[test]
1490 fn test_truncation_drops_high_order_excerpts_within_file() {
1491 // A single file has excerpts at order 1 and order 3. With a tight budget,
1492 // only the order-1 excerpts are included while the order-3 excerpt is
1493 // dropped — even though they belong to the same file. This also preserves
1494 // the parent invariant: parent outline items have order ≤ their best
1495 // child, so they're always included when any child is.
1496 let input = make_input(
1497 "x",
1498 0..1,
1499 0,
1500 vec![],
1501 vec![RelatedFile {
1502 path: Path::new("mod.rs").into(),
1503 max_row: 30,
1504 in_open_source_repo: false,
1505 excerpts: vec![
1506 RelatedExcerpt {
1507 row_range: 0..5,
1508 text: "mod header\n".into(),
1509 order: 1,
1510 },
1511 RelatedExcerpt {
1512 row_range: 5..15,
1513 text: "important fn\n".into(),
1514 order: 1,
1515 },
1516 RelatedExcerpt {
1517 row_range: 15..30,
1518 text: "less important fn\n".into(),
1519 order: 3,
1520 },
1521 ],
1522 }],
1523 );
1524
1525 // With large budget, all three excerpts included.
1526 assert_eq!(
1527 format_with_budget(&input, 10000),
1528 indoc! {r#"
1529 <|file_sep|>mod.rs
1530 mod header
1531 ...
1532 important fn
1533 ...
1534 less important fn
1535 <|file_sep|>test.rs
1536 <|fim_prefix|>
1537 <|fim_middle|>current
1538 <|user_cursor|>x
1539 <|fim_suffix|>
1540 <|fim_middle|>updated
1541 "#}
1542 );
1543
1544 // With tight budget, only order<=1 excerpts included (header + important fn).
1545 assert_eq!(
1546 format_with_budget(&input, 55),
1547 indoc! {r#"
1548 <|file_sep|>mod.rs
1549 mod header
1550 ...
1551 important fn
1552 ...
1553 <|file_sep|>test.rs
1554 <|fim_prefix|>
1555 <|fim_middle|>current
1556 <|user_cursor|>x
1557 <|fim_suffix|>
1558 <|fim_middle|>updated
1559 "#}
1560 );
1561 }
1562
1563 #[test]
1564 fn test_truncation_drops_older_events_first() {
1565 let input = make_input(
1566 "x",
1567 0..1,
1568 0,
1569 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1570 vec![],
1571 );
1572
1573 assert_eq!(
1574 format_with_budget(&input, 10000),
1575 indoc! {r#"
1576 <|file_sep|>edit history
1577 --- a/old.rs
1578 +++ b/old.rs
1579 -1
1580 --- a/new.rs
1581 +++ b/new.rs
1582 -2
1583 <|file_sep|>test.rs
1584 <|fim_prefix|>
1585 <|fim_middle|>current
1586 <|user_cursor|>x
1587 <|fim_suffix|>
1588 <|fim_middle|>updated
1589 "#}
1590 );
1591
1592 assert_eq!(
1593 format_with_budget(&input, 55),
1594 indoc! {r#"
1595 <|file_sep|>edit history
1596 --- a/new.rs
1597 +++ b/new.rs
1598 -2
1599 <|file_sep|>test.rs
1600 <|fim_prefix|>
1601 <|fim_middle|>current
1602 <|user_cursor|>x
1603 <|fim_suffix|>
1604 <|fim_middle|>updated
1605 "#}
1606 );
1607 }
1608
1609 #[test]
1610 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1611 let input = make_input(
1612 "fn main() {}",
1613 0..12,
1614 3,
1615 vec![make_event("a.rs", "-old\n+new\n")],
1616 vec![make_related_file("related.rs", "helper\n")],
1617 );
1618
1619 assert_eq!(
1620 format_with_budget(&input, 30),
1621 indoc! {r#"
1622 <|file_sep|>test.rs
1623 <|fim_prefix|>
1624 <|fim_middle|>current
1625 fn <|user_cursor|>main() {}
1626 <|fim_suffix|>
1627 <|fim_middle|>updated
1628 "#}
1629 );
1630 }
1631
1632 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1633 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1634 }
1635
1636 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1637 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1638 }
1639
1640 #[test]
1641 fn test_seed_coder_basic_format() {
1642 let input = make_input(
1643 "prefix\neditable\nsuffix",
1644 7..15,
1645 10,
1646 vec![make_event("a.rs", "-old\n+new\n")],
1647 vec![make_related_file("related.rs", "fn helper() {}\n")],
1648 );
1649
1650 assert_eq!(
1651 format_seed_coder(&input),
1652 indoc! {r#"
1653 <[fim-suffix]>
1654 suffix
1655 <[fim-prefix]><filename>related.rs
1656 fn helper() {}
1657
1658 <filename>edit_history
1659 --- a/a.rs
1660 +++ b/a.rs
1661 -old
1662 +new
1663
1664 <filename>test.rs
1665 prefix
1666 <<<<<<< CURRENT
1667 edi<|user_cursor|>table
1668 =======
1669 <[fim-middle]>"#}
1670 );
1671 }
1672
1673 #[test]
1674 fn test_seed_coder_no_context() {
1675 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1676
1677 assert_eq!(
1678 format_seed_coder(&input),
1679 indoc! {r#"
1680 <[fim-suffix]>
1681 after
1682 <[fim-prefix]><filename>test.rs
1683 before
1684 <<<<<<< CURRENT
1685 mid<|user_cursor|>dle
1686 =======
1687 <[fim-middle]>"#}
1688 );
1689 }
1690
1691 #[test]
1692 fn test_seed_coder_truncation_drops_context() {
1693 let input = make_input(
1694 "code",
1695 0..4,
1696 2,
1697 vec![make_event("a.rs", "-x\n+y\n")],
1698 vec![make_related_file("r1.rs", "content\n")],
1699 );
1700
1701 // With large budget, everything is included
1702 assert_eq!(
1703 format_seed_coder(&input),
1704 indoc! {r#"
1705 <[fim-suffix]>
1706 <[fim-prefix]><filename>r1.rs
1707 content
1708
1709 <filename>edit_history
1710 --- a/a.rs
1711 +++ b/a.rs
1712 -x
1713 +y
1714
1715 <filename>test.rs
1716 <<<<<<< CURRENT
1717 co<|user_cursor|>de
1718 =======
1719 <[fim-middle]>"#}
1720 );
1721
1722 // With tight budget, context is dropped but cursor section remains
1723 assert_eq!(
1724 format_seed_coder_with_budget(&input, 30),
1725 indoc! {r#"
1726 <[fim-suffix]>
1727 <[fim-prefix]><filename>test.rs
1728 <<<<<<< CURRENT
1729 co<|user_cursor|>de
1730 =======
1731 <[fim-middle]>"#}
1732 );
1733 }
1734
1735 #[test]
1736 fn test_seed_coder_truncation_prioritizes_lower_order() {
1737 let input = make_input(
1738 "code",
1739 0..4,
1740 2,
1741 vec![],
1742 vec![
1743 RelatedFile {
1744 path: Path::new("low_prio.rs").into(),
1745 max_row: 5,
1746 in_open_source_repo: false,
1747 excerpts: vec![RelatedExcerpt {
1748 row_range: 0..5,
1749 text: "low prio\n".into(),
1750 order: 10,
1751 }],
1752 },
1753 RelatedFile {
1754 path: Path::new("high_prio.rs").into(),
1755 max_row: 5,
1756 in_open_source_repo: false,
1757 excerpts: vec![RelatedExcerpt {
1758 row_range: 0..5,
1759 text: "high prio\n".into(),
1760 order: 1,
1761 }],
1762 },
1763 ],
1764 );
1765
1766 // With large budget, both included; high_prio first due to lower order.
1767 assert_eq!(
1768 format_seed_coder(&input),
1769 indoc! {r#"
1770 <[fim-suffix]>
1771 <[fim-prefix]><filename>high_prio.rs
1772 high prio
1773 <filename>low_prio.rs
1774 low prio
1775
1776 <filename>test.rs
1777 <<<<<<< CURRENT
1778 co<|user_cursor|>de
1779 =======
1780 <[fim-middle]>"#}
1781 );
1782
1783 // With tight budget, only high_prio included.
1784 // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1785 // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1786 assert_eq!(
1787 format_seed_coder_with_budget(&input, 44),
1788 indoc! {r#"
1789 <[fim-suffix]>
1790 <[fim-prefix]><filename>high_prio.rs
1791 high prio
1792
1793 <filename>test.rs
1794 <<<<<<< CURRENT
1795 co<|user_cursor|>de
1796 =======
1797 <[fim-middle]>"#}
1798 );
1799 }
1800
1801 #[test]
1802 fn test_seed_coder_clean_output() {
1803 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1804 let output_without_marker = "new code\n";
1805
1806 assert_eq!(
1807 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1808 "new code\n"
1809 );
1810 assert_eq!(
1811 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1812 "new code\n"
1813 );
1814 }
1815
1816 #[test]
1817 fn test_format_zeta1_from_input_basic() {
1818 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1819 let input = ZetaPromptInput {
1820 cursor_path: Path::new("src/main.rs").into(),
1821 cursor_excerpt: excerpt.into(),
1822 editable_range_in_excerpt: 15..41,
1823 cursor_offset_in_excerpt: 30,
1824 excerpt_start_row: Some(0),
1825 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1826 related_files: vec![],
1827 excerpt_ranges: None,
1828 preferred_model: None,
1829 in_open_source_repo: false,
1830 can_collect_data: false,
1831 };
1832
1833 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1834
1835 assert_eq!(
1836 prompt,
1837 concat!(
1838 "### Instruction:\n",
1839 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1840 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1841 "into account the cursor location.\n",
1842 "\n",
1843 "### User Edits:\n",
1844 "\n",
1845 "User edited other.rs:\n",
1846 "```diff\n",
1847 "-old\n",
1848 "+new\n",
1849 "\n",
1850 "```\n",
1851 "\n",
1852 "### User Excerpt:\n",
1853 "\n",
1854 "```src/main.rs\n",
1855 "<|start_of_file|>\n",
1856 "fn before() {}\n",
1857 "<|editable_region_start|>\n",
1858 "fn foo() {\n",
1859 " <|user_cursor_is_here|>let x = 1;\n",
1860 "\n",
1861 "<|editable_region_end|>}\n",
1862 "fn after() {}\n",
1863 "\n",
1864 "```\n",
1865 "\n",
1866 "### Response:\n",
1867 ),
1868 );
1869 }
1870
1871 #[test]
1872 fn test_format_zeta1_from_input_no_start_of_file() {
1873 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1874 let input = ZetaPromptInput {
1875 cursor_path: Path::new("src/main.rs").into(),
1876 cursor_excerpt: excerpt.into(),
1877 editable_range_in_excerpt: 0..28,
1878 cursor_offset_in_excerpt: 15,
1879 excerpt_start_row: Some(10),
1880 events: vec![],
1881 related_files: vec![],
1882 excerpt_ranges: None,
1883 preferred_model: None,
1884 in_open_source_repo: false,
1885 can_collect_data: false,
1886 };
1887
1888 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1889
1890 assert_eq!(
1891 prompt,
1892 concat!(
1893 "### Instruction:\n",
1894 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1895 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1896 "into account the cursor location.\n",
1897 "\n",
1898 "### User Edits:\n",
1899 "\n",
1900 "\n",
1901 "\n",
1902 "### User Excerpt:\n",
1903 "\n",
1904 "```src/main.rs\n",
1905 "<|editable_region_start|>\n",
1906 "fn foo() {\n",
1907 " <|user_cursor_is_here|>let x = 1;\n",
1908 "}\n",
1909 "\n",
1910 "<|editable_region_end|>\n",
1911 "```\n",
1912 "\n",
1913 "### Response:\n",
1914 ),
1915 );
1916 }
1917
1918 #[test]
1919 fn test_format_zeta1_from_input_with_sub_ranges() {
1920 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1921 let editable_range = 10..37;
1922 let context_range = 0..excerpt.len();
1923
1924 let input = ZetaPromptInput {
1925 cursor_path: Path::new("test.rs").into(),
1926 cursor_excerpt: excerpt.into(),
1927 editable_range_in_excerpt: editable_range.clone(),
1928 cursor_offset_in_excerpt: 25,
1929 excerpt_start_row: Some(0),
1930 events: vec![],
1931 related_files: vec![],
1932 excerpt_ranges: None,
1933 preferred_model: None,
1934 in_open_source_repo: false,
1935 can_collect_data: false,
1936 };
1937
1938 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1939
1940 assert_eq!(
1941 prompt,
1942 concat!(
1943 "### Instruction:\n",
1944 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1945 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1946 "into account the cursor location.\n",
1947 "\n",
1948 "### User Edits:\n",
1949 "\n",
1950 "\n",
1951 "\n",
1952 "### User Excerpt:\n",
1953 "\n",
1954 "```test.rs\n",
1955 "<|start_of_file|>\n",
1956 "// prefix\n",
1957 "<|editable_region_start|>\n",
1958 "fn foo() {\n",
1959 " <|user_cursor_is_here|>let x = 1;\n",
1960 "}\n",
1961 "<|editable_region_end|>\n",
1962 "// suffix\n",
1963 "\n",
1964 "```\n",
1965 "\n",
1966 "### Response:\n",
1967 ),
1968 );
1969 }
1970
1971 #[test]
1972 fn test_clean_zeta1_model_output_basic() {
1973 let output = indoc! {"
1974 <|editable_region_start|>
1975 fn main() {
1976 println!(\"hello\");
1977 }
1978 <|editable_region_end|>
1979 "};
1980
1981 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1982 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1983 }
1984
1985 #[test]
1986 fn test_clean_zeta1_model_output_with_cursor() {
1987 let output = indoc! {"
1988 <|editable_region_start|>
1989 fn main() {
1990 <|user_cursor_is_here|>println!(\"hello\");
1991 }
1992 <|editable_region_end|>
1993 "};
1994
1995 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1996 assert_eq!(
1997 cleaned,
1998 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1999 );
2000 }
2001
2002 #[test]
2003 fn test_clean_zeta1_model_output_no_markers() {
2004 let output = "fn main() {}\n";
2005 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2006 assert_eq!(cleaned, "fn main() {}\n");
2007 }
2008
2009 #[test]
2010 fn test_clean_zeta1_model_output_empty_region() {
2011 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
2012 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2013 assert_eq!(cleaned, "");
2014 }
2015}