1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68}
69
70#[derive(
71 Default,
72 Clone,
73 Copy,
74 Debug,
75 PartialEq,
76 Eq,
77 Hash,
78 EnumIter,
79 IntoStaticStr,
80 Serialize,
81 Deserialize,
82)]
83#[allow(non_camel_case_types)]
84pub enum ZetaFormat {
85 V0112MiddleAtEnd,
86 V0113Ordered,
87 #[default]
88 V0114180EditableRegion,
89 V0120GitMergeMarkers,
90 V0131GitMergeMarkersPrefix,
91 V0211Prefill,
92 V0211SeedCoder,
93}
94
95impl std::fmt::Display for ZetaFormat {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 write!(f, "{}", <&'static str>::from(self))
98 }
99}
100
101impl ZetaFormat {
102 pub fn parse(format_name: &str) -> Result<Self> {
103 let mut results = ZetaFormat::iter().filter(|version| {
104 <&'static str>::from(version)
105 .to_lowercase()
106 .contains(&format_name.to_lowercase())
107 });
108 let Some(result) = results.next() else {
109 anyhow::bail!(
110 "`{format_name}` did not match any of:\n{}",
111 Self::options_as_string()
112 );
113 };
114 if results.next().is_some() {
115 anyhow::bail!(
116 "`{format_name}` matched more than one of:\n{}",
117 Self::options_as_string()
118 );
119 }
120 Ok(result)
121 }
122
123 pub fn options_as_string() -> String {
124 ZetaFormat::iter()
125 .map(|format| format!("- {}\n", <&'static str>::from(format)))
126 .collect::<Vec<_>>()
127 .concat()
128 }
129
130 pub fn special_tokens(&self) -> &'static [&'static str] {
131 match self {
132 ZetaFormat::V0112MiddleAtEnd
133 | ZetaFormat::V0113Ordered
134 | ZetaFormat::V0114180EditableRegion => &[
135 "<|fim_prefix|>",
136 "<|fim_suffix|>",
137 "<|fim_middle|>",
138 "<|file_sep|>",
139 CURSOR_MARKER,
140 ],
141 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
142 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
143 v0131_git_merge_markers_prefix::special_tokens()
144 }
145 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
146 }
147 }
148}
149
150#[derive(Clone, Debug, Serialize, Deserialize)]
151#[serde(tag = "event")]
152pub enum Event {
153 BufferChange {
154 path: Arc<Path>,
155 old_path: Arc<Path>,
156 diff: String,
157 predicted: bool,
158 in_open_source_repo: bool,
159 },
160}
161
162impl Event {
163 pub fn in_open_source_repo(&self) -> bool {
164 match self {
165 Event::BufferChange {
166 in_open_source_repo,
167 ..
168 } => *in_open_source_repo,
169 }
170 }
171}
172
173pub fn write_event(prompt: &mut String, event: &Event) {
174 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
175 for component in path.components() {
176 prompt.push('/');
177 write!(prompt, "{}", component.as_os_str().display()).ok();
178 }
179 }
180 match event {
181 Event::BufferChange {
182 path,
183 old_path,
184 diff,
185 predicted,
186 in_open_source_repo: _,
187 } => {
188 if *predicted {
189 prompt.push_str("// User accepted prediction:\n");
190 }
191 prompt.push_str("--- a");
192 write_path_as_unix_str(prompt, old_path.as_ref());
193 prompt.push_str("\n+++ b");
194 write_path_as_unix_str(prompt, path.as_ref());
195 prompt.push('\n');
196 prompt.push_str(diff);
197 }
198 }
199}
200
201#[derive(Clone, Debug, Serialize, Deserialize)]
202pub struct RelatedFile {
203 pub path: Arc<Path>,
204 pub max_row: u32,
205 pub excerpts: Vec<RelatedExcerpt>,
206 #[serde(default)]
207 pub in_open_source_repo: bool,
208}
209
210#[derive(Clone, Debug, Serialize, Deserialize)]
211pub struct RelatedExcerpt {
212 pub row_range: Range<u32>,
213 pub text: Arc<str>,
214}
215
216pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
217 format
218 .special_tokens()
219 .iter()
220 .any(|token| input.cursor_excerpt.contains(token))
221}
222
223pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
224 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
225}
226
227/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
228pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
229 match format {
230 ZetaFormat::V0120GitMergeMarkers => output
231 .strip_suffix(v0120_git_merge_markers::END_MARKER)
232 .unwrap_or(output),
233 ZetaFormat::V0131GitMergeMarkersPrefix => output
234 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
235 .unwrap_or(output),
236 ZetaFormat::V0211SeedCoder => output
237 .strip_suffix(seed_coder::END_MARKER)
238 .unwrap_or(output),
239 _ => output,
240 }
241}
242
243fn resolve_cursor_region(
244 input: &ZetaPromptInput,
245 format: ZetaFormat,
246) -> (&str, Range<usize>, usize) {
247 let Some(ranges) = &input.excerpt_ranges else {
248 return (
249 &input.cursor_excerpt,
250 input.editable_range_in_excerpt.clone(),
251 input.cursor_offset_in_excerpt,
252 );
253 };
254
255 let (editable_range, context_range) = match format {
256 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
257 ranges.editable_150.clone(),
258 ranges.editable_150_context_350.clone(),
259 ),
260 ZetaFormat::V0114180EditableRegion
261 | ZetaFormat::V0120GitMergeMarkers
262 | ZetaFormat::V0131GitMergeMarkersPrefix
263 | ZetaFormat::V0211Prefill
264 | ZetaFormat::V0211SeedCoder => (
265 ranges.editable_180.clone(),
266 ranges.editable_180_context_350.clone(),
267 ),
268 };
269
270 let context_start = context_range.start;
271 let context_text = &input.cursor_excerpt[context_range];
272 let adjusted_editable =
273 (editable_range.start - context_start)..(editable_range.end - context_start);
274 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
275
276 (context_text, adjusted_editable, adjusted_cursor)
277}
278
279fn format_zeta_prompt_with_budget(
280 input: &ZetaPromptInput,
281 format: ZetaFormat,
282 max_tokens: usize,
283) -> String {
284 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
285 let path = &*input.cursor_path;
286
287 let mut cursor_section = String::new();
288 match format {
289 ZetaFormat::V0112MiddleAtEnd => {
290 v0112_middle_at_end::write_cursor_excerpt_section(
291 &mut cursor_section,
292 path,
293 context,
294 &editable_range,
295 cursor_offset,
296 );
297 }
298 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
299 v0113_ordered::write_cursor_excerpt_section(
300 &mut cursor_section,
301 path,
302 context,
303 &editable_range,
304 cursor_offset,
305 )
306 }
307 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
308 &mut cursor_section,
309 path,
310 context,
311 &editable_range,
312 cursor_offset,
313 ),
314 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
315 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
316 &mut cursor_section,
317 path,
318 context,
319 &editable_range,
320 cursor_offset,
321 )
322 }
323 ZetaFormat::V0211SeedCoder => {
324 return seed_coder::format_prompt_with_budget(
325 path,
326 context,
327 &editable_range,
328 cursor_offset,
329 &input.events,
330 &input.related_files,
331 max_tokens,
332 );
333 }
334 }
335
336 let cursor_tokens = estimate_tokens(cursor_section.len());
337 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
338
339 let edit_history_section = format_edit_history_within_budget(
340 &input.events,
341 "<|file_sep|>",
342 "edit history",
343 budget_after_cursor,
344 );
345 let edit_history_tokens = estimate_tokens(edit_history_section.len());
346 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
347
348 let related_files_section = format_related_files_within_budget(
349 &input.related_files,
350 "<|file_sep|>",
351 budget_after_edit_history,
352 );
353
354 let mut prompt = String::new();
355 prompt.push_str(&related_files_section);
356 prompt.push_str(&edit_history_section);
357 prompt.push_str(&cursor_section);
358 prompt
359}
360
361pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
362 match format {
363 ZetaFormat::V0112MiddleAtEnd
364 | ZetaFormat::V0113Ordered
365 | ZetaFormat::V0114180EditableRegion
366 | ZetaFormat::V0120GitMergeMarkers
367 | ZetaFormat::V0131GitMergeMarkersPrefix
368 | ZetaFormat::V0211SeedCoder => String::new(),
369 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
370 }
371}
372
373fn format_edit_history_within_budget(
374 events: &[Arc<Event>],
375 file_marker: &str,
376 edit_history_name: &str,
377 max_tokens: usize,
378) -> String {
379 let header = format!("{}{}\n", file_marker, edit_history_name);
380 let header_tokens = estimate_tokens(header.len());
381 if header_tokens >= max_tokens {
382 return String::new();
383 }
384
385 let mut event_strings: Vec<String> = Vec::new();
386 let mut total_tokens = header_tokens;
387
388 for event in events.iter().rev() {
389 let mut event_str = String::new();
390 write_event(&mut event_str, event);
391 let event_tokens = estimate_tokens(event_str.len());
392
393 if total_tokens + event_tokens > max_tokens {
394 break;
395 }
396 total_tokens += event_tokens;
397 event_strings.push(event_str);
398 }
399
400 if event_strings.is_empty() {
401 return String::new();
402 }
403
404 let mut result = header;
405 for event_str in event_strings.iter().rev() {
406 result.push_str(event_str);
407 }
408 result
409}
410
411fn format_related_files_within_budget(
412 related_files: &[RelatedFile],
413 file_marker: &str,
414 max_tokens: usize,
415) -> String {
416 let mut result = String::new();
417 let mut total_tokens = 0;
418
419 for file in related_files {
420 let path_str = file.path.to_string_lossy();
421 let header = format!("{}{}\n", file_marker, path_str);
422 let header_tokens = estimate_tokens(header.len());
423
424 if total_tokens + header_tokens > max_tokens {
425 break;
426 }
427
428 let mut file_tokens = header_tokens;
429 let mut excerpts_to_include = 0;
430
431 for excerpt in &file.excerpts {
432 let needs_newline = !excerpt.text.ends_with('\n');
433 let needs_ellipsis = excerpt.row_range.end < file.max_row;
434 let excerpt_len = excerpt.text.len()
435 + if needs_newline { "\n".len() } else { 0 }
436 + if needs_ellipsis { "...\n".len() } else { 0 };
437
438 let excerpt_tokens = estimate_tokens(excerpt_len);
439 if total_tokens + file_tokens + excerpt_tokens > max_tokens {
440 break;
441 }
442 file_tokens += excerpt_tokens;
443 excerpts_to_include += 1;
444 }
445
446 if excerpts_to_include > 0 {
447 total_tokens += file_tokens;
448 result.push_str(&header);
449 for excerpt in file.excerpts.iter().take(excerpts_to_include) {
450 result.push_str(&excerpt.text);
451 if !result.ends_with('\n') {
452 result.push('\n');
453 }
454 if excerpt.row_range.end < file.max_row {
455 result.push_str("...\n");
456 }
457 }
458 }
459 }
460
461 result
462}
463
464pub fn write_related_files(
465 prompt: &mut String,
466 related_files: &[RelatedFile],
467) -> Vec<Range<usize>> {
468 let mut ranges = Vec::new();
469 for file in related_files {
470 let start = prompt.len();
471 let path_str = file.path.to_string_lossy();
472 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
473 for excerpt in &file.excerpts {
474 prompt.push_str(&excerpt.text);
475 if !prompt.ends_with('\n') {
476 prompt.push('\n');
477 }
478 if excerpt.row_range.end < file.max_row {
479 prompt.push_str("...\n");
480 }
481 }
482 let end = prompt.len();
483 ranges.push(start..end);
484 }
485 ranges
486}
487
488mod v0112_middle_at_end {
489 use super::*;
490
491 pub fn write_cursor_excerpt_section(
492 prompt: &mut String,
493 path: &Path,
494 context: &str,
495 editable_range: &Range<usize>,
496 cursor_offset: usize,
497 ) {
498 let path_str = path.to_string_lossy();
499 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
500
501 prompt.push_str("<|fim_prefix|>\n");
502 prompt.push_str(&context[..editable_range.start]);
503
504 prompt.push_str("<|fim_suffix|>\n");
505 prompt.push_str(&context[editable_range.end..]);
506 if !prompt.ends_with('\n') {
507 prompt.push('\n');
508 }
509
510 prompt.push_str("<|fim_middle|>current\n");
511 prompt.push_str(&context[editable_range.start..cursor_offset]);
512 prompt.push_str(CURSOR_MARKER);
513 prompt.push_str(&context[cursor_offset..editable_range.end]);
514 if !prompt.ends_with('\n') {
515 prompt.push('\n');
516 }
517
518 prompt.push_str("<|fim_middle|>updated\n");
519 }
520}
521
522mod v0113_ordered {
523 use super::*;
524
525 pub fn write_cursor_excerpt_section(
526 prompt: &mut String,
527 path: &Path,
528 context: &str,
529 editable_range: &Range<usize>,
530 cursor_offset: usize,
531 ) {
532 let path_str = path.to_string_lossy();
533 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
534
535 prompt.push_str("<|fim_prefix|>\n");
536 prompt.push_str(&context[..editable_range.start]);
537 if !prompt.ends_with('\n') {
538 prompt.push('\n');
539 }
540
541 prompt.push_str("<|fim_middle|>current\n");
542 prompt.push_str(&context[editable_range.start..cursor_offset]);
543 prompt.push_str(CURSOR_MARKER);
544 prompt.push_str(&context[cursor_offset..editable_range.end]);
545 if !prompt.ends_with('\n') {
546 prompt.push('\n');
547 }
548
549 prompt.push_str("<|fim_suffix|>\n");
550 prompt.push_str(&context[editable_range.end..]);
551 if !prompt.ends_with('\n') {
552 prompt.push('\n');
553 }
554
555 prompt.push_str("<|fim_middle|>updated\n");
556 }
557}
558
559pub mod v0120_git_merge_markers {
560 //! A prompt that uses git-style merge conflict markers to represent the editable region.
561 //!
562 //! Example prompt:
563 //!
564 //! <|file_sep|>path/to/target_file.py
565 //! <|fim_prefix|>
566 //! code before editable region
567 //! <|fim_suffix|>
568 //! code after editable region
569 //! <|fim_middle|>
570 //! <<<<<<< CURRENT
571 //! code that
572 //! needs to<|user_cursor|>
573 //! be rewritten
574 //! =======
575 //!
576 //! Expected output (should be generated by the model):
577 //!
578 //! updated
579 //! code with
580 //! changes applied
581 //! >>>>>>> UPDATED
582
583 use super::*;
584
585 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
586 pub const SEPARATOR: &str = "=======\n";
587 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
588
589 pub fn special_tokens() -> &'static [&'static str] {
590 &[
591 "<|fim_prefix|>",
592 "<|fim_suffix|>",
593 "<|fim_middle|>",
594 "<|file_sep|>",
595 START_MARKER,
596 SEPARATOR,
597 END_MARKER,
598 CURSOR_MARKER,
599 ]
600 }
601
602 pub fn write_cursor_excerpt_section(
603 prompt: &mut String,
604 path: &Path,
605 context: &str,
606 editable_range: &Range<usize>,
607 cursor_offset: usize,
608 ) {
609 let path_str = path.to_string_lossy();
610 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
611
612 prompt.push_str("<|fim_prefix|>");
613 prompt.push_str(&context[..editable_range.start]);
614
615 prompt.push_str("<|fim_suffix|>");
616 prompt.push_str(&context[editable_range.end..]);
617 if !prompt.ends_with('\n') {
618 prompt.push('\n');
619 }
620
621 prompt.push_str("<|fim_middle|>");
622 prompt.push_str(START_MARKER);
623 prompt.push_str(&context[editable_range.start..cursor_offset]);
624 prompt.push_str(CURSOR_MARKER);
625 prompt.push_str(&context[cursor_offset..editable_range.end]);
626 if !prompt.ends_with('\n') {
627 prompt.push('\n');
628 }
629 prompt.push_str(SEPARATOR);
630 }
631}
632
633pub mod v0131_git_merge_markers_prefix {
634 //! A prompt that uses git-style merge conflict markers to represent the editable region.
635 //!
636 //! Example prompt:
637 //!
638 //! <|file_sep|>path/to/target_file.py
639 //! <|fim_prefix|>
640 //! code before editable region
641 //! <<<<<<< CURRENT
642 //! code that
643 //! needs to<|user_cursor|>
644 //! be rewritten
645 //! =======
646 //! <|fim_suffix|>
647 //! code after editable region
648 //! <|fim_middle|>
649 //!
650 //! Expected output (should be generated by the model):
651 //!
652 //! updated
653 //! code with
654 //! changes applied
655 //! >>>>>>> UPDATED
656
657 use super::*;
658
659 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
660 pub const SEPARATOR: &str = "=======\n";
661 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
662
663 pub fn special_tokens() -> &'static [&'static str] {
664 &[
665 "<|fim_prefix|>",
666 "<|fim_suffix|>",
667 "<|fim_middle|>",
668 "<|file_sep|>",
669 START_MARKER,
670 SEPARATOR,
671 END_MARKER,
672 CURSOR_MARKER,
673 ]
674 }
675
676 pub fn write_cursor_excerpt_section(
677 prompt: &mut String,
678 path: &Path,
679 context: &str,
680 editable_range: &Range<usize>,
681 cursor_offset: usize,
682 ) {
683 let path_str = path.to_string_lossy();
684 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
685
686 prompt.push_str("<|fim_prefix|>");
687 prompt.push_str(&context[..editable_range.start]);
688 prompt.push_str(START_MARKER);
689 prompt.push_str(&context[editable_range.start..cursor_offset]);
690 prompt.push_str(CURSOR_MARKER);
691 prompt.push_str(&context[cursor_offset..editable_range.end]);
692 if !prompt.ends_with('\n') {
693 prompt.push('\n');
694 }
695 prompt.push_str(SEPARATOR);
696
697 prompt.push_str("<|fim_suffix|>");
698 prompt.push_str(&context[editable_range.end..]);
699 if !prompt.ends_with('\n') {
700 prompt.push('\n');
701 }
702
703 prompt.push_str("<|fim_middle|>");
704 }
705}
706
707pub mod v0211_prefill {
708 use super::*;
709
710 pub fn get_prefill(input: &ZetaPromptInput) -> String {
711 let editable_region = &input.cursor_excerpt
712 [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
713
714 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
715 let prefill_len = editable_region.floor_char_boundary(prefill_len);
716
717 // Find a token boundary to avoid splitting tokens in the prefill.
718 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
719 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
720 // the \n and consume any consecutive \n characters after it.
721 let prefill = &editable_region[..prefill_len];
722 match prefill.rfind('\n') {
723 Some(pos) => {
724 let mut end = pos + 1;
725 while end < editable_region.len()
726 && editable_region.as_bytes().get(end) == Some(&b'\n')
727 {
728 end += 1;
729 }
730 editable_region[..end].to_string()
731 }
732 // No newline found. Fall back to splitting before the last space
733 // (word-level boundary)
734 None => match prefill.rfind(' ') {
735 Some(pos) => prefill[..pos].to_string(),
736 None => prefill.to_string(),
737 },
738 }
739 }
740}
741
742pub mod seed_coder {
743 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
744 //!
745 //! Seed-Coder uses different FIM tokens and order than Qwen:
746 //! - SPM order: suffix comes FIRST, then prefix, then middle
747 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
748 //! - File markers: StarCoder-style `<filename>path` (single token + path)
749 //!
750 //! All context (related files, edit history) goes in the PREFIX section.
751 //! The suffix contains only code after the editable region.
752 //!
753 //! Example prompt:
754 //!
755 //! <[fim-suffix]>
756 //! code after editable region
757 //! <[fim-prefix]><filename>related/file.py
758 //! related file content
759 //!
760 //! <filename>edit_history
761 //! --- a/some_file.py
762 //! +++ b/some_file.py
763 //! -old
764 //! +new
765 //!
766 //! <filename>path/to/target_file.py
767 //! code before editable region
768 //! <<<<<<< CURRENT
769 //! code that
770 //! needs to<|user_cursor|>
771 //! be rewritten
772 //! =======
773 //! <[fim-middle]>
774 //!
775 //! Expected output (model generates):
776 //!
777 //! updated
778 //! code with
779 //! changes applied
780 //! >>>>>>> UPDATED
781
782 use super::*;
783
784 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
785 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
786 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
787 pub const FILE_MARKER: &str = "<filename>";
788
789 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
790 pub const SEPARATOR: &str = "=======\n";
791 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
792
793 pub fn special_tokens() -> &'static [&'static str] {
794 &[
795 FIM_SUFFIX,
796 FIM_PREFIX,
797 FIM_MIDDLE,
798 FILE_MARKER,
799 START_MARKER,
800 SEPARATOR,
801 END_MARKER,
802 CURSOR_MARKER,
803 ]
804 }
805
806 pub fn format_prompt_with_budget(
807 path: &Path,
808 context: &str,
809 editable_range: &Range<usize>,
810 cursor_offset: usize,
811 events: &[Arc<Event>],
812 related_files: &[RelatedFile],
813 max_tokens: usize,
814 ) -> String {
815 let suffix_section = build_suffix_section(context, editable_range);
816 let cursor_prefix_section =
817 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
818
819 let suffix_tokens = estimate_tokens(suffix_section.len());
820 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
821 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
822
823 let edit_history_section = super::format_edit_history_within_budget(
824 events,
825 FILE_MARKER,
826 "edit_history",
827 budget_after_cursor,
828 );
829 let edit_history_tokens = estimate_tokens(edit_history_section.len());
830 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
831
832 let related_files_section = super::format_related_files_within_budget(
833 related_files,
834 FILE_MARKER,
835 budget_after_edit_history,
836 );
837
838 let mut prompt = String::new();
839 prompt.push_str(&suffix_section);
840 prompt.push_str(FIM_PREFIX);
841 prompt.push_str(&related_files_section);
842 if !related_files_section.is_empty() {
843 prompt.push('\n');
844 }
845 prompt.push_str(&edit_history_section);
846 if !edit_history_section.is_empty() {
847 prompt.push('\n');
848 }
849 prompt.push_str(&cursor_prefix_section);
850 prompt.push_str(FIM_MIDDLE);
851 prompt
852 }
853
854 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
855 let mut section = String::new();
856 section.push_str(FIM_SUFFIX);
857 section.push_str(&context[editable_range.end..]);
858 if !section.ends_with('\n') {
859 section.push('\n');
860 }
861 section
862 }
863
864 fn build_cursor_prefix_section(
865 path: &Path,
866 context: &str,
867 editable_range: &Range<usize>,
868 cursor_offset: usize,
869 ) -> String {
870 let mut section = String::new();
871 let path_str = path.to_string_lossy();
872 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
873
874 section.push_str(&context[..editable_range.start]);
875 section.push_str(START_MARKER);
876 section.push_str(&context[editable_range.start..cursor_offset]);
877 section.push_str(CURSOR_MARKER);
878 section.push_str(&context[cursor_offset..editable_range.end]);
879 if !section.ends_with('\n') {
880 section.push('\n');
881 }
882 section.push_str(SEPARATOR);
883 section
884 }
885}
886
887/// The zeta1 prompt format
888pub mod zeta1 {
889 use super::*;
890 use std::fmt::Write;
891
892 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
893 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
894 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
895 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
896
897 const INSTRUCTION_HEADER: &str = concat!(
898 "### Instruction:\n",
899 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
900 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
901 "into account the cursor location.\n\n",
902 "### User Edits:\n\n"
903 );
904 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
905 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
906
907 /// Formats a complete zeta1 prompt from the input events and excerpt.
908 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
909 let mut prompt = String::with_capacity(
910 INSTRUCTION_HEADER.len()
911 + input_events.len()
912 + EXCERPT_HEADER.len()
913 + input_excerpt.len()
914 + RESPONSE_HEADER.len(),
915 );
916 prompt.push_str(INSTRUCTION_HEADER);
917 prompt.push_str(input_events);
918 prompt.push_str(EXCERPT_HEADER);
919 prompt.push_str(input_excerpt);
920 prompt.push_str(RESPONSE_HEADER);
921 prompt
922 }
923
924 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
925 /// editable and context byte-offset ranges within `cursor_excerpt`.
926 pub fn format_zeta1_from_input(
927 input: &ZetaPromptInput,
928 editable_range: Range<usize>,
929 context_range: Range<usize>,
930 ) -> String {
931 let events = format_zeta1_events(&input.events);
932 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
933 format_zeta1_prompt(&events, &excerpt)
934 }
935
936 /// Formats events in zeta1 style (oldest first).
937 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
938 let mut result = String::new();
939 for event in events {
940 let event_string = format_zeta1_event(event);
941 if event_string.is_empty() {
942 continue;
943 }
944 if !result.is_empty() {
945 result.push_str("\n\n");
946 }
947 result.push_str(&event_string);
948 }
949 result
950 }
951
952 fn format_zeta1_event(event: &Event) -> String {
953 match event {
954 Event::BufferChange {
955 path,
956 old_path,
957 diff,
958 ..
959 } => {
960 let mut prompt = String::new();
961 if old_path != path {
962 writeln!(
963 prompt,
964 "User renamed {} to {}\n",
965 old_path.display(),
966 path.display()
967 )
968 .ok();
969 }
970 if !diff.is_empty() {
971 write!(
972 prompt,
973 "User edited {}:\n```diff\n{}\n```",
974 path.display(),
975 diff
976 )
977 .ok();
978 }
979 prompt
980 }
981 }
982 }
983
984 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
985 /// within `cursor_excerpt`.
986 fn format_zeta1_excerpt(
987 input: &ZetaPromptInput,
988 editable_range: Range<usize>,
989 context_range: Range<usize>,
990 ) -> String {
991 let path_str = input.cursor_path.to_string_lossy();
992 let excerpt = &*input.cursor_excerpt;
993 let cursor_offset = input.cursor_offset_in_excerpt;
994
995 let mut prompt = String::new();
996 writeln!(&mut prompt, "```{path_str}").ok();
997
998 let starts_at_file_beginning =
999 input.excerpt_start_row == Some(0) && context_range.start == 0;
1000 if starts_at_file_beginning {
1001 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1002 }
1003
1004 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1005
1006 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1007 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1008 prompt.push_str(CURSOR_MARKER);
1009 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1010 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1011
1012 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1013 write!(prompt, "\n```").ok();
1014
1015 prompt
1016 }
1017
1018 /// Cleans zeta1 model output by extracting content between editable region
1019 /// markers and converting the zeta1 cursor marker to the universal one.
1020 /// Returns `None` if the output doesn't contain the expected markers.
1021 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1022 let content = output.replace(CURSOR_MARKER, "");
1023
1024 let content_start = content
1025 .find(EDITABLE_REGION_START_MARKER)
1026 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1027 .map(|pos| {
1028 if content.as_bytes().get(pos) == Some(&b'\n') {
1029 pos + 1
1030 } else {
1031 pos
1032 }
1033 })
1034 .unwrap_or(0);
1035
1036 let content_end = content
1037 .find(EDITABLE_REGION_END_MARKER)
1038 .map(|pos| {
1039 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1040 pos - 1
1041 } else {
1042 pos
1043 }
1044 })
1045 .unwrap_or(content.len());
1046
1047 if content_start > content_end {
1048 return Some(String::new());
1049 }
1050
1051 let extracted = &content[content_start..content_end];
1052
1053 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1054 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1055 let text_before_cursor = text_before_cursor
1056 .find(EDITABLE_REGION_START_MARKER)
1057 .map(|pos| {
1058 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1059 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1060 after_marker + 1
1061 } else {
1062 after_marker
1063 }
1064 })
1065 .unwrap_or(0);
1066 let offset_in_extracted = zeta1_cursor_pos
1067 .saturating_sub(text_before_cursor)
1068 .min(extracted.len());
1069 offset_in_extracted
1070 });
1071
1072 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1073 if let Some(offset) = cursor_offset {
1074 result.push_str(&extracted[..offset]);
1075 result.push_str(super::CURSOR_MARKER);
1076 result.push_str(&extracted[offset..]);
1077 } else {
1078 result.push_str(extracted);
1079 }
1080
1081 Some(result)
1082 }
1083}
1084
1085#[cfg(test)]
1086mod tests {
1087 use super::*;
1088 use indoc::indoc;
1089
1090 fn make_input(
1091 cursor_excerpt: &str,
1092 editable_range: Range<usize>,
1093 cursor_offset: usize,
1094 events: Vec<Event>,
1095 related_files: Vec<RelatedFile>,
1096 ) -> ZetaPromptInput {
1097 ZetaPromptInput {
1098 cursor_path: Path::new("test.rs").into(),
1099 cursor_excerpt: cursor_excerpt.into(),
1100 editable_range_in_excerpt: editable_range,
1101 cursor_offset_in_excerpt: cursor_offset,
1102 excerpt_start_row: None,
1103 events: events.into_iter().map(Arc::new).collect(),
1104 related_files,
1105 excerpt_ranges: None,
1106 preferred_model: None,
1107 in_open_source_repo: false,
1108 }
1109 }
1110
1111 fn make_event(path: &str, diff: &str) -> Event {
1112 Event::BufferChange {
1113 path: Path::new(path).into(),
1114 old_path: Path::new(path).into(),
1115 diff: diff.to_string(),
1116 predicted: false,
1117 in_open_source_repo: false,
1118 }
1119 }
1120
1121 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1122 RelatedFile {
1123 path: Path::new(path).into(),
1124 max_row: content.lines().count() as u32,
1125 excerpts: vec![RelatedExcerpt {
1126 row_range: 0..content.lines().count() as u32,
1127 text: content.into(),
1128 }],
1129 in_open_source_repo: false,
1130 }
1131 }
1132
1133 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1134 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1135 }
1136
1137 #[test]
1138 fn test_no_truncation_when_within_budget() {
1139 let input = make_input(
1140 "prefix\neditable\nsuffix",
1141 7..15,
1142 10,
1143 vec![make_event("a.rs", "-old\n+new\n")],
1144 vec![make_related_file("related.rs", "fn helper() {}\n")],
1145 );
1146
1147 assert_eq!(
1148 format_with_budget(&input, 10000),
1149 indoc! {r#"
1150 <|file_sep|>related.rs
1151 fn helper() {}
1152 <|file_sep|>edit history
1153 --- a/a.rs
1154 +++ b/a.rs
1155 -old
1156 +new
1157 <|file_sep|>test.rs
1158 <|fim_prefix|>
1159 prefix
1160 <|fim_middle|>current
1161 edi<|user_cursor|>table
1162 <|fim_suffix|>
1163
1164 suffix
1165 <|fim_middle|>updated
1166 "#}
1167 );
1168 }
1169
1170 #[test]
1171 fn test_truncation_drops_edit_history_when_budget_tight() {
1172 let input = make_input(
1173 "code",
1174 0..4,
1175 2,
1176 vec![make_event("a.rs", "-x\n+y\n")],
1177 vec![
1178 make_related_file("r1.rs", "a\n"),
1179 make_related_file("r2.rs", "b\n"),
1180 ],
1181 );
1182
1183 assert_eq!(
1184 format_with_budget(&input, 10000),
1185 indoc! {r#"
1186 <|file_sep|>r1.rs
1187 a
1188 <|file_sep|>r2.rs
1189 b
1190 <|file_sep|>edit history
1191 --- a/a.rs
1192 +++ b/a.rs
1193 -x
1194 +y
1195 <|file_sep|>test.rs
1196 <|fim_prefix|>
1197 <|fim_middle|>current
1198 co<|user_cursor|>de
1199 <|fim_suffix|>
1200 <|fim_middle|>updated
1201 "#}
1202 );
1203
1204 assert_eq!(
1205 format_with_budget(&input, 50),
1206 indoc! {r#"
1207 <|file_sep|>r1.rs
1208 a
1209 <|file_sep|>r2.rs
1210 b
1211 <|file_sep|>test.rs
1212 <|fim_prefix|>
1213 <|fim_middle|>current
1214 co<|user_cursor|>de
1215 <|fim_suffix|>
1216 <|fim_middle|>updated
1217 "#}
1218 );
1219 }
1220
1221 #[test]
1222 fn test_truncation_includes_partial_excerpts() {
1223 let input = make_input(
1224 "x",
1225 0..1,
1226 0,
1227 vec![],
1228 vec![RelatedFile {
1229 path: Path::new("big.rs").into(),
1230 max_row: 30,
1231 in_open_source_repo: false,
1232 excerpts: vec![
1233 RelatedExcerpt {
1234 row_range: 0..10,
1235 text: "first excerpt\n".into(),
1236 },
1237 RelatedExcerpt {
1238 row_range: 10..20,
1239 text: "second excerpt\n".into(),
1240 },
1241 RelatedExcerpt {
1242 row_range: 20..30,
1243 text: "third excerpt\n".into(),
1244 },
1245 ],
1246 }],
1247 );
1248
1249 assert_eq!(
1250 format_with_budget(&input, 10000),
1251 indoc! {r#"
1252 <|file_sep|>big.rs
1253 first excerpt
1254 ...
1255 second excerpt
1256 ...
1257 third excerpt
1258 <|file_sep|>test.rs
1259 <|fim_prefix|>
1260 <|fim_middle|>current
1261 <|user_cursor|>x
1262 <|fim_suffix|>
1263 <|fim_middle|>updated
1264 "#}
1265 );
1266
1267 assert_eq!(
1268 format_with_budget(&input, 50),
1269 indoc! {r#"
1270 <|file_sep|>big.rs
1271 first excerpt
1272 ...
1273 <|file_sep|>test.rs
1274 <|fim_prefix|>
1275 <|fim_middle|>current
1276 <|user_cursor|>x
1277 <|fim_suffix|>
1278 <|fim_middle|>updated
1279 "#}
1280 );
1281 }
1282
1283 #[test]
1284 fn test_truncation_drops_older_events_first() {
1285 let input = make_input(
1286 "x",
1287 0..1,
1288 0,
1289 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1290 vec![],
1291 );
1292
1293 assert_eq!(
1294 format_with_budget(&input, 10000),
1295 indoc! {r#"
1296 <|file_sep|>edit history
1297 --- a/old.rs
1298 +++ b/old.rs
1299 -1
1300 --- a/new.rs
1301 +++ b/new.rs
1302 -2
1303 <|file_sep|>test.rs
1304 <|fim_prefix|>
1305 <|fim_middle|>current
1306 <|user_cursor|>x
1307 <|fim_suffix|>
1308 <|fim_middle|>updated
1309 "#}
1310 );
1311
1312 assert_eq!(
1313 format_with_budget(&input, 55),
1314 indoc! {r#"
1315 <|file_sep|>edit history
1316 --- a/new.rs
1317 +++ b/new.rs
1318 -2
1319 <|file_sep|>test.rs
1320 <|fim_prefix|>
1321 <|fim_middle|>current
1322 <|user_cursor|>x
1323 <|fim_suffix|>
1324 <|fim_middle|>updated
1325 "#}
1326 );
1327 }
1328
1329 #[test]
1330 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1331 let input = make_input(
1332 "fn main() {}",
1333 0..12,
1334 3,
1335 vec![make_event("a.rs", "-old\n+new\n")],
1336 vec![make_related_file("related.rs", "helper\n")],
1337 );
1338
1339 assert_eq!(
1340 format_with_budget(&input, 30),
1341 indoc! {r#"
1342 <|file_sep|>test.rs
1343 <|fim_prefix|>
1344 <|fim_middle|>current
1345 fn <|user_cursor|>main() {}
1346 <|fim_suffix|>
1347 <|fim_middle|>updated
1348 "#}
1349 );
1350 }
1351
1352 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1353 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1354 }
1355
1356 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1357 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1358 }
1359
1360 #[test]
1361 fn test_seed_coder_basic_format() {
1362 let input = make_input(
1363 "prefix\neditable\nsuffix",
1364 7..15,
1365 10,
1366 vec![make_event("a.rs", "-old\n+new\n")],
1367 vec![make_related_file("related.rs", "fn helper() {}\n")],
1368 );
1369
1370 assert_eq!(
1371 format_seed_coder(&input),
1372 indoc! {r#"
1373 <[fim-suffix]>
1374 suffix
1375 <[fim-prefix]><filename>related.rs
1376 fn helper() {}
1377
1378 <filename>edit_history
1379 --- a/a.rs
1380 +++ b/a.rs
1381 -old
1382 +new
1383
1384 <filename>test.rs
1385 prefix
1386 <<<<<<< CURRENT
1387 edi<|user_cursor|>table
1388 =======
1389 <[fim-middle]>"#}
1390 );
1391 }
1392
1393 #[test]
1394 fn test_seed_coder_no_context() {
1395 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1396
1397 assert_eq!(
1398 format_seed_coder(&input),
1399 indoc! {r#"
1400 <[fim-suffix]>
1401 after
1402 <[fim-prefix]><filename>test.rs
1403 before
1404 <<<<<<< CURRENT
1405 mid<|user_cursor|>dle
1406 =======
1407 <[fim-middle]>"#}
1408 );
1409 }
1410
1411 #[test]
1412 fn test_seed_coder_truncation_drops_context() {
1413 let input = make_input(
1414 "code",
1415 0..4,
1416 2,
1417 vec![make_event("a.rs", "-x\n+y\n")],
1418 vec![make_related_file("r1.rs", "content\n")],
1419 );
1420
1421 // With large budget, everything is included
1422 assert_eq!(
1423 format_seed_coder(&input),
1424 indoc! {r#"
1425 <[fim-suffix]>
1426 <[fim-prefix]><filename>r1.rs
1427 content
1428
1429 <filename>edit_history
1430 --- a/a.rs
1431 +++ b/a.rs
1432 -x
1433 +y
1434
1435 <filename>test.rs
1436 <<<<<<< CURRENT
1437 co<|user_cursor|>de
1438 =======
1439 <[fim-middle]>"#}
1440 );
1441
1442 // With tight budget, context is dropped but cursor section remains
1443 assert_eq!(
1444 format_seed_coder_with_budget(&input, 30),
1445 indoc! {r#"
1446 <[fim-suffix]>
1447 <[fim-prefix]><filename>test.rs
1448 <<<<<<< CURRENT
1449 co<|user_cursor|>de
1450 =======
1451 <[fim-middle]>"#}
1452 );
1453 }
1454
1455 #[test]
1456 fn test_seed_coder_clean_output() {
1457 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1458 let output_without_marker = "new code\n";
1459
1460 assert_eq!(
1461 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1462 "new code\n"
1463 );
1464 assert_eq!(
1465 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1466 "new code\n"
1467 );
1468 }
1469
1470 #[test]
1471 fn test_format_zeta1_from_input_basic() {
1472 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1473 let input = ZetaPromptInput {
1474 cursor_path: Path::new("src/main.rs").into(),
1475 cursor_excerpt: excerpt.into(),
1476 editable_range_in_excerpt: 15..41,
1477 cursor_offset_in_excerpt: 30,
1478 excerpt_start_row: Some(0),
1479 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1480 related_files: vec![],
1481 excerpt_ranges: None,
1482 preferred_model: None,
1483 in_open_source_repo: false,
1484 };
1485
1486 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1487
1488 assert_eq!(
1489 prompt,
1490 concat!(
1491 "### Instruction:\n",
1492 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1493 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1494 "into account the cursor location.\n",
1495 "\n",
1496 "### User Edits:\n",
1497 "\n",
1498 "User edited other.rs:\n",
1499 "```diff\n",
1500 "-old\n",
1501 "+new\n",
1502 "\n",
1503 "```\n",
1504 "\n",
1505 "### User Excerpt:\n",
1506 "\n",
1507 "```src/main.rs\n",
1508 "<|start_of_file|>\n",
1509 "fn before() {}\n",
1510 "<|editable_region_start|>\n",
1511 "fn foo() {\n",
1512 " <|user_cursor_is_here|>let x = 1;\n",
1513 "\n",
1514 "<|editable_region_end|>}\n",
1515 "fn after() {}\n",
1516 "\n",
1517 "```\n",
1518 "\n",
1519 "### Response:\n",
1520 ),
1521 );
1522 }
1523
1524 #[test]
1525 fn test_format_zeta1_from_input_no_start_of_file() {
1526 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1527 let input = ZetaPromptInput {
1528 cursor_path: Path::new("src/main.rs").into(),
1529 cursor_excerpt: excerpt.into(),
1530 editable_range_in_excerpt: 0..28,
1531 cursor_offset_in_excerpt: 15,
1532 excerpt_start_row: Some(10),
1533 events: vec![],
1534 related_files: vec![],
1535 excerpt_ranges: None,
1536 preferred_model: None,
1537 in_open_source_repo: false,
1538 };
1539
1540 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1541
1542 assert_eq!(
1543 prompt,
1544 concat!(
1545 "### Instruction:\n",
1546 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1547 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1548 "into account the cursor location.\n",
1549 "\n",
1550 "### User Edits:\n",
1551 "\n",
1552 "\n",
1553 "\n",
1554 "### User Excerpt:\n",
1555 "\n",
1556 "```src/main.rs\n",
1557 "<|editable_region_start|>\n",
1558 "fn foo() {\n",
1559 " <|user_cursor_is_here|>let x = 1;\n",
1560 "}\n",
1561 "\n",
1562 "<|editable_region_end|>\n",
1563 "```\n",
1564 "\n",
1565 "### Response:\n",
1566 ),
1567 );
1568 }
1569
1570 #[test]
1571 fn test_format_zeta1_from_input_with_sub_ranges() {
1572 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1573 let editable_range = 10..37;
1574 let context_range = 0..excerpt.len();
1575
1576 let input = ZetaPromptInput {
1577 cursor_path: Path::new("test.rs").into(),
1578 cursor_excerpt: excerpt.into(),
1579 editable_range_in_excerpt: editable_range.clone(),
1580 cursor_offset_in_excerpt: 25,
1581 excerpt_start_row: Some(0),
1582 events: vec![],
1583 related_files: vec![],
1584 excerpt_ranges: None,
1585 preferred_model: None,
1586 in_open_source_repo: false,
1587 };
1588
1589 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1590
1591 assert_eq!(
1592 prompt,
1593 concat!(
1594 "### Instruction:\n",
1595 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1596 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1597 "into account the cursor location.\n",
1598 "\n",
1599 "### User Edits:\n",
1600 "\n",
1601 "\n",
1602 "\n",
1603 "### User Excerpt:\n",
1604 "\n",
1605 "```test.rs\n",
1606 "<|start_of_file|>\n",
1607 "// prefix\n",
1608 "<|editable_region_start|>\n",
1609 "fn foo() {\n",
1610 " <|user_cursor_is_here|>let x = 1;\n",
1611 "}\n",
1612 "<|editable_region_end|>\n",
1613 "// suffix\n",
1614 "\n",
1615 "```\n",
1616 "\n",
1617 "### Response:\n",
1618 ),
1619 );
1620 }
1621
1622 #[test]
1623 fn test_clean_zeta1_model_output_basic() {
1624 let output = indoc! {"
1625 <|editable_region_start|>
1626 fn main() {
1627 println!(\"hello\");
1628 }
1629 <|editable_region_end|>
1630 "};
1631
1632 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1633 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1634 }
1635
1636 #[test]
1637 fn test_clean_zeta1_model_output_with_cursor() {
1638 let output = indoc! {"
1639 <|editable_region_start|>
1640 fn main() {
1641 <|user_cursor_is_here|>println!(\"hello\");
1642 }
1643 <|editable_region_end|>
1644 "};
1645
1646 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1647 assert_eq!(
1648 cleaned,
1649 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1650 );
1651 }
1652
1653 #[test]
1654 fn test_clean_zeta1_model_output_no_markers() {
1655 let output = "fn main() {}\n";
1656 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1657 assert_eq!(cleaned, "fn main() {}\n");
1658 }
1659
1660 #[test]
1661 fn test_clean_zeta1_model_output_empty_region() {
1662 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1663 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1664 assert_eq!(cleaned, "");
1665 }
1666}