1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68 #[serde(default)]
69 pub can_collect_data: bool,
70}
71
72#[derive(
73 Default,
74 Clone,
75 Copy,
76 Debug,
77 PartialEq,
78 Eq,
79 Hash,
80 EnumIter,
81 IntoStaticStr,
82 Serialize,
83 Deserialize,
84)]
85#[allow(non_camel_case_types)]
86pub enum ZetaFormat {
87 V0112MiddleAtEnd,
88 V0113Ordered,
89 #[default]
90 V0114180EditableRegion,
91 V0120GitMergeMarkers,
92 V0131GitMergeMarkersPrefix,
93 V0211Prefill,
94 V0211SeedCoder,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131
132 pub fn special_tokens(&self) -> &'static [&'static str] {
133 match self {
134 ZetaFormat::V0112MiddleAtEnd
135 | ZetaFormat::V0113Ordered
136 | ZetaFormat::V0114180EditableRegion => &[
137 "<|fim_prefix|>",
138 "<|fim_suffix|>",
139 "<|fim_middle|>",
140 "<|file_sep|>",
141 CURSOR_MARKER,
142 ],
143 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
144 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
145 v0131_git_merge_markers_prefix::special_tokens()
146 }
147 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
148 }
149 }
150}
151
152#[derive(Clone, Debug, Serialize, Deserialize)]
153#[serde(tag = "event")]
154pub enum Event {
155 BufferChange {
156 path: Arc<Path>,
157 old_path: Arc<Path>,
158 diff: String,
159 predicted: bool,
160 in_open_source_repo: bool,
161 },
162}
163
164impl Event {
165 pub fn in_open_source_repo(&self) -> bool {
166 match self {
167 Event::BufferChange {
168 in_open_source_repo,
169 ..
170 } => *in_open_source_repo,
171 }
172 }
173}
174
175pub fn write_event(prompt: &mut String, event: &Event) {
176 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
177 for component in path.components() {
178 prompt.push('/');
179 write!(prompt, "{}", component.as_os_str().display()).ok();
180 }
181 }
182 match event {
183 Event::BufferChange {
184 path,
185 old_path,
186 diff,
187 predicted,
188 in_open_source_repo: _,
189 } => {
190 if *predicted {
191 prompt.push_str("// User accepted prediction:\n");
192 }
193 prompt.push_str("--- a");
194 write_path_as_unix_str(prompt, old_path.as_ref());
195 prompt.push_str("\n+++ b");
196 write_path_as_unix_str(prompt, path.as_ref());
197 prompt.push('\n');
198 prompt.push_str(diff);
199 }
200 }
201}
202
203#[derive(Clone, Debug, Serialize, Deserialize)]
204pub struct RelatedFile {
205 pub path: Arc<Path>,
206 pub max_row: u32,
207 pub excerpts: Vec<RelatedExcerpt>,
208 #[serde(default)]
209 pub in_open_source_repo: bool,
210}
211
212#[derive(Clone, Debug, Serialize, Deserialize)]
213pub struct RelatedExcerpt {
214 pub row_range: Range<u32>,
215 pub text: Arc<str>,
216}
217
218pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
219 format
220 .special_tokens()
221 .iter()
222 .any(|token| input.cursor_excerpt.contains(token))
223}
224
225pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
226 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
227}
228
229/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
230pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
231 match format {
232 ZetaFormat::V0120GitMergeMarkers => output
233 .strip_suffix(v0120_git_merge_markers::END_MARKER)
234 .unwrap_or(output),
235 ZetaFormat::V0131GitMergeMarkersPrefix => output
236 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
237 .unwrap_or(output),
238 ZetaFormat::V0211SeedCoder => output
239 .strip_suffix(seed_coder::END_MARKER)
240 .unwrap_or(output),
241 _ => output,
242 }
243}
244
245fn resolve_cursor_region(
246 input: &ZetaPromptInput,
247 format: ZetaFormat,
248) -> (&str, Range<usize>, usize) {
249 let Some(ranges) = &input.excerpt_ranges else {
250 return (
251 &input.cursor_excerpt,
252 input.editable_range_in_excerpt.clone(),
253 input.cursor_offset_in_excerpt,
254 );
255 };
256
257 let (editable_range, context_range) = match format {
258 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
259 ranges.editable_150.clone(),
260 ranges.editable_150_context_350.clone(),
261 ),
262 ZetaFormat::V0114180EditableRegion
263 | ZetaFormat::V0120GitMergeMarkers
264 | ZetaFormat::V0131GitMergeMarkersPrefix
265 | ZetaFormat::V0211Prefill
266 | ZetaFormat::V0211SeedCoder => (
267 ranges.editable_180.clone(),
268 ranges.editable_180_context_350.clone(),
269 ),
270 };
271
272 let context_start = context_range.start;
273 let context_text = &input.cursor_excerpt[context_range];
274 let adjusted_editable =
275 (editable_range.start - context_start)..(editable_range.end - context_start);
276 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
277
278 (context_text, adjusted_editable, adjusted_cursor)
279}
280
281fn format_zeta_prompt_with_budget(
282 input: &ZetaPromptInput,
283 format: ZetaFormat,
284 max_tokens: usize,
285) -> String {
286 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
287 let path = &*input.cursor_path;
288
289 let mut cursor_section = String::new();
290 match format {
291 ZetaFormat::V0112MiddleAtEnd => {
292 v0112_middle_at_end::write_cursor_excerpt_section(
293 &mut cursor_section,
294 path,
295 context,
296 &editable_range,
297 cursor_offset,
298 );
299 }
300 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
301 v0113_ordered::write_cursor_excerpt_section(
302 &mut cursor_section,
303 path,
304 context,
305 &editable_range,
306 cursor_offset,
307 )
308 }
309 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
310 &mut cursor_section,
311 path,
312 context,
313 &editable_range,
314 cursor_offset,
315 ),
316 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
317 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
318 &mut cursor_section,
319 path,
320 context,
321 &editable_range,
322 cursor_offset,
323 )
324 }
325 ZetaFormat::V0211SeedCoder => {
326 return seed_coder::format_prompt_with_budget(
327 path,
328 context,
329 &editable_range,
330 cursor_offset,
331 &input.events,
332 &input.related_files,
333 max_tokens,
334 );
335 }
336 }
337
338 let cursor_tokens = estimate_tokens(cursor_section.len());
339 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
340
341 let edit_history_section = format_edit_history_within_budget(
342 &input.events,
343 "<|file_sep|>",
344 "edit history",
345 budget_after_cursor,
346 );
347 let edit_history_tokens = estimate_tokens(edit_history_section.len());
348 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
349
350 let related_files_section = format_related_files_within_budget(
351 &input.related_files,
352 "<|file_sep|>",
353 budget_after_edit_history,
354 );
355
356 let mut prompt = String::new();
357 prompt.push_str(&related_files_section);
358 prompt.push_str(&edit_history_section);
359 prompt.push_str(&cursor_section);
360 prompt
361}
362
363pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
364 match format {
365 ZetaFormat::V0112MiddleAtEnd
366 | ZetaFormat::V0113Ordered
367 | ZetaFormat::V0114180EditableRegion
368 | ZetaFormat::V0120GitMergeMarkers
369 | ZetaFormat::V0131GitMergeMarkersPrefix
370 | ZetaFormat::V0211SeedCoder => String::new(),
371 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
372 }
373}
374
375fn format_edit_history_within_budget(
376 events: &[Arc<Event>],
377 file_marker: &str,
378 edit_history_name: &str,
379 max_tokens: usize,
380) -> String {
381 let header = format!("{}{}\n", file_marker, edit_history_name);
382 let header_tokens = estimate_tokens(header.len());
383 if header_tokens >= max_tokens {
384 return String::new();
385 }
386
387 let mut event_strings: Vec<String> = Vec::new();
388 let mut total_tokens = header_tokens;
389
390 for event in events.iter().rev() {
391 let mut event_str = String::new();
392 write_event(&mut event_str, event);
393 let event_tokens = estimate_tokens(event_str.len());
394
395 if total_tokens + event_tokens > max_tokens {
396 break;
397 }
398 total_tokens += event_tokens;
399 event_strings.push(event_str);
400 }
401
402 if event_strings.is_empty() {
403 return String::new();
404 }
405
406 let mut result = header;
407 for event_str in event_strings.iter().rev() {
408 result.push_str(event_str);
409 }
410 result
411}
412
413fn format_related_files_within_budget(
414 related_files: &[RelatedFile],
415 file_marker: &str,
416 max_tokens: usize,
417) -> String {
418 let mut result = String::new();
419 let mut total_tokens = 0;
420
421 for file in related_files {
422 let path_str = file.path.to_string_lossy();
423 let header = format!("{}{}\n", file_marker, path_str);
424 let header_tokens = estimate_tokens(header.len());
425
426 if total_tokens + header_tokens > max_tokens {
427 break;
428 }
429
430 let mut file_tokens = header_tokens;
431 let mut excerpts_to_include = 0;
432
433 for excerpt in &file.excerpts {
434 let needs_newline = !excerpt.text.ends_with('\n');
435 let needs_ellipsis = excerpt.row_range.end < file.max_row;
436 let excerpt_len = excerpt.text.len()
437 + if needs_newline { "\n".len() } else { 0 }
438 + if needs_ellipsis { "...\n".len() } else { 0 };
439
440 let excerpt_tokens = estimate_tokens(excerpt_len);
441 if total_tokens + file_tokens + excerpt_tokens > max_tokens {
442 break;
443 }
444 file_tokens += excerpt_tokens;
445 excerpts_to_include += 1;
446 }
447
448 if excerpts_to_include > 0 {
449 total_tokens += file_tokens;
450 result.push_str(&header);
451 for excerpt in file.excerpts.iter().take(excerpts_to_include) {
452 result.push_str(&excerpt.text);
453 if !result.ends_with('\n') {
454 result.push('\n');
455 }
456 if excerpt.row_range.end < file.max_row {
457 result.push_str("...\n");
458 }
459 }
460 }
461 }
462
463 result
464}
465
466pub fn write_related_files(
467 prompt: &mut String,
468 related_files: &[RelatedFile],
469) -> Vec<Range<usize>> {
470 let mut ranges = Vec::new();
471 for file in related_files {
472 let start = prompt.len();
473 let path_str = file.path.to_string_lossy();
474 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
475 for excerpt in &file.excerpts {
476 prompt.push_str(&excerpt.text);
477 if !prompt.ends_with('\n') {
478 prompt.push('\n');
479 }
480 if excerpt.row_range.end < file.max_row {
481 prompt.push_str("...\n");
482 }
483 }
484 let end = prompt.len();
485 ranges.push(start..end);
486 }
487 ranges
488}
489
490mod v0112_middle_at_end {
491 use super::*;
492
493 pub fn write_cursor_excerpt_section(
494 prompt: &mut String,
495 path: &Path,
496 context: &str,
497 editable_range: &Range<usize>,
498 cursor_offset: usize,
499 ) {
500 let path_str = path.to_string_lossy();
501 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
502
503 prompt.push_str("<|fim_prefix|>\n");
504 prompt.push_str(&context[..editable_range.start]);
505
506 prompt.push_str("<|fim_suffix|>\n");
507 prompt.push_str(&context[editable_range.end..]);
508 if !prompt.ends_with('\n') {
509 prompt.push('\n');
510 }
511
512 prompt.push_str("<|fim_middle|>current\n");
513 prompt.push_str(&context[editable_range.start..cursor_offset]);
514 prompt.push_str(CURSOR_MARKER);
515 prompt.push_str(&context[cursor_offset..editable_range.end]);
516 if !prompt.ends_with('\n') {
517 prompt.push('\n');
518 }
519
520 prompt.push_str("<|fim_middle|>updated\n");
521 }
522}
523
524mod v0113_ordered {
525 use super::*;
526
527 pub fn write_cursor_excerpt_section(
528 prompt: &mut String,
529 path: &Path,
530 context: &str,
531 editable_range: &Range<usize>,
532 cursor_offset: usize,
533 ) {
534 let path_str = path.to_string_lossy();
535 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
536
537 prompt.push_str("<|fim_prefix|>\n");
538 prompt.push_str(&context[..editable_range.start]);
539 if !prompt.ends_with('\n') {
540 prompt.push('\n');
541 }
542
543 prompt.push_str("<|fim_middle|>current\n");
544 prompt.push_str(&context[editable_range.start..cursor_offset]);
545 prompt.push_str(CURSOR_MARKER);
546 prompt.push_str(&context[cursor_offset..editable_range.end]);
547 if !prompt.ends_with('\n') {
548 prompt.push('\n');
549 }
550
551 prompt.push_str("<|fim_suffix|>\n");
552 prompt.push_str(&context[editable_range.end..]);
553 if !prompt.ends_with('\n') {
554 prompt.push('\n');
555 }
556
557 prompt.push_str("<|fim_middle|>updated\n");
558 }
559}
560
561pub mod v0120_git_merge_markers {
562 //! A prompt that uses git-style merge conflict markers to represent the editable region.
563 //!
564 //! Example prompt:
565 //!
566 //! <|file_sep|>path/to/target_file.py
567 //! <|fim_prefix|>
568 //! code before editable region
569 //! <|fim_suffix|>
570 //! code after editable region
571 //! <|fim_middle|>
572 //! <<<<<<< CURRENT
573 //! code that
574 //! needs to<|user_cursor|>
575 //! be rewritten
576 //! =======
577 //!
578 //! Expected output (should be generated by the model):
579 //!
580 //! updated
581 //! code with
582 //! changes applied
583 //! >>>>>>> UPDATED
584
585 use super::*;
586
587 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
588 pub const SEPARATOR: &str = "=======\n";
589 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
590
591 pub fn special_tokens() -> &'static [&'static str] {
592 &[
593 "<|fim_prefix|>",
594 "<|fim_suffix|>",
595 "<|fim_middle|>",
596 "<|file_sep|>",
597 START_MARKER,
598 SEPARATOR,
599 END_MARKER,
600 CURSOR_MARKER,
601 ]
602 }
603
604 pub fn write_cursor_excerpt_section(
605 prompt: &mut String,
606 path: &Path,
607 context: &str,
608 editable_range: &Range<usize>,
609 cursor_offset: usize,
610 ) {
611 let path_str = path.to_string_lossy();
612 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
613
614 prompt.push_str("<|fim_prefix|>");
615 prompt.push_str(&context[..editable_range.start]);
616
617 prompt.push_str("<|fim_suffix|>");
618 prompt.push_str(&context[editable_range.end..]);
619 if !prompt.ends_with('\n') {
620 prompt.push('\n');
621 }
622
623 prompt.push_str("<|fim_middle|>");
624 prompt.push_str(START_MARKER);
625 prompt.push_str(&context[editable_range.start..cursor_offset]);
626 prompt.push_str(CURSOR_MARKER);
627 prompt.push_str(&context[cursor_offset..editable_range.end]);
628 if !prompt.ends_with('\n') {
629 prompt.push('\n');
630 }
631 prompt.push_str(SEPARATOR);
632 }
633}
634
635pub mod v0131_git_merge_markers_prefix {
636 //! A prompt that uses git-style merge conflict markers to represent the editable region.
637 //!
638 //! Example prompt:
639 //!
640 //! <|file_sep|>path/to/target_file.py
641 //! <|fim_prefix|>
642 //! code before editable region
643 //! <<<<<<< CURRENT
644 //! code that
645 //! needs to<|user_cursor|>
646 //! be rewritten
647 //! =======
648 //! <|fim_suffix|>
649 //! code after editable region
650 //! <|fim_middle|>
651 //!
652 //! Expected output (should be generated by the model):
653 //!
654 //! updated
655 //! code with
656 //! changes applied
657 //! >>>>>>> UPDATED
658
659 use super::*;
660
661 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
662 pub const SEPARATOR: &str = "=======\n";
663 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
664
665 pub fn special_tokens() -> &'static [&'static str] {
666 &[
667 "<|fim_prefix|>",
668 "<|fim_suffix|>",
669 "<|fim_middle|>",
670 "<|file_sep|>",
671 START_MARKER,
672 SEPARATOR,
673 END_MARKER,
674 CURSOR_MARKER,
675 ]
676 }
677
678 pub fn write_cursor_excerpt_section(
679 prompt: &mut String,
680 path: &Path,
681 context: &str,
682 editable_range: &Range<usize>,
683 cursor_offset: usize,
684 ) {
685 let path_str = path.to_string_lossy();
686 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
687
688 prompt.push_str("<|fim_prefix|>");
689 prompt.push_str(&context[..editable_range.start]);
690 prompt.push_str(START_MARKER);
691 prompt.push_str(&context[editable_range.start..cursor_offset]);
692 prompt.push_str(CURSOR_MARKER);
693 prompt.push_str(&context[cursor_offset..editable_range.end]);
694 if !prompt.ends_with('\n') {
695 prompt.push('\n');
696 }
697 prompt.push_str(SEPARATOR);
698
699 prompt.push_str("<|fim_suffix|>");
700 prompt.push_str(&context[editable_range.end..]);
701 if !prompt.ends_with('\n') {
702 prompt.push('\n');
703 }
704
705 prompt.push_str("<|fim_middle|>");
706 }
707}
708
709pub mod v0211_prefill {
710 use super::*;
711
712 pub fn get_prefill(input: &ZetaPromptInput) -> String {
713 let editable_region = &input.cursor_excerpt
714 [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
715
716 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
717 let prefill_len = editable_region.floor_char_boundary(prefill_len);
718
719 // Find a token boundary to avoid splitting tokens in the prefill.
720 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
721 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
722 // the \n and consume any consecutive \n characters after it.
723 let prefill = &editable_region[..prefill_len];
724 match prefill.rfind('\n') {
725 Some(pos) => {
726 let mut end = pos + 1;
727 while end < editable_region.len()
728 && editable_region.as_bytes().get(end) == Some(&b'\n')
729 {
730 end += 1;
731 }
732 editable_region[..end].to_string()
733 }
734 // No newline found. Fall back to splitting before the last space
735 // (word-level boundary)
736 None => match prefill.rfind(' ') {
737 Some(pos) => prefill[..pos].to_string(),
738 None => prefill.to_string(),
739 },
740 }
741 }
742}
743
744pub mod seed_coder {
745 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
746 //!
747 //! Seed-Coder uses different FIM tokens and order than Qwen:
748 //! - SPM order: suffix comes FIRST, then prefix, then middle
749 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
750 //! - File markers: StarCoder-style `<filename>path` (single token + path)
751 //!
752 //! All context (related files, edit history) goes in the PREFIX section.
753 //! The suffix contains only code after the editable region.
754 //!
755 //! Example prompt:
756 //!
757 //! <[fim-suffix]>
758 //! code after editable region
759 //! <[fim-prefix]><filename>related/file.py
760 //! related file content
761 //!
762 //! <filename>edit_history
763 //! --- a/some_file.py
764 //! +++ b/some_file.py
765 //! -old
766 //! +new
767 //!
768 //! <filename>path/to/target_file.py
769 //! code before editable region
770 //! <<<<<<< CURRENT
771 //! code that
772 //! needs to<|user_cursor|>
773 //! be rewritten
774 //! =======
775 //! <[fim-middle]>
776 //!
777 //! Expected output (model generates):
778 //!
779 //! updated
780 //! code with
781 //! changes applied
782 //! >>>>>>> UPDATED
783
784 use super::*;
785
786 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
787 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
788 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
789 pub const FILE_MARKER: &str = "<filename>";
790
791 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
792 pub const SEPARATOR: &str = "=======\n";
793 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
794
795 pub fn special_tokens() -> &'static [&'static str] {
796 &[
797 FIM_SUFFIX,
798 FIM_PREFIX,
799 FIM_MIDDLE,
800 FILE_MARKER,
801 START_MARKER,
802 SEPARATOR,
803 END_MARKER,
804 CURSOR_MARKER,
805 ]
806 }
807
808 pub fn format_prompt_with_budget(
809 path: &Path,
810 context: &str,
811 editable_range: &Range<usize>,
812 cursor_offset: usize,
813 events: &[Arc<Event>],
814 related_files: &[RelatedFile],
815 max_tokens: usize,
816 ) -> String {
817 let suffix_section = build_suffix_section(context, editable_range);
818 let cursor_prefix_section =
819 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
820
821 let suffix_tokens = estimate_tokens(suffix_section.len());
822 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
823 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
824
825 let edit_history_section = super::format_edit_history_within_budget(
826 events,
827 FILE_MARKER,
828 "edit_history",
829 budget_after_cursor,
830 );
831 let edit_history_tokens = estimate_tokens(edit_history_section.len());
832 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
833
834 let related_files_section = super::format_related_files_within_budget(
835 related_files,
836 FILE_MARKER,
837 budget_after_edit_history,
838 );
839
840 let mut prompt = String::new();
841 prompt.push_str(&suffix_section);
842 prompt.push_str(FIM_PREFIX);
843 prompt.push_str(&related_files_section);
844 if !related_files_section.is_empty() {
845 prompt.push('\n');
846 }
847 prompt.push_str(&edit_history_section);
848 if !edit_history_section.is_empty() {
849 prompt.push('\n');
850 }
851 prompt.push_str(&cursor_prefix_section);
852 prompt.push_str(FIM_MIDDLE);
853 prompt
854 }
855
856 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
857 let mut section = String::new();
858 section.push_str(FIM_SUFFIX);
859 section.push_str(&context[editable_range.end..]);
860 if !section.ends_with('\n') {
861 section.push('\n');
862 }
863 section
864 }
865
866 fn build_cursor_prefix_section(
867 path: &Path,
868 context: &str,
869 editable_range: &Range<usize>,
870 cursor_offset: usize,
871 ) -> String {
872 let mut section = String::new();
873 let path_str = path.to_string_lossy();
874 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
875
876 section.push_str(&context[..editable_range.start]);
877 section.push_str(START_MARKER);
878 section.push_str(&context[editable_range.start..cursor_offset]);
879 section.push_str(CURSOR_MARKER);
880 section.push_str(&context[cursor_offset..editable_range.end]);
881 if !section.ends_with('\n') {
882 section.push('\n');
883 }
884 section.push_str(SEPARATOR);
885 section
886 }
887}
888
889/// The zeta1 prompt format
890pub mod zeta1 {
891 use super::*;
892 use std::fmt::Write;
893
894 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
895 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
896 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
897 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
898
899 const INSTRUCTION_HEADER: &str = concat!(
900 "### Instruction:\n",
901 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
902 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
903 "into account the cursor location.\n\n",
904 "### User Edits:\n\n"
905 );
906 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
907 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
908
909 /// Formats a complete zeta1 prompt from the input events and excerpt.
910 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
911 let mut prompt = String::with_capacity(
912 INSTRUCTION_HEADER.len()
913 + input_events.len()
914 + EXCERPT_HEADER.len()
915 + input_excerpt.len()
916 + RESPONSE_HEADER.len(),
917 );
918 prompt.push_str(INSTRUCTION_HEADER);
919 prompt.push_str(input_events);
920 prompt.push_str(EXCERPT_HEADER);
921 prompt.push_str(input_excerpt);
922 prompt.push_str(RESPONSE_HEADER);
923 prompt
924 }
925
926 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
927 /// editable and context byte-offset ranges within `cursor_excerpt`.
928 pub fn format_zeta1_from_input(
929 input: &ZetaPromptInput,
930 editable_range: Range<usize>,
931 context_range: Range<usize>,
932 ) -> String {
933 let events = format_zeta1_events(&input.events);
934 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
935 format_zeta1_prompt(&events, &excerpt)
936 }
937
938 /// Formats events in zeta1 style (oldest first).
939 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
940 let mut result = String::new();
941 for event in events {
942 let event_string = format_zeta1_event(event);
943 if event_string.is_empty() {
944 continue;
945 }
946 if !result.is_empty() {
947 result.push_str("\n\n");
948 }
949 result.push_str(&event_string);
950 }
951 result
952 }
953
954 fn format_zeta1_event(event: &Event) -> String {
955 match event {
956 Event::BufferChange {
957 path,
958 old_path,
959 diff,
960 ..
961 } => {
962 let mut prompt = String::new();
963 if old_path != path {
964 writeln!(
965 prompt,
966 "User renamed {} to {}\n",
967 old_path.display(),
968 path.display()
969 )
970 .ok();
971 }
972 if !diff.is_empty() {
973 write!(
974 prompt,
975 "User edited {}:\n```diff\n{}\n```",
976 path.display(),
977 diff
978 )
979 .ok();
980 }
981 prompt
982 }
983 }
984 }
985
986 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
987 /// within `cursor_excerpt`.
988 fn format_zeta1_excerpt(
989 input: &ZetaPromptInput,
990 editable_range: Range<usize>,
991 context_range: Range<usize>,
992 ) -> String {
993 let path_str = input.cursor_path.to_string_lossy();
994 let excerpt = &*input.cursor_excerpt;
995 let cursor_offset = input.cursor_offset_in_excerpt;
996
997 let mut prompt = String::new();
998 writeln!(&mut prompt, "```{path_str}").ok();
999
1000 let starts_at_file_beginning =
1001 input.excerpt_start_row == Some(0) && context_range.start == 0;
1002 if starts_at_file_beginning {
1003 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1004 }
1005
1006 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1007
1008 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1009 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1010 prompt.push_str(CURSOR_MARKER);
1011 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1012 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1013
1014 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1015 write!(prompt, "\n```").ok();
1016
1017 prompt
1018 }
1019
1020 /// Cleans zeta1 model output by extracting content between editable region
1021 /// markers and converting the zeta1 cursor marker to the universal one.
1022 /// Returns `None` if the output doesn't contain the expected markers.
1023 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1024 let content = output.replace(CURSOR_MARKER, "");
1025
1026 let content_start = content
1027 .find(EDITABLE_REGION_START_MARKER)
1028 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1029 .map(|pos| {
1030 if content.as_bytes().get(pos) == Some(&b'\n') {
1031 pos + 1
1032 } else {
1033 pos
1034 }
1035 })
1036 .unwrap_or(0);
1037
1038 let content_end = content
1039 .find(EDITABLE_REGION_END_MARKER)
1040 .map(|pos| {
1041 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1042 pos - 1
1043 } else {
1044 pos
1045 }
1046 })
1047 .unwrap_or(content.len());
1048
1049 if content_start > content_end {
1050 return Some(String::new());
1051 }
1052
1053 let extracted = &content[content_start..content_end];
1054
1055 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1056 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1057 let text_before_cursor = text_before_cursor
1058 .find(EDITABLE_REGION_START_MARKER)
1059 .map(|pos| {
1060 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1061 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1062 after_marker + 1
1063 } else {
1064 after_marker
1065 }
1066 })
1067 .unwrap_or(0);
1068 let offset_in_extracted = zeta1_cursor_pos
1069 .saturating_sub(text_before_cursor)
1070 .min(extracted.len());
1071 offset_in_extracted
1072 });
1073
1074 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1075 if let Some(offset) = cursor_offset {
1076 result.push_str(&extracted[..offset]);
1077 result.push_str(super::CURSOR_MARKER);
1078 result.push_str(&extracted[offset..]);
1079 } else {
1080 result.push_str(extracted);
1081 }
1082
1083 Some(result)
1084 }
1085}
1086
1087#[cfg(test)]
1088mod tests {
1089 use super::*;
1090 use indoc::indoc;
1091
1092 fn make_input(
1093 cursor_excerpt: &str,
1094 editable_range: Range<usize>,
1095 cursor_offset: usize,
1096 events: Vec<Event>,
1097 related_files: Vec<RelatedFile>,
1098 ) -> ZetaPromptInput {
1099 ZetaPromptInput {
1100 cursor_path: Path::new("test.rs").into(),
1101 cursor_excerpt: cursor_excerpt.into(),
1102 editable_range_in_excerpt: editable_range,
1103 cursor_offset_in_excerpt: cursor_offset,
1104 excerpt_start_row: None,
1105 events: events.into_iter().map(Arc::new).collect(),
1106 related_files,
1107 excerpt_ranges: None,
1108 preferred_model: None,
1109 in_open_source_repo: false,
1110 can_collect_data: false,
1111 }
1112 }
1113
1114 fn make_event(path: &str, diff: &str) -> Event {
1115 Event::BufferChange {
1116 path: Path::new(path).into(),
1117 old_path: Path::new(path).into(),
1118 diff: diff.to_string(),
1119 predicted: false,
1120 in_open_source_repo: false,
1121 }
1122 }
1123
1124 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1125 RelatedFile {
1126 path: Path::new(path).into(),
1127 max_row: content.lines().count() as u32,
1128 excerpts: vec![RelatedExcerpt {
1129 row_range: 0..content.lines().count() as u32,
1130 text: content.into(),
1131 }],
1132 in_open_source_repo: false,
1133 }
1134 }
1135
1136 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1137 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1138 }
1139
1140 #[test]
1141 fn test_no_truncation_when_within_budget() {
1142 let input = make_input(
1143 "prefix\neditable\nsuffix",
1144 7..15,
1145 10,
1146 vec![make_event("a.rs", "-old\n+new\n")],
1147 vec![make_related_file("related.rs", "fn helper() {}\n")],
1148 );
1149
1150 assert_eq!(
1151 format_with_budget(&input, 10000),
1152 indoc! {r#"
1153 <|file_sep|>related.rs
1154 fn helper() {}
1155 <|file_sep|>edit history
1156 --- a/a.rs
1157 +++ b/a.rs
1158 -old
1159 +new
1160 <|file_sep|>test.rs
1161 <|fim_prefix|>
1162 prefix
1163 <|fim_middle|>current
1164 edi<|user_cursor|>table
1165 <|fim_suffix|>
1166
1167 suffix
1168 <|fim_middle|>updated
1169 "#}
1170 );
1171 }
1172
1173 #[test]
1174 fn test_truncation_drops_edit_history_when_budget_tight() {
1175 let input = make_input(
1176 "code",
1177 0..4,
1178 2,
1179 vec![make_event("a.rs", "-x\n+y\n")],
1180 vec![
1181 make_related_file("r1.rs", "a\n"),
1182 make_related_file("r2.rs", "b\n"),
1183 ],
1184 );
1185
1186 assert_eq!(
1187 format_with_budget(&input, 10000),
1188 indoc! {r#"
1189 <|file_sep|>r1.rs
1190 a
1191 <|file_sep|>r2.rs
1192 b
1193 <|file_sep|>edit history
1194 --- a/a.rs
1195 +++ b/a.rs
1196 -x
1197 +y
1198 <|file_sep|>test.rs
1199 <|fim_prefix|>
1200 <|fim_middle|>current
1201 co<|user_cursor|>de
1202 <|fim_suffix|>
1203 <|fim_middle|>updated
1204 "#}
1205 );
1206
1207 assert_eq!(
1208 format_with_budget(&input, 50),
1209 indoc! {r#"
1210 <|file_sep|>r1.rs
1211 a
1212 <|file_sep|>r2.rs
1213 b
1214 <|file_sep|>test.rs
1215 <|fim_prefix|>
1216 <|fim_middle|>current
1217 co<|user_cursor|>de
1218 <|fim_suffix|>
1219 <|fim_middle|>updated
1220 "#}
1221 );
1222 }
1223
1224 #[test]
1225 fn test_truncation_includes_partial_excerpts() {
1226 let input = make_input(
1227 "x",
1228 0..1,
1229 0,
1230 vec![],
1231 vec![RelatedFile {
1232 path: Path::new("big.rs").into(),
1233 max_row: 30,
1234 in_open_source_repo: false,
1235 excerpts: vec![
1236 RelatedExcerpt {
1237 row_range: 0..10,
1238 text: "first excerpt\n".into(),
1239 },
1240 RelatedExcerpt {
1241 row_range: 10..20,
1242 text: "second excerpt\n".into(),
1243 },
1244 RelatedExcerpt {
1245 row_range: 20..30,
1246 text: "third excerpt\n".into(),
1247 },
1248 ],
1249 }],
1250 );
1251
1252 assert_eq!(
1253 format_with_budget(&input, 10000),
1254 indoc! {r#"
1255 <|file_sep|>big.rs
1256 first excerpt
1257 ...
1258 second excerpt
1259 ...
1260 third excerpt
1261 <|file_sep|>test.rs
1262 <|fim_prefix|>
1263 <|fim_middle|>current
1264 <|user_cursor|>x
1265 <|fim_suffix|>
1266 <|fim_middle|>updated
1267 "#}
1268 );
1269
1270 assert_eq!(
1271 format_with_budget(&input, 50),
1272 indoc! {r#"
1273 <|file_sep|>big.rs
1274 first excerpt
1275 ...
1276 <|file_sep|>test.rs
1277 <|fim_prefix|>
1278 <|fim_middle|>current
1279 <|user_cursor|>x
1280 <|fim_suffix|>
1281 <|fim_middle|>updated
1282 "#}
1283 );
1284 }
1285
1286 #[test]
1287 fn test_truncation_drops_older_events_first() {
1288 let input = make_input(
1289 "x",
1290 0..1,
1291 0,
1292 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1293 vec![],
1294 );
1295
1296 assert_eq!(
1297 format_with_budget(&input, 10000),
1298 indoc! {r#"
1299 <|file_sep|>edit history
1300 --- a/old.rs
1301 +++ b/old.rs
1302 -1
1303 --- a/new.rs
1304 +++ b/new.rs
1305 -2
1306 <|file_sep|>test.rs
1307 <|fim_prefix|>
1308 <|fim_middle|>current
1309 <|user_cursor|>x
1310 <|fim_suffix|>
1311 <|fim_middle|>updated
1312 "#}
1313 );
1314
1315 assert_eq!(
1316 format_with_budget(&input, 55),
1317 indoc! {r#"
1318 <|file_sep|>edit history
1319 --- a/new.rs
1320 +++ b/new.rs
1321 -2
1322 <|file_sep|>test.rs
1323 <|fim_prefix|>
1324 <|fim_middle|>current
1325 <|user_cursor|>x
1326 <|fim_suffix|>
1327 <|fim_middle|>updated
1328 "#}
1329 );
1330 }
1331
1332 #[test]
1333 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1334 let input = make_input(
1335 "fn main() {}",
1336 0..12,
1337 3,
1338 vec![make_event("a.rs", "-old\n+new\n")],
1339 vec![make_related_file("related.rs", "helper\n")],
1340 );
1341
1342 assert_eq!(
1343 format_with_budget(&input, 30),
1344 indoc! {r#"
1345 <|file_sep|>test.rs
1346 <|fim_prefix|>
1347 <|fim_middle|>current
1348 fn <|user_cursor|>main() {}
1349 <|fim_suffix|>
1350 <|fim_middle|>updated
1351 "#}
1352 );
1353 }
1354
1355 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1356 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1357 }
1358
1359 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1360 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1361 }
1362
1363 #[test]
1364 fn test_seed_coder_basic_format() {
1365 let input = make_input(
1366 "prefix\neditable\nsuffix",
1367 7..15,
1368 10,
1369 vec![make_event("a.rs", "-old\n+new\n")],
1370 vec![make_related_file("related.rs", "fn helper() {}\n")],
1371 );
1372
1373 assert_eq!(
1374 format_seed_coder(&input),
1375 indoc! {r#"
1376 <[fim-suffix]>
1377 suffix
1378 <[fim-prefix]><filename>related.rs
1379 fn helper() {}
1380
1381 <filename>edit_history
1382 --- a/a.rs
1383 +++ b/a.rs
1384 -old
1385 +new
1386
1387 <filename>test.rs
1388 prefix
1389 <<<<<<< CURRENT
1390 edi<|user_cursor|>table
1391 =======
1392 <[fim-middle]>"#}
1393 );
1394 }
1395
1396 #[test]
1397 fn test_seed_coder_no_context() {
1398 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1399
1400 assert_eq!(
1401 format_seed_coder(&input),
1402 indoc! {r#"
1403 <[fim-suffix]>
1404 after
1405 <[fim-prefix]><filename>test.rs
1406 before
1407 <<<<<<< CURRENT
1408 mid<|user_cursor|>dle
1409 =======
1410 <[fim-middle]>"#}
1411 );
1412 }
1413
1414 #[test]
1415 fn test_seed_coder_truncation_drops_context() {
1416 let input = make_input(
1417 "code",
1418 0..4,
1419 2,
1420 vec![make_event("a.rs", "-x\n+y\n")],
1421 vec![make_related_file("r1.rs", "content\n")],
1422 );
1423
1424 // With large budget, everything is included
1425 assert_eq!(
1426 format_seed_coder(&input),
1427 indoc! {r#"
1428 <[fim-suffix]>
1429 <[fim-prefix]><filename>r1.rs
1430 content
1431
1432 <filename>edit_history
1433 --- a/a.rs
1434 +++ b/a.rs
1435 -x
1436 +y
1437
1438 <filename>test.rs
1439 <<<<<<< CURRENT
1440 co<|user_cursor|>de
1441 =======
1442 <[fim-middle]>"#}
1443 );
1444
1445 // With tight budget, context is dropped but cursor section remains
1446 assert_eq!(
1447 format_seed_coder_with_budget(&input, 30),
1448 indoc! {r#"
1449 <[fim-suffix]>
1450 <[fim-prefix]><filename>test.rs
1451 <<<<<<< CURRENT
1452 co<|user_cursor|>de
1453 =======
1454 <[fim-middle]>"#}
1455 );
1456 }
1457
1458 #[test]
1459 fn test_seed_coder_clean_output() {
1460 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1461 let output_without_marker = "new code\n";
1462
1463 assert_eq!(
1464 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1465 "new code\n"
1466 );
1467 assert_eq!(
1468 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1469 "new code\n"
1470 );
1471 }
1472
1473 #[test]
1474 fn test_format_zeta1_from_input_basic() {
1475 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1476 let input = ZetaPromptInput {
1477 cursor_path: Path::new("src/main.rs").into(),
1478 cursor_excerpt: excerpt.into(),
1479 editable_range_in_excerpt: 15..41,
1480 cursor_offset_in_excerpt: 30,
1481 excerpt_start_row: Some(0),
1482 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1483 related_files: vec![],
1484 excerpt_ranges: None,
1485 preferred_model: None,
1486 in_open_source_repo: false,
1487 can_collect_data: false,
1488 };
1489
1490 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1491
1492 assert_eq!(
1493 prompt,
1494 concat!(
1495 "### Instruction:\n",
1496 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1497 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1498 "into account the cursor location.\n",
1499 "\n",
1500 "### User Edits:\n",
1501 "\n",
1502 "User edited other.rs:\n",
1503 "```diff\n",
1504 "-old\n",
1505 "+new\n",
1506 "\n",
1507 "```\n",
1508 "\n",
1509 "### User Excerpt:\n",
1510 "\n",
1511 "```src/main.rs\n",
1512 "<|start_of_file|>\n",
1513 "fn before() {}\n",
1514 "<|editable_region_start|>\n",
1515 "fn foo() {\n",
1516 " <|user_cursor_is_here|>let x = 1;\n",
1517 "\n",
1518 "<|editable_region_end|>}\n",
1519 "fn after() {}\n",
1520 "\n",
1521 "```\n",
1522 "\n",
1523 "### Response:\n",
1524 ),
1525 );
1526 }
1527
1528 #[test]
1529 fn test_format_zeta1_from_input_no_start_of_file() {
1530 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1531 let input = ZetaPromptInput {
1532 cursor_path: Path::new("src/main.rs").into(),
1533 cursor_excerpt: excerpt.into(),
1534 editable_range_in_excerpt: 0..28,
1535 cursor_offset_in_excerpt: 15,
1536 excerpt_start_row: Some(10),
1537 events: vec![],
1538 related_files: vec![],
1539 excerpt_ranges: None,
1540 preferred_model: None,
1541 in_open_source_repo: false,
1542 can_collect_data: false,
1543 };
1544
1545 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1546
1547 assert_eq!(
1548 prompt,
1549 concat!(
1550 "### Instruction:\n",
1551 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1552 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1553 "into account the cursor location.\n",
1554 "\n",
1555 "### User Edits:\n",
1556 "\n",
1557 "\n",
1558 "\n",
1559 "### User Excerpt:\n",
1560 "\n",
1561 "```src/main.rs\n",
1562 "<|editable_region_start|>\n",
1563 "fn foo() {\n",
1564 " <|user_cursor_is_here|>let x = 1;\n",
1565 "}\n",
1566 "\n",
1567 "<|editable_region_end|>\n",
1568 "```\n",
1569 "\n",
1570 "### Response:\n",
1571 ),
1572 );
1573 }
1574
1575 #[test]
1576 fn test_format_zeta1_from_input_with_sub_ranges() {
1577 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1578 let editable_range = 10..37;
1579 let context_range = 0..excerpt.len();
1580
1581 let input = ZetaPromptInput {
1582 cursor_path: Path::new("test.rs").into(),
1583 cursor_excerpt: excerpt.into(),
1584 editable_range_in_excerpt: editable_range.clone(),
1585 cursor_offset_in_excerpt: 25,
1586 excerpt_start_row: Some(0),
1587 events: vec![],
1588 related_files: vec![],
1589 excerpt_ranges: None,
1590 preferred_model: None,
1591 in_open_source_repo: false,
1592 can_collect_data: false,
1593 };
1594
1595 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1596
1597 assert_eq!(
1598 prompt,
1599 concat!(
1600 "### Instruction:\n",
1601 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1602 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1603 "into account the cursor location.\n",
1604 "\n",
1605 "### User Edits:\n",
1606 "\n",
1607 "\n",
1608 "\n",
1609 "### User Excerpt:\n",
1610 "\n",
1611 "```test.rs\n",
1612 "<|start_of_file|>\n",
1613 "// prefix\n",
1614 "<|editable_region_start|>\n",
1615 "fn foo() {\n",
1616 " <|user_cursor_is_here|>let x = 1;\n",
1617 "}\n",
1618 "<|editable_region_end|>\n",
1619 "// suffix\n",
1620 "\n",
1621 "```\n",
1622 "\n",
1623 "### Response:\n",
1624 ),
1625 );
1626 }
1627
1628 #[test]
1629 fn test_clean_zeta1_model_output_basic() {
1630 let output = indoc! {"
1631 <|editable_region_start|>
1632 fn main() {
1633 println!(\"hello\");
1634 }
1635 <|editable_region_end|>
1636 "};
1637
1638 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1639 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1640 }
1641
1642 #[test]
1643 fn test_clean_zeta1_model_output_with_cursor() {
1644 let output = indoc! {"
1645 <|editable_region_start|>
1646 fn main() {
1647 <|user_cursor_is_here|>println!(\"hello\");
1648 }
1649 <|editable_region_end|>
1650 "};
1651
1652 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1653 assert_eq!(
1654 cleaned,
1655 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1656 );
1657 }
1658
1659 #[test]
1660 fn test_clean_zeta1_model_output_no_markers() {
1661 let output = "fn main() {}\n";
1662 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1663 assert_eq!(cleaned, "fn main() {}\n");
1664 }
1665
1666 #[test]
1667 fn test_clean_zeta1_model_output_empty_region() {
1668 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1669 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1670 assert_eq!(cleaned, "");
1671 }
1672}