1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68 #[serde(default)]
69 pub can_collect_data: bool,
70}
71
72#[derive(
73 Default,
74 Clone,
75 Copy,
76 Debug,
77 PartialEq,
78 Eq,
79 Hash,
80 EnumIter,
81 IntoStaticStr,
82 Serialize,
83 Deserialize,
84)]
85#[allow(non_camel_case_types)]
86pub enum ZetaFormat {
87 V0112MiddleAtEnd,
88 V0113Ordered,
89 #[default]
90 V0114180EditableRegion,
91 V0120GitMergeMarkers,
92 V0131GitMergeMarkersPrefix,
93 V0211Prefill,
94 V0211SeedCoder,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131}
132
133#[derive(Clone, Debug, Serialize, Deserialize)]
134#[serde(tag = "event")]
135pub enum Event {
136 BufferChange {
137 path: Arc<Path>,
138 old_path: Arc<Path>,
139 diff: String,
140 predicted: bool,
141 in_open_source_repo: bool,
142 },
143}
144
145impl Event {
146 pub fn in_open_source_repo(&self) -> bool {
147 match self {
148 Event::BufferChange {
149 in_open_source_repo,
150 ..
151 } => *in_open_source_repo,
152 }
153 }
154}
155
156pub fn write_event(prompt: &mut String, event: &Event) {
157 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
158 for component in path.components() {
159 prompt.push('/');
160 write!(prompt, "{}", component.as_os_str().display()).ok();
161 }
162 }
163 match event {
164 Event::BufferChange {
165 path,
166 old_path,
167 diff,
168 predicted,
169 in_open_source_repo: _,
170 } => {
171 if *predicted {
172 prompt.push_str("// User accepted prediction:\n");
173 }
174 prompt.push_str("--- a");
175 write_path_as_unix_str(prompt, old_path.as_ref());
176 prompt.push_str("\n+++ b");
177 write_path_as_unix_str(prompt, path.as_ref());
178 prompt.push('\n');
179 prompt.push_str(diff);
180 }
181 }
182}
183
184#[derive(Clone, Debug, Serialize, Deserialize)]
185pub struct RelatedFile {
186 pub path: Arc<Path>,
187 pub max_row: u32,
188 pub excerpts: Vec<RelatedExcerpt>,
189 #[serde(default)]
190 pub in_open_source_repo: bool,
191}
192
193#[derive(Clone, Debug, Serialize, Deserialize)]
194pub struct RelatedExcerpt {
195 pub row_range: Range<u32>,
196 pub text: Arc<str>,
197}
198
199pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
200 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
201}
202
203/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
204pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
205 match format {
206 ZetaFormat::V0120GitMergeMarkers => output
207 .strip_suffix(v0120_git_merge_markers::END_MARKER)
208 .unwrap_or(output),
209 ZetaFormat::V0131GitMergeMarkersPrefix => output
210 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
211 .unwrap_or(output),
212 ZetaFormat::V0211SeedCoder => output
213 .strip_suffix(seed_coder::END_MARKER)
214 .unwrap_or(output),
215 _ => output,
216 }
217}
218
219fn resolve_cursor_region(
220 input: &ZetaPromptInput,
221 format: ZetaFormat,
222) -> (&str, Range<usize>, usize) {
223 let Some(ranges) = &input.excerpt_ranges else {
224 return (
225 &input.cursor_excerpt,
226 input.editable_range_in_excerpt.clone(),
227 input.cursor_offset_in_excerpt,
228 );
229 };
230
231 let (editable_range, context_range) = match format {
232 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
233 ranges.editable_150.clone(),
234 ranges.editable_150_context_350.clone(),
235 ),
236 ZetaFormat::V0114180EditableRegion
237 | ZetaFormat::V0120GitMergeMarkers
238 | ZetaFormat::V0131GitMergeMarkersPrefix
239 | ZetaFormat::V0211Prefill
240 | ZetaFormat::V0211SeedCoder => (
241 ranges.editable_180.clone(),
242 ranges.editable_180_context_350.clone(),
243 ),
244 };
245
246 let context_start = context_range.start;
247 let context_text = &input.cursor_excerpt[context_range];
248 let adjusted_editable =
249 (editable_range.start - context_start)..(editable_range.end - context_start);
250 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
251
252 (context_text, adjusted_editable, adjusted_cursor)
253}
254
255fn format_zeta_prompt_with_budget(
256 input: &ZetaPromptInput,
257 format: ZetaFormat,
258 max_tokens: usize,
259) -> String {
260 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
261 let path = &*input.cursor_path;
262
263 let mut cursor_section = String::new();
264 match format {
265 ZetaFormat::V0112MiddleAtEnd => {
266 v0112_middle_at_end::write_cursor_excerpt_section(
267 &mut cursor_section,
268 path,
269 context,
270 &editable_range,
271 cursor_offset,
272 );
273 }
274 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
275 v0113_ordered::write_cursor_excerpt_section(
276 &mut cursor_section,
277 path,
278 context,
279 &editable_range,
280 cursor_offset,
281 )
282 }
283 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
284 &mut cursor_section,
285 path,
286 context,
287 &editable_range,
288 cursor_offset,
289 ),
290 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
291 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
292 &mut cursor_section,
293 path,
294 context,
295 &editable_range,
296 cursor_offset,
297 )
298 }
299 ZetaFormat::V0211SeedCoder => {
300 return seed_coder::format_prompt_with_budget(
301 path,
302 context,
303 &editable_range,
304 cursor_offset,
305 &input.events,
306 &input.related_files,
307 max_tokens,
308 );
309 }
310 }
311
312 let cursor_tokens = estimate_tokens(cursor_section.len());
313 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
314
315 let edit_history_section = format_edit_history_within_budget(
316 &input.events,
317 "<|file_sep|>",
318 "edit history",
319 budget_after_cursor,
320 );
321 let edit_history_tokens = estimate_tokens(edit_history_section.len());
322 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
323
324 let related_files_section = format_related_files_within_budget(
325 &input.related_files,
326 "<|file_sep|>",
327 budget_after_edit_history,
328 );
329
330 let mut prompt = String::new();
331 prompt.push_str(&related_files_section);
332 prompt.push_str(&edit_history_section);
333 prompt.push_str(&cursor_section);
334 prompt
335}
336
337pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
338 match format {
339 ZetaFormat::V0112MiddleAtEnd
340 | ZetaFormat::V0113Ordered
341 | ZetaFormat::V0114180EditableRegion
342 | ZetaFormat::V0120GitMergeMarkers
343 | ZetaFormat::V0131GitMergeMarkersPrefix
344 | ZetaFormat::V0211SeedCoder => String::new(),
345 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
346 }
347}
348
349fn format_edit_history_within_budget(
350 events: &[Arc<Event>],
351 file_marker: &str,
352 edit_history_name: &str,
353 max_tokens: usize,
354) -> String {
355 let header = format!("{}{}\n", file_marker, edit_history_name);
356 let header_tokens = estimate_tokens(header.len());
357 if header_tokens >= max_tokens {
358 return String::new();
359 }
360
361 let mut event_strings: Vec<String> = Vec::new();
362 let mut total_tokens = header_tokens;
363
364 for event in events.iter().rev() {
365 let mut event_str = String::new();
366 write_event(&mut event_str, event);
367 let event_tokens = estimate_tokens(event_str.len());
368
369 if total_tokens + event_tokens > max_tokens {
370 break;
371 }
372 total_tokens += event_tokens;
373 event_strings.push(event_str);
374 }
375
376 if event_strings.is_empty() {
377 return String::new();
378 }
379
380 let mut result = header;
381 for event_str in event_strings.iter().rev() {
382 result.push_str(event_str);
383 }
384 result
385}
386
387fn format_related_files_within_budget(
388 related_files: &[RelatedFile],
389 file_marker: &str,
390 max_tokens: usize,
391) -> String {
392 let mut result = String::new();
393 let mut total_tokens = 0;
394
395 for file in related_files {
396 let path_str = file.path.to_string_lossy();
397 let header = format!("{}{}\n", file_marker, path_str);
398 let header_tokens = estimate_tokens(header.len());
399
400 if total_tokens + header_tokens > max_tokens {
401 break;
402 }
403
404 let mut file_tokens = header_tokens;
405 let mut excerpts_to_include = 0;
406
407 for excerpt in &file.excerpts {
408 let needs_newline = !excerpt.text.ends_with('\n');
409 let needs_ellipsis = excerpt.row_range.end < file.max_row;
410 let excerpt_len = excerpt.text.len()
411 + if needs_newline { "\n".len() } else { 0 }
412 + if needs_ellipsis { "...\n".len() } else { 0 };
413
414 let excerpt_tokens = estimate_tokens(excerpt_len);
415 if total_tokens + file_tokens + excerpt_tokens > max_tokens {
416 break;
417 }
418 file_tokens += excerpt_tokens;
419 excerpts_to_include += 1;
420 }
421
422 if excerpts_to_include > 0 {
423 total_tokens += file_tokens;
424 result.push_str(&header);
425 for excerpt in file.excerpts.iter().take(excerpts_to_include) {
426 result.push_str(&excerpt.text);
427 if !result.ends_with('\n') {
428 result.push('\n');
429 }
430 if excerpt.row_range.end < file.max_row {
431 result.push_str("...\n");
432 }
433 }
434 }
435 }
436
437 result
438}
439
440pub fn write_related_files(
441 prompt: &mut String,
442 related_files: &[RelatedFile],
443) -> Vec<Range<usize>> {
444 let mut ranges = Vec::new();
445 for file in related_files {
446 let start = prompt.len();
447 let path_str = file.path.to_string_lossy();
448 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
449 for excerpt in &file.excerpts {
450 prompt.push_str(&excerpt.text);
451 if !prompt.ends_with('\n') {
452 prompt.push('\n');
453 }
454 if excerpt.row_range.end < file.max_row {
455 prompt.push_str("...\n");
456 }
457 }
458 let end = prompt.len();
459 ranges.push(start..end);
460 }
461 ranges
462}
463
464mod v0112_middle_at_end {
465 use super::*;
466
467 pub fn write_cursor_excerpt_section(
468 prompt: &mut String,
469 path: &Path,
470 context: &str,
471 editable_range: &Range<usize>,
472 cursor_offset: usize,
473 ) {
474 let path_str = path.to_string_lossy();
475 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
476
477 prompt.push_str("<|fim_prefix|>\n");
478 prompt.push_str(&context[..editable_range.start]);
479
480 prompt.push_str("<|fim_suffix|>\n");
481 prompt.push_str(&context[editable_range.end..]);
482 if !prompt.ends_with('\n') {
483 prompt.push('\n');
484 }
485
486 prompt.push_str("<|fim_middle|>current\n");
487 prompt.push_str(&context[editable_range.start..cursor_offset]);
488 prompt.push_str(CURSOR_MARKER);
489 prompt.push_str(&context[cursor_offset..editable_range.end]);
490 if !prompt.ends_with('\n') {
491 prompt.push('\n');
492 }
493
494 prompt.push_str("<|fim_middle|>updated\n");
495 }
496}
497
498mod v0113_ordered {
499 use super::*;
500
501 pub fn write_cursor_excerpt_section(
502 prompt: &mut String,
503 path: &Path,
504 context: &str,
505 editable_range: &Range<usize>,
506 cursor_offset: usize,
507 ) {
508 let path_str = path.to_string_lossy();
509 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
510
511 prompt.push_str("<|fim_prefix|>\n");
512 prompt.push_str(&context[..editable_range.start]);
513 if !prompt.ends_with('\n') {
514 prompt.push('\n');
515 }
516
517 prompt.push_str("<|fim_middle|>current\n");
518 prompt.push_str(&context[editable_range.start..cursor_offset]);
519 prompt.push_str(CURSOR_MARKER);
520 prompt.push_str(&context[cursor_offset..editable_range.end]);
521 if !prompt.ends_with('\n') {
522 prompt.push('\n');
523 }
524
525 prompt.push_str("<|fim_suffix|>\n");
526 prompt.push_str(&context[editable_range.end..]);
527 if !prompt.ends_with('\n') {
528 prompt.push('\n');
529 }
530
531 prompt.push_str("<|fim_middle|>updated\n");
532 }
533}
534
535pub mod v0120_git_merge_markers {
536 //! A prompt that uses git-style merge conflict markers to represent the editable region.
537 //!
538 //! Example prompt:
539 //!
540 //! <|file_sep|>path/to/target_file.py
541 //! <|fim_prefix|>
542 //! code before editable region
543 //! <|fim_suffix|>
544 //! code after editable region
545 //! <|fim_middle|>
546 //! <<<<<<< CURRENT
547 //! code that
548 //! needs to<|user_cursor|>
549 //! be rewritten
550 //! =======
551 //!
552 //! Expected output (should be generated by the model):
553 //!
554 //! updated
555 //! code with
556 //! changes applied
557 //! >>>>>>> UPDATED
558
559 use super::*;
560
561 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
562 pub const SEPARATOR: &str = "=======\n";
563 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
564
565 pub fn write_cursor_excerpt_section(
566 prompt: &mut String,
567 path: &Path,
568 context: &str,
569 editable_range: &Range<usize>,
570 cursor_offset: usize,
571 ) {
572 let path_str = path.to_string_lossy();
573 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
574
575 prompt.push_str("<|fim_prefix|>");
576 prompt.push_str(&context[..editable_range.start]);
577
578 prompt.push_str("<|fim_suffix|>");
579 prompt.push_str(&context[editable_range.end..]);
580 if !prompt.ends_with('\n') {
581 prompt.push('\n');
582 }
583
584 prompt.push_str("<|fim_middle|>");
585 prompt.push_str(START_MARKER);
586 prompt.push_str(&context[editable_range.start..cursor_offset]);
587 prompt.push_str(CURSOR_MARKER);
588 prompt.push_str(&context[cursor_offset..editable_range.end]);
589 if !prompt.ends_with('\n') {
590 prompt.push('\n');
591 }
592 prompt.push_str(SEPARATOR);
593 }
594}
595
596pub mod v0131_git_merge_markers_prefix {
597 //! A prompt that uses git-style merge conflict markers to represent the editable region.
598 //!
599 //! Example prompt:
600 //!
601 //! <|file_sep|>path/to/target_file.py
602 //! <|fim_prefix|>
603 //! code before editable region
604 //! <<<<<<< CURRENT
605 //! code that
606 //! needs to<|user_cursor|>
607 //! be rewritten
608 //! =======
609 //! <|fim_suffix|>
610 //! code after editable region
611 //! <|fim_middle|>
612 //!
613 //! Expected output (should be generated by the model):
614 //!
615 //! updated
616 //! code with
617 //! changes applied
618 //! >>>>>>> UPDATED
619
620 use super::*;
621
622 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
623 pub const SEPARATOR: &str = "=======\n";
624 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
625
626 pub fn write_cursor_excerpt_section(
627 prompt: &mut String,
628 path: &Path,
629 context: &str,
630 editable_range: &Range<usize>,
631 cursor_offset: usize,
632 ) {
633 let path_str = path.to_string_lossy();
634 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
635
636 prompt.push_str("<|fim_prefix|>");
637 prompt.push_str(&context[..editable_range.start]);
638 prompt.push_str(START_MARKER);
639 prompt.push_str(&context[editable_range.start..cursor_offset]);
640 prompt.push_str(CURSOR_MARKER);
641 prompt.push_str(&context[cursor_offset..editable_range.end]);
642 if !prompt.ends_with('\n') {
643 prompt.push('\n');
644 }
645 prompt.push_str(SEPARATOR);
646
647 prompt.push_str("<|fim_suffix|>");
648 prompt.push_str(&context[editable_range.end..]);
649 if !prompt.ends_with('\n') {
650 prompt.push('\n');
651 }
652
653 prompt.push_str("<|fim_middle|>");
654 }
655}
656
657pub mod v0211_prefill {
658 use super::*;
659
660 pub fn get_prefill(input: &ZetaPromptInput) -> String {
661 let editable_region = &input.cursor_excerpt
662 [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
663
664 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
665 let prefill_len = editable_region.floor_char_boundary(prefill_len);
666
667 // Find a token boundary to avoid splitting tokens in the prefill.
668 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
669 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
670 // the \n and consume any consecutive \n characters after it.
671 let prefill = &editable_region[..prefill_len];
672 match prefill.rfind('\n') {
673 Some(pos) => {
674 let mut end = pos + 1;
675 while end < editable_region.len()
676 && editable_region.as_bytes().get(end) == Some(&b'\n')
677 {
678 end += 1;
679 }
680 editable_region[..end].to_string()
681 }
682 // No newline found. Fall back to splitting before the last space
683 // (word-level boundary)
684 None => match prefill.rfind(' ') {
685 Some(pos) => prefill[..pos].to_string(),
686 None => prefill.to_string(),
687 },
688 }
689 }
690}
691
692pub mod seed_coder {
693 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
694 //!
695 //! Seed-Coder uses different FIM tokens and order than Qwen:
696 //! - SPM order: suffix comes FIRST, then prefix, then middle
697 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
698 //! - File markers: StarCoder-style `<filename>path` (single token + path)
699 //!
700 //! All context (related files, edit history) goes in the PREFIX section.
701 //! The suffix contains only code after the editable region.
702 //!
703 //! Example prompt:
704 //!
705 //! <[fim-suffix]>
706 //! code after editable region
707 //! <[fim-prefix]><filename>related/file.py
708 //! related file content
709 //!
710 //! <filename>edit_history
711 //! --- a/some_file.py
712 //! +++ b/some_file.py
713 //! -old
714 //! +new
715 //!
716 //! <filename>path/to/target_file.py
717 //! code before editable region
718 //! <<<<<<< CURRENT
719 //! code that
720 //! needs to<|user_cursor|>
721 //! be rewritten
722 //! =======
723 //! <[fim-middle]>
724 //!
725 //! Expected output (model generates):
726 //!
727 //! updated
728 //! code with
729 //! changes applied
730 //! >>>>>>> UPDATED
731
732 use super::*;
733
734 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
735 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
736 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
737 pub const FILE_MARKER: &str = "<filename>";
738
739 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
740 pub const SEPARATOR: &str = "=======\n";
741 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
742
743 pub fn format_prompt_with_budget(
744 path: &Path,
745 context: &str,
746 editable_range: &Range<usize>,
747 cursor_offset: usize,
748 events: &[Arc<Event>],
749 related_files: &[RelatedFile],
750 max_tokens: usize,
751 ) -> String {
752 let suffix_section = build_suffix_section(context, editable_range);
753 let cursor_prefix_section =
754 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
755
756 let suffix_tokens = estimate_tokens(suffix_section.len());
757 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
758 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
759
760 let edit_history_section = super::format_edit_history_within_budget(
761 events,
762 FILE_MARKER,
763 "edit_history",
764 budget_after_cursor,
765 );
766 let edit_history_tokens = estimate_tokens(edit_history_section.len());
767 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
768
769 let related_files_section = super::format_related_files_within_budget(
770 related_files,
771 FILE_MARKER,
772 budget_after_edit_history,
773 );
774
775 let mut prompt = String::new();
776 prompt.push_str(&suffix_section);
777 prompt.push_str(FIM_PREFIX);
778 prompt.push_str(&related_files_section);
779 if !related_files_section.is_empty() {
780 prompt.push('\n');
781 }
782 prompt.push_str(&edit_history_section);
783 if !edit_history_section.is_empty() {
784 prompt.push('\n');
785 }
786 prompt.push_str(&cursor_prefix_section);
787 prompt.push_str(FIM_MIDDLE);
788 prompt
789 }
790
791 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
792 let mut section = String::new();
793 section.push_str(FIM_SUFFIX);
794 section.push_str(&context[editable_range.end..]);
795 if !section.ends_with('\n') {
796 section.push('\n');
797 }
798 section
799 }
800
801 fn build_cursor_prefix_section(
802 path: &Path,
803 context: &str,
804 editable_range: &Range<usize>,
805 cursor_offset: usize,
806 ) -> String {
807 let mut section = String::new();
808 let path_str = path.to_string_lossy();
809 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
810
811 section.push_str(&context[..editable_range.start]);
812 section.push_str(START_MARKER);
813 section.push_str(&context[editable_range.start..cursor_offset]);
814 section.push_str(CURSOR_MARKER);
815 section.push_str(&context[cursor_offset..editable_range.end]);
816 if !section.ends_with('\n') {
817 section.push('\n');
818 }
819 section.push_str(SEPARATOR);
820 section
821 }
822}
823
824/// The zeta1 prompt format
825pub mod zeta1 {
826 use super::*;
827 use std::fmt::Write;
828
829 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
830 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
831 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
832 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
833
834 const INSTRUCTION_HEADER: &str = concat!(
835 "### Instruction:\n",
836 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
837 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
838 "into account the cursor location.\n\n",
839 "### User Edits:\n\n"
840 );
841 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
842 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
843
844 /// Formats a complete zeta1 prompt from the input events and excerpt.
845 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
846 let mut prompt = String::with_capacity(
847 INSTRUCTION_HEADER.len()
848 + input_events.len()
849 + EXCERPT_HEADER.len()
850 + input_excerpt.len()
851 + RESPONSE_HEADER.len(),
852 );
853 prompt.push_str(INSTRUCTION_HEADER);
854 prompt.push_str(input_events);
855 prompt.push_str(EXCERPT_HEADER);
856 prompt.push_str(input_excerpt);
857 prompt.push_str(RESPONSE_HEADER);
858 prompt
859 }
860
861 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
862 /// editable and context byte-offset ranges within `cursor_excerpt`.
863 pub fn format_zeta1_from_input(
864 input: &ZetaPromptInput,
865 editable_range: Range<usize>,
866 context_range: Range<usize>,
867 ) -> String {
868 let events = format_zeta1_events(&input.events);
869 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
870 format_zeta1_prompt(&events, &excerpt)
871 }
872
873 /// Formats events in zeta1 style (oldest first).
874 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
875 let mut result = String::new();
876 for event in events {
877 let event_string = format_zeta1_event(event);
878 if event_string.is_empty() {
879 continue;
880 }
881 if !result.is_empty() {
882 result.push_str("\n\n");
883 }
884 result.push_str(&event_string);
885 }
886 result
887 }
888
889 fn format_zeta1_event(event: &Event) -> String {
890 match event {
891 Event::BufferChange {
892 path,
893 old_path,
894 diff,
895 ..
896 } => {
897 let mut prompt = String::new();
898 if old_path != path {
899 writeln!(
900 prompt,
901 "User renamed {} to {}\n",
902 old_path.display(),
903 path.display()
904 )
905 .ok();
906 }
907 if !diff.is_empty() {
908 write!(
909 prompt,
910 "User edited {}:\n```diff\n{}\n```",
911 path.display(),
912 diff
913 )
914 .ok();
915 }
916 prompt
917 }
918 }
919 }
920
921 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
922 /// within `cursor_excerpt`.
923 fn format_zeta1_excerpt(
924 input: &ZetaPromptInput,
925 editable_range: Range<usize>,
926 context_range: Range<usize>,
927 ) -> String {
928 let path_str = input.cursor_path.to_string_lossy();
929 let excerpt = &*input.cursor_excerpt;
930 let cursor_offset = input.cursor_offset_in_excerpt;
931
932 let mut prompt = String::new();
933 writeln!(&mut prompt, "```{path_str}").ok();
934
935 let starts_at_file_beginning =
936 input.excerpt_start_row == Some(0) && context_range.start == 0;
937 if starts_at_file_beginning {
938 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
939 }
940
941 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
942
943 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
944 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
945 prompt.push_str(CURSOR_MARKER);
946 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
947 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
948
949 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
950 write!(prompt, "\n```").ok();
951
952 prompt
953 }
954
955 /// Cleans zeta1 model output by extracting content between editable region
956 /// markers and converting the zeta1 cursor marker to the universal one.
957 /// Returns `None` if the output doesn't contain the expected markers.
958 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
959 let content = output.replace(CURSOR_MARKER, "");
960
961 let content_start = content
962 .find(EDITABLE_REGION_START_MARKER)
963 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
964 .map(|pos| {
965 if content.as_bytes().get(pos) == Some(&b'\n') {
966 pos + 1
967 } else {
968 pos
969 }
970 })
971 .unwrap_or(0);
972
973 let content_end = content
974 .find(EDITABLE_REGION_END_MARKER)
975 .map(|pos| {
976 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
977 pos - 1
978 } else {
979 pos
980 }
981 })
982 .unwrap_or(content.len());
983
984 if content_start > content_end {
985 return Some(String::new());
986 }
987
988 let extracted = &content[content_start..content_end];
989
990 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
991 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
992 let text_before_cursor = text_before_cursor
993 .find(EDITABLE_REGION_START_MARKER)
994 .map(|pos| {
995 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
996 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
997 after_marker + 1
998 } else {
999 after_marker
1000 }
1001 })
1002 .unwrap_or(0);
1003 let offset_in_extracted = zeta1_cursor_pos
1004 .saturating_sub(text_before_cursor)
1005 .min(extracted.len());
1006 offset_in_extracted
1007 });
1008
1009 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1010 if let Some(offset) = cursor_offset {
1011 result.push_str(&extracted[..offset]);
1012 result.push_str(super::CURSOR_MARKER);
1013 result.push_str(&extracted[offset..]);
1014 } else {
1015 result.push_str(extracted);
1016 }
1017
1018 Some(result)
1019 }
1020}
1021
1022#[cfg(test)]
1023mod tests {
1024 use super::*;
1025 use indoc::indoc;
1026
1027 fn make_input(
1028 cursor_excerpt: &str,
1029 editable_range: Range<usize>,
1030 cursor_offset: usize,
1031 events: Vec<Event>,
1032 related_files: Vec<RelatedFile>,
1033 ) -> ZetaPromptInput {
1034 ZetaPromptInput {
1035 cursor_path: Path::new("test.rs").into(),
1036 cursor_excerpt: cursor_excerpt.into(),
1037 editable_range_in_excerpt: editable_range,
1038 cursor_offset_in_excerpt: cursor_offset,
1039 excerpt_start_row: None,
1040 events: events.into_iter().map(Arc::new).collect(),
1041 related_files,
1042 excerpt_ranges: None,
1043 preferred_model: None,
1044 in_open_source_repo: false,
1045 can_collect_data: false,
1046 }
1047 }
1048
1049 fn make_event(path: &str, diff: &str) -> Event {
1050 Event::BufferChange {
1051 path: Path::new(path).into(),
1052 old_path: Path::new(path).into(),
1053 diff: diff.to_string(),
1054 predicted: false,
1055 in_open_source_repo: false,
1056 }
1057 }
1058
1059 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1060 RelatedFile {
1061 path: Path::new(path).into(),
1062 max_row: content.lines().count() as u32,
1063 excerpts: vec![RelatedExcerpt {
1064 row_range: 0..content.lines().count() as u32,
1065 text: content.into(),
1066 }],
1067 in_open_source_repo: false,
1068 }
1069 }
1070
1071 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1072 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1073 }
1074
1075 #[test]
1076 fn test_no_truncation_when_within_budget() {
1077 let input = make_input(
1078 "prefix\neditable\nsuffix",
1079 7..15,
1080 10,
1081 vec![make_event("a.rs", "-old\n+new\n")],
1082 vec![make_related_file("related.rs", "fn helper() {}\n")],
1083 );
1084
1085 assert_eq!(
1086 format_with_budget(&input, 10000),
1087 indoc! {r#"
1088 <|file_sep|>related.rs
1089 fn helper() {}
1090 <|file_sep|>edit history
1091 --- a/a.rs
1092 +++ b/a.rs
1093 -old
1094 +new
1095 <|file_sep|>test.rs
1096 <|fim_prefix|>
1097 prefix
1098 <|fim_middle|>current
1099 edi<|user_cursor|>table
1100 <|fim_suffix|>
1101
1102 suffix
1103 <|fim_middle|>updated
1104 "#}
1105 );
1106 }
1107
1108 #[test]
1109 fn test_truncation_drops_edit_history_when_budget_tight() {
1110 let input = make_input(
1111 "code",
1112 0..4,
1113 2,
1114 vec![make_event("a.rs", "-x\n+y\n")],
1115 vec![
1116 make_related_file("r1.rs", "a\n"),
1117 make_related_file("r2.rs", "b\n"),
1118 ],
1119 );
1120
1121 assert_eq!(
1122 format_with_budget(&input, 10000),
1123 indoc! {r#"
1124 <|file_sep|>r1.rs
1125 a
1126 <|file_sep|>r2.rs
1127 b
1128 <|file_sep|>edit history
1129 --- a/a.rs
1130 +++ b/a.rs
1131 -x
1132 +y
1133 <|file_sep|>test.rs
1134 <|fim_prefix|>
1135 <|fim_middle|>current
1136 co<|user_cursor|>de
1137 <|fim_suffix|>
1138 <|fim_middle|>updated
1139 "#}
1140 );
1141
1142 assert_eq!(
1143 format_with_budget(&input, 50),
1144 indoc! {r#"
1145 <|file_sep|>r1.rs
1146 a
1147 <|file_sep|>r2.rs
1148 b
1149 <|file_sep|>test.rs
1150 <|fim_prefix|>
1151 <|fim_middle|>current
1152 co<|user_cursor|>de
1153 <|fim_suffix|>
1154 <|fim_middle|>updated
1155 "#}
1156 );
1157 }
1158
1159 #[test]
1160 fn test_truncation_includes_partial_excerpts() {
1161 let input = make_input(
1162 "x",
1163 0..1,
1164 0,
1165 vec![],
1166 vec![RelatedFile {
1167 path: Path::new("big.rs").into(),
1168 max_row: 30,
1169 in_open_source_repo: false,
1170 excerpts: vec![
1171 RelatedExcerpt {
1172 row_range: 0..10,
1173 text: "first excerpt\n".into(),
1174 },
1175 RelatedExcerpt {
1176 row_range: 10..20,
1177 text: "second excerpt\n".into(),
1178 },
1179 RelatedExcerpt {
1180 row_range: 20..30,
1181 text: "third excerpt\n".into(),
1182 },
1183 ],
1184 }],
1185 );
1186
1187 assert_eq!(
1188 format_with_budget(&input, 10000),
1189 indoc! {r#"
1190 <|file_sep|>big.rs
1191 first excerpt
1192 ...
1193 second excerpt
1194 ...
1195 third excerpt
1196 <|file_sep|>test.rs
1197 <|fim_prefix|>
1198 <|fim_middle|>current
1199 <|user_cursor|>x
1200 <|fim_suffix|>
1201 <|fim_middle|>updated
1202 "#}
1203 );
1204
1205 assert_eq!(
1206 format_with_budget(&input, 50),
1207 indoc! {r#"
1208 <|file_sep|>big.rs
1209 first excerpt
1210 ...
1211 <|file_sep|>test.rs
1212 <|fim_prefix|>
1213 <|fim_middle|>current
1214 <|user_cursor|>x
1215 <|fim_suffix|>
1216 <|fim_middle|>updated
1217 "#}
1218 );
1219 }
1220
1221 #[test]
1222 fn test_truncation_drops_older_events_first() {
1223 let input = make_input(
1224 "x",
1225 0..1,
1226 0,
1227 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1228 vec![],
1229 );
1230
1231 assert_eq!(
1232 format_with_budget(&input, 10000),
1233 indoc! {r#"
1234 <|file_sep|>edit history
1235 --- a/old.rs
1236 +++ b/old.rs
1237 -1
1238 --- a/new.rs
1239 +++ b/new.rs
1240 -2
1241 <|file_sep|>test.rs
1242 <|fim_prefix|>
1243 <|fim_middle|>current
1244 <|user_cursor|>x
1245 <|fim_suffix|>
1246 <|fim_middle|>updated
1247 "#}
1248 );
1249
1250 assert_eq!(
1251 format_with_budget(&input, 55),
1252 indoc! {r#"
1253 <|file_sep|>edit history
1254 --- a/new.rs
1255 +++ b/new.rs
1256 -2
1257 <|file_sep|>test.rs
1258 <|fim_prefix|>
1259 <|fim_middle|>current
1260 <|user_cursor|>x
1261 <|fim_suffix|>
1262 <|fim_middle|>updated
1263 "#}
1264 );
1265 }
1266
1267 #[test]
1268 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1269 let input = make_input(
1270 "fn main() {}",
1271 0..12,
1272 3,
1273 vec![make_event("a.rs", "-old\n+new\n")],
1274 vec![make_related_file("related.rs", "helper\n")],
1275 );
1276
1277 assert_eq!(
1278 format_with_budget(&input, 30),
1279 indoc! {r#"
1280 <|file_sep|>test.rs
1281 <|fim_prefix|>
1282 <|fim_middle|>current
1283 fn <|user_cursor|>main() {}
1284 <|fim_suffix|>
1285 <|fim_middle|>updated
1286 "#}
1287 );
1288 }
1289
1290 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1291 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1292 }
1293
1294 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1295 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1296 }
1297
1298 #[test]
1299 fn test_seed_coder_basic_format() {
1300 let input = make_input(
1301 "prefix\neditable\nsuffix",
1302 7..15,
1303 10,
1304 vec![make_event("a.rs", "-old\n+new\n")],
1305 vec![make_related_file("related.rs", "fn helper() {}\n")],
1306 );
1307
1308 assert_eq!(
1309 format_seed_coder(&input),
1310 indoc! {r#"
1311 <[fim-suffix]>
1312 suffix
1313 <[fim-prefix]><filename>related.rs
1314 fn helper() {}
1315
1316 <filename>edit_history
1317 --- a/a.rs
1318 +++ b/a.rs
1319 -old
1320 +new
1321
1322 <filename>test.rs
1323 prefix
1324 <<<<<<< CURRENT
1325 edi<|user_cursor|>table
1326 =======
1327 <[fim-middle]>"#}
1328 );
1329 }
1330
1331 #[test]
1332 fn test_seed_coder_no_context() {
1333 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1334
1335 assert_eq!(
1336 format_seed_coder(&input),
1337 indoc! {r#"
1338 <[fim-suffix]>
1339 after
1340 <[fim-prefix]><filename>test.rs
1341 before
1342 <<<<<<< CURRENT
1343 mid<|user_cursor|>dle
1344 =======
1345 <[fim-middle]>"#}
1346 );
1347 }
1348
1349 #[test]
1350 fn test_seed_coder_truncation_drops_context() {
1351 let input = make_input(
1352 "code",
1353 0..4,
1354 2,
1355 vec![make_event("a.rs", "-x\n+y\n")],
1356 vec![make_related_file("r1.rs", "content\n")],
1357 );
1358
1359 // With large budget, everything is included
1360 assert_eq!(
1361 format_seed_coder(&input),
1362 indoc! {r#"
1363 <[fim-suffix]>
1364 <[fim-prefix]><filename>r1.rs
1365 content
1366
1367 <filename>edit_history
1368 --- a/a.rs
1369 +++ b/a.rs
1370 -x
1371 +y
1372
1373 <filename>test.rs
1374 <<<<<<< CURRENT
1375 co<|user_cursor|>de
1376 =======
1377 <[fim-middle]>"#}
1378 );
1379
1380 // With tight budget, context is dropped but cursor section remains
1381 assert_eq!(
1382 format_seed_coder_with_budget(&input, 30),
1383 indoc! {r#"
1384 <[fim-suffix]>
1385 <[fim-prefix]><filename>test.rs
1386 <<<<<<< CURRENT
1387 co<|user_cursor|>de
1388 =======
1389 <[fim-middle]>"#}
1390 );
1391 }
1392
1393 #[test]
1394 fn test_seed_coder_clean_output() {
1395 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1396 let output_without_marker = "new code\n";
1397
1398 assert_eq!(
1399 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1400 "new code\n"
1401 );
1402 assert_eq!(
1403 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1404 "new code\n"
1405 );
1406 }
1407
1408 #[test]
1409 fn test_format_zeta1_from_input_basic() {
1410 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1411 let input = ZetaPromptInput {
1412 cursor_path: Path::new("src/main.rs").into(),
1413 cursor_excerpt: excerpt.into(),
1414 editable_range_in_excerpt: 15..41,
1415 cursor_offset_in_excerpt: 30,
1416 excerpt_start_row: Some(0),
1417 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1418 related_files: vec![],
1419 excerpt_ranges: None,
1420 preferred_model: None,
1421 in_open_source_repo: false,
1422 can_collect_data: false,
1423 };
1424
1425 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1426
1427 assert_eq!(
1428 prompt,
1429 concat!(
1430 "### Instruction:\n",
1431 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1432 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1433 "into account the cursor location.\n",
1434 "\n",
1435 "### User Edits:\n",
1436 "\n",
1437 "User edited other.rs:\n",
1438 "```diff\n",
1439 "-old\n",
1440 "+new\n",
1441 "\n",
1442 "```\n",
1443 "\n",
1444 "### User Excerpt:\n",
1445 "\n",
1446 "```src/main.rs\n",
1447 "<|start_of_file|>\n",
1448 "fn before() {}\n",
1449 "<|editable_region_start|>\n",
1450 "fn foo() {\n",
1451 " <|user_cursor_is_here|>let x = 1;\n",
1452 "\n",
1453 "<|editable_region_end|>}\n",
1454 "fn after() {}\n",
1455 "\n",
1456 "```\n",
1457 "\n",
1458 "### Response:\n",
1459 ),
1460 );
1461 }
1462
1463 #[test]
1464 fn test_format_zeta1_from_input_no_start_of_file() {
1465 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1466 let input = ZetaPromptInput {
1467 cursor_path: Path::new("src/main.rs").into(),
1468 cursor_excerpt: excerpt.into(),
1469 editable_range_in_excerpt: 0..28,
1470 cursor_offset_in_excerpt: 15,
1471 excerpt_start_row: Some(10),
1472 events: vec![],
1473 related_files: vec![],
1474 excerpt_ranges: None,
1475 preferred_model: None,
1476 in_open_source_repo: false,
1477 can_collect_data: false,
1478 };
1479
1480 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1481
1482 assert_eq!(
1483 prompt,
1484 concat!(
1485 "### Instruction:\n",
1486 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1487 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1488 "into account the cursor location.\n",
1489 "\n",
1490 "### User Edits:\n",
1491 "\n",
1492 "\n",
1493 "\n",
1494 "### User Excerpt:\n",
1495 "\n",
1496 "```src/main.rs\n",
1497 "<|editable_region_start|>\n",
1498 "fn foo() {\n",
1499 " <|user_cursor_is_here|>let x = 1;\n",
1500 "}\n",
1501 "\n",
1502 "<|editable_region_end|>\n",
1503 "```\n",
1504 "\n",
1505 "### Response:\n",
1506 ),
1507 );
1508 }
1509
1510 #[test]
1511 fn test_format_zeta1_from_input_with_sub_ranges() {
1512 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1513 let editable_range = 10..37;
1514 let context_range = 0..excerpt.len();
1515
1516 let input = ZetaPromptInput {
1517 cursor_path: Path::new("test.rs").into(),
1518 cursor_excerpt: excerpt.into(),
1519 editable_range_in_excerpt: editable_range.clone(),
1520 cursor_offset_in_excerpt: 25,
1521 excerpt_start_row: Some(0),
1522 events: vec![],
1523 related_files: vec![],
1524 excerpt_ranges: None,
1525 preferred_model: None,
1526 in_open_source_repo: false,
1527 can_collect_data: false,
1528 };
1529
1530 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1531
1532 assert_eq!(
1533 prompt,
1534 concat!(
1535 "### Instruction:\n",
1536 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1537 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1538 "into account the cursor location.\n",
1539 "\n",
1540 "### User Edits:\n",
1541 "\n",
1542 "\n",
1543 "\n",
1544 "### User Excerpt:\n",
1545 "\n",
1546 "```test.rs\n",
1547 "<|start_of_file|>\n",
1548 "// prefix\n",
1549 "<|editable_region_start|>\n",
1550 "fn foo() {\n",
1551 " <|user_cursor_is_here|>let x = 1;\n",
1552 "}\n",
1553 "<|editable_region_end|>\n",
1554 "// suffix\n",
1555 "\n",
1556 "```\n",
1557 "\n",
1558 "### Response:\n",
1559 ),
1560 );
1561 }
1562
1563 #[test]
1564 fn test_clean_zeta1_model_output_basic() {
1565 let output = indoc! {"
1566 <|editable_region_start|>
1567 fn main() {
1568 println!(\"hello\");
1569 }
1570 <|editable_region_end|>
1571 "};
1572
1573 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1574 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1575 }
1576
1577 #[test]
1578 fn test_clean_zeta1_model_output_with_cursor() {
1579 let output = indoc! {"
1580 <|editable_region_start|>
1581 fn main() {
1582 <|user_cursor_is_here|>println!(\"hello\");
1583 }
1584 <|editable_region_end|>
1585 "};
1586
1587 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1588 assert_eq!(
1589 cleaned,
1590 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1591 );
1592 }
1593
1594 #[test]
1595 fn test_clean_zeta1_model_output_no_markers() {
1596 let output = "fn main() {}\n";
1597 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1598 assert_eq!(cleaned, "fn main() {}\n");
1599 }
1600
1601 #[test]
1602 fn test_clean_zeta1_model_output_empty_region() {
1603 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1604 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1605 assert_eq!(cleaned, "");
1606 }
1607}