1use anyhow::Result;
2use serde::{Deserialize, Serialize};
3use std::fmt::Write;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
8
9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
10pub const MAX_PROMPT_TOKENS: usize = 4096;
11
12/// Use up to this amount of the editable region for prefill.
13/// Larger values may result in more robust generation, but
14/// this region becomes non-editable.
15pub const PREFILL_RATIO: f64 = 0.1; // 10%
16
17fn estimate_tokens(bytes: usize) -> usize {
18 bytes / 3
19}
20
21/// The client's preferred edit prediction model. The server may override this.
22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
23pub enum EditPredictionModelKind {
24 Zeta1,
25 Zeta2,
26}
27
28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
29/// editable and context token budgets. Allows the server to select the
30/// appropriate ranges for whichever model it uses.
31#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct ExcerptRanges {
33 /// Editable region computed with a 150-token budget.
34 pub editable_150: Range<usize>,
35 /// Editable region computed with a 180-token budget.
36 pub editable_180: Range<usize>,
37 /// Editable region computed with a 350-token budget.
38 pub editable_350: Range<usize>,
39 /// Context boundary when using editable_150 with 350 tokens of additional context.
40 pub editable_150_context_350: Range<usize>,
41 /// Context boundary when using editable_180 with 350 tokens of additional context.
42 pub editable_180_context_350: Range<usize>,
43 /// Context boundary when using editable_350 with 150 tokens of additional context.
44 pub editable_350_context_150: Range<usize>,
45}
46
47#[derive(Clone, Debug, Serialize, Deserialize)]
48pub struct ZetaPromptInput {
49 pub cursor_path: Arc<Path>,
50 pub cursor_excerpt: Arc<str>,
51 pub editable_range_in_excerpt: Range<usize>,
52 pub cursor_offset_in_excerpt: usize,
53 #[serde(default, skip_serializing_if = "Option::is_none")]
54 pub excerpt_start_row: Option<u32>,
55 pub events: Vec<Arc<Event>>,
56 pub related_files: Vec<RelatedFile>,
57 /// When set, the excerpt was computed with a larger budget (~512 tokens)
58 /// and these ranges let the server select model-appropriate subsets.
59 /// When absent, the excerpt IS the context region and
60 /// `editable_range_in_excerpt` is the only editable range.
61 #[serde(default, skip_serializing_if = "Option::is_none")]
62 pub excerpt_ranges: Option<ExcerptRanges>,
63 /// Client's preferred model. The server may override.
64 #[serde(default, skip_serializing_if = "Option::is_none")]
65 pub preferred_model: Option<EditPredictionModelKind>,
66 #[serde(default)]
67 pub in_open_source_repo: bool,
68 #[serde(default)]
69 pub can_collect_data: bool,
70}
71
72#[derive(
73 Default,
74 Clone,
75 Copy,
76 Debug,
77 PartialEq,
78 Eq,
79 Hash,
80 EnumIter,
81 IntoStaticStr,
82 Serialize,
83 Deserialize,
84)]
85#[allow(non_camel_case_types)]
86pub enum ZetaFormat {
87 V0112MiddleAtEnd,
88 V0113Ordered,
89 V0114180EditableRegion,
90 V0120GitMergeMarkers,
91 #[default]
92 V0131GitMergeMarkersPrefix,
93 V0211Prefill,
94 V0211SeedCoder,
95}
96
97impl std::fmt::Display for ZetaFormat {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 write!(f, "{}", <&'static str>::from(self))
100 }
101}
102
103impl ZetaFormat {
104 pub fn parse(format_name: &str) -> Result<Self> {
105 let mut results = ZetaFormat::iter().filter(|version| {
106 <&'static str>::from(version)
107 .to_lowercase()
108 .contains(&format_name.to_lowercase())
109 });
110 let Some(result) = results.next() else {
111 anyhow::bail!(
112 "`{format_name}` did not match any of:\n{}",
113 Self::options_as_string()
114 );
115 };
116 if results.next().is_some() {
117 anyhow::bail!(
118 "`{format_name}` matched more than one of:\n{}",
119 Self::options_as_string()
120 );
121 }
122 Ok(result)
123 }
124
125 pub fn options_as_string() -> String {
126 ZetaFormat::iter()
127 .map(|format| format!("- {}\n", <&'static str>::from(format)))
128 .collect::<Vec<_>>()
129 .concat()
130 }
131
132 pub fn special_tokens(&self) -> &'static [&'static str] {
133 match self {
134 ZetaFormat::V0112MiddleAtEnd
135 | ZetaFormat::V0113Ordered
136 | ZetaFormat::V0114180EditableRegion => &[
137 "<|fim_prefix|>",
138 "<|fim_suffix|>",
139 "<|fim_middle|>",
140 "<|file_sep|>",
141 CURSOR_MARKER,
142 ],
143 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
144 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
145 v0131_git_merge_markers_prefix::special_tokens()
146 }
147 ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
148 }
149 }
150}
151
152#[derive(Clone, Debug, Serialize, Deserialize)]
153#[serde(tag = "event")]
154pub enum Event {
155 BufferChange {
156 path: Arc<Path>,
157 old_path: Arc<Path>,
158 diff: String,
159 predicted: bool,
160 in_open_source_repo: bool,
161 },
162}
163
164impl Event {
165 pub fn in_open_source_repo(&self) -> bool {
166 match self {
167 Event::BufferChange {
168 in_open_source_repo,
169 ..
170 } => *in_open_source_repo,
171 }
172 }
173}
174
175pub fn write_event(prompt: &mut String, event: &Event) {
176 fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
177 for component in path.components() {
178 prompt.push('/');
179 write!(prompt, "{}", component.as_os_str().display()).ok();
180 }
181 }
182 match event {
183 Event::BufferChange {
184 path,
185 old_path,
186 diff,
187 predicted,
188 in_open_source_repo: _,
189 } => {
190 if *predicted {
191 prompt.push_str("// User accepted prediction:\n");
192 }
193 prompt.push_str("--- a");
194 write_path_as_unix_str(prompt, old_path.as_ref());
195 prompt.push_str("\n+++ b");
196 write_path_as_unix_str(prompt, path.as_ref());
197 prompt.push('\n');
198 prompt.push_str(diff);
199 }
200 }
201}
202
203#[derive(Clone, Debug, Serialize, Deserialize)]
204pub struct RelatedFile {
205 pub path: Arc<Path>,
206 pub max_row: u32,
207 pub excerpts: Vec<RelatedExcerpt>,
208 #[serde(default)]
209 pub in_open_source_repo: bool,
210}
211
212#[derive(Clone, Debug, Serialize, Deserialize)]
213pub struct RelatedExcerpt {
214 pub row_range: Range<u32>,
215 pub text: Arc<str>,
216}
217
218pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
219 format
220 .special_tokens()
221 .iter()
222 .any(|token| input.cursor_excerpt.contains(token))
223}
224
225pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
226 format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
227}
228
229/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
230pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
231 match format {
232 ZetaFormat::V0120GitMergeMarkers => output
233 .strip_suffix(v0120_git_merge_markers::END_MARKER)
234 .unwrap_or(output),
235 ZetaFormat::V0131GitMergeMarkersPrefix => output
236 .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
237 .unwrap_or(output),
238 ZetaFormat::V0211SeedCoder => output
239 .strip_suffix(seed_coder::END_MARKER)
240 .unwrap_or(output),
241 _ => output,
242 }
243}
244
245pub fn excerpt_range_for_format(
246 format: ZetaFormat,
247 ranges: &ExcerptRanges,
248) -> (Range<usize>, Range<usize>) {
249 match format {
250 ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
251 ranges.editable_150.clone(),
252 ranges.editable_150_context_350.clone(),
253 ),
254 ZetaFormat::V0114180EditableRegion
255 | ZetaFormat::V0120GitMergeMarkers
256 | ZetaFormat::V0131GitMergeMarkersPrefix
257 | ZetaFormat::V0211Prefill
258 | ZetaFormat::V0211SeedCoder => (
259 ranges.editable_350.clone(),
260 ranges.editable_350_context_150.clone(),
261 ),
262 }
263}
264
265fn resolve_cursor_region(
266 input: &ZetaPromptInput,
267 format: ZetaFormat,
268) -> (&str, Range<usize>, usize) {
269 let Some(ranges) = &input.excerpt_ranges else {
270 return (
271 &input.cursor_excerpt,
272 input.editable_range_in_excerpt.clone(),
273 input.cursor_offset_in_excerpt,
274 );
275 };
276
277 let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
278 let context_start = context_range.start;
279 let context_text = &input.cursor_excerpt[context_range];
280 let adjusted_editable =
281 (editable_range.start - context_start)..(editable_range.end - context_start);
282 let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
283
284 (context_text, adjusted_editable, adjusted_cursor)
285}
286
287fn format_zeta_prompt_with_budget(
288 input: &ZetaPromptInput,
289 format: ZetaFormat,
290 max_tokens: usize,
291) -> String {
292 let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
293 let path = &*input.cursor_path;
294
295 let mut cursor_section = String::new();
296 match format {
297 ZetaFormat::V0112MiddleAtEnd => {
298 v0112_middle_at_end::write_cursor_excerpt_section(
299 &mut cursor_section,
300 path,
301 context,
302 &editable_range,
303 cursor_offset,
304 );
305 }
306 ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
307 v0113_ordered::write_cursor_excerpt_section(
308 &mut cursor_section,
309 path,
310 context,
311 &editable_range,
312 cursor_offset,
313 )
314 }
315 ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
316 &mut cursor_section,
317 path,
318 context,
319 &editable_range,
320 cursor_offset,
321 ),
322 ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
323 v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
324 &mut cursor_section,
325 path,
326 context,
327 &editable_range,
328 cursor_offset,
329 )
330 }
331 ZetaFormat::V0211SeedCoder => {
332 return seed_coder::format_prompt_with_budget(
333 path,
334 context,
335 &editable_range,
336 cursor_offset,
337 &input.events,
338 &input.related_files,
339 max_tokens,
340 );
341 }
342 }
343
344 let cursor_tokens = estimate_tokens(cursor_section.len());
345 let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
346
347 let edit_history_section = format_edit_history_within_budget(
348 &input.events,
349 "<|file_sep|>",
350 "edit history",
351 budget_after_cursor,
352 );
353 let edit_history_tokens = estimate_tokens(edit_history_section.len());
354 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
355
356 let related_files_section = format_related_files_within_budget(
357 &input.related_files,
358 "<|file_sep|>",
359 budget_after_edit_history,
360 );
361
362 let mut prompt = String::new();
363 prompt.push_str(&related_files_section);
364 prompt.push_str(&edit_history_section);
365 prompt.push_str(&cursor_section);
366 prompt
367}
368
369pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
370 match format {
371 ZetaFormat::V0112MiddleAtEnd
372 | ZetaFormat::V0113Ordered
373 | ZetaFormat::V0114180EditableRegion
374 | ZetaFormat::V0120GitMergeMarkers
375 | ZetaFormat::V0131GitMergeMarkersPrefix
376 | ZetaFormat::V0211SeedCoder => String::new(),
377 ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
378 }
379}
380
381fn format_edit_history_within_budget(
382 events: &[Arc<Event>],
383 file_marker: &str,
384 edit_history_name: &str,
385 max_tokens: usize,
386) -> String {
387 let header = format!("{}{}\n", file_marker, edit_history_name);
388 let header_tokens = estimate_tokens(header.len());
389 if header_tokens >= max_tokens {
390 return String::new();
391 }
392
393 let mut event_strings: Vec<String> = Vec::new();
394 let mut total_tokens = header_tokens;
395
396 for event in events.iter().rev() {
397 let mut event_str = String::new();
398 write_event(&mut event_str, event);
399 let event_tokens = estimate_tokens(event_str.len());
400
401 if total_tokens + event_tokens > max_tokens {
402 break;
403 }
404 total_tokens += event_tokens;
405 event_strings.push(event_str);
406 }
407
408 if event_strings.is_empty() {
409 return String::new();
410 }
411
412 let mut result = header;
413 for event_str in event_strings.iter().rev() {
414 result.push_str(event_str);
415 }
416 result
417}
418
419fn format_related_files_within_budget(
420 related_files: &[RelatedFile],
421 file_marker: &str,
422 max_tokens: usize,
423) -> String {
424 let mut result = String::new();
425 let mut total_tokens = 0;
426
427 for file in related_files {
428 let path_str = file.path.to_string_lossy();
429 let header = format!("{}{}\n", file_marker, path_str);
430 let header_tokens = estimate_tokens(header.len());
431
432 if total_tokens + header_tokens > max_tokens {
433 break;
434 }
435
436 let mut file_tokens = header_tokens;
437 let mut excerpts_to_include = 0;
438
439 for excerpt in &file.excerpts {
440 let needs_newline = !excerpt.text.ends_with('\n');
441 let needs_ellipsis = excerpt.row_range.end < file.max_row;
442 let excerpt_len = excerpt.text.len()
443 + if needs_newline { "\n".len() } else { 0 }
444 + if needs_ellipsis { "...\n".len() } else { 0 };
445
446 let excerpt_tokens = estimate_tokens(excerpt_len);
447 if total_tokens + file_tokens + excerpt_tokens > max_tokens {
448 break;
449 }
450 file_tokens += excerpt_tokens;
451 excerpts_to_include += 1;
452 }
453
454 if excerpts_to_include > 0 {
455 total_tokens += file_tokens;
456 result.push_str(&header);
457 for excerpt in file.excerpts.iter().take(excerpts_to_include) {
458 result.push_str(&excerpt.text);
459 if !result.ends_with('\n') {
460 result.push('\n');
461 }
462 if excerpt.row_range.end < file.max_row {
463 result.push_str("...\n");
464 }
465 }
466 }
467 }
468
469 result
470}
471
472pub fn write_related_files(
473 prompt: &mut String,
474 related_files: &[RelatedFile],
475) -> Vec<Range<usize>> {
476 let mut ranges = Vec::new();
477 for file in related_files {
478 let start = prompt.len();
479 let path_str = file.path.to_string_lossy();
480 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
481 for excerpt in &file.excerpts {
482 prompt.push_str(&excerpt.text);
483 if !prompt.ends_with('\n') {
484 prompt.push('\n');
485 }
486 if excerpt.row_range.end < file.max_row {
487 prompt.push_str("...\n");
488 }
489 }
490 let end = prompt.len();
491 ranges.push(start..end);
492 }
493 ranges
494}
495
496mod v0112_middle_at_end {
497 use super::*;
498
499 pub fn write_cursor_excerpt_section(
500 prompt: &mut String,
501 path: &Path,
502 context: &str,
503 editable_range: &Range<usize>,
504 cursor_offset: usize,
505 ) {
506 let path_str = path.to_string_lossy();
507 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
508
509 prompt.push_str("<|fim_prefix|>\n");
510 prompt.push_str(&context[..editable_range.start]);
511
512 prompt.push_str("<|fim_suffix|>\n");
513 prompt.push_str(&context[editable_range.end..]);
514 if !prompt.ends_with('\n') {
515 prompt.push('\n');
516 }
517
518 prompt.push_str("<|fim_middle|>current\n");
519 prompt.push_str(&context[editable_range.start..cursor_offset]);
520 prompt.push_str(CURSOR_MARKER);
521 prompt.push_str(&context[cursor_offset..editable_range.end]);
522 if !prompt.ends_with('\n') {
523 prompt.push('\n');
524 }
525
526 prompt.push_str("<|fim_middle|>updated\n");
527 }
528}
529
530mod v0113_ordered {
531 use super::*;
532
533 pub fn write_cursor_excerpt_section(
534 prompt: &mut String,
535 path: &Path,
536 context: &str,
537 editable_range: &Range<usize>,
538 cursor_offset: usize,
539 ) {
540 let path_str = path.to_string_lossy();
541 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
542
543 prompt.push_str("<|fim_prefix|>\n");
544 prompt.push_str(&context[..editable_range.start]);
545 if !prompt.ends_with('\n') {
546 prompt.push('\n');
547 }
548
549 prompt.push_str("<|fim_middle|>current\n");
550 prompt.push_str(&context[editable_range.start..cursor_offset]);
551 prompt.push_str(CURSOR_MARKER);
552 prompt.push_str(&context[cursor_offset..editable_range.end]);
553 if !prompt.ends_with('\n') {
554 prompt.push('\n');
555 }
556
557 prompt.push_str("<|fim_suffix|>\n");
558 prompt.push_str(&context[editable_range.end..]);
559 if !prompt.ends_with('\n') {
560 prompt.push('\n');
561 }
562
563 prompt.push_str("<|fim_middle|>updated\n");
564 }
565}
566
567pub mod v0120_git_merge_markers {
568 //! A prompt that uses git-style merge conflict markers to represent the editable region.
569 //!
570 //! Example prompt:
571 //!
572 //! <|file_sep|>path/to/target_file.py
573 //! <|fim_prefix|>
574 //! code before editable region
575 //! <|fim_suffix|>
576 //! code after editable region
577 //! <|fim_middle|>
578 //! <<<<<<< CURRENT
579 //! code that
580 //! needs to<|user_cursor|>
581 //! be rewritten
582 //! =======
583 //!
584 //! Expected output (should be generated by the model):
585 //!
586 //! updated
587 //! code with
588 //! changes applied
589 //! >>>>>>> UPDATED
590
591 use super::*;
592
593 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
594 pub const SEPARATOR: &str = "=======\n";
595 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
596
597 pub fn special_tokens() -> &'static [&'static str] {
598 &[
599 "<|fim_prefix|>",
600 "<|fim_suffix|>",
601 "<|fim_middle|>",
602 "<|file_sep|>",
603 START_MARKER,
604 SEPARATOR,
605 END_MARKER,
606 CURSOR_MARKER,
607 ]
608 }
609
610 pub fn write_cursor_excerpt_section(
611 prompt: &mut String,
612 path: &Path,
613 context: &str,
614 editable_range: &Range<usize>,
615 cursor_offset: usize,
616 ) {
617 let path_str = path.to_string_lossy();
618 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
619
620 prompt.push_str("<|fim_prefix|>");
621 prompt.push_str(&context[..editable_range.start]);
622
623 prompt.push_str("<|fim_suffix|>");
624 prompt.push_str(&context[editable_range.end..]);
625 if !prompt.ends_with('\n') {
626 prompt.push('\n');
627 }
628
629 prompt.push_str("<|fim_middle|>");
630 prompt.push_str(START_MARKER);
631 prompt.push_str(&context[editable_range.start..cursor_offset]);
632 prompt.push_str(CURSOR_MARKER);
633 prompt.push_str(&context[cursor_offset..editable_range.end]);
634 if !prompt.ends_with('\n') {
635 prompt.push('\n');
636 }
637 prompt.push_str(SEPARATOR);
638 }
639}
640
641pub mod v0131_git_merge_markers_prefix {
642 //! A prompt that uses git-style merge conflict markers to represent the editable region.
643 //!
644 //! Example prompt:
645 //!
646 //! <|file_sep|>path/to/target_file.py
647 //! <|fim_prefix|>
648 //! code before editable region
649 //! <<<<<<< CURRENT
650 //! code that
651 //! needs to<|user_cursor|>
652 //! be rewritten
653 //! =======
654 //! <|fim_suffix|>
655 //! code after editable region
656 //! <|fim_middle|>
657 //!
658 //! Expected output (should be generated by the model):
659 //!
660 //! updated
661 //! code with
662 //! changes applied
663 //! >>>>>>> UPDATED
664
665 use super::*;
666
667 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
668 pub const SEPARATOR: &str = "=======\n";
669 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
670
671 pub fn special_tokens() -> &'static [&'static str] {
672 &[
673 "<|fim_prefix|>",
674 "<|fim_suffix|>",
675 "<|fim_middle|>",
676 "<|file_sep|>",
677 START_MARKER,
678 SEPARATOR,
679 END_MARKER,
680 CURSOR_MARKER,
681 ]
682 }
683
684 pub fn write_cursor_excerpt_section(
685 prompt: &mut String,
686 path: &Path,
687 context: &str,
688 editable_range: &Range<usize>,
689 cursor_offset: usize,
690 ) {
691 let path_str = path.to_string_lossy();
692 write!(prompt, "<|file_sep|>{}\n", path_str).ok();
693
694 prompt.push_str("<|fim_prefix|>");
695 prompt.push_str(&context[..editable_range.start]);
696 prompt.push_str(START_MARKER);
697 prompt.push_str(&context[editable_range.start..cursor_offset]);
698 prompt.push_str(CURSOR_MARKER);
699 prompt.push_str(&context[cursor_offset..editable_range.end]);
700 if !prompt.ends_with('\n') {
701 prompt.push('\n');
702 }
703 prompt.push_str(SEPARATOR);
704
705 prompt.push_str("<|fim_suffix|>");
706 prompt.push_str(&context[editable_range.end..]);
707 if !prompt.ends_with('\n') {
708 prompt.push('\n');
709 }
710
711 prompt.push_str("<|fim_middle|>");
712 }
713}
714
715pub mod v0211_prefill {
716 use super::*;
717
718 pub fn get_prefill(input: &ZetaPromptInput) -> String {
719 let editable_region = &input.cursor_excerpt
720 [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
721
722 let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
723 let prefill_len = editable_region.floor_char_boundary(prefill_len);
724
725 // Find a token boundary to avoid splitting tokens in the prefill.
726 // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
727 // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
728 // the \n and consume any consecutive \n characters after it.
729 let prefill = &editable_region[..prefill_len];
730 match prefill.rfind('\n') {
731 Some(pos) => {
732 let mut end = pos + 1;
733 while end < editable_region.len()
734 && editable_region.as_bytes().get(end) == Some(&b'\n')
735 {
736 end += 1;
737 }
738 editable_region[..end].to_string()
739 }
740 // No newline found. Fall back to splitting before the last space
741 // (word-level boundary)
742 None => match prefill.rfind(' ') {
743 Some(pos) => prefill[..pos].to_string(),
744 None => prefill.to_string(),
745 },
746 }
747 }
748}
749
750pub mod seed_coder {
751 //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
752 //!
753 //! Seed-Coder uses different FIM tokens and order than Qwen:
754 //! - SPM order: suffix comes FIRST, then prefix, then middle
755 //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
756 //! - File markers: StarCoder-style `<filename>path` (single token + path)
757 //!
758 //! All context (related files, edit history) goes in the PREFIX section.
759 //! The suffix contains only code after the editable region.
760 //!
761 //! Example prompt:
762 //!
763 //! <[fim-suffix]>
764 //! code after editable region
765 //! <[fim-prefix]><filename>related/file.py
766 //! related file content
767 //!
768 //! <filename>edit_history
769 //! --- a/some_file.py
770 //! +++ b/some_file.py
771 //! -old
772 //! +new
773 //!
774 //! <filename>path/to/target_file.py
775 //! code before editable region
776 //! <<<<<<< CURRENT
777 //! code that
778 //! needs to<|user_cursor|>
779 //! be rewritten
780 //! =======
781 //! <[fim-middle]>
782 //!
783 //! Expected output (model generates):
784 //!
785 //! updated
786 //! code with
787 //! changes applied
788 //! >>>>>>> UPDATED
789
790 use super::*;
791
792 pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
793 pub const FIM_PREFIX: &str = "<[fim-prefix]>";
794 pub const FIM_MIDDLE: &str = "<[fim-middle]>";
795 pub const FILE_MARKER: &str = "<filename>";
796
797 pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
798 pub const SEPARATOR: &str = "=======\n";
799 pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
800
801 pub fn special_tokens() -> &'static [&'static str] {
802 &[
803 FIM_SUFFIX,
804 FIM_PREFIX,
805 FIM_MIDDLE,
806 FILE_MARKER,
807 START_MARKER,
808 SEPARATOR,
809 END_MARKER,
810 CURSOR_MARKER,
811 ]
812 }
813
814 pub fn format_prompt_with_budget(
815 path: &Path,
816 context: &str,
817 editable_range: &Range<usize>,
818 cursor_offset: usize,
819 events: &[Arc<Event>],
820 related_files: &[RelatedFile],
821 max_tokens: usize,
822 ) -> String {
823 let suffix_section = build_suffix_section(context, editable_range);
824 let cursor_prefix_section =
825 build_cursor_prefix_section(path, context, editable_range, cursor_offset);
826
827 let suffix_tokens = estimate_tokens(suffix_section.len());
828 let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
829 let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
830
831 let edit_history_section = super::format_edit_history_within_budget(
832 events,
833 FILE_MARKER,
834 "edit_history",
835 budget_after_cursor,
836 );
837 let edit_history_tokens = estimate_tokens(edit_history_section.len());
838 let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
839
840 let related_files_section = super::format_related_files_within_budget(
841 related_files,
842 FILE_MARKER,
843 budget_after_edit_history,
844 );
845
846 let mut prompt = String::new();
847 prompt.push_str(&suffix_section);
848 prompt.push_str(FIM_PREFIX);
849 prompt.push_str(&related_files_section);
850 if !related_files_section.is_empty() {
851 prompt.push('\n');
852 }
853 prompt.push_str(&edit_history_section);
854 if !edit_history_section.is_empty() {
855 prompt.push('\n');
856 }
857 prompt.push_str(&cursor_prefix_section);
858 prompt.push_str(FIM_MIDDLE);
859 prompt
860 }
861
862 fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
863 let mut section = String::new();
864 section.push_str(FIM_SUFFIX);
865 section.push_str(&context[editable_range.end..]);
866 if !section.ends_with('\n') {
867 section.push('\n');
868 }
869 section
870 }
871
872 fn build_cursor_prefix_section(
873 path: &Path,
874 context: &str,
875 editable_range: &Range<usize>,
876 cursor_offset: usize,
877 ) -> String {
878 let mut section = String::new();
879 let path_str = path.to_string_lossy();
880 write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
881
882 section.push_str(&context[..editable_range.start]);
883 section.push_str(START_MARKER);
884 section.push_str(&context[editable_range.start..cursor_offset]);
885 section.push_str(CURSOR_MARKER);
886 section.push_str(&context[cursor_offset..editable_range.end]);
887 if !section.ends_with('\n') {
888 section.push('\n');
889 }
890 section.push_str(SEPARATOR);
891 section
892 }
893}
894
895/// The zeta1 prompt format
896pub mod zeta1 {
897 use super::*;
898 use std::fmt::Write;
899
900 pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
901 pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
902 pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
903 pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
904
905 const INSTRUCTION_HEADER: &str = concat!(
906 "### Instruction:\n",
907 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
908 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
909 "into account the cursor location.\n\n",
910 "### User Edits:\n\n"
911 );
912 const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
913 const RESPONSE_HEADER: &str = "\n\n### Response:\n";
914
915 /// Formats a complete zeta1 prompt from the input events and excerpt.
916 pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
917 let mut prompt = String::with_capacity(
918 INSTRUCTION_HEADER.len()
919 + input_events.len()
920 + EXCERPT_HEADER.len()
921 + input_excerpt.len()
922 + RESPONSE_HEADER.len(),
923 );
924 prompt.push_str(INSTRUCTION_HEADER);
925 prompt.push_str(input_events);
926 prompt.push_str(EXCERPT_HEADER);
927 prompt.push_str(input_excerpt);
928 prompt.push_str(RESPONSE_HEADER);
929 prompt
930 }
931
932 /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
933 /// editable and context byte-offset ranges within `cursor_excerpt`.
934 pub fn format_zeta1_from_input(
935 input: &ZetaPromptInput,
936 editable_range: Range<usize>,
937 context_range: Range<usize>,
938 ) -> String {
939 let events = format_zeta1_events(&input.events);
940 let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
941 format_zeta1_prompt(&events, &excerpt)
942 }
943
944 /// Formats events in zeta1 style (oldest first).
945 fn format_zeta1_events(events: &[Arc<Event>]) -> String {
946 let mut result = String::new();
947 for event in events {
948 let event_string = format_zeta1_event(event);
949 if event_string.is_empty() {
950 continue;
951 }
952 if !result.is_empty() {
953 result.push_str("\n\n");
954 }
955 result.push_str(&event_string);
956 }
957 result
958 }
959
960 fn format_zeta1_event(event: &Event) -> String {
961 match event {
962 Event::BufferChange {
963 path,
964 old_path,
965 diff,
966 ..
967 } => {
968 let mut prompt = String::new();
969 if old_path != path {
970 writeln!(
971 prompt,
972 "User renamed {} to {}\n",
973 old_path.display(),
974 path.display()
975 )
976 .ok();
977 }
978 if !diff.is_empty() {
979 write!(
980 prompt,
981 "User edited {}:\n```diff\n{}\n```",
982 path.display(),
983 diff
984 )
985 .ok();
986 }
987 prompt
988 }
989 }
990 }
991
992 /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
993 /// within `cursor_excerpt`.
994 fn format_zeta1_excerpt(
995 input: &ZetaPromptInput,
996 editable_range: Range<usize>,
997 context_range: Range<usize>,
998 ) -> String {
999 let path_str = input.cursor_path.to_string_lossy();
1000 let excerpt = &*input.cursor_excerpt;
1001 let cursor_offset = input.cursor_offset_in_excerpt;
1002
1003 let mut prompt = String::new();
1004 writeln!(&mut prompt, "```{path_str}").ok();
1005
1006 let starts_at_file_beginning =
1007 input.excerpt_start_row == Some(0) && context_range.start == 0;
1008 if starts_at_file_beginning {
1009 writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1010 }
1011
1012 prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1013
1014 writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1015 prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1016 prompt.push_str(CURSOR_MARKER);
1017 prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1018 write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1019
1020 prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1021 write!(prompt, "\n```").ok();
1022
1023 prompt
1024 }
1025
1026 /// Cleans zeta1 model output by extracting content between editable region
1027 /// markers and converting the zeta1 cursor marker to the universal one.
1028 /// Returns `None` if the output doesn't contain the expected markers.
1029 pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1030 let content = output.replace(CURSOR_MARKER, "");
1031
1032 let content_start = content
1033 .find(EDITABLE_REGION_START_MARKER)
1034 .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1035 .map(|pos| {
1036 if content.as_bytes().get(pos) == Some(&b'\n') {
1037 pos + 1
1038 } else {
1039 pos
1040 }
1041 })
1042 .unwrap_or(0);
1043
1044 let content_end = content
1045 .find(EDITABLE_REGION_END_MARKER)
1046 .map(|pos| {
1047 if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1048 pos - 1
1049 } else {
1050 pos
1051 }
1052 })
1053 .unwrap_or(content.len());
1054
1055 if content_start > content_end {
1056 return Some(String::new());
1057 }
1058
1059 let extracted = &content[content_start..content_end];
1060
1061 let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1062 let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1063 let text_before_cursor = text_before_cursor
1064 .find(EDITABLE_REGION_START_MARKER)
1065 .map(|pos| {
1066 let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1067 if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1068 after_marker + 1
1069 } else {
1070 after_marker
1071 }
1072 })
1073 .unwrap_or(0);
1074 let offset_in_extracted = zeta1_cursor_pos
1075 .saturating_sub(text_before_cursor)
1076 .min(extracted.len());
1077 offset_in_extracted
1078 });
1079
1080 let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1081 if let Some(offset) = cursor_offset {
1082 result.push_str(&extracted[..offset]);
1083 result.push_str(super::CURSOR_MARKER);
1084 result.push_str(&extracted[offset..]);
1085 } else {
1086 result.push_str(extracted);
1087 }
1088
1089 Some(result)
1090 }
1091}
1092
1093#[cfg(test)]
1094mod tests {
1095 use super::*;
1096 use indoc::indoc;
1097
1098 fn make_input(
1099 cursor_excerpt: &str,
1100 editable_range: Range<usize>,
1101 cursor_offset: usize,
1102 events: Vec<Event>,
1103 related_files: Vec<RelatedFile>,
1104 ) -> ZetaPromptInput {
1105 ZetaPromptInput {
1106 cursor_path: Path::new("test.rs").into(),
1107 cursor_excerpt: cursor_excerpt.into(),
1108 editable_range_in_excerpt: editable_range,
1109 cursor_offset_in_excerpt: cursor_offset,
1110 excerpt_start_row: None,
1111 events: events.into_iter().map(Arc::new).collect(),
1112 related_files,
1113 excerpt_ranges: None,
1114 preferred_model: None,
1115 in_open_source_repo: false,
1116 can_collect_data: false,
1117 }
1118 }
1119
1120 fn make_event(path: &str, diff: &str) -> Event {
1121 Event::BufferChange {
1122 path: Path::new(path).into(),
1123 old_path: Path::new(path).into(),
1124 diff: diff.to_string(),
1125 predicted: false,
1126 in_open_source_repo: false,
1127 }
1128 }
1129
1130 fn make_related_file(path: &str, content: &str) -> RelatedFile {
1131 RelatedFile {
1132 path: Path::new(path).into(),
1133 max_row: content.lines().count() as u32,
1134 excerpts: vec![RelatedExcerpt {
1135 row_range: 0..content.lines().count() as u32,
1136 text: content.into(),
1137 }],
1138 in_open_source_repo: false,
1139 }
1140 }
1141
1142 fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1143 format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1144 }
1145
1146 #[test]
1147 fn test_no_truncation_when_within_budget() {
1148 let input = make_input(
1149 "prefix\neditable\nsuffix",
1150 7..15,
1151 10,
1152 vec![make_event("a.rs", "-old\n+new\n")],
1153 vec![make_related_file("related.rs", "fn helper() {}\n")],
1154 );
1155
1156 assert_eq!(
1157 format_with_budget(&input, 10000),
1158 indoc! {r#"
1159 <|file_sep|>related.rs
1160 fn helper() {}
1161 <|file_sep|>edit history
1162 --- a/a.rs
1163 +++ b/a.rs
1164 -old
1165 +new
1166 <|file_sep|>test.rs
1167 <|fim_prefix|>
1168 prefix
1169 <|fim_middle|>current
1170 edi<|user_cursor|>table
1171 <|fim_suffix|>
1172
1173 suffix
1174 <|fim_middle|>updated
1175 "#}
1176 );
1177 }
1178
1179 #[test]
1180 fn test_truncation_drops_edit_history_when_budget_tight() {
1181 let input = make_input(
1182 "code",
1183 0..4,
1184 2,
1185 vec![make_event("a.rs", "-x\n+y\n")],
1186 vec![
1187 make_related_file("r1.rs", "a\n"),
1188 make_related_file("r2.rs", "b\n"),
1189 ],
1190 );
1191
1192 assert_eq!(
1193 format_with_budget(&input, 10000),
1194 indoc! {r#"
1195 <|file_sep|>r1.rs
1196 a
1197 <|file_sep|>r2.rs
1198 b
1199 <|file_sep|>edit history
1200 --- a/a.rs
1201 +++ b/a.rs
1202 -x
1203 +y
1204 <|file_sep|>test.rs
1205 <|fim_prefix|>
1206 <|fim_middle|>current
1207 co<|user_cursor|>de
1208 <|fim_suffix|>
1209 <|fim_middle|>updated
1210 "#}
1211 );
1212
1213 assert_eq!(
1214 format_with_budget(&input, 50),
1215 indoc! {r#"
1216 <|file_sep|>r1.rs
1217 a
1218 <|file_sep|>r2.rs
1219 b
1220 <|file_sep|>test.rs
1221 <|fim_prefix|>
1222 <|fim_middle|>current
1223 co<|user_cursor|>de
1224 <|fim_suffix|>
1225 <|fim_middle|>updated
1226 "#}
1227 );
1228 }
1229
1230 #[test]
1231 fn test_truncation_includes_partial_excerpts() {
1232 let input = make_input(
1233 "x",
1234 0..1,
1235 0,
1236 vec![],
1237 vec![RelatedFile {
1238 path: Path::new("big.rs").into(),
1239 max_row: 30,
1240 in_open_source_repo: false,
1241 excerpts: vec![
1242 RelatedExcerpt {
1243 row_range: 0..10,
1244 text: "first excerpt\n".into(),
1245 },
1246 RelatedExcerpt {
1247 row_range: 10..20,
1248 text: "second excerpt\n".into(),
1249 },
1250 RelatedExcerpt {
1251 row_range: 20..30,
1252 text: "third excerpt\n".into(),
1253 },
1254 ],
1255 }],
1256 );
1257
1258 assert_eq!(
1259 format_with_budget(&input, 10000),
1260 indoc! {r#"
1261 <|file_sep|>big.rs
1262 first excerpt
1263 ...
1264 second excerpt
1265 ...
1266 third excerpt
1267 <|file_sep|>test.rs
1268 <|fim_prefix|>
1269 <|fim_middle|>current
1270 <|user_cursor|>x
1271 <|fim_suffix|>
1272 <|fim_middle|>updated
1273 "#}
1274 );
1275
1276 assert_eq!(
1277 format_with_budget(&input, 50),
1278 indoc! {r#"
1279 <|file_sep|>big.rs
1280 first excerpt
1281 ...
1282 <|file_sep|>test.rs
1283 <|fim_prefix|>
1284 <|fim_middle|>current
1285 <|user_cursor|>x
1286 <|fim_suffix|>
1287 <|fim_middle|>updated
1288 "#}
1289 );
1290 }
1291
1292 #[test]
1293 fn test_truncation_drops_older_events_first() {
1294 let input = make_input(
1295 "x",
1296 0..1,
1297 0,
1298 vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1299 vec![],
1300 );
1301
1302 assert_eq!(
1303 format_with_budget(&input, 10000),
1304 indoc! {r#"
1305 <|file_sep|>edit history
1306 --- a/old.rs
1307 +++ b/old.rs
1308 -1
1309 --- a/new.rs
1310 +++ b/new.rs
1311 -2
1312 <|file_sep|>test.rs
1313 <|fim_prefix|>
1314 <|fim_middle|>current
1315 <|user_cursor|>x
1316 <|fim_suffix|>
1317 <|fim_middle|>updated
1318 "#}
1319 );
1320
1321 assert_eq!(
1322 format_with_budget(&input, 55),
1323 indoc! {r#"
1324 <|file_sep|>edit history
1325 --- a/new.rs
1326 +++ b/new.rs
1327 -2
1328 <|file_sep|>test.rs
1329 <|fim_prefix|>
1330 <|fim_middle|>current
1331 <|user_cursor|>x
1332 <|fim_suffix|>
1333 <|fim_middle|>updated
1334 "#}
1335 );
1336 }
1337
1338 #[test]
1339 fn test_cursor_excerpt_always_included_with_minimal_budget() {
1340 let input = make_input(
1341 "fn main() {}",
1342 0..12,
1343 3,
1344 vec![make_event("a.rs", "-old\n+new\n")],
1345 vec![make_related_file("related.rs", "helper\n")],
1346 );
1347
1348 assert_eq!(
1349 format_with_budget(&input, 30),
1350 indoc! {r#"
1351 <|file_sep|>test.rs
1352 <|fim_prefix|>
1353 <|fim_middle|>current
1354 fn <|user_cursor|>main() {}
1355 <|fim_suffix|>
1356 <|fim_middle|>updated
1357 "#}
1358 );
1359 }
1360
1361 fn format_seed_coder(input: &ZetaPromptInput) -> String {
1362 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1363 }
1364
1365 fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1366 format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1367 }
1368
1369 #[test]
1370 fn test_seed_coder_basic_format() {
1371 let input = make_input(
1372 "prefix\neditable\nsuffix",
1373 7..15,
1374 10,
1375 vec![make_event("a.rs", "-old\n+new\n")],
1376 vec![make_related_file("related.rs", "fn helper() {}\n")],
1377 );
1378
1379 assert_eq!(
1380 format_seed_coder(&input),
1381 indoc! {r#"
1382 <[fim-suffix]>
1383 suffix
1384 <[fim-prefix]><filename>related.rs
1385 fn helper() {}
1386
1387 <filename>edit_history
1388 --- a/a.rs
1389 +++ b/a.rs
1390 -old
1391 +new
1392
1393 <filename>test.rs
1394 prefix
1395 <<<<<<< CURRENT
1396 edi<|user_cursor|>table
1397 =======
1398 <[fim-middle]>"#}
1399 );
1400 }
1401
1402 #[test]
1403 fn test_seed_coder_no_context() {
1404 let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1405
1406 assert_eq!(
1407 format_seed_coder(&input),
1408 indoc! {r#"
1409 <[fim-suffix]>
1410 after
1411 <[fim-prefix]><filename>test.rs
1412 before
1413 <<<<<<< CURRENT
1414 mid<|user_cursor|>dle
1415 =======
1416 <[fim-middle]>"#}
1417 );
1418 }
1419
1420 #[test]
1421 fn test_seed_coder_truncation_drops_context() {
1422 let input = make_input(
1423 "code",
1424 0..4,
1425 2,
1426 vec![make_event("a.rs", "-x\n+y\n")],
1427 vec![make_related_file("r1.rs", "content\n")],
1428 );
1429
1430 // With large budget, everything is included
1431 assert_eq!(
1432 format_seed_coder(&input),
1433 indoc! {r#"
1434 <[fim-suffix]>
1435 <[fim-prefix]><filename>r1.rs
1436 content
1437
1438 <filename>edit_history
1439 --- a/a.rs
1440 +++ b/a.rs
1441 -x
1442 +y
1443
1444 <filename>test.rs
1445 <<<<<<< CURRENT
1446 co<|user_cursor|>de
1447 =======
1448 <[fim-middle]>"#}
1449 );
1450
1451 // With tight budget, context is dropped but cursor section remains
1452 assert_eq!(
1453 format_seed_coder_with_budget(&input, 30),
1454 indoc! {r#"
1455 <[fim-suffix]>
1456 <[fim-prefix]><filename>test.rs
1457 <<<<<<< CURRENT
1458 co<|user_cursor|>de
1459 =======
1460 <[fim-middle]>"#}
1461 );
1462 }
1463
1464 #[test]
1465 fn test_seed_coder_clean_output() {
1466 let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1467 let output_without_marker = "new code\n";
1468
1469 assert_eq!(
1470 clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1471 "new code\n"
1472 );
1473 assert_eq!(
1474 clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1475 "new code\n"
1476 );
1477 }
1478
1479 #[test]
1480 fn test_format_zeta1_from_input_basic() {
1481 let excerpt = "fn before() {}\nfn foo() {\n let x = 1;\n}\nfn after() {}\n";
1482 let input = ZetaPromptInput {
1483 cursor_path: Path::new("src/main.rs").into(),
1484 cursor_excerpt: excerpt.into(),
1485 editable_range_in_excerpt: 15..41,
1486 cursor_offset_in_excerpt: 30,
1487 excerpt_start_row: Some(0),
1488 events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1489 related_files: vec![],
1490 excerpt_ranges: None,
1491 preferred_model: None,
1492 in_open_source_repo: false,
1493 can_collect_data: false,
1494 };
1495
1496 let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1497
1498 assert_eq!(
1499 prompt,
1500 concat!(
1501 "### Instruction:\n",
1502 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1503 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1504 "into account the cursor location.\n",
1505 "\n",
1506 "### User Edits:\n",
1507 "\n",
1508 "User edited other.rs:\n",
1509 "```diff\n",
1510 "-old\n",
1511 "+new\n",
1512 "\n",
1513 "```\n",
1514 "\n",
1515 "### User Excerpt:\n",
1516 "\n",
1517 "```src/main.rs\n",
1518 "<|start_of_file|>\n",
1519 "fn before() {}\n",
1520 "<|editable_region_start|>\n",
1521 "fn foo() {\n",
1522 " <|user_cursor_is_here|>let x = 1;\n",
1523 "\n",
1524 "<|editable_region_end|>}\n",
1525 "fn after() {}\n",
1526 "\n",
1527 "```\n",
1528 "\n",
1529 "### Response:\n",
1530 ),
1531 );
1532 }
1533
1534 #[test]
1535 fn test_format_zeta1_from_input_no_start_of_file() {
1536 let excerpt = "fn foo() {\n let x = 1;\n}\n";
1537 let input = ZetaPromptInput {
1538 cursor_path: Path::new("src/main.rs").into(),
1539 cursor_excerpt: excerpt.into(),
1540 editable_range_in_excerpt: 0..28,
1541 cursor_offset_in_excerpt: 15,
1542 excerpt_start_row: Some(10),
1543 events: vec![],
1544 related_files: vec![],
1545 excerpt_ranges: None,
1546 preferred_model: None,
1547 in_open_source_repo: false,
1548 can_collect_data: false,
1549 };
1550
1551 let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1552
1553 assert_eq!(
1554 prompt,
1555 concat!(
1556 "### Instruction:\n",
1557 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1558 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1559 "into account the cursor location.\n",
1560 "\n",
1561 "### User Edits:\n",
1562 "\n",
1563 "\n",
1564 "\n",
1565 "### User Excerpt:\n",
1566 "\n",
1567 "```src/main.rs\n",
1568 "<|editable_region_start|>\n",
1569 "fn foo() {\n",
1570 " <|user_cursor_is_here|>let x = 1;\n",
1571 "}\n",
1572 "\n",
1573 "<|editable_region_end|>\n",
1574 "```\n",
1575 "\n",
1576 "### Response:\n",
1577 ),
1578 );
1579 }
1580
1581 #[test]
1582 fn test_format_zeta1_from_input_with_sub_ranges() {
1583 let excerpt = "// prefix\nfn foo() {\n let x = 1;\n}\n// suffix\n";
1584 let editable_range = 10..37;
1585 let context_range = 0..excerpt.len();
1586
1587 let input = ZetaPromptInput {
1588 cursor_path: Path::new("test.rs").into(),
1589 cursor_excerpt: excerpt.into(),
1590 editable_range_in_excerpt: editable_range.clone(),
1591 cursor_offset_in_excerpt: 25,
1592 excerpt_start_row: Some(0),
1593 events: vec![],
1594 related_files: vec![],
1595 excerpt_ranges: None,
1596 preferred_model: None,
1597 in_open_source_repo: false,
1598 can_collect_data: false,
1599 };
1600
1601 let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1602
1603 assert_eq!(
1604 prompt,
1605 concat!(
1606 "### Instruction:\n",
1607 "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1608 "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1609 "into account the cursor location.\n",
1610 "\n",
1611 "### User Edits:\n",
1612 "\n",
1613 "\n",
1614 "\n",
1615 "### User Excerpt:\n",
1616 "\n",
1617 "```test.rs\n",
1618 "<|start_of_file|>\n",
1619 "// prefix\n",
1620 "<|editable_region_start|>\n",
1621 "fn foo() {\n",
1622 " <|user_cursor_is_here|>let x = 1;\n",
1623 "}\n",
1624 "<|editable_region_end|>\n",
1625 "// suffix\n",
1626 "\n",
1627 "```\n",
1628 "\n",
1629 "### Response:\n",
1630 ),
1631 );
1632 }
1633
1634 #[test]
1635 fn test_clean_zeta1_model_output_basic() {
1636 let output = indoc! {"
1637 <|editable_region_start|>
1638 fn main() {
1639 println!(\"hello\");
1640 }
1641 <|editable_region_end|>
1642 "};
1643
1644 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1645 assert_eq!(cleaned, "fn main() {\n println!(\"hello\");\n}");
1646 }
1647
1648 #[test]
1649 fn test_clean_zeta1_model_output_with_cursor() {
1650 let output = indoc! {"
1651 <|editable_region_start|>
1652 fn main() {
1653 <|user_cursor_is_here|>println!(\"hello\");
1654 }
1655 <|editable_region_end|>
1656 "};
1657
1658 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1659 assert_eq!(
1660 cleaned,
1661 "fn main() {\n <|user_cursor|>println!(\"hello\");\n}"
1662 );
1663 }
1664
1665 #[test]
1666 fn test_clean_zeta1_model_output_no_markers() {
1667 let output = "fn main() {}\n";
1668 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1669 assert_eq!(cleaned, "fn main() {}\n");
1670 }
1671
1672 #[test]
1673 fn test_clean_zeta1_model_output_empty_region() {
1674 let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1675 let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1676 assert_eq!(cleaned, "");
1677 }
1678}