zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68}
  69
  70#[derive(
  71    Default,
  72    Clone,
  73    Copy,
  74    Debug,
  75    PartialEq,
  76    Eq,
  77    Hash,
  78    EnumIter,
  79    IntoStaticStr,
  80    Serialize,
  81    Deserialize,
  82)]
  83#[allow(non_camel_case_types)]
  84pub enum ZetaFormat {
  85    V0112MiddleAtEnd,
  86    V0113Ordered,
  87    #[default]
  88    V0114180EditableRegion,
  89    V0120GitMergeMarkers,
  90    V0131GitMergeMarkersPrefix,
  91    V0211Prefill,
  92    V0211SeedCoder,
  93}
  94
  95impl std::fmt::Display for ZetaFormat {
  96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  97        write!(f, "{}", <&'static str>::from(self))
  98    }
  99}
 100
 101impl ZetaFormat {
 102    pub fn parse(format_name: &str) -> Result<Self> {
 103        let mut results = ZetaFormat::iter().filter(|version| {
 104            <&'static str>::from(version)
 105                .to_lowercase()
 106                .contains(&format_name.to_lowercase())
 107        });
 108        let Some(result) = results.next() else {
 109            anyhow::bail!(
 110                "`{format_name}` did not match any of:\n{}",
 111                Self::options_as_string()
 112            );
 113        };
 114        if results.next().is_some() {
 115            anyhow::bail!(
 116                "`{format_name}` matched more than one of:\n{}",
 117                Self::options_as_string()
 118            );
 119        }
 120        Ok(result)
 121    }
 122
 123    pub fn options_as_string() -> String {
 124        ZetaFormat::iter()
 125            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 126            .collect::<Vec<_>>()
 127            .concat()
 128    }
 129
 130    pub fn special_tokens(&self) -> &'static [&'static str] {
 131        match self {
 132            ZetaFormat::V0112MiddleAtEnd
 133            | ZetaFormat::V0113Ordered
 134            | ZetaFormat::V0114180EditableRegion => &[
 135                "<|fim_prefix|>",
 136                "<|fim_suffix|>",
 137                "<|fim_middle|>",
 138                "<|file_sep|>",
 139                CURSOR_MARKER,
 140            ],
 141            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 142            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 143                v0131_git_merge_markers_prefix::special_tokens()
 144            }
 145            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 146        }
 147    }
 148}
 149
 150#[derive(Clone, Debug, Serialize, Deserialize)]
 151#[serde(tag = "event")]
 152pub enum Event {
 153    BufferChange {
 154        path: Arc<Path>,
 155        old_path: Arc<Path>,
 156        diff: String,
 157        predicted: bool,
 158        in_open_source_repo: bool,
 159    },
 160}
 161
 162impl Event {
 163    pub fn in_open_source_repo(&self) -> bool {
 164        match self {
 165            Event::BufferChange {
 166                in_open_source_repo,
 167                ..
 168            } => *in_open_source_repo,
 169        }
 170    }
 171}
 172
 173pub fn write_event(prompt: &mut String, event: &Event) {
 174    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 175        for component in path.components() {
 176            prompt.push('/');
 177            write!(prompt, "{}", component.as_os_str().display()).ok();
 178        }
 179    }
 180    match event {
 181        Event::BufferChange {
 182            path,
 183            old_path,
 184            diff,
 185            predicted,
 186            in_open_source_repo: _,
 187        } => {
 188            if *predicted {
 189                prompt.push_str("// User accepted prediction:\n");
 190            }
 191            prompt.push_str("--- a");
 192            write_path_as_unix_str(prompt, old_path.as_ref());
 193            prompt.push_str("\n+++ b");
 194            write_path_as_unix_str(prompt, path.as_ref());
 195            prompt.push('\n');
 196            prompt.push_str(diff);
 197        }
 198    }
 199}
 200
 201#[derive(Clone, Debug, Serialize, Deserialize)]
 202pub struct RelatedFile {
 203    pub path: Arc<Path>,
 204    pub max_row: u32,
 205    pub excerpts: Vec<RelatedExcerpt>,
 206    #[serde(default)]
 207    pub in_open_source_repo: bool,
 208}
 209
 210#[derive(Clone, Debug, Serialize, Deserialize)]
 211pub struct RelatedExcerpt {
 212    pub row_range: Range<u32>,
 213    pub text: Arc<str>,
 214}
 215
 216pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 217    format
 218        .special_tokens()
 219        .iter()
 220        .any(|token| input.cursor_excerpt.contains(token))
 221}
 222
 223pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 224    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 225}
 226
 227/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 228pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 229    match format {
 230        ZetaFormat::V0120GitMergeMarkers => output
 231            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 232            .unwrap_or(output),
 233        ZetaFormat::V0131GitMergeMarkersPrefix => output
 234            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 235            .unwrap_or(output),
 236        ZetaFormat::V0211SeedCoder => output
 237            .strip_suffix(seed_coder::END_MARKER)
 238            .unwrap_or(output),
 239        _ => output,
 240    }
 241}
 242
 243fn resolve_cursor_region(
 244    input: &ZetaPromptInput,
 245    format: ZetaFormat,
 246) -> (&str, Range<usize>, usize) {
 247    let Some(ranges) = &input.excerpt_ranges else {
 248        return (
 249            &input.cursor_excerpt,
 250            input.editable_range_in_excerpt.clone(),
 251            input.cursor_offset_in_excerpt,
 252        );
 253    };
 254
 255    let (editable_range, context_range) = match format {
 256        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 257            ranges.editable_150.clone(),
 258            ranges.editable_150_context_350.clone(),
 259        ),
 260        ZetaFormat::V0114180EditableRegion
 261        | ZetaFormat::V0120GitMergeMarkers
 262        | ZetaFormat::V0131GitMergeMarkersPrefix
 263        | ZetaFormat::V0211Prefill
 264        | ZetaFormat::V0211SeedCoder => (
 265            ranges.editable_180.clone(),
 266            ranges.editable_180_context_350.clone(),
 267        ),
 268    };
 269
 270    let context_start = context_range.start;
 271    let context_text = &input.cursor_excerpt[context_range];
 272    let adjusted_editable =
 273        (editable_range.start - context_start)..(editable_range.end - context_start);
 274    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 275
 276    (context_text, adjusted_editable, adjusted_cursor)
 277}
 278
 279fn format_zeta_prompt_with_budget(
 280    input: &ZetaPromptInput,
 281    format: ZetaFormat,
 282    max_tokens: usize,
 283) -> String {
 284    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 285    let path = &*input.cursor_path;
 286
 287    let mut cursor_section = String::new();
 288    match format {
 289        ZetaFormat::V0112MiddleAtEnd => {
 290            v0112_middle_at_end::write_cursor_excerpt_section(
 291                &mut cursor_section,
 292                path,
 293                context,
 294                &editable_range,
 295                cursor_offset,
 296            );
 297        }
 298        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 299            v0113_ordered::write_cursor_excerpt_section(
 300                &mut cursor_section,
 301                path,
 302                context,
 303                &editable_range,
 304                cursor_offset,
 305            )
 306        }
 307        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 308            &mut cursor_section,
 309            path,
 310            context,
 311            &editable_range,
 312            cursor_offset,
 313        ),
 314        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 315            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 316                &mut cursor_section,
 317                path,
 318                context,
 319                &editable_range,
 320                cursor_offset,
 321            )
 322        }
 323        ZetaFormat::V0211SeedCoder => {
 324            return seed_coder::format_prompt_with_budget(
 325                path,
 326                context,
 327                &editable_range,
 328                cursor_offset,
 329                &input.events,
 330                &input.related_files,
 331                max_tokens,
 332            );
 333        }
 334    }
 335
 336    let cursor_tokens = estimate_tokens(cursor_section.len());
 337    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 338
 339    let edit_history_section = format_edit_history_within_budget(
 340        &input.events,
 341        "<|file_sep|>",
 342        "edit history",
 343        budget_after_cursor,
 344    );
 345    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 346    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 347
 348    let related_files_section = format_related_files_within_budget(
 349        &input.related_files,
 350        "<|file_sep|>",
 351        budget_after_edit_history,
 352    );
 353
 354    let mut prompt = String::new();
 355    prompt.push_str(&related_files_section);
 356    prompt.push_str(&edit_history_section);
 357    prompt.push_str(&cursor_section);
 358    prompt
 359}
 360
 361pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 362    match format {
 363        ZetaFormat::V0112MiddleAtEnd
 364        | ZetaFormat::V0113Ordered
 365        | ZetaFormat::V0114180EditableRegion
 366        | ZetaFormat::V0120GitMergeMarkers
 367        | ZetaFormat::V0131GitMergeMarkersPrefix
 368        | ZetaFormat::V0211SeedCoder => String::new(),
 369        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
 370    }
 371}
 372
 373fn format_edit_history_within_budget(
 374    events: &[Arc<Event>],
 375    file_marker: &str,
 376    edit_history_name: &str,
 377    max_tokens: usize,
 378) -> String {
 379    let header = format!("{}{}\n", file_marker, edit_history_name);
 380    let header_tokens = estimate_tokens(header.len());
 381    if header_tokens >= max_tokens {
 382        return String::new();
 383    }
 384
 385    let mut event_strings: Vec<String> = Vec::new();
 386    let mut total_tokens = header_tokens;
 387
 388    for event in events.iter().rev() {
 389        let mut event_str = String::new();
 390        write_event(&mut event_str, event);
 391        let event_tokens = estimate_tokens(event_str.len());
 392
 393        if total_tokens + event_tokens > max_tokens {
 394            break;
 395        }
 396        total_tokens += event_tokens;
 397        event_strings.push(event_str);
 398    }
 399
 400    if event_strings.is_empty() {
 401        return String::new();
 402    }
 403
 404    let mut result = header;
 405    for event_str in event_strings.iter().rev() {
 406        result.push_str(event_str);
 407    }
 408    result
 409}
 410
 411fn format_related_files_within_budget(
 412    related_files: &[RelatedFile],
 413    file_marker: &str,
 414    max_tokens: usize,
 415) -> String {
 416    let mut result = String::new();
 417    let mut total_tokens = 0;
 418
 419    for file in related_files {
 420        let path_str = file.path.to_string_lossy();
 421        let header = format!("{}{}\n", file_marker, path_str);
 422        let header_tokens = estimate_tokens(header.len());
 423
 424        if total_tokens + header_tokens > max_tokens {
 425            break;
 426        }
 427
 428        let mut file_tokens = header_tokens;
 429        let mut excerpts_to_include = 0;
 430
 431        for excerpt in &file.excerpts {
 432            let needs_newline = !excerpt.text.ends_with('\n');
 433            let needs_ellipsis = excerpt.row_range.end < file.max_row;
 434            let excerpt_len = excerpt.text.len()
 435                + if needs_newline { "\n".len() } else { 0 }
 436                + if needs_ellipsis { "...\n".len() } else { 0 };
 437
 438            let excerpt_tokens = estimate_tokens(excerpt_len);
 439            if total_tokens + file_tokens + excerpt_tokens > max_tokens {
 440                break;
 441            }
 442            file_tokens += excerpt_tokens;
 443            excerpts_to_include += 1;
 444        }
 445
 446        if excerpts_to_include > 0 {
 447            total_tokens += file_tokens;
 448            result.push_str(&header);
 449            for excerpt in file.excerpts.iter().take(excerpts_to_include) {
 450                result.push_str(&excerpt.text);
 451                if !result.ends_with('\n') {
 452                    result.push('\n');
 453                }
 454                if excerpt.row_range.end < file.max_row {
 455                    result.push_str("...\n");
 456                }
 457            }
 458        }
 459    }
 460
 461    result
 462}
 463
 464pub fn write_related_files(
 465    prompt: &mut String,
 466    related_files: &[RelatedFile],
 467) -> Vec<Range<usize>> {
 468    let mut ranges = Vec::new();
 469    for file in related_files {
 470        let start = prompt.len();
 471        let path_str = file.path.to_string_lossy();
 472        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 473        for excerpt in &file.excerpts {
 474            prompt.push_str(&excerpt.text);
 475            if !prompt.ends_with('\n') {
 476                prompt.push('\n');
 477            }
 478            if excerpt.row_range.end < file.max_row {
 479                prompt.push_str("...\n");
 480            }
 481        }
 482        let end = prompt.len();
 483        ranges.push(start..end);
 484    }
 485    ranges
 486}
 487
 488mod v0112_middle_at_end {
 489    use super::*;
 490
 491    pub fn write_cursor_excerpt_section(
 492        prompt: &mut String,
 493        path: &Path,
 494        context: &str,
 495        editable_range: &Range<usize>,
 496        cursor_offset: usize,
 497    ) {
 498        let path_str = path.to_string_lossy();
 499        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 500
 501        prompt.push_str("<|fim_prefix|>\n");
 502        prompt.push_str(&context[..editable_range.start]);
 503
 504        prompt.push_str("<|fim_suffix|>\n");
 505        prompt.push_str(&context[editable_range.end..]);
 506        if !prompt.ends_with('\n') {
 507            prompt.push('\n');
 508        }
 509
 510        prompt.push_str("<|fim_middle|>current\n");
 511        prompt.push_str(&context[editable_range.start..cursor_offset]);
 512        prompt.push_str(CURSOR_MARKER);
 513        prompt.push_str(&context[cursor_offset..editable_range.end]);
 514        if !prompt.ends_with('\n') {
 515            prompt.push('\n');
 516        }
 517
 518        prompt.push_str("<|fim_middle|>updated\n");
 519    }
 520}
 521
 522mod v0113_ordered {
 523    use super::*;
 524
 525    pub fn write_cursor_excerpt_section(
 526        prompt: &mut String,
 527        path: &Path,
 528        context: &str,
 529        editable_range: &Range<usize>,
 530        cursor_offset: usize,
 531    ) {
 532        let path_str = path.to_string_lossy();
 533        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 534
 535        prompt.push_str("<|fim_prefix|>\n");
 536        prompt.push_str(&context[..editable_range.start]);
 537        if !prompt.ends_with('\n') {
 538            prompt.push('\n');
 539        }
 540
 541        prompt.push_str("<|fim_middle|>current\n");
 542        prompt.push_str(&context[editable_range.start..cursor_offset]);
 543        prompt.push_str(CURSOR_MARKER);
 544        prompt.push_str(&context[cursor_offset..editable_range.end]);
 545        if !prompt.ends_with('\n') {
 546            prompt.push('\n');
 547        }
 548
 549        prompt.push_str("<|fim_suffix|>\n");
 550        prompt.push_str(&context[editable_range.end..]);
 551        if !prompt.ends_with('\n') {
 552            prompt.push('\n');
 553        }
 554
 555        prompt.push_str("<|fim_middle|>updated\n");
 556    }
 557}
 558
 559pub mod v0120_git_merge_markers {
 560    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 561    //!
 562    //! Example prompt:
 563    //!
 564    //! <|file_sep|>path/to/target_file.py
 565    //! <|fim_prefix|>
 566    //! code before editable region
 567    //! <|fim_suffix|>
 568    //! code after editable region
 569    //! <|fim_middle|>
 570    //! <<<<<<< CURRENT
 571    //! code that
 572    //! needs to<|user_cursor|>
 573    //! be rewritten
 574    //! =======
 575    //!
 576    //! Expected output (should be generated by the model):
 577    //!
 578    //! updated
 579    //! code with
 580    //! changes applied
 581    //! >>>>>>> UPDATED
 582
 583    use super::*;
 584
 585    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 586    pub const SEPARATOR: &str = "=======\n";
 587    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 588
 589    pub fn special_tokens() -> &'static [&'static str] {
 590        &[
 591            "<|fim_prefix|>",
 592            "<|fim_suffix|>",
 593            "<|fim_middle|>",
 594            "<|file_sep|>",
 595            START_MARKER,
 596            SEPARATOR,
 597            END_MARKER,
 598            CURSOR_MARKER,
 599        ]
 600    }
 601
 602    pub fn write_cursor_excerpt_section(
 603        prompt: &mut String,
 604        path: &Path,
 605        context: &str,
 606        editable_range: &Range<usize>,
 607        cursor_offset: usize,
 608    ) {
 609        let path_str = path.to_string_lossy();
 610        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 611
 612        prompt.push_str("<|fim_prefix|>");
 613        prompt.push_str(&context[..editable_range.start]);
 614
 615        prompt.push_str("<|fim_suffix|>");
 616        prompt.push_str(&context[editable_range.end..]);
 617        if !prompt.ends_with('\n') {
 618            prompt.push('\n');
 619        }
 620
 621        prompt.push_str("<|fim_middle|>");
 622        prompt.push_str(START_MARKER);
 623        prompt.push_str(&context[editable_range.start..cursor_offset]);
 624        prompt.push_str(CURSOR_MARKER);
 625        prompt.push_str(&context[cursor_offset..editable_range.end]);
 626        if !prompt.ends_with('\n') {
 627            prompt.push('\n');
 628        }
 629        prompt.push_str(SEPARATOR);
 630    }
 631}
 632
 633pub mod v0131_git_merge_markers_prefix {
 634    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 635    //!
 636    //! Example prompt:
 637    //!
 638    //! <|file_sep|>path/to/target_file.py
 639    //! <|fim_prefix|>
 640    //! code before editable region
 641    //! <<<<<<< CURRENT
 642    //! code that
 643    //! needs to<|user_cursor|>
 644    //! be rewritten
 645    //! =======
 646    //! <|fim_suffix|>
 647    //! code after editable region
 648    //! <|fim_middle|>
 649    //!
 650    //! Expected output (should be generated by the model):
 651    //!
 652    //! updated
 653    //! code with
 654    //! changes applied
 655    //! >>>>>>> UPDATED
 656
 657    use super::*;
 658
 659    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 660    pub const SEPARATOR: &str = "=======\n";
 661    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 662
 663    pub fn special_tokens() -> &'static [&'static str] {
 664        &[
 665            "<|fim_prefix|>",
 666            "<|fim_suffix|>",
 667            "<|fim_middle|>",
 668            "<|file_sep|>",
 669            START_MARKER,
 670            SEPARATOR,
 671            END_MARKER,
 672            CURSOR_MARKER,
 673        ]
 674    }
 675
 676    pub fn write_cursor_excerpt_section(
 677        prompt: &mut String,
 678        path: &Path,
 679        context: &str,
 680        editable_range: &Range<usize>,
 681        cursor_offset: usize,
 682    ) {
 683        let path_str = path.to_string_lossy();
 684        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 685
 686        prompt.push_str("<|fim_prefix|>");
 687        prompt.push_str(&context[..editable_range.start]);
 688        prompt.push_str(START_MARKER);
 689        prompt.push_str(&context[editable_range.start..cursor_offset]);
 690        prompt.push_str(CURSOR_MARKER);
 691        prompt.push_str(&context[cursor_offset..editable_range.end]);
 692        if !prompt.ends_with('\n') {
 693            prompt.push('\n');
 694        }
 695        prompt.push_str(SEPARATOR);
 696
 697        prompt.push_str("<|fim_suffix|>");
 698        prompt.push_str(&context[editable_range.end..]);
 699        if !prompt.ends_with('\n') {
 700            prompt.push('\n');
 701        }
 702
 703        prompt.push_str("<|fim_middle|>");
 704    }
 705}
 706
 707pub mod v0211_prefill {
 708    use super::*;
 709
 710    pub fn get_prefill(input: &ZetaPromptInput) -> String {
 711        let editable_region = &input.cursor_excerpt
 712            [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
 713
 714        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 715        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 716
 717        // Find a token boundary to avoid splitting tokens in the prefill.
 718        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 719        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 720        // the \n and consume any consecutive \n characters after it.
 721        let prefill = &editable_region[..prefill_len];
 722        match prefill.rfind('\n') {
 723            Some(pos) => {
 724                let mut end = pos + 1;
 725                while end < editable_region.len()
 726                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 727                {
 728                    end += 1;
 729                }
 730                editable_region[..end].to_string()
 731            }
 732            // No newline found. Fall back to splitting before the last space
 733            // (word-level boundary)
 734            None => match prefill.rfind(' ') {
 735                Some(pos) => prefill[..pos].to_string(),
 736                None => prefill.to_string(),
 737            },
 738        }
 739    }
 740}
 741
 742pub mod seed_coder {
 743    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 744    //!
 745    //! Seed-Coder uses different FIM tokens and order than Qwen:
 746    //! - SPM order: suffix comes FIRST, then prefix, then middle
 747    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 748    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 749    //!
 750    //! All context (related files, edit history) goes in the PREFIX section.
 751    //! The suffix contains only code after the editable region.
 752    //!
 753    //! Example prompt:
 754    //!
 755    //! <[fim-suffix]>
 756    //! code after editable region
 757    //! <[fim-prefix]><filename>related/file.py
 758    //! related file content
 759    //!
 760    //! <filename>edit_history
 761    //! --- a/some_file.py
 762    //! +++ b/some_file.py
 763    //! -old
 764    //! +new
 765    //!
 766    //! <filename>path/to/target_file.py
 767    //! code before editable region
 768    //! <<<<<<< CURRENT
 769    //! code that
 770    //! needs to<|user_cursor|>
 771    //! be rewritten
 772    //! =======
 773    //! <[fim-middle]>
 774    //!
 775    //! Expected output (model generates):
 776    //!
 777    //! updated
 778    //! code with
 779    //! changes applied
 780    //! >>>>>>> UPDATED
 781
 782    use super::*;
 783
 784    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 785    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 786    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 787    pub const FILE_MARKER: &str = "<filename>";
 788
 789    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 790    pub const SEPARATOR: &str = "=======\n";
 791    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 792
 793    pub fn special_tokens() -> &'static [&'static str] {
 794        &[
 795            FIM_SUFFIX,
 796            FIM_PREFIX,
 797            FIM_MIDDLE,
 798            FILE_MARKER,
 799            START_MARKER,
 800            SEPARATOR,
 801            END_MARKER,
 802            CURSOR_MARKER,
 803        ]
 804    }
 805
 806    pub fn format_prompt_with_budget(
 807        path: &Path,
 808        context: &str,
 809        editable_range: &Range<usize>,
 810        cursor_offset: usize,
 811        events: &[Arc<Event>],
 812        related_files: &[RelatedFile],
 813        max_tokens: usize,
 814    ) -> String {
 815        let suffix_section = build_suffix_section(context, editable_range);
 816        let cursor_prefix_section =
 817            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 818
 819        let suffix_tokens = estimate_tokens(suffix_section.len());
 820        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 821        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 822
 823        let edit_history_section = super::format_edit_history_within_budget(
 824            events,
 825            FILE_MARKER,
 826            "edit_history",
 827            budget_after_cursor,
 828        );
 829        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 830        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 831
 832        let related_files_section = super::format_related_files_within_budget(
 833            related_files,
 834            FILE_MARKER,
 835            budget_after_edit_history,
 836        );
 837
 838        let mut prompt = String::new();
 839        prompt.push_str(&suffix_section);
 840        prompt.push_str(FIM_PREFIX);
 841        prompt.push_str(&related_files_section);
 842        if !related_files_section.is_empty() {
 843            prompt.push('\n');
 844        }
 845        prompt.push_str(&edit_history_section);
 846        if !edit_history_section.is_empty() {
 847            prompt.push('\n');
 848        }
 849        prompt.push_str(&cursor_prefix_section);
 850        prompt.push_str(FIM_MIDDLE);
 851        prompt
 852    }
 853
 854    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 855        let mut section = String::new();
 856        section.push_str(FIM_SUFFIX);
 857        section.push_str(&context[editable_range.end..]);
 858        if !section.ends_with('\n') {
 859            section.push('\n');
 860        }
 861        section
 862    }
 863
 864    fn build_cursor_prefix_section(
 865        path: &Path,
 866        context: &str,
 867        editable_range: &Range<usize>,
 868        cursor_offset: usize,
 869    ) -> String {
 870        let mut section = String::new();
 871        let path_str = path.to_string_lossy();
 872        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 873
 874        section.push_str(&context[..editable_range.start]);
 875        section.push_str(START_MARKER);
 876        section.push_str(&context[editable_range.start..cursor_offset]);
 877        section.push_str(CURSOR_MARKER);
 878        section.push_str(&context[cursor_offset..editable_range.end]);
 879        if !section.ends_with('\n') {
 880            section.push('\n');
 881        }
 882        section.push_str(SEPARATOR);
 883        section
 884    }
 885}
 886
 887/// The zeta1 prompt format
 888pub mod zeta1 {
 889    use super::*;
 890    use std::fmt::Write;
 891
 892    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 893    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 894    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 895    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 896
 897    const INSTRUCTION_HEADER: &str = concat!(
 898        "### Instruction:\n",
 899        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 900        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 901        "into account the cursor location.\n\n",
 902        "### User Edits:\n\n"
 903    );
 904    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 905    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 906
 907    /// Formats a complete zeta1 prompt from the input events and excerpt.
 908    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 909        let mut prompt = String::with_capacity(
 910            INSTRUCTION_HEADER.len()
 911                + input_events.len()
 912                + EXCERPT_HEADER.len()
 913                + input_excerpt.len()
 914                + RESPONSE_HEADER.len(),
 915        );
 916        prompt.push_str(INSTRUCTION_HEADER);
 917        prompt.push_str(input_events);
 918        prompt.push_str(EXCERPT_HEADER);
 919        prompt.push_str(input_excerpt);
 920        prompt.push_str(RESPONSE_HEADER);
 921        prompt
 922    }
 923
 924    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 925    /// editable and context byte-offset ranges within `cursor_excerpt`.
 926    pub fn format_zeta1_from_input(
 927        input: &ZetaPromptInput,
 928        editable_range: Range<usize>,
 929        context_range: Range<usize>,
 930    ) -> String {
 931        let events = format_zeta1_events(&input.events);
 932        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 933        format_zeta1_prompt(&events, &excerpt)
 934    }
 935
 936    /// Formats events in zeta1 style (oldest first).
 937    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 938        let mut result = String::new();
 939        for event in events {
 940            let event_string = format_zeta1_event(event);
 941            if event_string.is_empty() {
 942                continue;
 943            }
 944            if !result.is_empty() {
 945                result.push_str("\n\n");
 946            }
 947            result.push_str(&event_string);
 948        }
 949        result
 950    }
 951
 952    fn format_zeta1_event(event: &Event) -> String {
 953        match event {
 954            Event::BufferChange {
 955                path,
 956                old_path,
 957                diff,
 958                ..
 959            } => {
 960                let mut prompt = String::new();
 961                if old_path != path {
 962                    writeln!(
 963                        prompt,
 964                        "User renamed {} to {}\n",
 965                        old_path.display(),
 966                        path.display()
 967                    )
 968                    .ok();
 969                }
 970                if !diff.is_empty() {
 971                    write!(
 972                        prompt,
 973                        "User edited {}:\n```diff\n{}\n```",
 974                        path.display(),
 975                        diff
 976                    )
 977                    .ok();
 978                }
 979                prompt
 980            }
 981        }
 982    }
 983
 984    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
 985    /// within `cursor_excerpt`.
 986    fn format_zeta1_excerpt(
 987        input: &ZetaPromptInput,
 988        editable_range: Range<usize>,
 989        context_range: Range<usize>,
 990    ) -> String {
 991        let path_str = input.cursor_path.to_string_lossy();
 992        let excerpt = &*input.cursor_excerpt;
 993        let cursor_offset = input.cursor_offset_in_excerpt;
 994
 995        let mut prompt = String::new();
 996        writeln!(&mut prompt, "```{path_str}").ok();
 997
 998        let starts_at_file_beginning =
 999            input.excerpt_start_row == Some(0) && context_range.start == 0;
1000        if starts_at_file_beginning {
1001            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1002        }
1003
1004        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1005
1006        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1007        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1008        prompt.push_str(CURSOR_MARKER);
1009        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1010        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1011
1012        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1013        write!(prompt, "\n```").ok();
1014
1015        prompt
1016    }
1017
1018    /// Cleans zeta1 model output by extracting content between editable region
1019    /// markers and converting the zeta1 cursor marker to the universal one.
1020    /// Returns `None` if the output doesn't contain the expected markers.
1021    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1022        let content = output.replace(CURSOR_MARKER, "");
1023
1024        let content_start = content
1025            .find(EDITABLE_REGION_START_MARKER)
1026            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1027            .map(|pos| {
1028                if content.as_bytes().get(pos) == Some(&b'\n') {
1029                    pos + 1
1030                } else {
1031                    pos
1032                }
1033            })
1034            .unwrap_or(0);
1035
1036        let content_end = content
1037            .find(EDITABLE_REGION_END_MARKER)
1038            .map(|pos| {
1039                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1040                    pos - 1
1041                } else {
1042                    pos
1043                }
1044            })
1045            .unwrap_or(content.len());
1046
1047        if content_start > content_end {
1048            return Some(String::new());
1049        }
1050
1051        let extracted = &content[content_start..content_end];
1052
1053        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1054            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1055            let text_before_cursor = text_before_cursor
1056                .find(EDITABLE_REGION_START_MARKER)
1057                .map(|pos| {
1058                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1059                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1060                        after_marker + 1
1061                    } else {
1062                        after_marker
1063                    }
1064                })
1065                .unwrap_or(0);
1066            let offset_in_extracted = zeta1_cursor_pos
1067                .saturating_sub(text_before_cursor)
1068                .min(extracted.len());
1069            offset_in_extracted
1070        });
1071
1072        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1073        if let Some(offset) = cursor_offset {
1074            result.push_str(&extracted[..offset]);
1075            result.push_str(super::CURSOR_MARKER);
1076            result.push_str(&extracted[offset..]);
1077        } else {
1078            result.push_str(extracted);
1079        }
1080
1081        Some(result)
1082    }
1083}
1084
1085#[cfg(test)]
1086mod tests {
1087    use super::*;
1088    use indoc::indoc;
1089
1090    fn make_input(
1091        cursor_excerpt: &str,
1092        editable_range: Range<usize>,
1093        cursor_offset: usize,
1094        events: Vec<Event>,
1095        related_files: Vec<RelatedFile>,
1096    ) -> ZetaPromptInput {
1097        ZetaPromptInput {
1098            cursor_path: Path::new("test.rs").into(),
1099            cursor_excerpt: cursor_excerpt.into(),
1100            editable_range_in_excerpt: editable_range,
1101            cursor_offset_in_excerpt: cursor_offset,
1102            excerpt_start_row: None,
1103            events: events.into_iter().map(Arc::new).collect(),
1104            related_files,
1105            excerpt_ranges: None,
1106            preferred_model: None,
1107            in_open_source_repo: false,
1108        }
1109    }
1110
1111    fn make_event(path: &str, diff: &str) -> Event {
1112        Event::BufferChange {
1113            path: Path::new(path).into(),
1114            old_path: Path::new(path).into(),
1115            diff: diff.to_string(),
1116            predicted: false,
1117            in_open_source_repo: false,
1118        }
1119    }
1120
1121    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1122        RelatedFile {
1123            path: Path::new(path).into(),
1124            max_row: content.lines().count() as u32,
1125            excerpts: vec![RelatedExcerpt {
1126                row_range: 0..content.lines().count() as u32,
1127                text: content.into(),
1128            }],
1129            in_open_source_repo: false,
1130        }
1131    }
1132
1133    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1134        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1135    }
1136
1137    #[test]
1138    fn test_no_truncation_when_within_budget() {
1139        let input = make_input(
1140            "prefix\neditable\nsuffix",
1141            7..15,
1142            10,
1143            vec![make_event("a.rs", "-old\n+new\n")],
1144            vec![make_related_file("related.rs", "fn helper() {}\n")],
1145        );
1146
1147        assert_eq!(
1148            format_with_budget(&input, 10000),
1149            indoc! {r#"
1150                <|file_sep|>related.rs
1151                fn helper() {}
1152                <|file_sep|>edit history
1153                --- a/a.rs
1154                +++ b/a.rs
1155                -old
1156                +new
1157                <|file_sep|>test.rs
1158                <|fim_prefix|>
1159                prefix
1160                <|fim_middle|>current
1161                edi<|user_cursor|>table
1162                <|fim_suffix|>
1163
1164                suffix
1165                <|fim_middle|>updated
1166            "#}
1167        );
1168    }
1169
1170    #[test]
1171    fn test_truncation_drops_edit_history_when_budget_tight() {
1172        let input = make_input(
1173            "code",
1174            0..4,
1175            2,
1176            vec![make_event("a.rs", "-x\n+y\n")],
1177            vec![
1178                make_related_file("r1.rs", "a\n"),
1179                make_related_file("r2.rs", "b\n"),
1180            ],
1181        );
1182
1183        assert_eq!(
1184            format_with_budget(&input, 10000),
1185            indoc! {r#"
1186                <|file_sep|>r1.rs
1187                a
1188                <|file_sep|>r2.rs
1189                b
1190                <|file_sep|>edit history
1191                --- a/a.rs
1192                +++ b/a.rs
1193                -x
1194                +y
1195                <|file_sep|>test.rs
1196                <|fim_prefix|>
1197                <|fim_middle|>current
1198                co<|user_cursor|>de
1199                <|fim_suffix|>
1200                <|fim_middle|>updated
1201            "#}
1202        );
1203
1204        assert_eq!(
1205            format_with_budget(&input, 50),
1206            indoc! {r#"
1207                <|file_sep|>r1.rs
1208                a
1209                <|file_sep|>r2.rs
1210                b
1211                <|file_sep|>test.rs
1212                <|fim_prefix|>
1213                <|fim_middle|>current
1214                co<|user_cursor|>de
1215                <|fim_suffix|>
1216                <|fim_middle|>updated
1217            "#}
1218        );
1219    }
1220
1221    #[test]
1222    fn test_truncation_includes_partial_excerpts() {
1223        let input = make_input(
1224            "x",
1225            0..1,
1226            0,
1227            vec![],
1228            vec![RelatedFile {
1229                path: Path::new("big.rs").into(),
1230                max_row: 30,
1231                in_open_source_repo: false,
1232                excerpts: vec![
1233                    RelatedExcerpt {
1234                        row_range: 0..10,
1235                        text: "first excerpt\n".into(),
1236                    },
1237                    RelatedExcerpt {
1238                        row_range: 10..20,
1239                        text: "second excerpt\n".into(),
1240                    },
1241                    RelatedExcerpt {
1242                        row_range: 20..30,
1243                        text: "third excerpt\n".into(),
1244                    },
1245                ],
1246            }],
1247        );
1248
1249        assert_eq!(
1250            format_with_budget(&input, 10000),
1251            indoc! {r#"
1252                <|file_sep|>big.rs
1253                first excerpt
1254                ...
1255                second excerpt
1256                ...
1257                third excerpt
1258                <|file_sep|>test.rs
1259                <|fim_prefix|>
1260                <|fim_middle|>current
1261                <|user_cursor|>x
1262                <|fim_suffix|>
1263                <|fim_middle|>updated
1264            "#}
1265        );
1266
1267        assert_eq!(
1268            format_with_budget(&input, 50),
1269            indoc! {r#"
1270                <|file_sep|>big.rs
1271                first excerpt
1272                ...
1273                <|file_sep|>test.rs
1274                <|fim_prefix|>
1275                <|fim_middle|>current
1276                <|user_cursor|>x
1277                <|fim_suffix|>
1278                <|fim_middle|>updated
1279            "#}
1280        );
1281    }
1282
1283    #[test]
1284    fn test_truncation_drops_older_events_first() {
1285        let input = make_input(
1286            "x",
1287            0..1,
1288            0,
1289            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1290            vec![],
1291        );
1292
1293        assert_eq!(
1294            format_with_budget(&input, 10000),
1295            indoc! {r#"
1296                <|file_sep|>edit history
1297                --- a/old.rs
1298                +++ b/old.rs
1299                -1
1300                --- a/new.rs
1301                +++ b/new.rs
1302                -2
1303                <|file_sep|>test.rs
1304                <|fim_prefix|>
1305                <|fim_middle|>current
1306                <|user_cursor|>x
1307                <|fim_suffix|>
1308                <|fim_middle|>updated
1309            "#}
1310        );
1311
1312        assert_eq!(
1313            format_with_budget(&input, 55),
1314            indoc! {r#"
1315                <|file_sep|>edit history
1316                --- a/new.rs
1317                +++ b/new.rs
1318                -2
1319                <|file_sep|>test.rs
1320                <|fim_prefix|>
1321                <|fim_middle|>current
1322                <|user_cursor|>x
1323                <|fim_suffix|>
1324                <|fim_middle|>updated
1325            "#}
1326        );
1327    }
1328
1329    #[test]
1330    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1331        let input = make_input(
1332            "fn main() {}",
1333            0..12,
1334            3,
1335            vec![make_event("a.rs", "-old\n+new\n")],
1336            vec![make_related_file("related.rs", "helper\n")],
1337        );
1338
1339        assert_eq!(
1340            format_with_budget(&input, 30),
1341            indoc! {r#"
1342                <|file_sep|>test.rs
1343                <|fim_prefix|>
1344                <|fim_middle|>current
1345                fn <|user_cursor|>main() {}
1346                <|fim_suffix|>
1347                <|fim_middle|>updated
1348            "#}
1349        );
1350    }
1351
1352    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1353        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1354    }
1355
1356    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1357        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1358    }
1359
1360    #[test]
1361    fn test_seed_coder_basic_format() {
1362        let input = make_input(
1363            "prefix\neditable\nsuffix",
1364            7..15,
1365            10,
1366            vec![make_event("a.rs", "-old\n+new\n")],
1367            vec![make_related_file("related.rs", "fn helper() {}\n")],
1368        );
1369
1370        assert_eq!(
1371            format_seed_coder(&input),
1372            indoc! {r#"
1373                <[fim-suffix]>
1374                suffix
1375                <[fim-prefix]><filename>related.rs
1376                fn helper() {}
1377
1378                <filename>edit_history
1379                --- a/a.rs
1380                +++ b/a.rs
1381                -old
1382                +new
1383
1384                <filename>test.rs
1385                prefix
1386                <<<<<<< CURRENT
1387                edi<|user_cursor|>table
1388                =======
1389                <[fim-middle]>"#}
1390        );
1391    }
1392
1393    #[test]
1394    fn test_seed_coder_no_context() {
1395        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1396
1397        assert_eq!(
1398            format_seed_coder(&input),
1399            indoc! {r#"
1400                <[fim-suffix]>
1401                after
1402                <[fim-prefix]><filename>test.rs
1403                before
1404                <<<<<<< CURRENT
1405                mid<|user_cursor|>dle
1406                =======
1407                <[fim-middle]>"#}
1408        );
1409    }
1410
1411    #[test]
1412    fn test_seed_coder_truncation_drops_context() {
1413        let input = make_input(
1414            "code",
1415            0..4,
1416            2,
1417            vec![make_event("a.rs", "-x\n+y\n")],
1418            vec![make_related_file("r1.rs", "content\n")],
1419        );
1420
1421        // With large budget, everything is included
1422        assert_eq!(
1423            format_seed_coder(&input),
1424            indoc! {r#"
1425                <[fim-suffix]>
1426                <[fim-prefix]><filename>r1.rs
1427                content
1428
1429                <filename>edit_history
1430                --- a/a.rs
1431                +++ b/a.rs
1432                -x
1433                +y
1434
1435                <filename>test.rs
1436                <<<<<<< CURRENT
1437                co<|user_cursor|>de
1438                =======
1439                <[fim-middle]>"#}
1440        );
1441
1442        // With tight budget, context is dropped but cursor section remains
1443        assert_eq!(
1444            format_seed_coder_with_budget(&input, 30),
1445            indoc! {r#"
1446                <[fim-suffix]>
1447                <[fim-prefix]><filename>test.rs
1448                <<<<<<< CURRENT
1449                co<|user_cursor|>de
1450                =======
1451                <[fim-middle]>"#}
1452        );
1453    }
1454
1455    #[test]
1456    fn test_seed_coder_clean_output() {
1457        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1458        let output_without_marker = "new code\n";
1459
1460        assert_eq!(
1461            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1462            "new code\n"
1463        );
1464        assert_eq!(
1465            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1466            "new code\n"
1467        );
1468    }
1469
1470    #[test]
1471    fn test_format_zeta1_from_input_basic() {
1472        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1473        let input = ZetaPromptInput {
1474            cursor_path: Path::new("src/main.rs").into(),
1475            cursor_excerpt: excerpt.into(),
1476            editable_range_in_excerpt: 15..41,
1477            cursor_offset_in_excerpt: 30,
1478            excerpt_start_row: Some(0),
1479            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1480            related_files: vec![],
1481            excerpt_ranges: None,
1482            preferred_model: None,
1483            in_open_source_repo: false,
1484        };
1485
1486        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1487
1488        assert_eq!(
1489            prompt,
1490            concat!(
1491                "### Instruction:\n",
1492                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1493                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1494                "into account the cursor location.\n",
1495                "\n",
1496                "### User Edits:\n",
1497                "\n",
1498                "User edited other.rs:\n",
1499                "```diff\n",
1500                "-old\n",
1501                "+new\n",
1502                "\n",
1503                "```\n",
1504                "\n",
1505                "### User Excerpt:\n",
1506                "\n",
1507                "```src/main.rs\n",
1508                "<|start_of_file|>\n",
1509                "fn before() {}\n",
1510                "<|editable_region_start|>\n",
1511                "fn foo() {\n",
1512                "    <|user_cursor_is_here|>let x = 1;\n",
1513                "\n",
1514                "<|editable_region_end|>}\n",
1515                "fn after() {}\n",
1516                "\n",
1517                "```\n",
1518                "\n",
1519                "### Response:\n",
1520            ),
1521        );
1522    }
1523
1524    #[test]
1525    fn test_format_zeta1_from_input_no_start_of_file() {
1526        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1527        let input = ZetaPromptInput {
1528            cursor_path: Path::new("src/main.rs").into(),
1529            cursor_excerpt: excerpt.into(),
1530            editable_range_in_excerpt: 0..28,
1531            cursor_offset_in_excerpt: 15,
1532            excerpt_start_row: Some(10),
1533            events: vec![],
1534            related_files: vec![],
1535            excerpt_ranges: None,
1536            preferred_model: None,
1537            in_open_source_repo: false,
1538        };
1539
1540        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1541
1542        assert_eq!(
1543            prompt,
1544            concat!(
1545                "### Instruction:\n",
1546                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1547                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1548                "into account the cursor location.\n",
1549                "\n",
1550                "### User Edits:\n",
1551                "\n",
1552                "\n",
1553                "\n",
1554                "### User Excerpt:\n",
1555                "\n",
1556                "```src/main.rs\n",
1557                "<|editable_region_start|>\n",
1558                "fn foo() {\n",
1559                "    <|user_cursor_is_here|>let x = 1;\n",
1560                "}\n",
1561                "\n",
1562                "<|editable_region_end|>\n",
1563                "```\n",
1564                "\n",
1565                "### Response:\n",
1566            ),
1567        );
1568    }
1569
1570    #[test]
1571    fn test_format_zeta1_from_input_with_sub_ranges() {
1572        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1573        let editable_range = 10..37;
1574        let context_range = 0..excerpt.len();
1575
1576        let input = ZetaPromptInput {
1577            cursor_path: Path::new("test.rs").into(),
1578            cursor_excerpt: excerpt.into(),
1579            editable_range_in_excerpt: editable_range.clone(),
1580            cursor_offset_in_excerpt: 25,
1581            excerpt_start_row: Some(0),
1582            events: vec![],
1583            related_files: vec![],
1584            excerpt_ranges: None,
1585            preferred_model: None,
1586            in_open_source_repo: false,
1587        };
1588
1589        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1590
1591        assert_eq!(
1592            prompt,
1593            concat!(
1594                "### Instruction:\n",
1595                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1596                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1597                "into account the cursor location.\n",
1598                "\n",
1599                "### User Edits:\n",
1600                "\n",
1601                "\n",
1602                "\n",
1603                "### User Excerpt:\n",
1604                "\n",
1605                "```test.rs\n",
1606                "<|start_of_file|>\n",
1607                "// prefix\n",
1608                "<|editable_region_start|>\n",
1609                "fn foo() {\n",
1610                "    <|user_cursor_is_here|>let x = 1;\n",
1611                "}\n",
1612                "<|editable_region_end|>\n",
1613                "// suffix\n",
1614                "\n",
1615                "```\n",
1616                "\n",
1617                "### Response:\n",
1618            ),
1619        );
1620    }
1621
1622    #[test]
1623    fn test_clean_zeta1_model_output_basic() {
1624        let output = indoc! {"
1625            <|editable_region_start|>
1626            fn main() {
1627                println!(\"hello\");
1628            }
1629            <|editable_region_end|>
1630        "};
1631
1632        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1633        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1634    }
1635
1636    #[test]
1637    fn test_clean_zeta1_model_output_with_cursor() {
1638        let output = indoc! {"
1639            <|editable_region_start|>
1640            fn main() {
1641                <|user_cursor_is_here|>println!(\"hello\");
1642            }
1643            <|editable_region_end|>
1644        "};
1645
1646        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1647        assert_eq!(
1648            cleaned,
1649            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1650        );
1651    }
1652
1653    #[test]
1654    fn test_clean_zeta1_model_output_no_markers() {
1655        let output = "fn main() {}\n";
1656        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1657        assert_eq!(cleaned, "fn main() {}\n");
1658    }
1659
1660    #[test]
1661    fn test_clean_zeta1_model_output_empty_region() {
1662        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1663        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1664        assert_eq!(cleaned, "");
1665    }
1666}