zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68    #[serde(default)]
  69    pub can_collect_data: bool,
  70}
  71
  72#[derive(
  73    Default,
  74    Clone,
  75    Copy,
  76    Debug,
  77    PartialEq,
  78    Eq,
  79    Hash,
  80    EnumIter,
  81    IntoStaticStr,
  82    Serialize,
  83    Deserialize,
  84)]
  85#[allow(non_camel_case_types)]
  86pub enum ZetaFormat {
  87    V0112MiddleAtEnd,
  88    V0113Ordered,
  89    V0114180EditableRegion,
  90    V0120GitMergeMarkers,
  91    #[default]
  92    V0131GitMergeMarkersPrefix,
  93    V0211Prefill,
  94    V0211SeedCoder,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131
 132    pub fn special_tokens(&self) -> &'static [&'static str] {
 133        match self {
 134            ZetaFormat::V0112MiddleAtEnd
 135            | ZetaFormat::V0113Ordered
 136            | ZetaFormat::V0114180EditableRegion => &[
 137                "<|fim_prefix|>",
 138                "<|fim_suffix|>",
 139                "<|fim_middle|>",
 140                "<|file_sep|>",
 141                CURSOR_MARKER,
 142            ],
 143            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 144            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 145                v0131_git_merge_markers_prefix::special_tokens()
 146            }
 147            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 148        }
 149    }
 150}
 151
 152#[derive(Clone, Debug, Serialize, Deserialize)]
 153#[serde(tag = "event")]
 154pub enum Event {
 155    BufferChange {
 156        path: Arc<Path>,
 157        old_path: Arc<Path>,
 158        diff: String,
 159        predicted: bool,
 160        in_open_source_repo: bool,
 161    },
 162}
 163
 164impl Event {
 165    pub fn in_open_source_repo(&self) -> bool {
 166        match self {
 167            Event::BufferChange {
 168                in_open_source_repo,
 169                ..
 170            } => *in_open_source_repo,
 171        }
 172    }
 173}
 174
 175pub fn write_event(prompt: &mut String, event: &Event) {
 176    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 177        for component in path.components() {
 178            prompt.push('/');
 179            write!(prompt, "{}", component.as_os_str().display()).ok();
 180        }
 181    }
 182    match event {
 183        Event::BufferChange {
 184            path,
 185            old_path,
 186            diff,
 187            predicted,
 188            in_open_source_repo: _,
 189        } => {
 190            if *predicted {
 191                prompt.push_str("// User accepted prediction:\n");
 192            }
 193            prompt.push_str("--- a");
 194            write_path_as_unix_str(prompt, old_path.as_ref());
 195            prompt.push_str("\n+++ b");
 196            write_path_as_unix_str(prompt, path.as_ref());
 197            prompt.push('\n');
 198            prompt.push_str(diff);
 199        }
 200    }
 201}
 202
 203#[derive(Clone, Debug, Serialize, Deserialize)]
 204pub struct RelatedFile {
 205    pub path: Arc<Path>,
 206    pub max_row: u32,
 207    pub excerpts: Vec<RelatedExcerpt>,
 208    #[serde(default)]
 209    pub in_open_source_repo: bool,
 210}
 211
 212#[derive(Clone, Debug, Serialize, Deserialize)]
 213pub struct RelatedExcerpt {
 214    pub row_range: Range<u32>,
 215    pub text: Arc<str>,
 216}
 217
 218pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 219    format
 220        .special_tokens()
 221        .iter()
 222        .any(|token| input.cursor_excerpt.contains(token))
 223}
 224
 225pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 226    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 227}
 228
 229/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 230pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 231    match format {
 232        ZetaFormat::V0120GitMergeMarkers => output
 233            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 234            .unwrap_or(output),
 235        ZetaFormat::V0131GitMergeMarkersPrefix => output
 236            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 237            .unwrap_or(output),
 238        ZetaFormat::V0211SeedCoder => output
 239            .strip_suffix(seed_coder::END_MARKER)
 240            .unwrap_or(output),
 241        _ => output,
 242    }
 243}
 244
 245pub fn excerpt_range_for_format(
 246    format: ZetaFormat,
 247    ranges: &ExcerptRanges,
 248) -> (Range<usize>, Range<usize>) {
 249    match format {
 250        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 251            ranges.editable_150.clone(),
 252            ranges.editable_150_context_350.clone(),
 253        ),
 254        ZetaFormat::V0114180EditableRegion
 255        | ZetaFormat::V0120GitMergeMarkers
 256        | ZetaFormat::V0131GitMergeMarkersPrefix
 257        | ZetaFormat::V0211Prefill
 258        | ZetaFormat::V0211SeedCoder => (
 259            ranges.editable_350.clone(),
 260            ranges.editable_350_context_150.clone(),
 261        ),
 262    }
 263}
 264
 265fn resolve_cursor_region(
 266    input: &ZetaPromptInput,
 267    format: ZetaFormat,
 268) -> (&str, Range<usize>, usize) {
 269    let Some(ranges) = &input.excerpt_ranges else {
 270        return (
 271            &input.cursor_excerpt,
 272            input.editable_range_in_excerpt.clone(),
 273            input.cursor_offset_in_excerpt,
 274        );
 275    };
 276
 277    let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
 278    let context_start = context_range.start;
 279    let context_text = &input.cursor_excerpt[context_range];
 280    let adjusted_editable =
 281        (editable_range.start - context_start)..(editable_range.end - context_start);
 282    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 283
 284    (context_text, adjusted_editable, adjusted_cursor)
 285}
 286
 287fn format_zeta_prompt_with_budget(
 288    input: &ZetaPromptInput,
 289    format: ZetaFormat,
 290    max_tokens: usize,
 291) -> String {
 292    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 293    let path = &*input.cursor_path;
 294
 295    let mut cursor_section = String::new();
 296    match format {
 297        ZetaFormat::V0112MiddleAtEnd => {
 298            v0112_middle_at_end::write_cursor_excerpt_section(
 299                &mut cursor_section,
 300                path,
 301                context,
 302                &editable_range,
 303                cursor_offset,
 304            );
 305        }
 306        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 307            v0113_ordered::write_cursor_excerpt_section(
 308                &mut cursor_section,
 309                path,
 310                context,
 311                &editable_range,
 312                cursor_offset,
 313            )
 314        }
 315        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 316            &mut cursor_section,
 317            path,
 318            context,
 319            &editable_range,
 320            cursor_offset,
 321        ),
 322        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 323            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 324                &mut cursor_section,
 325                path,
 326                context,
 327                &editable_range,
 328                cursor_offset,
 329            )
 330        }
 331        ZetaFormat::V0211SeedCoder => {
 332            return seed_coder::format_prompt_with_budget(
 333                path,
 334                context,
 335                &editable_range,
 336                cursor_offset,
 337                &input.events,
 338                &input.related_files,
 339                max_tokens,
 340            );
 341        }
 342    }
 343
 344    let cursor_tokens = estimate_tokens(cursor_section.len());
 345    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 346
 347    let edit_history_section = format_edit_history_within_budget(
 348        &input.events,
 349        "<|file_sep|>",
 350        "edit history",
 351        budget_after_cursor,
 352    );
 353    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 354    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 355
 356    let related_files_section = format_related_files_within_budget(
 357        &input.related_files,
 358        "<|file_sep|>",
 359        budget_after_edit_history,
 360    );
 361
 362    let mut prompt = String::new();
 363    prompt.push_str(&related_files_section);
 364    prompt.push_str(&edit_history_section);
 365    prompt.push_str(&cursor_section);
 366    prompt
 367}
 368
 369pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 370    match format {
 371        ZetaFormat::V0112MiddleAtEnd
 372        | ZetaFormat::V0113Ordered
 373        | ZetaFormat::V0114180EditableRegion
 374        | ZetaFormat::V0120GitMergeMarkers
 375        | ZetaFormat::V0131GitMergeMarkersPrefix
 376        | ZetaFormat::V0211SeedCoder => String::new(),
 377        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
 378    }
 379}
 380
 381fn format_edit_history_within_budget(
 382    events: &[Arc<Event>],
 383    file_marker: &str,
 384    edit_history_name: &str,
 385    max_tokens: usize,
 386) -> String {
 387    let header = format!("{}{}\n", file_marker, edit_history_name);
 388    let header_tokens = estimate_tokens(header.len());
 389    if header_tokens >= max_tokens {
 390        return String::new();
 391    }
 392
 393    let mut event_strings: Vec<String> = Vec::new();
 394    let mut total_tokens = header_tokens;
 395
 396    for event in events.iter().rev() {
 397        let mut event_str = String::new();
 398        write_event(&mut event_str, event);
 399        let event_tokens = estimate_tokens(event_str.len());
 400
 401        if total_tokens + event_tokens > max_tokens {
 402            break;
 403        }
 404        total_tokens += event_tokens;
 405        event_strings.push(event_str);
 406    }
 407
 408    if event_strings.is_empty() {
 409        return String::new();
 410    }
 411
 412    let mut result = header;
 413    for event_str in event_strings.iter().rev() {
 414        result.push_str(event_str);
 415    }
 416    result
 417}
 418
 419fn format_related_files_within_budget(
 420    related_files: &[RelatedFile],
 421    file_marker: &str,
 422    max_tokens: usize,
 423) -> String {
 424    let mut result = String::new();
 425    let mut total_tokens = 0;
 426
 427    for file in related_files {
 428        let path_str = file.path.to_string_lossy();
 429        let header = format!("{}{}\n", file_marker, path_str);
 430        let header_tokens = estimate_tokens(header.len());
 431
 432        if total_tokens + header_tokens > max_tokens {
 433            break;
 434        }
 435
 436        let mut file_tokens = header_tokens;
 437        let mut excerpts_to_include = 0;
 438
 439        for excerpt in &file.excerpts {
 440            let needs_newline = !excerpt.text.ends_with('\n');
 441            let needs_ellipsis = excerpt.row_range.end < file.max_row;
 442            let excerpt_len = excerpt.text.len()
 443                + if needs_newline { "\n".len() } else { 0 }
 444                + if needs_ellipsis { "...\n".len() } else { 0 };
 445
 446            let excerpt_tokens = estimate_tokens(excerpt_len);
 447            if total_tokens + file_tokens + excerpt_tokens > max_tokens {
 448                break;
 449            }
 450            file_tokens += excerpt_tokens;
 451            excerpts_to_include += 1;
 452        }
 453
 454        if excerpts_to_include > 0 {
 455            total_tokens += file_tokens;
 456            result.push_str(&header);
 457            for excerpt in file.excerpts.iter().take(excerpts_to_include) {
 458                result.push_str(&excerpt.text);
 459                if !result.ends_with('\n') {
 460                    result.push('\n');
 461                }
 462                if excerpt.row_range.end < file.max_row {
 463                    result.push_str("...\n");
 464                }
 465            }
 466        }
 467    }
 468
 469    result
 470}
 471
 472pub fn write_related_files(
 473    prompt: &mut String,
 474    related_files: &[RelatedFile],
 475) -> Vec<Range<usize>> {
 476    let mut ranges = Vec::new();
 477    for file in related_files {
 478        let start = prompt.len();
 479        let path_str = file.path.to_string_lossy();
 480        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 481        for excerpt in &file.excerpts {
 482            prompt.push_str(&excerpt.text);
 483            if !prompt.ends_with('\n') {
 484                prompt.push('\n');
 485            }
 486            if excerpt.row_range.end < file.max_row {
 487                prompt.push_str("...\n");
 488            }
 489        }
 490        let end = prompt.len();
 491        ranges.push(start..end);
 492    }
 493    ranges
 494}
 495
 496mod v0112_middle_at_end {
 497    use super::*;
 498
 499    pub fn write_cursor_excerpt_section(
 500        prompt: &mut String,
 501        path: &Path,
 502        context: &str,
 503        editable_range: &Range<usize>,
 504        cursor_offset: usize,
 505    ) {
 506        let path_str = path.to_string_lossy();
 507        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 508
 509        prompt.push_str("<|fim_prefix|>\n");
 510        prompt.push_str(&context[..editable_range.start]);
 511
 512        prompt.push_str("<|fim_suffix|>\n");
 513        prompt.push_str(&context[editable_range.end..]);
 514        if !prompt.ends_with('\n') {
 515            prompt.push('\n');
 516        }
 517
 518        prompt.push_str("<|fim_middle|>current\n");
 519        prompt.push_str(&context[editable_range.start..cursor_offset]);
 520        prompt.push_str(CURSOR_MARKER);
 521        prompt.push_str(&context[cursor_offset..editable_range.end]);
 522        if !prompt.ends_with('\n') {
 523            prompt.push('\n');
 524        }
 525
 526        prompt.push_str("<|fim_middle|>updated\n");
 527    }
 528}
 529
 530mod v0113_ordered {
 531    use super::*;
 532
 533    pub fn write_cursor_excerpt_section(
 534        prompt: &mut String,
 535        path: &Path,
 536        context: &str,
 537        editable_range: &Range<usize>,
 538        cursor_offset: usize,
 539    ) {
 540        let path_str = path.to_string_lossy();
 541        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 542
 543        prompt.push_str("<|fim_prefix|>\n");
 544        prompt.push_str(&context[..editable_range.start]);
 545        if !prompt.ends_with('\n') {
 546            prompt.push('\n');
 547        }
 548
 549        prompt.push_str("<|fim_middle|>current\n");
 550        prompt.push_str(&context[editable_range.start..cursor_offset]);
 551        prompt.push_str(CURSOR_MARKER);
 552        prompt.push_str(&context[cursor_offset..editable_range.end]);
 553        if !prompt.ends_with('\n') {
 554            prompt.push('\n');
 555        }
 556
 557        prompt.push_str("<|fim_suffix|>\n");
 558        prompt.push_str(&context[editable_range.end..]);
 559        if !prompt.ends_with('\n') {
 560            prompt.push('\n');
 561        }
 562
 563        prompt.push_str("<|fim_middle|>updated\n");
 564    }
 565}
 566
 567pub mod v0120_git_merge_markers {
 568    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 569    //!
 570    //! Example prompt:
 571    //!
 572    //! <|file_sep|>path/to/target_file.py
 573    //! <|fim_prefix|>
 574    //! code before editable region
 575    //! <|fim_suffix|>
 576    //! code after editable region
 577    //! <|fim_middle|>
 578    //! <<<<<<< CURRENT
 579    //! code that
 580    //! needs to<|user_cursor|>
 581    //! be rewritten
 582    //! =======
 583    //!
 584    //! Expected output (should be generated by the model):
 585    //!
 586    //! updated
 587    //! code with
 588    //! changes applied
 589    //! >>>>>>> UPDATED
 590
 591    use super::*;
 592
 593    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 594    pub const SEPARATOR: &str = "=======\n";
 595    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 596
 597    pub fn special_tokens() -> &'static [&'static str] {
 598        &[
 599            "<|fim_prefix|>",
 600            "<|fim_suffix|>",
 601            "<|fim_middle|>",
 602            "<|file_sep|>",
 603            START_MARKER,
 604            SEPARATOR,
 605            END_MARKER,
 606            CURSOR_MARKER,
 607        ]
 608    }
 609
 610    pub fn write_cursor_excerpt_section(
 611        prompt: &mut String,
 612        path: &Path,
 613        context: &str,
 614        editable_range: &Range<usize>,
 615        cursor_offset: usize,
 616    ) {
 617        let path_str = path.to_string_lossy();
 618        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 619
 620        prompt.push_str("<|fim_prefix|>");
 621        prompt.push_str(&context[..editable_range.start]);
 622
 623        prompt.push_str("<|fim_suffix|>");
 624        prompt.push_str(&context[editable_range.end..]);
 625        if !prompt.ends_with('\n') {
 626            prompt.push('\n');
 627        }
 628
 629        prompt.push_str("<|fim_middle|>");
 630        prompt.push_str(START_MARKER);
 631        prompt.push_str(&context[editable_range.start..cursor_offset]);
 632        prompt.push_str(CURSOR_MARKER);
 633        prompt.push_str(&context[cursor_offset..editable_range.end]);
 634        if !prompt.ends_with('\n') {
 635            prompt.push('\n');
 636        }
 637        prompt.push_str(SEPARATOR);
 638    }
 639}
 640
 641pub mod v0131_git_merge_markers_prefix {
 642    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 643    //!
 644    //! Example prompt:
 645    //!
 646    //! <|file_sep|>path/to/target_file.py
 647    //! <|fim_prefix|>
 648    //! code before editable region
 649    //! <<<<<<< CURRENT
 650    //! code that
 651    //! needs to<|user_cursor|>
 652    //! be rewritten
 653    //! =======
 654    //! <|fim_suffix|>
 655    //! code after editable region
 656    //! <|fim_middle|>
 657    //!
 658    //! Expected output (should be generated by the model):
 659    //!
 660    //! updated
 661    //! code with
 662    //! changes applied
 663    //! >>>>>>> UPDATED
 664
 665    use super::*;
 666
 667    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 668    pub const SEPARATOR: &str = "=======\n";
 669    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 670
 671    pub fn special_tokens() -> &'static [&'static str] {
 672        &[
 673            "<|fim_prefix|>",
 674            "<|fim_suffix|>",
 675            "<|fim_middle|>",
 676            "<|file_sep|>",
 677            START_MARKER,
 678            SEPARATOR,
 679            END_MARKER,
 680            CURSOR_MARKER,
 681        ]
 682    }
 683
 684    pub fn write_cursor_excerpt_section(
 685        prompt: &mut String,
 686        path: &Path,
 687        context: &str,
 688        editable_range: &Range<usize>,
 689        cursor_offset: usize,
 690    ) {
 691        let path_str = path.to_string_lossy();
 692        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 693
 694        prompt.push_str("<|fim_prefix|>");
 695        prompt.push_str(&context[..editable_range.start]);
 696        prompt.push_str(START_MARKER);
 697        prompt.push_str(&context[editable_range.start..cursor_offset]);
 698        prompt.push_str(CURSOR_MARKER);
 699        prompt.push_str(&context[cursor_offset..editable_range.end]);
 700        if !prompt.ends_with('\n') {
 701            prompt.push('\n');
 702        }
 703        prompt.push_str(SEPARATOR);
 704
 705        prompt.push_str("<|fim_suffix|>");
 706        prompt.push_str(&context[editable_range.end..]);
 707        if !prompt.ends_with('\n') {
 708            prompt.push('\n');
 709        }
 710
 711        prompt.push_str("<|fim_middle|>");
 712    }
 713}
 714
 715pub mod v0211_prefill {
 716    use super::*;
 717
 718    pub fn get_prefill(input: &ZetaPromptInput) -> String {
 719        let editable_region = &input.cursor_excerpt
 720            [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
 721
 722        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 723        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 724
 725        // Find a token boundary to avoid splitting tokens in the prefill.
 726        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 727        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 728        // the \n and consume any consecutive \n characters after it.
 729        let prefill = &editable_region[..prefill_len];
 730        match prefill.rfind('\n') {
 731            Some(pos) => {
 732                let mut end = pos + 1;
 733                while end < editable_region.len()
 734                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 735                {
 736                    end += 1;
 737                }
 738                editable_region[..end].to_string()
 739            }
 740            // No newline found. Fall back to splitting before the last space
 741            // (word-level boundary)
 742            None => match prefill.rfind(' ') {
 743                Some(pos) => prefill[..pos].to_string(),
 744                None => prefill.to_string(),
 745            },
 746        }
 747    }
 748}
 749
 750pub mod seed_coder {
 751    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 752    //!
 753    //! Seed-Coder uses different FIM tokens and order than Qwen:
 754    //! - SPM order: suffix comes FIRST, then prefix, then middle
 755    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 756    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 757    //!
 758    //! All context (related files, edit history) goes in the PREFIX section.
 759    //! The suffix contains only code after the editable region.
 760    //!
 761    //! Example prompt:
 762    //!
 763    //! <[fim-suffix]>
 764    //! code after editable region
 765    //! <[fim-prefix]><filename>related/file.py
 766    //! related file content
 767    //!
 768    //! <filename>edit_history
 769    //! --- a/some_file.py
 770    //! +++ b/some_file.py
 771    //! -old
 772    //! +new
 773    //!
 774    //! <filename>path/to/target_file.py
 775    //! code before editable region
 776    //! <<<<<<< CURRENT
 777    //! code that
 778    //! needs to<|user_cursor|>
 779    //! be rewritten
 780    //! =======
 781    //! <[fim-middle]>
 782    //!
 783    //! Expected output (model generates):
 784    //!
 785    //! updated
 786    //! code with
 787    //! changes applied
 788    //! >>>>>>> UPDATED
 789
 790    use super::*;
 791
 792    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 793    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 794    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 795    pub const FILE_MARKER: &str = "<filename>";
 796
 797    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 798    pub const SEPARATOR: &str = "=======\n";
 799    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 800
 801    pub fn special_tokens() -> &'static [&'static str] {
 802        &[
 803            FIM_SUFFIX,
 804            FIM_PREFIX,
 805            FIM_MIDDLE,
 806            FILE_MARKER,
 807            START_MARKER,
 808            SEPARATOR,
 809            END_MARKER,
 810            CURSOR_MARKER,
 811        ]
 812    }
 813
 814    pub fn format_prompt_with_budget(
 815        path: &Path,
 816        context: &str,
 817        editable_range: &Range<usize>,
 818        cursor_offset: usize,
 819        events: &[Arc<Event>],
 820        related_files: &[RelatedFile],
 821        max_tokens: usize,
 822    ) -> String {
 823        let suffix_section = build_suffix_section(context, editable_range);
 824        let cursor_prefix_section =
 825            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 826
 827        let suffix_tokens = estimate_tokens(suffix_section.len());
 828        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 829        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 830
 831        let edit_history_section = super::format_edit_history_within_budget(
 832            events,
 833            FILE_MARKER,
 834            "edit_history",
 835            budget_after_cursor,
 836        );
 837        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 838        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 839
 840        let related_files_section = super::format_related_files_within_budget(
 841            related_files,
 842            FILE_MARKER,
 843            budget_after_edit_history,
 844        );
 845
 846        let mut prompt = String::new();
 847        prompt.push_str(&suffix_section);
 848        prompt.push_str(FIM_PREFIX);
 849        prompt.push_str(&related_files_section);
 850        if !related_files_section.is_empty() {
 851            prompt.push('\n');
 852        }
 853        prompt.push_str(&edit_history_section);
 854        if !edit_history_section.is_empty() {
 855            prompt.push('\n');
 856        }
 857        prompt.push_str(&cursor_prefix_section);
 858        prompt.push_str(FIM_MIDDLE);
 859        prompt
 860    }
 861
 862    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 863        let mut section = String::new();
 864        section.push_str(FIM_SUFFIX);
 865        section.push_str(&context[editable_range.end..]);
 866        if !section.ends_with('\n') {
 867            section.push('\n');
 868        }
 869        section
 870    }
 871
 872    fn build_cursor_prefix_section(
 873        path: &Path,
 874        context: &str,
 875        editable_range: &Range<usize>,
 876        cursor_offset: usize,
 877    ) -> String {
 878        let mut section = String::new();
 879        let path_str = path.to_string_lossy();
 880        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 881
 882        section.push_str(&context[..editable_range.start]);
 883        section.push_str(START_MARKER);
 884        section.push_str(&context[editable_range.start..cursor_offset]);
 885        section.push_str(CURSOR_MARKER);
 886        section.push_str(&context[cursor_offset..editable_range.end]);
 887        if !section.ends_with('\n') {
 888            section.push('\n');
 889        }
 890        section.push_str(SEPARATOR);
 891        section
 892    }
 893}
 894
 895/// The zeta1 prompt format
 896pub mod zeta1 {
 897    use super::*;
 898    use std::fmt::Write;
 899
 900    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 901    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 902    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 903    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 904
 905    const INSTRUCTION_HEADER: &str = concat!(
 906        "### Instruction:\n",
 907        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 908        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 909        "into account the cursor location.\n\n",
 910        "### User Edits:\n\n"
 911    );
 912    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 913    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 914
 915    /// Formats a complete zeta1 prompt from the input events and excerpt.
 916    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 917        let mut prompt = String::with_capacity(
 918            INSTRUCTION_HEADER.len()
 919                + input_events.len()
 920                + EXCERPT_HEADER.len()
 921                + input_excerpt.len()
 922                + RESPONSE_HEADER.len(),
 923        );
 924        prompt.push_str(INSTRUCTION_HEADER);
 925        prompt.push_str(input_events);
 926        prompt.push_str(EXCERPT_HEADER);
 927        prompt.push_str(input_excerpt);
 928        prompt.push_str(RESPONSE_HEADER);
 929        prompt
 930    }
 931
 932    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 933    /// editable and context byte-offset ranges within `cursor_excerpt`.
 934    pub fn format_zeta1_from_input(
 935        input: &ZetaPromptInput,
 936        editable_range: Range<usize>,
 937        context_range: Range<usize>,
 938    ) -> String {
 939        let events = format_zeta1_events(&input.events);
 940        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 941        format_zeta1_prompt(&events, &excerpt)
 942    }
 943
 944    /// Formats events in zeta1 style (oldest first).
 945    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 946        let mut result = String::new();
 947        for event in events {
 948            let event_string = format_zeta1_event(event);
 949            if event_string.is_empty() {
 950                continue;
 951            }
 952            if !result.is_empty() {
 953                result.push_str("\n\n");
 954            }
 955            result.push_str(&event_string);
 956        }
 957        result
 958    }
 959
 960    fn format_zeta1_event(event: &Event) -> String {
 961        match event {
 962            Event::BufferChange {
 963                path,
 964                old_path,
 965                diff,
 966                ..
 967            } => {
 968                let mut prompt = String::new();
 969                if old_path != path {
 970                    writeln!(
 971                        prompt,
 972                        "User renamed {} to {}\n",
 973                        old_path.display(),
 974                        path.display()
 975                    )
 976                    .ok();
 977                }
 978                if !diff.is_empty() {
 979                    write!(
 980                        prompt,
 981                        "User edited {}:\n```diff\n{}\n```",
 982                        path.display(),
 983                        diff
 984                    )
 985                    .ok();
 986                }
 987                prompt
 988            }
 989        }
 990    }
 991
 992    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
 993    /// within `cursor_excerpt`.
 994    fn format_zeta1_excerpt(
 995        input: &ZetaPromptInput,
 996        editable_range: Range<usize>,
 997        context_range: Range<usize>,
 998    ) -> String {
 999        let path_str = input.cursor_path.to_string_lossy();
1000        let excerpt = &*input.cursor_excerpt;
1001        let cursor_offset = input.cursor_offset_in_excerpt;
1002
1003        let mut prompt = String::new();
1004        writeln!(&mut prompt, "```{path_str}").ok();
1005
1006        let starts_at_file_beginning =
1007            input.excerpt_start_row == Some(0) && context_range.start == 0;
1008        if starts_at_file_beginning {
1009            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1010        }
1011
1012        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1013
1014        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1015        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1016        prompt.push_str(CURSOR_MARKER);
1017        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1018        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1019
1020        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1021        write!(prompt, "\n```").ok();
1022
1023        prompt
1024    }
1025
1026    /// Cleans zeta1 model output by extracting content between editable region
1027    /// markers and converting the zeta1 cursor marker to the universal one.
1028    /// Returns `None` if the output doesn't contain the expected markers.
1029    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1030        let content = output.replace(CURSOR_MARKER, "");
1031
1032        let content_start = content
1033            .find(EDITABLE_REGION_START_MARKER)
1034            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1035            .map(|pos| {
1036                if content.as_bytes().get(pos) == Some(&b'\n') {
1037                    pos + 1
1038                } else {
1039                    pos
1040                }
1041            })
1042            .unwrap_or(0);
1043
1044        let content_end = content
1045            .find(EDITABLE_REGION_END_MARKER)
1046            .map(|pos| {
1047                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1048                    pos - 1
1049                } else {
1050                    pos
1051                }
1052            })
1053            .unwrap_or(content.len());
1054
1055        if content_start > content_end {
1056            return Some(String::new());
1057        }
1058
1059        let extracted = &content[content_start..content_end];
1060
1061        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1062            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1063            let text_before_cursor = text_before_cursor
1064                .find(EDITABLE_REGION_START_MARKER)
1065                .map(|pos| {
1066                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1067                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1068                        after_marker + 1
1069                    } else {
1070                        after_marker
1071                    }
1072                })
1073                .unwrap_or(0);
1074            let offset_in_extracted = zeta1_cursor_pos
1075                .saturating_sub(text_before_cursor)
1076                .min(extracted.len());
1077            offset_in_extracted
1078        });
1079
1080        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1081        if let Some(offset) = cursor_offset {
1082            result.push_str(&extracted[..offset]);
1083            result.push_str(super::CURSOR_MARKER);
1084            result.push_str(&extracted[offset..]);
1085        } else {
1086            result.push_str(extracted);
1087        }
1088
1089        Some(result)
1090    }
1091}
1092
1093#[cfg(test)]
1094mod tests {
1095    use super::*;
1096    use indoc::indoc;
1097
1098    fn make_input(
1099        cursor_excerpt: &str,
1100        editable_range: Range<usize>,
1101        cursor_offset: usize,
1102        events: Vec<Event>,
1103        related_files: Vec<RelatedFile>,
1104    ) -> ZetaPromptInput {
1105        ZetaPromptInput {
1106            cursor_path: Path::new("test.rs").into(),
1107            cursor_excerpt: cursor_excerpt.into(),
1108            editable_range_in_excerpt: editable_range,
1109            cursor_offset_in_excerpt: cursor_offset,
1110            excerpt_start_row: None,
1111            events: events.into_iter().map(Arc::new).collect(),
1112            related_files,
1113            excerpt_ranges: None,
1114            preferred_model: None,
1115            in_open_source_repo: false,
1116            can_collect_data: false,
1117        }
1118    }
1119
1120    fn make_event(path: &str, diff: &str) -> Event {
1121        Event::BufferChange {
1122            path: Path::new(path).into(),
1123            old_path: Path::new(path).into(),
1124            diff: diff.to_string(),
1125            predicted: false,
1126            in_open_source_repo: false,
1127        }
1128    }
1129
1130    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1131        RelatedFile {
1132            path: Path::new(path).into(),
1133            max_row: content.lines().count() as u32,
1134            excerpts: vec![RelatedExcerpt {
1135                row_range: 0..content.lines().count() as u32,
1136                text: content.into(),
1137            }],
1138            in_open_source_repo: false,
1139        }
1140    }
1141
1142    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1143        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1144    }
1145
1146    #[test]
1147    fn test_no_truncation_when_within_budget() {
1148        let input = make_input(
1149            "prefix\neditable\nsuffix",
1150            7..15,
1151            10,
1152            vec![make_event("a.rs", "-old\n+new\n")],
1153            vec![make_related_file("related.rs", "fn helper() {}\n")],
1154        );
1155
1156        assert_eq!(
1157            format_with_budget(&input, 10000),
1158            indoc! {r#"
1159                <|file_sep|>related.rs
1160                fn helper() {}
1161                <|file_sep|>edit history
1162                --- a/a.rs
1163                +++ b/a.rs
1164                -old
1165                +new
1166                <|file_sep|>test.rs
1167                <|fim_prefix|>
1168                prefix
1169                <|fim_middle|>current
1170                edi<|user_cursor|>table
1171                <|fim_suffix|>
1172
1173                suffix
1174                <|fim_middle|>updated
1175            "#}
1176        );
1177    }
1178
1179    #[test]
1180    fn test_truncation_drops_edit_history_when_budget_tight() {
1181        let input = make_input(
1182            "code",
1183            0..4,
1184            2,
1185            vec![make_event("a.rs", "-x\n+y\n")],
1186            vec![
1187                make_related_file("r1.rs", "a\n"),
1188                make_related_file("r2.rs", "b\n"),
1189            ],
1190        );
1191
1192        assert_eq!(
1193            format_with_budget(&input, 10000),
1194            indoc! {r#"
1195                <|file_sep|>r1.rs
1196                a
1197                <|file_sep|>r2.rs
1198                b
1199                <|file_sep|>edit history
1200                --- a/a.rs
1201                +++ b/a.rs
1202                -x
1203                +y
1204                <|file_sep|>test.rs
1205                <|fim_prefix|>
1206                <|fim_middle|>current
1207                co<|user_cursor|>de
1208                <|fim_suffix|>
1209                <|fim_middle|>updated
1210            "#}
1211        );
1212
1213        assert_eq!(
1214            format_with_budget(&input, 50),
1215            indoc! {r#"
1216                <|file_sep|>r1.rs
1217                a
1218                <|file_sep|>r2.rs
1219                b
1220                <|file_sep|>test.rs
1221                <|fim_prefix|>
1222                <|fim_middle|>current
1223                co<|user_cursor|>de
1224                <|fim_suffix|>
1225                <|fim_middle|>updated
1226            "#}
1227        );
1228    }
1229
1230    #[test]
1231    fn test_truncation_includes_partial_excerpts() {
1232        let input = make_input(
1233            "x",
1234            0..1,
1235            0,
1236            vec![],
1237            vec![RelatedFile {
1238                path: Path::new("big.rs").into(),
1239                max_row: 30,
1240                in_open_source_repo: false,
1241                excerpts: vec![
1242                    RelatedExcerpt {
1243                        row_range: 0..10,
1244                        text: "first excerpt\n".into(),
1245                    },
1246                    RelatedExcerpt {
1247                        row_range: 10..20,
1248                        text: "second excerpt\n".into(),
1249                    },
1250                    RelatedExcerpt {
1251                        row_range: 20..30,
1252                        text: "third excerpt\n".into(),
1253                    },
1254                ],
1255            }],
1256        );
1257
1258        assert_eq!(
1259            format_with_budget(&input, 10000),
1260            indoc! {r#"
1261                <|file_sep|>big.rs
1262                first excerpt
1263                ...
1264                second excerpt
1265                ...
1266                third excerpt
1267                <|file_sep|>test.rs
1268                <|fim_prefix|>
1269                <|fim_middle|>current
1270                <|user_cursor|>x
1271                <|fim_suffix|>
1272                <|fim_middle|>updated
1273            "#}
1274        );
1275
1276        assert_eq!(
1277            format_with_budget(&input, 50),
1278            indoc! {r#"
1279                <|file_sep|>big.rs
1280                first excerpt
1281                ...
1282                <|file_sep|>test.rs
1283                <|fim_prefix|>
1284                <|fim_middle|>current
1285                <|user_cursor|>x
1286                <|fim_suffix|>
1287                <|fim_middle|>updated
1288            "#}
1289        );
1290    }
1291
1292    #[test]
1293    fn test_truncation_drops_older_events_first() {
1294        let input = make_input(
1295            "x",
1296            0..1,
1297            0,
1298            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1299            vec![],
1300        );
1301
1302        assert_eq!(
1303            format_with_budget(&input, 10000),
1304            indoc! {r#"
1305                <|file_sep|>edit history
1306                --- a/old.rs
1307                +++ b/old.rs
1308                -1
1309                --- a/new.rs
1310                +++ b/new.rs
1311                -2
1312                <|file_sep|>test.rs
1313                <|fim_prefix|>
1314                <|fim_middle|>current
1315                <|user_cursor|>x
1316                <|fim_suffix|>
1317                <|fim_middle|>updated
1318            "#}
1319        );
1320
1321        assert_eq!(
1322            format_with_budget(&input, 55),
1323            indoc! {r#"
1324                <|file_sep|>edit history
1325                --- a/new.rs
1326                +++ b/new.rs
1327                -2
1328                <|file_sep|>test.rs
1329                <|fim_prefix|>
1330                <|fim_middle|>current
1331                <|user_cursor|>x
1332                <|fim_suffix|>
1333                <|fim_middle|>updated
1334            "#}
1335        );
1336    }
1337
1338    #[test]
1339    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1340        let input = make_input(
1341            "fn main() {}",
1342            0..12,
1343            3,
1344            vec![make_event("a.rs", "-old\n+new\n")],
1345            vec![make_related_file("related.rs", "helper\n")],
1346        );
1347
1348        assert_eq!(
1349            format_with_budget(&input, 30),
1350            indoc! {r#"
1351                <|file_sep|>test.rs
1352                <|fim_prefix|>
1353                <|fim_middle|>current
1354                fn <|user_cursor|>main() {}
1355                <|fim_suffix|>
1356                <|fim_middle|>updated
1357            "#}
1358        );
1359    }
1360
1361    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1362        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1363    }
1364
1365    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1366        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1367    }
1368
1369    #[test]
1370    fn test_seed_coder_basic_format() {
1371        let input = make_input(
1372            "prefix\neditable\nsuffix",
1373            7..15,
1374            10,
1375            vec![make_event("a.rs", "-old\n+new\n")],
1376            vec![make_related_file("related.rs", "fn helper() {}\n")],
1377        );
1378
1379        assert_eq!(
1380            format_seed_coder(&input),
1381            indoc! {r#"
1382                <[fim-suffix]>
1383                suffix
1384                <[fim-prefix]><filename>related.rs
1385                fn helper() {}
1386
1387                <filename>edit_history
1388                --- a/a.rs
1389                +++ b/a.rs
1390                -old
1391                +new
1392
1393                <filename>test.rs
1394                prefix
1395                <<<<<<< CURRENT
1396                edi<|user_cursor|>table
1397                =======
1398                <[fim-middle]>"#}
1399        );
1400    }
1401
1402    #[test]
1403    fn test_seed_coder_no_context() {
1404        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1405
1406        assert_eq!(
1407            format_seed_coder(&input),
1408            indoc! {r#"
1409                <[fim-suffix]>
1410                after
1411                <[fim-prefix]><filename>test.rs
1412                before
1413                <<<<<<< CURRENT
1414                mid<|user_cursor|>dle
1415                =======
1416                <[fim-middle]>"#}
1417        );
1418    }
1419
1420    #[test]
1421    fn test_seed_coder_truncation_drops_context() {
1422        let input = make_input(
1423            "code",
1424            0..4,
1425            2,
1426            vec![make_event("a.rs", "-x\n+y\n")],
1427            vec![make_related_file("r1.rs", "content\n")],
1428        );
1429
1430        // With large budget, everything is included
1431        assert_eq!(
1432            format_seed_coder(&input),
1433            indoc! {r#"
1434                <[fim-suffix]>
1435                <[fim-prefix]><filename>r1.rs
1436                content
1437
1438                <filename>edit_history
1439                --- a/a.rs
1440                +++ b/a.rs
1441                -x
1442                +y
1443
1444                <filename>test.rs
1445                <<<<<<< CURRENT
1446                co<|user_cursor|>de
1447                =======
1448                <[fim-middle]>"#}
1449        );
1450
1451        // With tight budget, context is dropped but cursor section remains
1452        assert_eq!(
1453            format_seed_coder_with_budget(&input, 30),
1454            indoc! {r#"
1455                <[fim-suffix]>
1456                <[fim-prefix]><filename>test.rs
1457                <<<<<<< CURRENT
1458                co<|user_cursor|>de
1459                =======
1460                <[fim-middle]>"#}
1461        );
1462    }
1463
1464    #[test]
1465    fn test_seed_coder_clean_output() {
1466        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1467        let output_without_marker = "new code\n";
1468
1469        assert_eq!(
1470            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1471            "new code\n"
1472        );
1473        assert_eq!(
1474            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1475            "new code\n"
1476        );
1477    }
1478
1479    #[test]
1480    fn test_format_zeta1_from_input_basic() {
1481        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1482        let input = ZetaPromptInput {
1483            cursor_path: Path::new("src/main.rs").into(),
1484            cursor_excerpt: excerpt.into(),
1485            editable_range_in_excerpt: 15..41,
1486            cursor_offset_in_excerpt: 30,
1487            excerpt_start_row: Some(0),
1488            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1489            related_files: vec![],
1490            excerpt_ranges: None,
1491            preferred_model: None,
1492            in_open_source_repo: false,
1493            can_collect_data: false,
1494        };
1495
1496        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1497
1498        assert_eq!(
1499            prompt,
1500            concat!(
1501                "### Instruction:\n",
1502                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1503                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1504                "into account the cursor location.\n",
1505                "\n",
1506                "### User Edits:\n",
1507                "\n",
1508                "User edited other.rs:\n",
1509                "```diff\n",
1510                "-old\n",
1511                "+new\n",
1512                "\n",
1513                "```\n",
1514                "\n",
1515                "### User Excerpt:\n",
1516                "\n",
1517                "```src/main.rs\n",
1518                "<|start_of_file|>\n",
1519                "fn before() {}\n",
1520                "<|editable_region_start|>\n",
1521                "fn foo() {\n",
1522                "    <|user_cursor_is_here|>let x = 1;\n",
1523                "\n",
1524                "<|editable_region_end|>}\n",
1525                "fn after() {}\n",
1526                "\n",
1527                "```\n",
1528                "\n",
1529                "### Response:\n",
1530            ),
1531        );
1532    }
1533
1534    #[test]
1535    fn test_format_zeta1_from_input_no_start_of_file() {
1536        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1537        let input = ZetaPromptInput {
1538            cursor_path: Path::new("src/main.rs").into(),
1539            cursor_excerpt: excerpt.into(),
1540            editable_range_in_excerpt: 0..28,
1541            cursor_offset_in_excerpt: 15,
1542            excerpt_start_row: Some(10),
1543            events: vec![],
1544            related_files: vec![],
1545            excerpt_ranges: None,
1546            preferred_model: None,
1547            in_open_source_repo: false,
1548            can_collect_data: false,
1549        };
1550
1551        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1552
1553        assert_eq!(
1554            prompt,
1555            concat!(
1556                "### Instruction:\n",
1557                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1558                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1559                "into account the cursor location.\n",
1560                "\n",
1561                "### User Edits:\n",
1562                "\n",
1563                "\n",
1564                "\n",
1565                "### User Excerpt:\n",
1566                "\n",
1567                "```src/main.rs\n",
1568                "<|editable_region_start|>\n",
1569                "fn foo() {\n",
1570                "    <|user_cursor_is_here|>let x = 1;\n",
1571                "}\n",
1572                "\n",
1573                "<|editable_region_end|>\n",
1574                "```\n",
1575                "\n",
1576                "### Response:\n",
1577            ),
1578        );
1579    }
1580
1581    #[test]
1582    fn test_format_zeta1_from_input_with_sub_ranges() {
1583        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1584        let editable_range = 10..37;
1585        let context_range = 0..excerpt.len();
1586
1587        let input = ZetaPromptInput {
1588            cursor_path: Path::new("test.rs").into(),
1589            cursor_excerpt: excerpt.into(),
1590            editable_range_in_excerpt: editable_range.clone(),
1591            cursor_offset_in_excerpt: 25,
1592            excerpt_start_row: Some(0),
1593            events: vec![],
1594            related_files: vec![],
1595            excerpt_ranges: None,
1596            preferred_model: None,
1597            in_open_source_repo: false,
1598            can_collect_data: false,
1599        };
1600
1601        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1602
1603        assert_eq!(
1604            prompt,
1605            concat!(
1606                "### Instruction:\n",
1607                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1608                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1609                "into account the cursor location.\n",
1610                "\n",
1611                "### User Edits:\n",
1612                "\n",
1613                "\n",
1614                "\n",
1615                "### User Excerpt:\n",
1616                "\n",
1617                "```test.rs\n",
1618                "<|start_of_file|>\n",
1619                "// prefix\n",
1620                "<|editable_region_start|>\n",
1621                "fn foo() {\n",
1622                "    <|user_cursor_is_here|>let x = 1;\n",
1623                "}\n",
1624                "<|editable_region_end|>\n",
1625                "// suffix\n",
1626                "\n",
1627                "```\n",
1628                "\n",
1629                "### Response:\n",
1630            ),
1631        );
1632    }
1633
1634    #[test]
1635    fn test_clean_zeta1_model_output_basic() {
1636        let output = indoc! {"
1637            <|editable_region_start|>
1638            fn main() {
1639                println!(\"hello\");
1640            }
1641            <|editable_region_end|>
1642        "};
1643
1644        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1645        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1646    }
1647
1648    #[test]
1649    fn test_clean_zeta1_model_output_with_cursor() {
1650        let output = indoc! {"
1651            <|editable_region_start|>
1652            fn main() {
1653                <|user_cursor_is_here|>println!(\"hello\");
1654            }
1655            <|editable_region_end|>
1656        "};
1657
1658        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1659        assert_eq!(
1660            cleaned,
1661            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1662        );
1663    }
1664
1665    #[test]
1666    fn test_clean_zeta1_model_output_no_markers() {
1667        let output = "fn main() {}\n";
1668        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1669        assert_eq!(cleaned, "fn main() {}\n");
1670    }
1671
1672    #[test]
1673    fn test_clean_zeta1_model_output_empty_region() {
1674        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1675        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1676        assert_eq!(cleaned, "");
1677    }
1678}