zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68    #[serde(default)]
  69    pub can_collect_data: bool,
  70}
  71
  72#[derive(
  73    Default,
  74    Clone,
  75    Copy,
  76    Debug,
  77    PartialEq,
  78    Eq,
  79    Hash,
  80    EnumIter,
  81    IntoStaticStr,
  82    Serialize,
  83    Deserialize,
  84)]
  85#[allow(non_camel_case_types)]
  86pub enum ZetaFormat {
  87    V0112MiddleAtEnd,
  88    V0113Ordered,
  89    #[default]
  90    V0114180EditableRegion,
  91    V0120GitMergeMarkers,
  92    V0131GitMergeMarkersPrefix,
  93    V0211Prefill,
  94    V0211SeedCoder,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131
 132    pub fn special_tokens(&self) -> &'static [&'static str] {
 133        match self {
 134            ZetaFormat::V0112MiddleAtEnd
 135            | ZetaFormat::V0113Ordered
 136            | ZetaFormat::V0114180EditableRegion => &[
 137                "<|fim_prefix|>",
 138                "<|fim_suffix|>",
 139                "<|fim_middle|>",
 140                "<|file_sep|>",
 141                CURSOR_MARKER,
 142            ],
 143            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 144            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 145                v0131_git_merge_markers_prefix::special_tokens()
 146            }
 147            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 148        }
 149    }
 150}
 151
 152#[derive(Clone, Debug, Serialize, Deserialize)]
 153#[serde(tag = "event")]
 154pub enum Event {
 155    BufferChange {
 156        path: Arc<Path>,
 157        old_path: Arc<Path>,
 158        diff: String,
 159        predicted: bool,
 160        in_open_source_repo: bool,
 161    },
 162}
 163
 164impl Event {
 165    pub fn in_open_source_repo(&self) -> bool {
 166        match self {
 167            Event::BufferChange {
 168                in_open_source_repo,
 169                ..
 170            } => *in_open_source_repo,
 171        }
 172    }
 173}
 174
 175pub fn write_event(prompt: &mut String, event: &Event) {
 176    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 177        for component in path.components() {
 178            prompt.push('/');
 179            write!(prompt, "{}", component.as_os_str().display()).ok();
 180        }
 181    }
 182    match event {
 183        Event::BufferChange {
 184            path,
 185            old_path,
 186            diff,
 187            predicted,
 188            in_open_source_repo: _,
 189        } => {
 190            if *predicted {
 191                prompt.push_str("// User accepted prediction:\n");
 192            }
 193            prompt.push_str("--- a");
 194            write_path_as_unix_str(prompt, old_path.as_ref());
 195            prompt.push_str("\n+++ b");
 196            write_path_as_unix_str(prompt, path.as_ref());
 197            prompt.push('\n');
 198            prompt.push_str(diff);
 199        }
 200    }
 201}
 202
 203#[derive(Clone, Debug, Serialize, Deserialize)]
 204pub struct RelatedFile {
 205    pub path: Arc<Path>,
 206    pub max_row: u32,
 207    pub excerpts: Vec<RelatedExcerpt>,
 208    #[serde(default)]
 209    pub in_open_source_repo: bool,
 210}
 211
 212#[derive(Clone, Debug, Serialize, Deserialize)]
 213pub struct RelatedExcerpt {
 214    pub row_range: Range<u32>,
 215    pub text: Arc<str>,
 216}
 217
 218pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 219    format
 220        .special_tokens()
 221        .iter()
 222        .any(|token| input.cursor_excerpt.contains(token))
 223}
 224
 225pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 226    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 227}
 228
 229/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 230pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 231    match format {
 232        ZetaFormat::V0120GitMergeMarkers => output
 233            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 234            .unwrap_or(output),
 235        ZetaFormat::V0131GitMergeMarkersPrefix => output
 236            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 237            .unwrap_or(output),
 238        ZetaFormat::V0211SeedCoder => output
 239            .strip_suffix(seed_coder::END_MARKER)
 240            .unwrap_or(output),
 241        _ => output,
 242    }
 243}
 244
 245fn resolve_cursor_region(
 246    input: &ZetaPromptInput,
 247    format: ZetaFormat,
 248) -> (&str, Range<usize>, usize) {
 249    let Some(ranges) = &input.excerpt_ranges else {
 250        return (
 251            &input.cursor_excerpt,
 252            input.editable_range_in_excerpt.clone(),
 253            input.cursor_offset_in_excerpt,
 254        );
 255    };
 256
 257    let (editable_range, context_range) = match format {
 258        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 259            ranges.editable_150.clone(),
 260            ranges.editable_150_context_350.clone(),
 261        ),
 262        ZetaFormat::V0114180EditableRegion
 263        | ZetaFormat::V0120GitMergeMarkers
 264        | ZetaFormat::V0131GitMergeMarkersPrefix
 265        | ZetaFormat::V0211Prefill
 266        | ZetaFormat::V0211SeedCoder => (
 267            ranges.editable_180.clone(),
 268            ranges.editable_180_context_350.clone(),
 269        ),
 270    };
 271
 272    let context_start = context_range.start;
 273    let context_text = &input.cursor_excerpt[context_range];
 274    let adjusted_editable =
 275        (editable_range.start - context_start)..(editable_range.end - context_start);
 276    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 277
 278    (context_text, adjusted_editable, adjusted_cursor)
 279}
 280
 281fn format_zeta_prompt_with_budget(
 282    input: &ZetaPromptInput,
 283    format: ZetaFormat,
 284    max_tokens: usize,
 285) -> String {
 286    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 287    let path = &*input.cursor_path;
 288
 289    let mut cursor_section = String::new();
 290    match format {
 291        ZetaFormat::V0112MiddleAtEnd => {
 292            v0112_middle_at_end::write_cursor_excerpt_section(
 293                &mut cursor_section,
 294                path,
 295                context,
 296                &editable_range,
 297                cursor_offset,
 298            );
 299        }
 300        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 301            v0113_ordered::write_cursor_excerpt_section(
 302                &mut cursor_section,
 303                path,
 304                context,
 305                &editable_range,
 306                cursor_offset,
 307            )
 308        }
 309        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 310            &mut cursor_section,
 311            path,
 312            context,
 313            &editable_range,
 314            cursor_offset,
 315        ),
 316        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 317            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 318                &mut cursor_section,
 319                path,
 320                context,
 321                &editable_range,
 322                cursor_offset,
 323            )
 324        }
 325        ZetaFormat::V0211SeedCoder => {
 326            return seed_coder::format_prompt_with_budget(
 327                path,
 328                context,
 329                &editable_range,
 330                cursor_offset,
 331                &input.events,
 332                &input.related_files,
 333                max_tokens,
 334            );
 335        }
 336    }
 337
 338    let cursor_tokens = estimate_tokens(cursor_section.len());
 339    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 340
 341    let edit_history_section = format_edit_history_within_budget(
 342        &input.events,
 343        "<|file_sep|>",
 344        "edit history",
 345        budget_after_cursor,
 346    );
 347    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 348    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 349
 350    let related_files_section = format_related_files_within_budget(
 351        &input.related_files,
 352        "<|file_sep|>",
 353        budget_after_edit_history,
 354    );
 355
 356    let mut prompt = String::new();
 357    prompt.push_str(&related_files_section);
 358    prompt.push_str(&edit_history_section);
 359    prompt.push_str(&cursor_section);
 360    prompt
 361}
 362
 363pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 364    match format {
 365        ZetaFormat::V0112MiddleAtEnd
 366        | ZetaFormat::V0113Ordered
 367        | ZetaFormat::V0114180EditableRegion
 368        | ZetaFormat::V0120GitMergeMarkers
 369        | ZetaFormat::V0131GitMergeMarkersPrefix
 370        | ZetaFormat::V0211SeedCoder => String::new(),
 371        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
 372    }
 373}
 374
 375fn format_edit_history_within_budget(
 376    events: &[Arc<Event>],
 377    file_marker: &str,
 378    edit_history_name: &str,
 379    max_tokens: usize,
 380) -> String {
 381    let header = format!("{}{}\n", file_marker, edit_history_name);
 382    let header_tokens = estimate_tokens(header.len());
 383    if header_tokens >= max_tokens {
 384        return String::new();
 385    }
 386
 387    let mut event_strings: Vec<String> = Vec::new();
 388    let mut total_tokens = header_tokens;
 389
 390    for event in events.iter().rev() {
 391        let mut event_str = String::new();
 392        write_event(&mut event_str, event);
 393        let event_tokens = estimate_tokens(event_str.len());
 394
 395        if total_tokens + event_tokens > max_tokens {
 396            break;
 397        }
 398        total_tokens += event_tokens;
 399        event_strings.push(event_str);
 400    }
 401
 402    if event_strings.is_empty() {
 403        return String::new();
 404    }
 405
 406    let mut result = header;
 407    for event_str in event_strings.iter().rev() {
 408        result.push_str(event_str);
 409    }
 410    result
 411}
 412
 413fn format_related_files_within_budget(
 414    related_files: &[RelatedFile],
 415    file_marker: &str,
 416    max_tokens: usize,
 417) -> String {
 418    let mut result = String::new();
 419    let mut total_tokens = 0;
 420
 421    for file in related_files {
 422        let path_str = file.path.to_string_lossy();
 423        let header = format!("{}{}\n", file_marker, path_str);
 424        let header_tokens = estimate_tokens(header.len());
 425
 426        if total_tokens + header_tokens > max_tokens {
 427            break;
 428        }
 429
 430        let mut file_tokens = header_tokens;
 431        let mut excerpts_to_include = 0;
 432
 433        for excerpt in &file.excerpts {
 434            let needs_newline = !excerpt.text.ends_with('\n');
 435            let needs_ellipsis = excerpt.row_range.end < file.max_row;
 436            let excerpt_len = excerpt.text.len()
 437                + if needs_newline { "\n".len() } else { 0 }
 438                + if needs_ellipsis { "...\n".len() } else { 0 };
 439
 440            let excerpt_tokens = estimate_tokens(excerpt_len);
 441            if total_tokens + file_tokens + excerpt_tokens > max_tokens {
 442                break;
 443            }
 444            file_tokens += excerpt_tokens;
 445            excerpts_to_include += 1;
 446        }
 447
 448        if excerpts_to_include > 0 {
 449            total_tokens += file_tokens;
 450            result.push_str(&header);
 451            for excerpt in file.excerpts.iter().take(excerpts_to_include) {
 452                result.push_str(&excerpt.text);
 453                if !result.ends_with('\n') {
 454                    result.push('\n');
 455                }
 456                if excerpt.row_range.end < file.max_row {
 457                    result.push_str("...\n");
 458                }
 459            }
 460        }
 461    }
 462
 463    result
 464}
 465
 466pub fn write_related_files(
 467    prompt: &mut String,
 468    related_files: &[RelatedFile],
 469) -> Vec<Range<usize>> {
 470    let mut ranges = Vec::new();
 471    for file in related_files {
 472        let start = prompt.len();
 473        let path_str = file.path.to_string_lossy();
 474        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 475        for excerpt in &file.excerpts {
 476            prompt.push_str(&excerpt.text);
 477            if !prompt.ends_with('\n') {
 478                prompt.push('\n');
 479            }
 480            if excerpt.row_range.end < file.max_row {
 481                prompt.push_str("...\n");
 482            }
 483        }
 484        let end = prompt.len();
 485        ranges.push(start..end);
 486    }
 487    ranges
 488}
 489
 490mod v0112_middle_at_end {
 491    use super::*;
 492
 493    pub fn write_cursor_excerpt_section(
 494        prompt: &mut String,
 495        path: &Path,
 496        context: &str,
 497        editable_range: &Range<usize>,
 498        cursor_offset: usize,
 499    ) {
 500        let path_str = path.to_string_lossy();
 501        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 502
 503        prompt.push_str("<|fim_prefix|>\n");
 504        prompt.push_str(&context[..editable_range.start]);
 505
 506        prompt.push_str("<|fim_suffix|>\n");
 507        prompt.push_str(&context[editable_range.end..]);
 508        if !prompt.ends_with('\n') {
 509            prompt.push('\n');
 510        }
 511
 512        prompt.push_str("<|fim_middle|>current\n");
 513        prompt.push_str(&context[editable_range.start..cursor_offset]);
 514        prompt.push_str(CURSOR_MARKER);
 515        prompt.push_str(&context[cursor_offset..editable_range.end]);
 516        if !prompt.ends_with('\n') {
 517            prompt.push('\n');
 518        }
 519
 520        prompt.push_str("<|fim_middle|>updated\n");
 521    }
 522}
 523
 524mod v0113_ordered {
 525    use super::*;
 526
 527    pub fn write_cursor_excerpt_section(
 528        prompt: &mut String,
 529        path: &Path,
 530        context: &str,
 531        editable_range: &Range<usize>,
 532        cursor_offset: usize,
 533    ) {
 534        let path_str = path.to_string_lossy();
 535        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 536
 537        prompt.push_str("<|fim_prefix|>\n");
 538        prompt.push_str(&context[..editable_range.start]);
 539        if !prompt.ends_with('\n') {
 540            prompt.push('\n');
 541        }
 542
 543        prompt.push_str("<|fim_middle|>current\n");
 544        prompt.push_str(&context[editable_range.start..cursor_offset]);
 545        prompt.push_str(CURSOR_MARKER);
 546        prompt.push_str(&context[cursor_offset..editable_range.end]);
 547        if !prompt.ends_with('\n') {
 548            prompt.push('\n');
 549        }
 550
 551        prompt.push_str("<|fim_suffix|>\n");
 552        prompt.push_str(&context[editable_range.end..]);
 553        if !prompt.ends_with('\n') {
 554            prompt.push('\n');
 555        }
 556
 557        prompt.push_str("<|fim_middle|>updated\n");
 558    }
 559}
 560
 561pub mod v0120_git_merge_markers {
 562    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 563    //!
 564    //! Example prompt:
 565    //!
 566    //! <|file_sep|>path/to/target_file.py
 567    //! <|fim_prefix|>
 568    //! code before editable region
 569    //! <|fim_suffix|>
 570    //! code after editable region
 571    //! <|fim_middle|>
 572    //! <<<<<<< CURRENT
 573    //! code that
 574    //! needs to<|user_cursor|>
 575    //! be rewritten
 576    //! =======
 577    //!
 578    //! Expected output (should be generated by the model):
 579    //!
 580    //! updated
 581    //! code with
 582    //! changes applied
 583    //! >>>>>>> UPDATED
 584
 585    use super::*;
 586
 587    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 588    pub const SEPARATOR: &str = "=======\n";
 589    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 590
 591    pub fn special_tokens() -> &'static [&'static str] {
 592        &[
 593            "<|fim_prefix|>",
 594            "<|fim_suffix|>",
 595            "<|fim_middle|>",
 596            "<|file_sep|>",
 597            START_MARKER,
 598            SEPARATOR,
 599            END_MARKER,
 600            CURSOR_MARKER,
 601        ]
 602    }
 603
 604    pub fn write_cursor_excerpt_section(
 605        prompt: &mut String,
 606        path: &Path,
 607        context: &str,
 608        editable_range: &Range<usize>,
 609        cursor_offset: usize,
 610    ) {
 611        let path_str = path.to_string_lossy();
 612        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 613
 614        prompt.push_str("<|fim_prefix|>");
 615        prompt.push_str(&context[..editable_range.start]);
 616
 617        prompt.push_str("<|fim_suffix|>");
 618        prompt.push_str(&context[editable_range.end..]);
 619        if !prompt.ends_with('\n') {
 620            prompt.push('\n');
 621        }
 622
 623        prompt.push_str("<|fim_middle|>");
 624        prompt.push_str(START_MARKER);
 625        prompt.push_str(&context[editable_range.start..cursor_offset]);
 626        prompt.push_str(CURSOR_MARKER);
 627        prompt.push_str(&context[cursor_offset..editable_range.end]);
 628        if !prompt.ends_with('\n') {
 629            prompt.push('\n');
 630        }
 631        prompt.push_str(SEPARATOR);
 632    }
 633}
 634
 635pub mod v0131_git_merge_markers_prefix {
 636    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 637    //!
 638    //! Example prompt:
 639    //!
 640    //! <|file_sep|>path/to/target_file.py
 641    //! <|fim_prefix|>
 642    //! code before editable region
 643    //! <<<<<<< CURRENT
 644    //! code that
 645    //! needs to<|user_cursor|>
 646    //! be rewritten
 647    //! =======
 648    //! <|fim_suffix|>
 649    //! code after editable region
 650    //! <|fim_middle|>
 651    //!
 652    //! Expected output (should be generated by the model):
 653    //!
 654    //! updated
 655    //! code with
 656    //! changes applied
 657    //! >>>>>>> UPDATED
 658
 659    use super::*;
 660
 661    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 662    pub const SEPARATOR: &str = "=======\n";
 663    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 664
 665    pub fn special_tokens() -> &'static [&'static str] {
 666        &[
 667            "<|fim_prefix|>",
 668            "<|fim_suffix|>",
 669            "<|fim_middle|>",
 670            "<|file_sep|>",
 671            START_MARKER,
 672            SEPARATOR,
 673            END_MARKER,
 674            CURSOR_MARKER,
 675        ]
 676    }
 677
 678    pub fn write_cursor_excerpt_section(
 679        prompt: &mut String,
 680        path: &Path,
 681        context: &str,
 682        editable_range: &Range<usize>,
 683        cursor_offset: usize,
 684    ) {
 685        let path_str = path.to_string_lossy();
 686        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 687
 688        prompt.push_str("<|fim_prefix|>");
 689        prompt.push_str(&context[..editable_range.start]);
 690        prompt.push_str(START_MARKER);
 691        prompt.push_str(&context[editable_range.start..cursor_offset]);
 692        prompt.push_str(CURSOR_MARKER);
 693        prompt.push_str(&context[cursor_offset..editable_range.end]);
 694        if !prompt.ends_with('\n') {
 695            prompt.push('\n');
 696        }
 697        prompt.push_str(SEPARATOR);
 698
 699        prompt.push_str("<|fim_suffix|>");
 700        prompt.push_str(&context[editable_range.end..]);
 701        if !prompt.ends_with('\n') {
 702            prompt.push('\n');
 703        }
 704
 705        prompt.push_str("<|fim_middle|>");
 706    }
 707}
 708
 709pub mod v0211_prefill {
 710    use super::*;
 711
 712    pub fn get_prefill(input: &ZetaPromptInput) -> String {
 713        let editable_region = &input.cursor_excerpt
 714            [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
 715
 716        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 717        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 718
 719        // Find a token boundary to avoid splitting tokens in the prefill.
 720        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 721        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 722        // the \n and consume any consecutive \n characters after it.
 723        let prefill = &editable_region[..prefill_len];
 724        match prefill.rfind('\n') {
 725            Some(pos) => {
 726                let mut end = pos + 1;
 727                while end < editable_region.len()
 728                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 729                {
 730                    end += 1;
 731                }
 732                editable_region[..end].to_string()
 733            }
 734            // No newline found. Fall back to splitting before the last space
 735            // (word-level boundary)
 736            None => match prefill.rfind(' ') {
 737                Some(pos) => prefill[..pos].to_string(),
 738                None => prefill.to_string(),
 739            },
 740        }
 741    }
 742}
 743
 744pub mod seed_coder {
 745    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 746    //!
 747    //! Seed-Coder uses different FIM tokens and order than Qwen:
 748    //! - SPM order: suffix comes FIRST, then prefix, then middle
 749    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 750    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 751    //!
 752    //! All context (related files, edit history) goes in the PREFIX section.
 753    //! The suffix contains only code after the editable region.
 754    //!
 755    //! Example prompt:
 756    //!
 757    //! <[fim-suffix]>
 758    //! code after editable region
 759    //! <[fim-prefix]><filename>related/file.py
 760    //! related file content
 761    //!
 762    //! <filename>edit_history
 763    //! --- a/some_file.py
 764    //! +++ b/some_file.py
 765    //! -old
 766    //! +new
 767    //!
 768    //! <filename>path/to/target_file.py
 769    //! code before editable region
 770    //! <<<<<<< CURRENT
 771    //! code that
 772    //! needs to<|user_cursor|>
 773    //! be rewritten
 774    //! =======
 775    //! <[fim-middle]>
 776    //!
 777    //! Expected output (model generates):
 778    //!
 779    //! updated
 780    //! code with
 781    //! changes applied
 782    //! >>>>>>> UPDATED
 783
 784    use super::*;
 785
 786    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 787    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 788    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 789    pub const FILE_MARKER: &str = "<filename>";
 790
 791    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 792    pub const SEPARATOR: &str = "=======\n";
 793    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 794
 795    pub fn special_tokens() -> &'static [&'static str] {
 796        &[
 797            FIM_SUFFIX,
 798            FIM_PREFIX,
 799            FIM_MIDDLE,
 800            FILE_MARKER,
 801            START_MARKER,
 802            SEPARATOR,
 803            END_MARKER,
 804            CURSOR_MARKER,
 805        ]
 806    }
 807
 808    pub fn format_prompt_with_budget(
 809        path: &Path,
 810        context: &str,
 811        editable_range: &Range<usize>,
 812        cursor_offset: usize,
 813        events: &[Arc<Event>],
 814        related_files: &[RelatedFile],
 815        max_tokens: usize,
 816    ) -> String {
 817        let suffix_section = build_suffix_section(context, editable_range);
 818        let cursor_prefix_section =
 819            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 820
 821        let suffix_tokens = estimate_tokens(suffix_section.len());
 822        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 823        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 824
 825        let edit_history_section = super::format_edit_history_within_budget(
 826            events,
 827            FILE_MARKER,
 828            "edit_history",
 829            budget_after_cursor,
 830        );
 831        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 832        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 833
 834        let related_files_section = super::format_related_files_within_budget(
 835            related_files,
 836            FILE_MARKER,
 837            budget_after_edit_history,
 838        );
 839
 840        let mut prompt = String::new();
 841        prompt.push_str(&suffix_section);
 842        prompt.push_str(FIM_PREFIX);
 843        prompt.push_str(&related_files_section);
 844        if !related_files_section.is_empty() {
 845            prompt.push('\n');
 846        }
 847        prompt.push_str(&edit_history_section);
 848        if !edit_history_section.is_empty() {
 849            prompt.push('\n');
 850        }
 851        prompt.push_str(&cursor_prefix_section);
 852        prompt.push_str(FIM_MIDDLE);
 853        prompt
 854    }
 855
 856    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 857        let mut section = String::new();
 858        section.push_str(FIM_SUFFIX);
 859        section.push_str(&context[editable_range.end..]);
 860        if !section.ends_with('\n') {
 861            section.push('\n');
 862        }
 863        section
 864    }
 865
 866    fn build_cursor_prefix_section(
 867        path: &Path,
 868        context: &str,
 869        editable_range: &Range<usize>,
 870        cursor_offset: usize,
 871    ) -> String {
 872        let mut section = String::new();
 873        let path_str = path.to_string_lossy();
 874        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 875
 876        section.push_str(&context[..editable_range.start]);
 877        section.push_str(START_MARKER);
 878        section.push_str(&context[editable_range.start..cursor_offset]);
 879        section.push_str(CURSOR_MARKER);
 880        section.push_str(&context[cursor_offset..editable_range.end]);
 881        if !section.ends_with('\n') {
 882            section.push('\n');
 883        }
 884        section.push_str(SEPARATOR);
 885        section
 886    }
 887}
 888
 889/// The zeta1 prompt format
 890pub mod zeta1 {
 891    use super::*;
 892    use std::fmt::Write;
 893
 894    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 895    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 896    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 897    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 898
 899    const INSTRUCTION_HEADER: &str = concat!(
 900        "### Instruction:\n",
 901        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 902        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 903        "into account the cursor location.\n\n",
 904        "### User Edits:\n\n"
 905    );
 906    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 907    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 908
 909    /// Formats a complete zeta1 prompt from the input events and excerpt.
 910    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 911        let mut prompt = String::with_capacity(
 912            INSTRUCTION_HEADER.len()
 913                + input_events.len()
 914                + EXCERPT_HEADER.len()
 915                + input_excerpt.len()
 916                + RESPONSE_HEADER.len(),
 917        );
 918        prompt.push_str(INSTRUCTION_HEADER);
 919        prompt.push_str(input_events);
 920        prompt.push_str(EXCERPT_HEADER);
 921        prompt.push_str(input_excerpt);
 922        prompt.push_str(RESPONSE_HEADER);
 923        prompt
 924    }
 925
 926    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 927    /// editable and context byte-offset ranges within `cursor_excerpt`.
 928    pub fn format_zeta1_from_input(
 929        input: &ZetaPromptInput,
 930        editable_range: Range<usize>,
 931        context_range: Range<usize>,
 932    ) -> String {
 933        let events = format_zeta1_events(&input.events);
 934        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 935        format_zeta1_prompt(&events, &excerpt)
 936    }
 937
 938    /// Formats events in zeta1 style (oldest first).
 939    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 940        let mut result = String::new();
 941        for event in events {
 942            let event_string = format_zeta1_event(event);
 943            if event_string.is_empty() {
 944                continue;
 945            }
 946            if !result.is_empty() {
 947                result.push_str("\n\n");
 948            }
 949            result.push_str(&event_string);
 950        }
 951        result
 952    }
 953
 954    fn format_zeta1_event(event: &Event) -> String {
 955        match event {
 956            Event::BufferChange {
 957                path,
 958                old_path,
 959                diff,
 960                ..
 961            } => {
 962                let mut prompt = String::new();
 963                if old_path != path {
 964                    writeln!(
 965                        prompt,
 966                        "User renamed {} to {}\n",
 967                        old_path.display(),
 968                        path.display()
 969                    )
 970                    .ok();
 971                }
 972                if !diff.is_empty() {
 973                    write!(
 974                        prompt,
 975                        "User edited {}:\n```diff\n{}\n```",
 976                        path.display(),
 977                        diff
 978                    )
 979                    .ok();
 980                }
 981                prompt
 982            }
 983        }
 984    }
 985
 986    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
 987    /// within `cursor_excerpt`.
 988    fn format_zeta1_excerpt(
 989        input: &ZetaPromptInput,
 990        editable_range: Range<usize>,
 991        context_range: Range<usize>,
 992    ) -> String {
 993        let path_str = input.cursor_path.to_string_lossy();
 994        let excerpt = &*input.cursor_excerpt;
 995        let cursor_offset = input.cursor_offset_in_excerpt;
 996
 997        let mut prompt = String::new();
 998        writeln!(&mut prompt, "```{path_str}").ok();
 999
1000        let starts_at_file_beginning =
1001            input.excerpt_start_row == Some(0) && context_range.start == 0;
1002        if starts_at_file_beginning {
1003            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1004        }
1005
1006        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1007
1008        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1009        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1010        prompt.push_str(CURSOR_MARKER);
1011        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1012        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1013
1014        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1015        write!(prompt, "\n```").ok();
1016
1017        prompt
1018    }
1019
1020    /// Cleans zeta1 model output by extracting content between editable region
1021    /// markers and converting the zeta1 cursor marker to the universal one.
1022    /// Returns `None` if the output doesn't contain the expected markers.
1023    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1024        let content = output.replace(CURSOR_MARKER, "");
1025
1026        let content_start = content
1027            .find(EDITABLE_REGION_START_MARKER)
1028            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1029            .map(|pos| {
1030                if content.as_bytes().get(pos) == Some(&b'\n') {
1031                    pos + 1
1032                } else {
1033                    pos
1034                }
1035            })
1036            .unwrap_or(0);
1037
1038        let content_end = content
1039            .find(EDITABLE_REGION_END_MARKER)
1040            .map(|pos| {
1041                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1042                    pos - 1
1043                } else {
1044                    pos
1045                }
1046            })
1047            .unwrap_or(content.len());
1048
1049        if content_start > content_end {
1050            return Some(String::new());
1051        }
1052
1053        let extracted = &content[content_start..content_end];
1054
1055        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1056            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1057            let text_before_cursor = text_before_cursor
1058                .find(EDITABLE_REGION_START_MARKER)
1059                .map(|pos| {
1060                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1061                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1062                        after_marker + 1
1063                    } else {
1064                        after_marker
1065                    }
1066                })
1067                .unwrap_or(0);
1068            let offset_in_extracted = zeta1_cursor_pos
1069                .saturating_sub(text_before_cursor)
1070                .min(extracted.len());
1071            offset_in_extracted
1072        });
1073
1074        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1075        if let Some(offset) = cursor_offset {
1076            result.push_str(&extracted[..offset]);
1077            result.push_str(super::CURSOR_MARKER);
1078            result.push_str(&extracted[offset..]);
1079        } else {
1080            result.push_str(extracted);
1081        }
1082
1083        Some(result)
1084    }
1085}
1086
1087#[cfg(test)]
1088mod tests {
1089    use super::*;
1090    use indoc::indoc;
1091
1092    fn make_input(
1093        cursor_excerpt: &str,
1094        editable_range: Range<usize>,
1095        cursor_offset: usize,
1096        events: Vec<Event>,
1097        related_files: Vec<RelatedFile>,
1098    ) -> ZetaPromptInput {
1099        ZetaPromptInput {
1100            cursor_path: Path::new("test.rs").into(),
1101            cursor_excerpt: cursor_excerpt.into(),
1102            editable_range_in_excerpt: editable_range,
1103            cursor_offset_in_excerpt: cursor_offset,
1104            excerpt_start_row: None,
1105            events: events.into_iter().map(Arc::new).collect(),
1106            related_files,
1107            excerpt_ranges: None,
1108            preferred_model: None,
1109            in_open_source_repo: false,
1110            can_collect_data: false,
1111        }
1112    }
1113
1114    fn make_event(path: &str, diff: &str) -> Event {
1115        Event::BufferChange {
1116            path: Path::new(path).into(),
1117            old_path: Path::new(path).into(),
1118            diff: diff.to_string(),
1119            predicted: false,
1120            in_open_source_repo: false,
1121        }
1122    }
1123
1124    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1125        RelatedFile {
1126            path: Path::new(path).into(),
1127            max_row: content.lines().count() as u32,
1128            excerpts: vec![RelatedExcerpt {
1129                row_range: 0..content.lines().count() as u32,
1130                text: content.into(),
1131            }],
1132            in_open_source_repo: false,
1133        }
1134    }
1135
1136    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1137        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1138    }
1139
1140    #[test]
1141    fn test_no_truncation_when_within_budget() {
1142        let input = make_input(
1143            "prefix\neditable\nsuffix",
1144            7..15,
1145            10,
1146            vec![make_event("a.rs", "-old\n+new\n")],
1147            vec![make_related_file("related.rs", "fn helper() {}\n")],
1148        );
1149
1150        assert_eq!(
1151            format_with_budget(&input, 10000),
1152            indoc! {r#"
1153                <|file_sep|>related.rs
1154                fn helper() {}
1155                <|file_sep|>edit history
1156                --- a/a.rs
1157                +++ b/a.rs
1158                -old
1159                +new
1160                <|file_sep|>test.rs
1161                <|fim_prefix|>
1162                prefix
1163                <|fim_middle|>current
1164                edi<|user_cursor|>table
1165                <|fim_suffix|>
1166
1167                suffix
1168                <|fim_middle|>updated
1169            "#}
1170        );
1171    }
1172
1173    #[test]
1174    fn test_truncation_drops_edit_history_when_budget_tight() {
1175        let input = make_input(
1176            "code",
1177            0..4,
1178            2,
1179            vec![make_event("a.rs", "-x\n+y\n")],
1180            vec![
1181                make_related_file("r1.rs", "a\n"),
1182                make_related_file("r2.rs", "b\n"),
1183            ],
1184        );
1185
1186        assert_eq!(
1187            format_with_budget(&input, 10000),
1188            indoc! {r#"
1189                <|file_sep|>r1.rs
1190                a
1191                <|file_sep|>r2.rs
1192                b
1193                <|file_sep|>edit history
1194                --- a/a.rs
1195                +++ b/a.rs
1196                -x
1197                +y
1198                <|file_sep|>test.rs
1199                <|fim_prefix|>
1200                <|fim_middle|>current
1201                co<|user_cursor|>de
1202                <|fim_suffix|>
1203                <|fim_middle|>updated
1204            "#}
1205        );
1206
1207        assert_eq!(
1208            format_with_budget(&input, 50),
1209            indoc! {r#"
1210                <|file_sep|>r1.rs
1211                a
1212                <|file_sep|>r2.rs
1213                b
1214                <|file_sep|>test.rs
1215                <|fim_prefix|>
1216                <|fim_middle|>current
1217                co<|user_cursor|>de
1218                <|fim_suffix|>
1219                <|fim_middle|>updated
1220            "#}
1221        );
1222    }
1223
1224    #[test]
1225    fn test_truncation_includes_partial_excerpts() {
1226        let input = make_input(
1227            "x",
1228            0..1,
1229            0,
1230            vec![],
1231            vec![RelatedFile {
1232                path: Path::new("big.rs").into(),
1233                max_row: 30,
1234                in_open_source_repo: false,
1235                excerpts: vec![
1236                    RelatedExcerpt {
1237                        row_range: 0..10,
1238                        text: "first excerpt\n".into(),
1239                    },
1240                    RelatedExcerpt {
1241                        row_range: 10..20,
1242                        text: "second excerpt\n".into(),
1243                    },
1244                    RelatedExcerpt {
1245                        row_range: 20..30,
1246                        text: "third excerpt\n".into(),
1247                    },
1248                ],
1249            }],
1250        );
1251
1252        assert_eq!(
1253            format_with_budget(&input, 10000),
1254            indoc! {r#"
1255                <|file_sep|>big.rs
1256                first excerpt
1257                ...
1258                second excerpt
1259                ...
1260                third excerpt
1261                <|file_sep|>test.rs
1262                <|fim_prefix|>
1263                <|fim_middle|>current
1264                <|user_cursor|>x
1265                <|fim_suffix|>
1266                <|fim_middle|>updated
1267            "#}
1268        );
1269
1270        assert_eq!(
1271            format_with_budget(&input, 50),
1272            indoc! {r#"
1273                <|file_sep|>big.rs
1274                first excerpt
1275                ...
1276                <|file_sep|>test.rs
1277                <|fim_prefix|>
1278                <|fim_middle|>current
1279                <|user_cursor|>x
1280                <|fim_suffix|>
1281                <|fim_middle|>updated
1282            "#}
1283        );
1284    }
1285
1286    #[test]
1287    fn test_truncation_drops_older_events_first() {
1288        let input = make_input(
1289            "x",
1290            0..1,
1291            0,
1292            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1293            vec![],
1294        );
1295
1296        assert_eq!(
1297            format_with_budget(&input, 10000),
1298            indoc! {r#"
1299                <|file_sep|>edit history
1300                --- a/old.rs
1301                +++ b/old.rs
1302                -1
1303                --- a/new.rs
1304                +++ b/new.rs
1305                -2
1306                <|file_sep|>test.rs
1307                <|fim_prefix|>
1308                <|fim_middle|>current
1309                <|user_cursor|>x
1310                <|fim_suffix|>
1311                <|fim_middle|>updated
1312            "#}
1313        );
1314
1315        assert_eq!(
1316            format_with_budget(&input, 55),
1317            indoc! {r#"
1318                <|file_sep|>edit history
1319                --- a/new.rs
1320                +++ b/new.rs
1321                -2
1322                <|file_sep|>test.rs
1323                <|fim_prefix|>
1324                <|fim_middle|>current
1325                <|user_cursor|>x
1326                <|fim_suffix|>
1327                <|fim_middle|>updated
1328            "#}
1329        );
1330    }
1331
1332    #[test]
1333    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1334        let input = make_input(
1335            "fn main() {}",
1336            0..12,
1337            3,
1338            vec![make_event("a.rs", "-old\n+new\n")],
1339            vec![make_related_file("related.rs", "helper\n")],
1340        );
1341
1342        assert_eq!(
1343            format_with_budget(&input, 30),
1344            indoc! {r#"
1345                <|file_sep|>test.rs
1346                <|fim_prefix|>
1347                <|fim_middle|>current
1348                fn <|user_cursor|>main() {}
1349                <|fim_suffix|>
1350                <|fim_middle|>updated
1351            "#}
1352        );
1353    }
1354
1355    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1356        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1357    }
1358
1359    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1360        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1361    }
1362
1363    #[test]
1364    fn test_seed_coder_basic_format() {
1365        let input = make_input(
1366            "prefix\neditable\nsuffix",
1367            7..15,
1368            10,
1369            vec![make_event("a.rs", "-old\n+new\n")],
1370            vec![make_related_file("related.rs", "fn helper() {}\n")],
1371        );
1372
1373        assert_eq!(
1374            format_seed_coder(&input),
1375            indoc! {r#"
1376                <[fim-suffix]>
1377                suffix
1378                <[fim-prefix]><filename>related.rs
1379                fn helper() {}
1380
1381                <filename>edit_history
1382                --- a/a.rs
1383                +++ b/a.rs
1384                -old
1385                +new
1386
1387                <filename>test.rs
1388                prefix
1389                <<<<<<< CURRENT
1390                edi<|user_cursor|>table
1391                =======
1392                <[fim-middle]>"#}
1393        );
1394    }
1395
1396    #[test]
1397    fn test_seed_coder_no_context() {
1398        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1399
1400        assert_eq!(
1401            format_seed_coder(&input),
1402            indoc! {r#"
1403                <[fim-suffix]>
1404                after
1405                <[fim-prefix]><filename>test.rs
1406                before
1407                <<<<<<< CURRENT
1408                mid<|user_cursor|>dle
1409                =======
1410                <[fim-middle]>"#}
1411        );
1412    }
1413
1414    #[test]
1415    fn test_seed_coder_truncation_drops_context() {
1416        let input = make_input(
1417            "code",
1418            0..4,
1419            2,
1420            vec![make_event("a.rs", "-x\n+y\n")],
1421            vec![make_related_file("r1.rs", "content\n")],
1422        );
1423
1424        // With large budget, everything is included
1425        assert_eq!(
1426            format_seed_coder(&input),
1427            indoc! {r#"
1428                <[fim-suffix]>
1429                <[fim-prefix]><filename>r1.rs
1430                content
1431
1432                <filename>edit_history
1433                --- a/a.rs
1434                +++ b/a.rs
1435                -x
1436                +y
1437
1438                <filename>test.rs
1439                <<<<<<< CURRENT
1440                co<|user_cursor|>de
1441                =======
1442                <[fim-middle]>"#}
1443        );
1444
1445        // With tight budget, context is dropped but cursor section remains
1446        assert_eq!(
1447            format_seed_coder_with_budget(&input, 30),
1448            indoc! {r#"
1449                <[fim-suffix]>
1450                <[fim-prefix]><filename>test.rs
1451                <<<<<<< CURRENT
1452                co<|user_cursor|>de
1453                =======
1454                <[fim-middle]>"#}
1455        );
1456    }
1457
1458    #[test]
1459    fn test_seed_coder_clean_output() {
1460        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1461        let output_without_marker = "new code\n";
1462
1463        assert_eq!(
1464            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1465            "new code\n"
1466        );
1467        assert_eq!(
1468            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1469            "new code\n"
1470        );
1471    }
1472
1473    #[test]
1474    fn test_format_zeta1_from_input_basic() {
1475        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1476        let input = ZetaPromptInput {
1477            cursor_path: Path::new("src/main.rs").into(),
1478            cursor_excerpt: excerpt.into(),
1479            editable_range_in_excerpt: 15..41,
1480            cursor_offset_in_excerpt: 30,
1481            excerpt_start_row: Some(0),
1482            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1483            related_files: vec![],
1484            excerpt_ranges: None,
1485            preferred_model: None,
1486            in_open_source_repo: false,
1487            can_collect_data: false,
1488        };
1489
1490        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1491
1492        assert_eq!(
1493            prompt,
1494            concat!(
1495                "### Instruction:\n",
1496                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1497                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1498                "into account the cursor location.\n",
1499                "\n",
1500                "### User Edits:\n",
1501                "\n",
1502                "User edited other.rs:\n",
1503                "```diff\n",
1504                "-old\n",
1505                "+new\n",
1506                "\n",
1507                "```\n",
1508                "\n",
1509                "### User Excerpt:\n",
1510                "\n",
1511                "```src/main.rs\n",
1512                "<|start_of_file|>\n",
1513                "fn before() {}\n",
1514                "<|editable_region_start|>\n",
1515                "fn foo() {\n",
1516                "    <|user_cursor_is_here|>let x = 1;\n",
1517                "\n",
1518                "<|editable_region_end|>}\n",
1519                "fn after() {}\n",
1520                "\n",
1521                "```\n",
1522                "\n",
1523                "### Response:\n",
1524            ),
1525        );
1526    }
1527
1528    #[test]
1529    fn test_format_zeta1_from_input_no_start_of_file() {
1530        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1531        let input = ZetaPromptInput {
1532            cursor_path: Path::new("src/main.rs").into(),
1533            cursor_excerpt: excerpt.into(),
1534            editable_range_in_excerpt: 0..28,
1535            cursor_offset_in_excerpt: 15,
1536            excerpt_start_row: Some(10),
1537            events: vec![],
1538            related_files: vec![],
1539            excerpt_ranges: None,
1540            preferred_model: None,
1541            in_open_source_repo: false,
1542            can_collect_data: false,
1543        };
1544
1545        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1546
1547        assert_eq!(
1548            prompt,
1549            concat!(
1550                "### Instruction:\n",
1551                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1552                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1553                "into account the cursor location.\n",
1554                "\n",
1555                "### User Edits:\n",
1556                "\n",
1557                "\n",
1558                "\n",
1559                "### User Excerpt:\n",
1560                "\n",
1561                "```src/main.rs\n",
1562                "<|editable_region_start|>\n",
1563                "fn foo() {\n",
1564                "    <|user_cursor_is_here|>let x = 1;\n",
1565                "}\n",
1566                "\n",
1567                "<|editable_region_end|>\n",
1568                "```\n",
1569                "\n",
1570                "### Response:\n",
1571            ),
1572        );
1573    }
1574
1575    #[test]
1576    fn test_format_zeta1_from_input_with_sub_ranges() {
1577        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1578        let editable_range = 10..37;
1579        let context_range = 0..excerpt.len();
1580
1581        let input = ZetaPromptInput {
1582            cursor_path: Path::new("test.rs").into(),
1583            cursor_excerpt: excerpt.into(),
1584            editable_range_in_excerpt: editable_range.clone(),
1585            cursor_offset_in_excerpt: 25,
1586            excerpt_start_row: Some(0),
1587            events: vec![],
1588            related_files: vec![],
1589            excerpt_ranges: None,
1590            preferred_model: None,
1591            in_open_source_repo: false,
1592            can_collect_data: false,
1593        };
1594
1595        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1596
1597        assert_eq!(
1598            prompt,
1599            concat!(
1600                "### Instruction:\n",
1601                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1602                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1603                "into account the cursor location.\n",
1604                "\n",
1605                "### User Edits:\n",
1606                "\n",
1607                "\n",
1608                "\n",
1609                "### User Excerpt:\n",
1610                "\n",
1611                "```test.rs\n",
1612                "<|start_of_file|>\n",
1613                "// prefix\n",
1614                "<|editable_region_start|>\n",
1615                "fn foo() {\n",
1616                "    <|user_cursor_is_here|>let x = 1;\n",
1617                "}\n",
1618                "<|editable_region_end|>\n",
1619                "// suffix\n",
1620                "\n",
1621                "```\n",
1622                "\n",
1623                "### Response:\n",
1624            ),
1625        );
1626    }
1627
1628    #[test]
1629    fn test_clean_zeta1_model_output_basic() {
1630        let output = indoc! {"
1631            <|editable_region_start|>
1632            fn main() {
1633                println!(\"hello\");
1634            }
1635            <|editable_region_end|>
1636        "};
1637
1638        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1639        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1640    }
1641
1642    #[test]
1643    fn test_clean_zeta1_model_output_with_cursor() {
1644        let output = indoc! {"
1645            <|editable_region_start|>
1646            fn main() {
1647                <|user_cursor_is_here|>println!(\"hello\");
1648            }
1649            <|editable_region_end|>
1650        "};
1651
1652        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1653        assert_eq!(
1654            cleaned,
1655            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1656        );
1657    }
1658
1659    #[test]
1660    fn test_clean_zeta1_model_output_no_markers() {
1661        let output = "fn main() {}\n";
1662        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1663        assert_eq!(cleaned, "fn main() {}\n");
1664    }
1665
1666    #[test]
1667    fn test_clean_zeta1_model_output_empty_region() {
1668        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1669        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1670        assert_eq!(cleaned, "");
1671    }
1672}