zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64}
  65
  66#[derive(
  67    Default,
  68    Clone,
  69    Copy,
  70    Debug,
  71    PartialEq,
  72    Eq,
  73    Hash,
  74    EnumIter,
  75    IntoStaticStr,
  76    Serialize,
  77    Deserialize,
  78)]
  79#[allow(non_camel_case_types)]
  80pub enum ZetaFormat {
  81    V0112MiddleAtEnd,
  82    V0113Ordered,
  83    V0114180EditableRegion,
  84    V0120GitMergeMarkers,
  85    #[default]
  86    V0131GitMergeMarkersPrefix,
  87    V0211Prefill,
  88    V0211SeedCoder,
  89}
  90
  91impl std::fmt::Display for ZetaFormat {
  92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  93        write!(f, "{}", <&'static str>::from(self))
  94    }
  95}
  96
  97impl ZetaFormat {
  98    pub fn parse(format_name: &str) -> Result<Self> {
  99        let mut results = ZetaFormat::iter().filter(|version| {
 100            <&'static str>::from(version)
 101                .to_lowercase()
 102                .contains(&format_name.to_lowercase())
 103        });
 104        let Some(result) = results.next() else {
 105            anyhow::bail!(
 106                "`{format_name}` did not match any of:\n{}",
 107                Self::options_as_string()
 108            );
 109        };
 110        if results.next().is_some() {
 111            anyhow::bail!(
 112                "`{format_name}` matched more than one of:\n{}",
 113                Self::options_as_string()
 114            );
 115        }
 116        Ok(result)
 117    }
 118
 119    pub fn options_as_string() -> String {
 120        ZetaFormat::iter()
 121            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 122            .collect::<Vec<_>>()
 123            .concat()
 124    }
 125
 126    pub fn special_tokens(&self) -> &'static [&'static str] {
 127        match self {
 128            ZetaFormat::V0112MiddleAtEnd
 129            | ZetaFormat::V0113Ordered
 130            | ZetaFormat::V0114180EditableRegion => &[
 131                "<|fim_prefix|>",
 132                "<|fim_suffix|>",
 133                "<|fim_middle|>",
 134                "<|file_sep|>",
 135                CURSOR_MARKER,
 136            ],
 137            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 138            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 139                v0131_git_merge_markers_prefix::special_tokens()
 140            }
 141            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 142        }
 143    }
 144}
 145
 146#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 147#[serde(tag = "event")]
 148pub enum Event {
 149    BufferChange {
 150        path: Arc<Path>,
 151        old_path: Arc<Path>,
 152        diff: String,
 153        predicted: bool,
 154        in_open_source_repo: bool,
 155    },
 156}
 157
 158impl Event {
 159    pub fn in_open_source_repo(&self) -> bool {
 160        match self {
 161            Event::BufferChange {
 162                in_open_source_repo,
 163                ..
 164            } => *in_open_source_repo,
 165        }
 166    }
 167}
 168
 169pub fn write_event(prompt: &mut String, event: &Event) {
 170    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 171        for component in path.components() {
 172            prompt.push('/');
 173            write!(prompt, "{}", component.as_os_str().display()).ok();
 174        }
 175    }
 176    match event {
 177        Event::BufferChange {
 178            path,
 179            old_path,
 180            diff,
 181            predicted,
 182            in_open_source_repo: _,
 183        } => {
 184            if *predicted {
 185                prompt.push_str("// User accepted prediction:\n");
 186            }
 187            prompt.push_str("--- a");
 188            write_path_as_unix_str(prompt, old_path.as_ref());
 189            prompt.push_str("\n+++ b");
 190            write_path_as_unix_str(prompt, path.as_ref());
 191            prompt.push('\n');
 192            prompt.push_str(diff);
 193        }
 194    }
 195}
 196
 197#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 198pub struct RelatedFile {
 199    pub path: Arc<Path>,
 200    pub max_row: u32,
 201    pub excerpts: Vec<RelatedExcerpt>,
 202    #[serde(default)]
 203    pub in_open_source_repo: bool,
 204}
 205
 206#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 207pub struct RelatedExcerpt {
 208    pub row_range: Range<u32>,
 209    pub text: Arc<str>,
 210    #[serde(default)]
 211    pub order: usize,
 212}
 213
 214pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 215    format
 216        .special_tokens()
 217        .iter()
 218        .any(|token| input.cursor_excerpt.contains(token))
 219}
 220
 221pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 222    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 223}
 224
 225/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 226pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 227    match format {
 228        ZetaFormat::V0120GitMergeMarkers => output
 229            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 230            .unwrap_or(output),
 231        ZetaFormat::V0131GitMergeMarkersPrefix => output
 232            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 233            .unwrap_or(output),
 234        ZetaFormat::V0211SeedCoder => output
 235            .strip_suffix(seed_coder::END_MARKER)
 236            .unwrap_or(output),
 237        _ => output,
 238    }
 239}
 240
 241pub fn excerpt_range_for_format(
 242    format: ZetaFormat,
 243    ranges: &ExcerptRanges,
 244) -> (Range<usize>, Range<usize>) {
 245    match format {
 246        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 247            ranges.editable_150.clone(),
 248            ranges.editable_150_context_350.clone(),
 249        ),
 250        ZetaFormat::V0114180EditableRegion
 251        | ZetaFormat::V0120GitMergeMarkers
 252        | ZetaFormat::V0131GitMergeMarkersPrefix
 253        | ZetaFormat::V0211Prefill
 254        | ZetaFormat::V0211SeedCoder => (
 255            ranges.editable_350.clone(),
 256            ranges.editable_350_context_150.clone(),
 257        ),
 258    }
 259}
 260
 261pub fn resolve_cursor_region(
 262    input: &ZetaPromptInput,
 263    format: ZetaFormat,
 264) -> (&str, Range<usize>, usize) {
 265    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 266    let context_start = context_range.start;
 267    let context_text = &input.cursor_excerpt[context_range];
 268    let adjusted_editable =
 269        (editable_range.start - context_start)..(editable_range.end - context_start);
 270    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 271
 272    (context_text, adjusted_editable, adjusted_cursor)
 273}
 274
 275fn format_zeta_prompt_with_budget(
 276    input: &ZetaPromptInput,
 277    format: ZetaFormat,
 278    max_tokens: usize,
 279) -> String {
 280    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 281    let path = &*input.cursor_path;
 282
 283    let mut cursor_section = String::new();
 284    match format {
 285        ZetaFormat::V0112MiddleAtEnd => {
 286            v0112_middle_at_end::write_cursor_excerpt_section(
 287                &mut cursor_section,
 288                path,
 289                context,
 290                &editable_range,
 291                cursor_offset,
 292            );
 293        }
 294        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 295            v0113_ordered::write_cursor_excerpt_section(
 296                &mut cursor_section,
 297                path,
 298                context,
 299                &editable_range,
 300                cursor_offset,
 301            )
 302        }
 303        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 304            &mut cursor_section,
 305            path,
 306            context,
 307            &editable_range,
 308            cursor_offset,
 309        ),
 310        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 311            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 312                &mut cursor_section,
 313                path,
 314                context,
 315                &editable_range,
 316                cursor_offset,
 317            )
 318        }
 319        ZetaFormat::V0211SeedCoder => {
 320            return seed_coder::format_prompt_with_budget(
 321                path,
 322                context,
 323                &editable_range,
 324                cursor_offset,
 325                &input.events,
 326                &input.related_files,
 327                max_tokens,
 328            );
 329        }
 330    }
 331
 332    let cursor_tokens = estimate_tokens(cursor_section.len());
 333    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 334
 335    let edit_history_section = format_edit_history_within_budget(
 336        &input.events,
 337        "<|file_sep|>",
 338        "edit history",
 339        budget_after_cursor,
 340    );
 341    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 342    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 343
 344    let related_files_section = format_related_files_within_budget(
 345        &input.related_files,
 346        "<|file_sep|>",
 347        "",
 348        budget_after_edit_history,
 349    );
 350
 351    let mut prompt = String::new();
 352    prompt.push_str(&related_files_section);
 353    prompt.push_str(&edit_history_section);
 354    prompt.push_str(&cursor_section);
 355    prompt
 356}
 357
 358pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 359    match format {
 360        ZetaFormat::V0112MiddleAtEnd
 361        | ZetaFormat::V0113Ordered
 362        | ZetaFormat::V0114180EditableRegion
 363        | ZetaFormat::V0120GitMergeMarkers
 364        | ZetaFormat::V0131GitMergeMarkersPrefix
 365        | ZetaFormat::V0211SeedCoder => String::new(),
 366        ZetaFormat::V0211Prefill => {
 367            let (context, editable_range, _) = resolve_cursor_region(input, format);
 368            v0211_prefill::get_prefill(context, &editable_range)
 369        }
 370    }
 371}
 372
 373fn format_edit_history_within_budget(
 374    events: &[Arc<Event>],
 375    file_marker: &str,
 376    edit_history_name: &str,
 377    max_tokens: usize,
 378) -> String {
 379    let header = format!("{}{}\n", file_marker, edit_history_name);
 380    let header_tokens = estimate_tokens(header.len());
 381    if header_tokens >= max_tokens {
 382        return String::new();
 383    }
 384
 385    let mut event_strings: Vec<String> = Vec::new();
 386    let mut total_tokens = header_tokens;
 387
 388    for event in events.iter().rev() {
 389        let mut event_str = String::new();
 390        write_event(&mut event_str, event);
 391        let event_tokens = estimate_tokens(event_str.len());
 392
 393        if total_tokens + event_tokens > max_tokens {
 394            break;
 395        }
 396        total_tokens += event_tokens;
 397        event_strings.push(event_str);
 398    }
 399
 400    if event_strings.is_empty() {
 401        return String::new();
 402    }
 403
 404    let mut result = header;
 405    for event_str in event_strings.iter().rev() {
 406        result.push_str(event_str);
 407    }
 408    result
 409}
 410
 411fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 412    let needs_newline = !excerpt.text.ends_with('\n');
 413    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 414    let len = excerpt.text.len()
 415        + if needs_newline { "\n".len() } else { 0 }
 416        + if needs_ellipsis { "...\n".len() } else { 0 };
 417    estimate_tokens(len)
 418}
 419
 420pub fn format_related_files_within_budget(
 421    related_files: &[RelatedFile],
 422    file_prefix: &str,
 423    file_suffix: &str,
 424    max_tokens: usize,
 425) -> String {
 426    struct ExcerptCandidate {
 427        file_ix: usize,
 428        excerpt_ix: usize,
 429        order: usize,
 430    }
 431
 432    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 433        .iter()
 434        .enumerate()
 435        .flat_map(|(file_ix, file)| {
 436            file.excerpts
 437                .iter()
 438                .enumerate()
 439                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 440                    file_ix,
 441                    excerpt_ix,
 442                    order: e.order,
 443                })
 444        })
 445        .collect();
 446
 447    // Pre-compute file header strings and their token costs.
 448    let file_headers: Vec<String> = related_files
 449        .iter()
 450        .map(|file| {
 451            let path_str = file.path.to_string_lossy();
 452            format!("{}{}\n", file_prefix, path_str)
 453        })
 454        .collect();
 455
 456    // Sort the excerpts by their order and determine how many fit within the budget.
 457    let mut total_tokens = 0;
 458    let mut included_excerpt_count = 0_usize;
 459    let mut included_file_indices = vec![false; related_files.len()];
 460    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 461    for candidate in &excerpt_candidates {
 462        let file = &related_files[candidate.file_ix];
 463        let excerpt = &file.excerpts[candidate.excerpt_ix];
 464        let file_already_included = included_file_indices[candidate.file_ix];
 465        let header_cost = if file_already_included {
 466            0
 467        } else {
 468            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 469        };
 470        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 471        if total_tokens + header_cost + excerpt_cost > max_tokens {
 472            break;
 473        }
 474        total_tokens += header_cost + excerpt_cost;
 475        if !file_already_included {
 476            included_file_indices[candidate.file_ix] = true;
 477        }
 478        included_excerpt_count += 1;
 479    }
 480
 481    excerpt_candidates.truncate(included_excerpt_count);
 482    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 483
 484    // Render all of the files that fit within the token budget, in the original order.
 485    let mut result = String::new();
 486    let mut last_file_ix = None;
 487    for candidate in &excerpt_candidates {
 488        if last_file_ix != Some(candidate.file_ix) {
 489            if last_file_ix.is_some() {
 490                result.push_str(file_suffix);
 491            }
 492            result.push_str(&file_headers[candidate.file_ix]);
 493            last_file_ix = Some(candidate.file_ix);
 494        }
 495        let file = &related_files[candidate.file_ix];
 496        let excerpt = &file.excerpts[candidate.excerpt_ix];
 497        result.push_str(&excerpt.text);
 498        if !result.ends_with('\n') {
 499            result.push('\n');
 500        }
 501        if excerpt.row_range.end < file.max_row {
 502            result.push_str("...\n");
 503        }
 504    }
 505
 506    result
 507}
 508
 509pub fn write_related_files(
 510    prompt: &mut String,
 511    related_files: &[RelatedFile],
 512) -> Vec<Range<usize>> {
 513    let mut ranges = Vec::new();
 514    for file in related_files {
 515        let start = prompt.len();
 516        let path_str = file.path.to_string_lossy();
 517        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 518        for excerpt in &file.excerpts {
 519            prompt.push_str(&excerpt.text);
 520            if !prompt.ends_with('\n') {
 521                prompt.push('\n');
 522            }
 523            if excerpt.row_range.end < file.max_row {
 524                prompt.push_str("...\n");
 525            }
 526        }
 527        let end = prompt.len();
 528        ranges.push(start..end);
 529    }
 530    ranges
 531}
 532
 533mod v0112_middle_at_end {
 534    use super::*;
 535
 536    pub fn write_cursor_excerpt_section(
 537        prompt: &mut String,
 538        path: &Path,
 539        context: &str,
 540        editable_range: &Range<usize>,
 541        cursor_offset: usize,
 542    ) {
 543        let path_str = path.to_string_lossy();
 544        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 545
 546        prompt.push_str("<|fim_prefix|>\n");
 547        prompt.push_str(&context[..editable_range.start]);
 548
 549        prompt.push_str("<|fim_suffix|>\n");
 550        prompt.push_str(&context[editable_range.end..]);
 551        if !prompt.ends_with('\n') {
 552            prompt.push('\n');
 553        }
 554
 555        prompt.push_str("<|fim_middle|>current\n");
 556        prompt.push_str(&context[editable_range.start..cursor_offset]);
 557        prompt.push_str(CURSOR_MARKER);
 558        prompt.push_str(&context[cursor_offset..editable_range.end]);
 559        if !prompt.ends_with('\n') {
 560            prompt.push('\n');
 561        }
 562
 563        prompt.push_str("<|fim_middle|>updated\n");
 564    }
 565}
 566
 567mod v0113_ordered {
 568    use super::*;
 569
 570    pub fn write_cursor_excerpt_section(
 571        prompt: &mut String,
 572        path: &Path,
 573        context: &str,
 574        editable_range: &Range<usize>,
 575        cursor_offset: usize,
 576    ) {
 577        let path_str = path.to_string_lossy();
 578        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 579
 580        prompt.push_str("<|fim_prefix|>\n");
 581        prompt.push_str(&context[..editable_range.start]);
 582        if !prompt.ends_with('\n') {
 583            prompt.push('\n');
 584        }
 585
 586        prompt.push_str("<|fim_middle|>current\n");
 587        prompt.push_str(&context[editable_range.start..cursor_offset]);
 588        prompt.push_str(CURSOR_MARKER);
 589        prompt.push_str(&context[cursor_offset..editable_range.end]);
 590        if !prompt.ends_with('\n') {
 591            prompt.push('\n');
 592        }
 593
 594        prompt.push_str("<|fim_suffix|>\n");
 595        prompt.push_str(&context[editable_range.end..]);
 596        if !prompt.ends_with('\n') {
 597            prompt.push('\n');
 598        }
 599
 600        prompt.push_str("<|fim_middle|>updated\n");
 601    }
 602}
 603
 604pub mod v0120_git_merge_markers {
 605    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 606    //!
 607    //! Example prompt:
 608    //!
 609    //! <|file_sep|>path/to/target_file.py
 610    //! <|fim_prefix|>
 611    //! code before editable region
 612    //! <|fim_suffix|>
 613    //! code after editable region
 614    //! <|fim_middle|>
 615    //! <<<<<<< CURRENT
 616    //! code that
 617    //! needs to<|user_cursor|>
 618    //! be rewritten
 619    //! =======
 620    //!
 621    //! Expected output (should be generated by the model):
 622    //!
 623    //! updated
 624    //! code with
 625    //! changes applied
 626    //! >>>>>>> UPDATED
 627
 628    use super::*;
 629
 630    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 631    pub const SEPARATOR: &str = "=======\n";
 632    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 633
 634    pub fn special_tokens() -> &'static [&'static str] {
 635        &[
 636            "<|fim_prefix|>",
 637            "<|fim_suffix|>",
 638            "<|fim_middle|>",
 639            "<|file_sep|>",
 640            START_MARKER,
 641            SEPARATOR,
 642            END_MARKER,
 643            CURSOR_MARKER,
 644        ]
 645    }
 646
 647    pub fn write_cursor_excerpt_section(
 648        prompt: &mut String,
 649        path: &Path,
 650        context: &str,
 651        editable_range: &Range<usize>,
 652        cursor_offset: usize,
 653    ) {
 654        let path_str = path.to_string_lossy();
 655        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 656
 657        prompt.push_str("<|fim_prefix|>");
 658        prompt.push_str(&context[..editable_range.start]);
 659
 660        prompt.push_str("<|fim_suffix|>");
 661        prompt.push_str(&context[editable_range.end..]);
 662        if !prompt.ends_with('\n') {
 663            prompt.push('\n');
 664        }
 665
 666        prompt.push_str("<|fim_middle|>");
 667        prompt.push_str(START_MARKER);
 668        prompt.push_str(&context[editable_range.start..cursor_offset]);
 669        prompt.push_str(CURSOR_MARKER);
 670        prompt.push_str(&context[cursor_offset..editable_range.end]);
 671        if !prompt.ends_with('\n') {
 672            prompt.push('\n');
 673        }
 674        prompt.push_str(SEPARATOR);
 675    }
 676}
 677
 678pub mod v0131_git_merge_markers_prefix {
 679    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 680    //!
 681    //! Example prompt:
 682    //!
 683    //! <|file_sep|>path/to/target_file.py
 684    //! <|fim_prefix|>
 685    //! code before editable region
 686    //! <<<<<<< CURRENT
 687    //! code that
 688    //! needs to<|user_cursor|>
 689    //! be rewritten
 690    //! =======
 691    //! <|fim_suffix|>
 692    //! code after editable region
 693    //! <|fim_middle|>
 694    //!
 695    //! Expected output (should be generated by the model):
 696    //!
 697    //! updated
 698    //! code with
 699    //! changes applied
 700    //! >>>>>>> UPDATED
 701
 702    use super::*;
 703
 704    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 705    pub const SEPARATOR: &str = "=======\n";
 706    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 707
 708    pub fn special_tokens() -> &'static [&'static str] {
 709        &[
 710            "<|fim_prefix|>",
 711            "<|fim_suffix|>",
 712            "<|fim_middle|>",
 713            "<|file_sep|>",
 714            START_MARKER,
 715            SEPARATOR,
 716            END_MARKER,
 717            CURSOR_MARKER,
 718        ]
 719    }
 720
 721    pub fn write_cursor_excerpt_section(
 722        prompt: &mut String,
 723        path: &Path,
 724        context: &str,
 725        editable_range: &Range<usize>,
 726        cursor_offset: usize,
 727    ) {
 728        let path_str = path.to_string_lossy();
 729        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 730
 731        prompt.push_str("<|fim_prefix|>");
 732        prompt.push_str(&context[..editable_range.start]);
 733        prompt.push_str(START_MARKER);
 734        prompt.push_str(&context[editable_range.start..cursor_offset]);
 735        prompt.push_str(CURSOR_MARKER);
 736        prompt.push_str(&context[cursor_offset..editable_range.end]);
 737        if !prompt.ends_with('\n') {
 738            prompt.push('\n');
 739        }
 740        prompt.push_str(SEPARATOR);
 741
 742        prompt.push_str("<|fim_suffix|>");
 743        prompt.push_str(&context[editable_range.end..]);
 744        if !prompt.ends_with('\n') {
 745            prompt.push('\n');
 746        }
 747
 748        prompt.push_str("<|fim_middle|>");
 749    }
 750}
 751
 752pub mod v0211_prefill {
 753    use super::*;
 754
 755    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 756        let editable_region = &context[editable_range.start..editable_range.end];
 757
 758        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 759        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 760
 761        // Find a token boundary to avoid splitting tokens in the prefill.
 762        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 763        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 764        // the \n and consume any consecutive \n characters after it.
 765        let prefill = &editable_region[..prefill_len];
 766        match prefill.rfind('\n') {
 767            Some(pos) => {
 768                let mut end = pos + 1;
 769                while end < editable_region.len()
 770                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 771                {
 772                    end += 1;
 773                }
 774                editable_region[..end].to_string()
 775            }
 776            // No newline found. Fall back to splitting before the last space
 777            // (word-level boundary)
 778            None => match prefill.rfind(' ') {
 779                Some(pos) => prefill[..pos].to_string(),
 780                None => prefill.to_string(),
 781            },
 782        }
 783    }
 784}
 785
 786pub mod seed_coder {
 787    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 788    //!
 789    //! Seed-Coder uses different FIM tokens and order than Qwen:
 790    //! - SPM order: suffix comes FIRST, then prefix, then middle
 791    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 792    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 793    //!
 794    //! All context (related files, edit history) goes in the PREFIX section.
 795    //! The suffix contains only code after the editable region.
 796    //!
 797    //! Example prompt:
 798    //!
 799    //! <[fim-suffix]>
 800    //! code after editable region
 801    //! <[fim-prefix]><filename>related/file.py
 802    //! related file content
 803    //!
 804    //! <filename>edit_history
 805    //! --- a/some_file.py
 806    //! +++ b/some_file.py
 807    //! -old
 808    //! +new
 809    //!
 810    //! <filename>path/to/target_file.py
 811    //! code before editable region
 812    //! <<<<<<< CURRENT
 813    //! code that
 814    //! needs to<|user_cursor|>
 815    //! be rewritten
 816    //! =======
 817    //! <[fim-middle]>
 818    //!
 819    //! Expected output (model generates):
 820    //!
 821    //! updated
 822    //! code with
 823    //! changes applied
 824    //! >>>>>>> UPDATED
 825
 826    use super::*;
 827
 828    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 829    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 830    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 831    pub const FILE_MARKER: &str = "<filename>";
 832
 833    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 834    pub const SEPARATOR: &str = "=======\n";
 835    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 836
 837    pub fn special_tokens() -> &'static [&'static str] {
 838        &[
 839            FIM_SUFFIX,
 840            FIM_PREFIX,
 841            FIM_MIDDLE,
 842            FILE_MARKER,
 843            START_MARKER,
 844            SEPARATOR,
 845            END_MARKER,
 846            CURSOR_MARKER,
 847        ]
 848    }
 849
 850    pub fn format_prompt_with_budget(
 851        path: &Path,
 852        context: &str,
 853        editable_range: &Range<usize>,
 854        cursor_offset: usize,
 855        events: &[Arc<Event>],
 856        related_files: &[RelatedFile],
 857        max_tokens: usize,
 858    ) -> String {
 859        let suffix_section = build_suffix_section(context, editable_range);
 860        let cursor_prefix_section =
 861            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 862
 863        let suffix_tokens = estimate_tokens(suffix_section.len());
 864        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 865        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 866
 867        let edit_history_section = super::format_edit_history_within_budget(
 868            events,
 869            FILE_MARKER,
 870            "edit_history",
 871            budget_after_cursor,
 872        );
 873        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 874        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 875
 876        let related_files_section = super::format_related_files_within_budget(
 877            related_files,
 878            FILE_MARKER,
 879            "",
 880            budget_after_edit_history,
 881        );
 882
 883        let mut prompt = String::new();
 884        prompt.push_str(&suffix_section);
 885        prompt.push_str(FIM_PREFIX);
 886        prompt.push_str(&related_files_section);
 887        if !related_files_section.is_empty() {
 888            prompt.push('\n');
 889        }
 890        prompt.push_str(&edit_history_section);
 891        if !edit_history_section.is_empty() {
 892            prompt.push('\n');
 893        }
 894        prompt.push_str(&cursor_prefix_section);
 895        prompt.push_str(FIM_MIDDLE);
 896        prompt
 897    }
 898
 899    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 900        let mut section = String::new();
 901        section.push_str(FIM_SUFFIX);
 902        section.push_str(&context[editable_range.end..]);
 903        if !section.ends_with('\n') {
 904            section.push('\n');
 905        }
 906        section
 907    }
 908
 909    fn build_cursor_prefix_section(
 910        path: &Path,
 911        context: &str,
 912        editable_range: &Range<usize>,
 913        cursor_offset: usize,
 914    ) -> String {
 915        let mut section = String::new();
 916        let path_str = path.to_string_lossy();
 917        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 918
 919        section.push_str(&context[..editable_range.start]);
 920        section.push_str(START_MARKER);
 921        section.push_str(&context[editable_range.start..cursor_offset]);
 922        section.push_str(CURSOR_MARKER);
 923        section.push_str(&context[cursor_offset..editable_range.end]);
 924        if !section.ends_with('\n') {
 925            section.push('\n');
 926        }
 927        section.push_str(SEPARATOR);
 928        section
 929    }
 930}
 931
 932/// The zeta1 prompt format
 933pub mod zeta1 {
 934    use super::*;
 935    use std::fmt::Write;
 936
 937    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 938    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 939    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 940    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 941
 942    const INSTRUCTION_HEADER: &str = concat!(
 943        "### Instruction:\n",
 944        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 945        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 946        "into account the cursor location.\n\n",
 947        "### User Edits:\n\n"
 948    );
 949    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 950    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 951
 952    /// Formats a complete zeta1 prompt from the input events and excerpt.
 953    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 954        let mut prompt = String::with_capacity(
 955            INSTRUCTION_HEADER.len()
 956                + input_events.len()
 957                + EXCERPT_HEADER.len()
 958                + input_excerpt.len()
 959                + RESPONSE_HEADER.len(),
 960        );
 961        prompt.push_str(INSTRUCTION_HEADER);
 962        prompt.push_str(input_events);
 963        prompt.push_str(EXCERPT_HEADER);
 964        prompt.push_str(input_excerpt);
 965        prompt.push_str(RESPONSE_HEADER);
 966        prompt
 967    }
 968
 969    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 970    /// editable and context byte-offset ranges within `cursor_excerpt`.
 971    pub fn format_zeta1_from_input(
 972        input: &ZetaPromptInput,
 973        editable_range: Range<usize>,
 974        context_range: Range<usize>,
 975    ) -> String {
 976        let events = format_zeta1_events(&input.events);
 977        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 978        format_zeta1_prompt(&events, &excerpt)
 979    }
 980
 981    /// Formats events in zeta1 style (oldest first).
 982    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 983        let mut result = String::new();
 984        for event in events {
 985            let event_string = format_zeta1_event(event);
 986            if event_string.is_empty() {
 987                continue;
 988            }
 989            if !result.is_empty() {
 990                result.push_str("\n\n");
 991            }
 992            result.push_str(&event_string);
 993        }
 994        result
 995    }
 996
 997    fn format_zeta1_event(event: &Event) -> String {
 998        match event {
 999            Event::BufferChange {
1000                path,
1001                old_path,
1002                diff,
1003                ..
1004            } => {
1005                let mut prompt = String::new();
1006                if old_path != path {
1007                    writeln!(
1008                        prompt,
1009                        "User renamed {} to {}\n",
1010                        old_path.display(),
1011                        path.display()
1012                    )
1013                    .ok();
1014                }
1015                if !diff.is_empty() {
1016                    write!(
1017                        prompt,
1018                        "User edited {}:\n```diff\n{}\n```",
1019                        path.display(),
1020                        diff
1021                    )
1022                    .ok();
1023                }
1024                prompt
1025            }
1026        }
1027    }
1028
1029    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1030    /// within `cursor_excerpt`.
1031    fn format_zeta1_excerpt(
1032        input: &ZetaPromptInput,
1033        editable_range: Range<usize>,
1034        context_range: Range<usize>,
1035    ) -> String {
1036        let path_str = input.cursor_path.to_string_lossy();
1037        let excerpt = &*input.cursor_excerpt;
1038        let cursor_offset = input.cursor_offset_in_excerpt;
1039
1040        let mut prompt = String::new();
1041        writeln!(&mut prompt, "```{path_str}").ok();
1042
1043        let starts_at_file_beginning =
1044            input.excerpt_start_row == Some(0) && context_range.start == 0;
1045        if starts_at_file_beginning {
1046            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1047        }
1048
1049        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1050
1051        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1052        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1053        prompt.push_str(CURSOR_MARKER);
1054        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1055        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1056
1057        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1058        write!(prompt, "\n```").ok();
1059
1060        prompt
1061    }
1062
1063    /// Cleans zeta1 model output by extracting content between editable region
1064    /// markers and converting the zeta1 cursor marker to the universal one.
1065    /// Returns `None` if the output doesn't contain the expected markers.
1066    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1067        let content = output.replace(CURSOR_MARKER, "");
1068
1069        let content_start = content
1070            .find(EDITABLE_REGION_START_MARKER)
1071            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1072            .map(|pos| {
1073                if content.as_bytes().get(pos) == Some(&b'\n') {
1074                    pos + 1
1075                } else {
1076                    pos
1077                }
1078            })
1079            .unwrap_or(0);
1080
1081        let content_end = content
1082            .find(EDITABLE_REGION_END_MARKER)
1083            .map(|pos| {
1084                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1085                    pos - 1
1086                } else {
1087                    pos
1088                }
1089            })
1090            .unwrap_or(content.len());
1091
1092        if content_start > content_end {
1093            return Some(String::new());
1094        }
1095
1096        let extracted = &content[content_start..content_end];
1097
1098        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1099            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1100            let text_before_cursor = text_before_cursor
1101                .find(EDITABLE_REGION_START_MARKER)
1102                .map(|pos| {
1103                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1104                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1105                        after_marker + 1
1106                    } else {
1107                        after_marker
1108                    }
1109                })
1110                .unwrap_or(0);
1111            let offset_in_extracted = zeta1_cursor_pos
1112                .saturating_sub(text_before_cursor)
1113                .min(extracted.len());
1114            offset_in_extracted
1115        });
1116
1117        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1118        if let Some(offset) = cursor_offset {
1119            result.push_str(&extracted[..offset]);
1120            result.push_str(super::CURSOR_MARKER);
1121            result.push_str(&extracted[offset..]);
1122        } else {
1123            result.push_str(extracted);
1124        }
1125
1126        Some(result)
1127    }
1128}
1129
1130#[cfg(test)]
1131mod tests {
1132    use super::*;
1133    use indoc::indoc;
1134
1135    fn make_input(
1136        cursor_excerpt: &str,
1137        editable_range: Range<usize>,
1138        cursor_offset: usize,
1139        events: Vec<Event>,
1140        related_files: Vec<RelatedFile>,
1141    ) -> ZetaPromptInput {
1142        let context_range = 0..cursor_excerpt.len();
1143        ZetaPromptInput {
1144            cursor_path: Path::new("test.rs").into(),
1145            cursor_excerpt: cursor_excerpt.into(),
1146            cursor_offset_in_excerpt: cursor_offset,
1147            excerpt_start_row: None,
1148            events: events.into_iter().map(Arc::new).collect(),
1149            related_files,
1150            excerpt_ranges: ExcerptRanges {
1151                editable_150: editable_range.clone(),
1152                editable_180: editable_range.clone(),
1153                editable_350: editable_range,
1154                editable_150_context_350: context_range.clone(),
1155                editable_180_context_350: context_range.clone(),
1156                editable_350_context_150: context_range,
1157                ..Default::default()
1158            },
1159            experiment: None,
1160            in_open_source_repo: false,
1161            can_collect_data: false,
1162        }
1163    }
1164
1165    fn make_event(path: &str, diff: &str) -> Event {
1166        Event::BufferChange {
1167            path: Path::new(path).into(),
1168            old_path: Path::new(path).into(),
1169            diff: diff.to_string(),
1170            predicted: false,
1171            in_open_source_repo: false,
1172        }
1173    }
1174
1175    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1176        RelatedFile {
1177            path: Path::new(path).into(),
1178            max_row: content.lines().count() as u32,
1179            excerpts: vec![RelatedExcerpt {
1180                row_range: 0..content.lines().count() as u32,
1181                text: content.into(),
1182                order: 0,
1183            }],
1184            in_open_source_repo: false,
1185        }
1186    }
1187
1188    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1189        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1190    }
1191
1192    #[test]
1193    fn test_no_truncation_when_within_budget() {
1194        let input = make_input(
1195            "prefix\neditable\nsuffix",
1196            7..15,
1197            10,
1198            vec![make_event("a.rs", "-old\n+new\n")],
1199            vec![make_related_file("related.rs", "fn helper() {}\n")],
1200        );
1201
1202        assert_eq!(
1203            format_with_budget(&input, 10000),
1204            indoc! {r#"
1205                <|file_sep|>related.rs
1206                fn helper() {}
1207                <|file_sep|>edit history
1208                --- a/a.rs
1209                +++ b/a.rs
1210                -old
1211                +new
1212                <|file_sep|>test.rs
1213                <|fim_prefix|>
1214                prefix
1215                <|fim_middle|>current
1216                edi<|user_cursor|>table
1217                <|fim_suffix|>
1218
1219                suffix
1220                <|fim_middle|>updated
1221            "#}
1222        );
1223    }
1224
1225    #[test]
1226    fn test_truncation_drops_edit_history_when_budget_tight() {
1227        let input = make_input(
1228            "code",
1229            0..4,
1230            2,
1231            vec![make_event("a.rs", "-x\n+y\n")],
1232            vec![
1233                make_related_file("r1.rs", "a\n"),
1234                make_related_file("r2.rs", "b\n"),
1235            ],
1236        );
1237
1238        assert_eq!(
1239            format_with_budget(&input, 10000),
1240            indoc! {r#"
1241                <|file_sep|>r1.rs
1242                a
1243                <|file_sep|>r2.rs
1244                b
1245                <|file_sep|>edit history
1246                --- a/a.rs
1247                +++ b/a.rs
1248                -x
1249                +y
1250                <|file_sep|>test.rs
1251                <|fim_prefix|>
1252                <|fim_middle|>current
1253                co<|user_cursor|>de
1254                <|fim_suffix|>
1255                <|fim_middle|>updated
1256            "#}
1257        );
1258
1259        assert_eq!(
1260            format_with_budget(&input, 50),
1261            indoc! {r#"
1262                <|file_sep|>r1.rs
1263                a
1264                <|file_sep|>r2.rs
1265                b
1266                <|file_sep|>test.rs
1267                <|fim_prefix|>
1268                <|fim_middle|>current
1269                co<|user_cursor|>de
1270                <|fim_suffix|>
1271                <|fim_middle|>updated
1272            "#}
1273        );
1274    }
1275
1276    #[test]
1277    fn test_truncation_includes_partial_excerpts() {
1278        let input = make_input(
1279            "x",
1280            0..1,
1281            0,
1282            vec![],
1283            vec![RelatedFile {
1284                path: Path::new("big.rs").into(),
1285                max_row: 30,
1286                in_open_source_repo: false,
1287                excerpts: vec![
1288                    RelatedExcerpt {
1289                        row_range: 0..10,
1290                        text: "first excerpt\n".into(),
1291                        order: 0,
1292                    },
1293                    RelatedExcerpt {
1294                        row_range: 10..20,
1295                        text: "second excerpt\n".into(),
1296                        order: 0,
1297                    },
1298                    RelatedExcerpt {
1299                        row_range: 20..30,
1300                        text: "third excerpt\n".into(),
1301                        order: 0,
1302                    },
1303                ],
1304            }],
1305        );
1306
1307        assert_eq!(
1308            format_with_budget(&input, 10000),
1309            indoc! {r#"
1310                <|file_sep|>big.rs
1311                first excerpt
1312                ...
1313                second excerpt
1314                ...
1315                third excerpt
1316                <|file_sep|>test.rs
1317                <|fim_prefix|>
1318                <|fim_middle|>current
1319                <|user_cursor|>x
1320                <|fim_suffix|>
1321                <|fim_middle|>updated
1322            "#}
1323        );
1324
1325        assert_eq!(
1326            format_with_budget(&input, 50),
1327            indoc! {r#"
1328                <|file_sep|>big.rs
1329                first excerpt
1330                ...
1331                <|file_sep|>test.rs
1332                <|fim_prefix|>
1333                <|fim_middle|>current
1334                <|user_cursor|>x
1335                <|fim_suffix|>
1336                <|fim_middle|>updated
1337            "#}
1338        );
1339    }
1340
1341    #[test]
1342    fn test_truncation_prioritizes_lower_order_excerpts() {
1343        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1344        // With tight budget, only the lower-order excerpt from file_b should be included.
1345        let input = make_input(
1346            "x",
1347            0..1,
1348            0,
1349            vec![],
1350            vec![
1351                RelatedFile {
1352                    path: Path::new("file_a.rs").into(),
1353                    max_row: 10,
1354                    in_open_source_repo: false,
1355                    excerpts: vec![RelatedExcerpt {
1356                        row_range: 0..10,
1357                        text: "low priority content\n".into(),
1358                        order: 5,
1359                    }],
1360                },
1361                RelatedFile {
1362                    path: Path::new("file_b.rs").into(),
1363                    max_row: 10,
1364                    in_open_source_repo: false,
1365                    excerpts: vec![RelatedExcerpt {
1366                        row_range: 0..10,
1367                        text: "high priority content\n".into(),
1368                        order: 1,
1369                    }],
1370                },
1371            ],
1372        );
1373
1374        // With large budget, both files included; rendered in stable lexicographic order.
1375        assert_eq!(
1376            format_with_budget(&input, 10000),
1377            indoc! {r#"
1378                <|file_sep|>file_a.rs
1379                low priority content
1380                <|file_sep|>file_b.rs
1381                high priority content
1382                <|file_sep|>test.rs
1383                <|fim_prefix|>
1384                <|fim_middle|>current
1385                <|user_cursor|>x
1386                <|fim_suffix|>
1387                <|fim_middle|>updated
1388            "#}
1389        );
1390
1391        // With tight budget, only file_b (lower order) fits.
1392        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1393        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1394        // file_a would need another 14 tokens, which doesn't fit.
1395        assert_eq!(
1396            format_with_budget(&input, 52),
1397            indoc! {r#"
1398                <|file_sep|>file_b.rs
1399                high priority content
1400                <|file_sep|>test.rs
1401                <|fim_prefix|>
1402                <|fim_middle|>current
1403                <|user_cursor|>x
1404                <|fim_suffix|>
1405                <|fim_middle|>updated
1406            "#}
1407        );
1408    }
1409
1410    #[test]
1411    fn test_truncation_drops_high_order_excerpts_within_file() {
1412        // A single file has excerpts at order 1 and order 3. With a tight budget,
1413        // only the order-1 excerpts are included while the order-3 excerpt is
1414        // dropped — even though they belong to the same file. This also preserves
1415        // the parent invariant: parent outline items have order ≤ their best
1416        // child, so they're always included when any child is.
1417        let input = make_input(
1418            "x",
1419            0..1,
1420            0,
1421            vec![],
1422            vec![RelatedFile {
1423                path: Path::new("mod.rs").into(),
1424                max_row: 30,
1425                in_open_source_repo: false,
1426                excerpts: vec![
1427                    RelatedExcerpt {
1428                        row_range: 0..5,
1429                        text: "mod header\n".into(),
1430                        order: 1,
1431                    },
1432                    RelatedExcerpt {
1433                        row_range: 5..15,
1434                        text: "important fn\n".into(),
1435                        order: 1,
1436                    },
1437                    RelatedExcerpt {
1438                        row_range: 15..30,
1439                        text: "less important fn\n".into(),
1440                        order: 3,
1441                    },
1442                ],
1443            }],
1444        );
1445
1446        // With large budget, all three excerpts included.
1447        assert_eq!(
1448            format_with_budget(&input, 10000),
1449            indoc! {r#"
1450                <|file_sep|>mod.rs
1451                mod header
1452                ...
1453                important fn
1454                ...
1455                less important fn
1456                <|file_sep|>test.rs
1457                <|fim_prefix|>
1458                <|fim_middle|>current
1459                <|user_cursor|>x
1460                <|fim_suffix|>
1461                <|fim_middle|>updated
1462            "#}
1463        );
1464
1465        // With tight budget, only order<=1 excerpts included (header + important fn).
1466        assert_eq!(
1467            format_with_budget(&input, 55),
1468            indoc! {r#"
1469                <|file_sep|>mod.rs
1470                mod header
1471                ...
1472                important fn
1473                ...
1474                <|file_sep|>test.rs
1475                <|fim_prefix|>
1476                <|fim_middle|>current
1477                <|user_cursor|>x
1478                <|fim_suffix|>
1479                <|fim_middle|>updated
1480            "#}
1481        );
1482    }
1483
1484    #[test]
1485    fn test_truncation_drops_older_events_first() {
1486        let input = make_input(
1487            "x",
1488            0..1,
1489            0,
1490            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1491            vec![],
1492        );
1493
1494        assert_eq!(
1495            format_with_budget(&input, 10000),
1496            indoc! {r#"
1497                <|file_sep|>edit history
1498                --- a/old.rs
1499                +++ b/old.rs
1500                -1
1501                --- a/new.rs
1502                +++ b/new.rs
1503                -2
1504                <|file_sep|>test.rs
1505                <|fim_prefix|>
1506                <|fim_middle|>current
1507                <|user_cursor|>x
1508                <|fim_suffix|>
1509                <|fim_middle|>updated
1510            "#}
1511        );
1512
1513        assert_eq!(
1514            format_with_budget(&input, 55),
1515            indoc! {r#"
1516                <|file_sep|>edit history
1517                --- a/new.rs
1518                +++ b/new.rs
1519                -2
1520                <|file_sep|>test.rs
1521                <|fim_prefix|>
1522                <|fim_middle|>current
1523                <|user_cursor|>x
1524                <|fim_suffix|>
1525                <|fim_middle|>updated
1526            "#}
1527        );
1528    }
1529
1530    #[test]
1531    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1532        let input = make_input(
1533            "fn main() {}",
1534            0..12,
1535            3,
1536            vec![make_event("a.rs", "-old\n+new\n")],
1537            vec![make_related_file("related.rs", "helper\n")],
1538        );
1539
1540        assert_eq!(
1541            format_with_budget(&input, 30),
1542            indoc! {r#"
1543                <|file_sep|>test.rs
1544                <|fim_prefix|>
1545                <|fim_middle|>current
1546                fn <|user_cursor|>main() {}
1547                <|fim_suffix|>
1548                <|fim_middle|>updated
1549            "#}
1550        );
1551    }
1552
1553    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1554        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1555    }
1556
1557    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1558        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1559    }
1560
1561    #[test]
1562    fn test_seed_coder_basic_format() {
1563        let input = make_input(
1564            "prefix\neditable\nsuffix",
1565            7..15,
1566            10,
1567            vec![make_event("a.rs", "-old\n+new\n")],
1568            vec![make_related_file("related.rs", "fn helper() {}\n")],
1569        );
1570
1571        assert_eq!(
1572            format_seed_coder(&input),
1573            indoc! {r#"
1574                <[fim-suffix]>
1575                suffix
1576                <[fim-prefix]><filename>related.rs
1577                fn helper() {}
1578
1579                <filename>edit_history
1580                --- a/a.rs
1581                +++ b/a.rs
1582                -old
1583                +new
1584
1585                <filename>test.rs
1586                prefix
1587                <<<<<<< CURRENT
1588                edi<|user_cursor|>table
1589                =======
1590                <[fim-middle]>"#}
1591        );
1592    }
1593
1594    #[test]
1595    fn test_seed_coder_no_context() {
1596        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1597
1598        assert_eq!(
1599            format_seed_coder(&input),
1600            indoc! {r#"
1601                <[fim-suffix]>
1602                after
1603                <[fim-prefix]><filename>test.rs
1604                before
1605                <<<<<<< CURRENT
1606                mid<|user_cursor|>dle
1607                =======
1608                <[fim-middle]>"#}
1609        );
1610    }
1611
1612    #[test]
1613    fn test_seed_coder_truncation_drops_context() {
1614        let input = make_input(
1615            "code",
1616            0..4,
1617            2,
1618            vec![make_event("a.rs", "-x\n+y\n")],
1619            vec![make_related_file("r1.rs", "content\n")],
1620        );
1621
1622        // With large budget, everything is included
1623        assert_eq!(
1624            format_seed_coder(&input),
1625            indoc! {r#"
1626                <[fim-suffix]>
1627                <[fim-prefix]><filename>r1.rs
1628                content
1629
1630                <filename>edit_history
1631                --- a/a.rs
1632                +++ b/a.rs
1633                -x
1634                +y
1635
1636                <filename>test.rs
1637                <<<<<<< CURRENT
1638                co<|user_cursor|>de
1639                =======
1640                <[fim-middle]>"#}
1641        );
1642
1643        // With tight budget, context is dropped but cursor section remains
1644        assert_eq!(
1645            format_seed_coder_with_budget(&input, 30),
1646            indoc! {r#"
1647                <[fim-suffix]>
1648                <[fim-prefix]><filename>test.rs
1649                <<<<<<< CURRENT
1650                co<|user_cursor|>de
1651                =======
1652                <[fim-middle]>"#}
1653        );
1654    }
1655
1656    #[test]
1657    fn test_seed_coder_truncation_prioritizes_lower_order() {
1658        let input = make_input(
1659            "code",
1660            0..4,
1661            2,
1662            vec![],
1663            vec![
1664                RelatedFile {
1665                    path: Path::new("low_prio.rs").into(),
1666                    max_row: 5,
1667                    in_open_source_repo: false,
1668                    excerpts: vec![RelatedExcerpt {
1669                        row_range: 0..5,
1670                        text: "low prio\n".into(),
1671                        order: 10,
1672                    }],
1673                },
1674                RelatedFile {
1675                    path: Path::new("high_prio.rs").into(),
1676                    max_row: 5,
1677                    in_open_source_repo: false,
1678                    excerpts: vec![RelatedExcerpt {
1679                        row_range: 0..5,
1680                        text: "high prio\n".into(),
1681                        order: 1,
1682                    }],
1683                },
1684            ],
1685        );
1686
1687        // With large budget, both included; rendered in stable lexicographic order.
1688        assert_eq!(
1689            format_seed_coder(&input),
1690            indoc! {r#"
1691                <[fim-suffix]>
1692                <[fim-prefix]><filename>low_prio.rs
1693                low prio
1694                <filename>high_prio.rs
1695                high prio
1696
1697                <filename>test.rs
1698                <<<<<<< CURRENT
1699                co<|user_cursor|>de
1700                =======
1701                <[fim-middle]>"#}
1702        );
1703
1704        // With tight budget, only high_prio included.
1705        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1706        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1707        assert_eq!(
1708            format_seed_coder_with_budget(&input, 44),
1709            indoc! {r#"
1710                <[fim-suffix]>
1711                <[fim-prefix]><filename>high_prio.rs
1712                high prio
1713
1714                <filename>test.rs
1715                <<<<<<< CURRENT
1716                co<|user_cursor|>de
1717                =======
1718                <[fim-middle]>"#}
1719        );
1720    }
1721
1722    #[test]
1723    fn test_seed_coder_clean_output() {
1724        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1725        let output_without_marker = "new code\n";
1726
1727        assert_eq!(
1728            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1729            "new code\n"
1730        );
1731        assert_eq!(
1732            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1733            "new code\n"
1734        );
1735    }
1736
1737    #[test]
1738    fn test_format_zeta1_from_input_basic() {
1739        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1740        let input = ZetaPromptInput {
1741            cursor_path: Path::new("src/main.rs").into(),
1742            cursor_excerpt: excerpt.into(),
1743            cursor_offset_in_excerpt: 30,
1744            excerpt_start_row: Some(0),
1745            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1746            related_files: vec![],
1747            excerpt_ranges: ExcerptRanges {
1748                editable_150: 15..41,
1749                editable_180: 15..41,
1750                editable_350: 15..41,
1751                editable_150_context_350: 0..excerpt.len(),
1752                editable_180_context_350: 0..excerpt.len(),
1753                editable_350_context_150: 0..excerpt.len(),
1754                ..Default::default()
1755            },
1756            experiment: None,
1757            in_open_source_repo: false,
1758            can_collect_data: false,
1759        };
1760
1761        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1762
1763        assert_eq!(
1764            prompt,
1765            concat!(
1766                "### Instruction:\n",
1767                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1768                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1769                "into account the cursor location.\n",
1770                "\n",
1771                "### User Edits:\n",
1772                "\n",
1773                "User edited other.rs:\n",
1774                "```diff\n",
1775                "-old\n",
1776                "+new\n",
1777                "\n",
1778                "```\n",
1779                "\n",
1780                "### User Excerpt:\n",
1781                "\n",
1782                "```src/main.rs\n",
1783                "<|start_of_file|>\n",
1784                "fn before() {}\n",
1785                "<|editable_region_start|>\n",
1786                "fn foo() {\n",
1787                "    <|user_cursor_is_here|>let x = 1;\n",
1788                "\n",
1789                "<|editable_region_end|>}\n",
1790                "fn after() {}\n",
1791                "\n",
1792                "```\n",
1793                "\n",
1794                "### Response:\n",
1795            ),
1796        );
1797    }
1798
1799    #[test]
1800    fn test_format_zeta1_from_input_no_start_of_file() {
1801        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1802        let input = ZetaPromptInput {
1803            cursor_path: Path::new("src/main.rs").into(),
1804            cursor_excerpt: excerpt.into(),
1805            cursor_offset_in_excerpt: 15,
1806            excerpt_start_row: Some(10),
1807            events: vec![],
1808            related_files: vec![],
1809            excerpt_ranges: ExcerptRanges {
1810                editable_150: 0..28,
1811                editable_180: 0..28,
1812                editable_350: 0..28,
1813                editable_150_context_350: 0..28,
1814                editable_180_context_350: 0..28,
1815                editable_350_context_150: 0..28,
1816                ..Default::default()
1817            },
1818            experiment: None,
1819            in_open_source_repo: false,
1820            can_collect_data: false,
1821        };
1822
1823        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1824
1825        assert_eq!(
1826            prompt,
1827            concat!(
1828                "### Instruction:\n",
1829                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1830                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1831                "into account the cursor location.\n",
1832                "\n",
1833                "### User Edits:\n",
1834                "\n",
1835                "\n",
1836                "\n",
1837                "### User Excerpt:\n",
1838                "\n",
1839                "```src/main.rs\n",
1840                "<|editable_region_start|>\n",
1841                "fn foo() {\n",
1842                "    <|user_cursor_is_here|>let x = 1;\n",
1843                "}\n",
1844                "\n",
1845                "<|editable_region_end|>\n",
1846                "```\n",
1847                "\n",
1848                "### Response:\n",
1849            ),
1850        );
1851    }
1852
1853    #[test]
1854    fn test_format_zeta1_from_input_with_sub_ranges() {
1855        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1856        let editable_range = 10..37;
1857        let context_range = 0..excerpt.len();
1858
1859        let input = ZetaPromptInput {
1860            cursor_path: Path::new("test.rs").into(),
1861            cursor_excerpt: excerpt.into(),
1862            cursor_offset_in_excerpt: 25,
1863            excerpt_start_row: Some(0),
1864            events: vec![],
1865            related_files: vec![],
1866            excerpt_ranges: ExcerptRanges {
1867                editable_150: editable_range.clone(),
1868                editable_180: editable_range.clone(),
1869                editable_350: editable_range.clone(),
1870                editable_150_context_350: context_range.clone(),
1871                editable_180_context_350: context_range.clone(),
1872                editable_350_context_150: context_range.clone(),
1873                ..Default::default()
1874            },
1875            experiment: None,
1876            in_open_source_repo: false,
1877            can_collect_data: false,
1878        };
1879
1880        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1881
1882        assert_eq!(
1883            prompt,
1884            concat!(
1885                "### Instruction:\n",
1886                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1887                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1888                "into account the cursor location.\n",
1889                "\n",
1890                "### User Edits:\n",
1891                "\n",
1892                "\n",
1893                "\n",
1894                "### User Excerpt:\n",
1895                "\n",
1896                "```test.rs\n",
1897                "<|start_of_file|>\n",
1898                "// prefix\n",
1899                "<|editable_region_start|>\n",
1900                "fn foo() {\n",
1901                "    <|user_cursor_is_here|>let x = 1;\n",
1902                "}\n",
1903                "<|editable_region_end|>\n",
1904                "// suffix\n",
1905                "\n",
1906                "```\n",
1907                "\n",
1908                "### Response:\n",
1909            ),
1910        );
1911    }
1912
1913    #[test]
1914    fn test_clean_zeta1_model_output_basic() {
1915        let output = indoc! {"
1916            <|editable_region_start|>
1917            fn main() {
1918                println!(\"hello\");
1919            }
1920            <|editable_region_end|>
1921        "};
1922
1923        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1924        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1925    }
1926
1927    #[test]
1928    fn test_clean_zeta1_model_output_with_cursor() {
1929        let output = indoc! {"
1930            <|editable_region_start|>
1931            fn main() {
1932                <|user_cursor_is_here|>println!(\"hello\");
1933            }
1934            <|editable_region_end|>
1935        "};
1936
1937        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1938        assert_eq!(
1939            cleaned,
1940            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1941        );
1942    }
1943
1944    #[test]
1945    fn test_clean_zeta1_model_output_no_markers() {
1946        let output = "fn main() {}\n";
1947        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1948        assert_eq!(cleaned, "fn main() {}\n");
1949    }
1950
1951    #[test]
1952    fn test_clean_zeta1_model_output_empty_region() {
1953        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1954        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1955        assert_eq!(cleaned, "");
1956    }
1957}