zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68    #[serde(default)]
  69    pub can_collect_data: bool,
  70}
  71
  72#[derive(
  73    Default,
  74    Clone,
  75    Copy,
  76    Debug,
  77    PartialEq,
  78    Eq,
  79    Hash,
  80    EnumIter,
  81    IntoStaticStr,
  82    Serialize,
  83    Deserialize,
  84)]
  85#[allow(non_camel_case_types)]
  86pub enum ZetaFormat {
  87    V0112MiddleAtEnd,
  88    V0113Ordered,
  89    V0114180EditableRegion,
  90    V0120GitMergeMarkers,
  91    #[default]
  92    V0131GitMergeMarkersPrefix,
  93    V0211Prefill,
  94    V0211SeedCoder,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131
 132    pub fn special_tokens(&self) -> &'static [&'static str] {
 133        match self {
 134            ZetaFormat::V0112MiddleAtEnd
 135            | ZetaFormat::V0113Ordered
 136            | ZetaFormat::V0114180EditableRegion => &[
 137                "<|fim_prefix|>",
 138                "<|fim_suffix|>",
 139                "<|fim_middle|>",
 140                "<|file_sep|>",
 141                CURSOR_MARKER,
 142            ],
 143            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 144            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 145                v0131_git_merge_markers_prefix::special_tokens()
 146            }
 147            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 148        }
 149    }
 150}
 151
 152#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 153#[serde(tag = "event")]
 154pub enum Event {
 155    BufferChange {
 156        path: Arc<Path>,
 157        old_path: Arc<Path>,
 158        diff: String,
 159        predicted: bool,
 160        in_open_source_repo: bool,
 161    },
 162}
 163
 164impl Event {
 165    pub fn in_open_source_repo(&self) -> bool {
 166        match self {
 167            Event::BufferChange {
 168                in_open_source_repo,
 169                ..
 170            } => *in_open_source_repo,
 171        }
 172    }
 173}
 174
 175pub fn write_event(prompt: &mut String, event: &Event) {
 176    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 177        for component in path.components() {
 178            prompt.push('/');
 179            write!(prompt, "{}", component.as_os_str().display()).ok();
 180        }
 181    }
 182    match event {
 183        Event::BufferChange {
 184            path,
 185            old_path,
 186            diff,
 187            predicted,
 188            in_open_source_repo: _,
 189        } => {
 190            if *predicted {
 191                prompt.push_str("// User accepted prediction:\n");
 192            }
 193            prompt.push_str("--- a");
 194            write_path_as_unix_str(prompt, old_path.as_ref());
 195            prompt.push_str("\n+++ b");
 196            write_path_as_unix_str(prompt, path.as_ref());
 197            prompt.push('\n');
 198            prompt.push_str(diff);
 199        }
 200    }
 201}
 202
 203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 204pub struct RelatedFile {
 205    pub path: Arc<Path>,
 206    pub max_row: u32,
 207    pub excerpts: Vec<RelatedExcerpt>,
 208    #[serde(default)]
 209    pub in_open_source_repo: bool,
 210}
 211
 212#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 213pub struct RelatedExcerpt {
 214    pub row_range: Range<u32>,
 215    pub text: Arc<str>,
 216    #[serde(default)]
 217    pub order: usize,
 218}
 219
 220pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 221    format
 222        .special_tokens()
 223        .iter()
 224        .any(|token| input.cursor_excerpt.contains(token))
 225}
 226
 227pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 228    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 229}
 230
 231/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 232pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 233    match format {
 234        ZetaFormat::V0120GitMergeMarkers => output
 235            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 236            .unwrap_or(output),
 237        ZetaFormat::V0131GitMergeMarkersPrefix => output
 238            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 239            .unwrap_or(output),
 240        ZetaFormat::V0211SeedCoder => output
 241            .strip_suffix(seed_coder::END_MARKER)
 242            .unwrap_or(output),
 243        _ => output,
 244    }
 245}
 246
 247pub fn excerpt_range_for_format(
 248    format: ZetaFormat,
 249    ranges: &ExcerptRanges,
 250) -> (Range<usize>, Range<usize>) {
 251    match format {
 252        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 253            ranges.editable_150.clone(),
 254            ranges.editable_150_context_350.clone(),
 255        ),
 256        ZetaFormat::V0114180EditableRegion
 257        | ZetaFormat::V0120GitMergeMarkers
 258        | ZetaFormat::V0131GitMergeMarkersPrefix
 259        | ZetaFormat::V0211Prefill
 260        | ZetaFormat::V0211SeedCoder => (
 261            ranges.editable_350.clone(),
 262            ranges.editable_350_context_150.clone(),
 263        ),
 264    }
 265}
 266
 267pub fn resolve_cursor_region(
 268    input: &ZetaPromptInput,
 269    format: ZetaFormat,
 270) -> (&str, Range<usize>, usize) {
 271    let Some(ranges) = &input.excerpt_ranges else {
 272        return (
 273            &input.cursor_excerpt,
 274            input.editable_range_in_excerpt.clone(),
 275            input.cursor_offset_in_excerpt,
 276        );
 277    };
 278
 279    let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
 280    let context_start = context_range.start;
 281    let context_text = &input.cursor_excerpt[context_range];
 282    let adjusted_editable =
 283        (editable_range.start - context_start)..(editable_range.end - context_start);
 284    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 285
 286    (context_text, adjusted_editable, adjusted_cursor)
 287}
 288
 289fn format_zeta_prompt_with_budget(
 290    input: &ZetaPromptInput,
 291    format: ZetaFormat,
 292    max_tokens: usize,
 293) -> String {
 294    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 295    let path = &*input.cursor_path;
 296
 297    let mut cursor_section = String::new();
 298    match format {
 299        ZetaFormat::V0112MiddleAtEnd => {
 300            v0112_middle_at_end::write_cursor_excerpt_section(
 301                &mut cursor_section,
 302                path,
 303                context,
 304                &editable_range,
 305                cursor_offset,
 306            );
 307        }
 308        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 309            v0113_ordered::write_cursor_excerpt_section(
 310                &mut cursor_section,
 311                path,
 312                context,
 313                &editable_range,
 314                cursor_offset,
 315            )
 316        }
 317        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 318            &mut cursor_section,
 319            path,
 320            context,
 321            &editable_range,
 322            cursor_offset,
 323        ),
 324        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 325            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 326                &mut cursor_section,
 327                path,
 328                context,
 329                &editable_range,
 330                cursor_offset,
 331            )
 332        }
 333        ZetaFormat::V0211SeedCoder => {
 334            return seed_coder::format_prompt_with_budget(
 335                path,
 336                context,
 337                &editable_range,
 338                cursor_offset,
 339                &input.events,
 340                &input.related_files,
 341                max_tokens,
 342            );
 343        }
 344    }
 345
 346    let cursor_tokens = estimate_tokens(cursor_section.len());
 347    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 348
 349    let edit_history_section = format_edit_history_within_budget(
 350        &input.events,
 351        "<|file_sep|>",
 352        "edit history",
 353        budget_after_cursor,
 354    );
 355    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 356    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 357
 358    let related_files_section = format_related_files_within_budget(
 359        &input.related_files,
 360        "<|file_sep|>",
 361        "",
 362        budget_after_edit_history,
 363    );
 364
 365    let mut prompt = String::new();
 366    prompt.push_str(&related_files_section);
 367    prompt.push_str(&edit_history_section);
 368    prompt.push_str(&cursor_section);
 369    prompt
 370}
 371
 372pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 373    match format {
 374        ZetaFormat::V0112MiddleAtEnd
 375        | ZetaFormat::V0113Ordered
 376        | ZetaFormat::V0114180EditableRegion
 377        | ZetaFormat::V0120GitMergeMarkers
 378        | ZetaFormat::V0131GitMergeMarkersPrefix
 379        | ZetaFormat::V0211SeedCoder => String::new(),
 380        ZetaFormat::V0211Prefill => {
 381            let (context, editable_range, _) = resolve_cursor_region(input, format);
 382            v0211_prefill::get_prefill(context, &editable_range)
 383        }
 384    }
 385}
 386
 387fn format_edit_history_within_budget(
 388    events: &[Arc<Event>],
 389    file_marker: &str,
 390    edit_history_name: &str,
 391    max_tokens: usize,
 392) -> String {
 393    let header = format!("{}{}\n", file_marker, edit_history_name);
 394    let header_tokens = estimate_tokens(header.len());
 395    if header_tokens >= max_tokens {
 396        return String::new();
 397    }
 398
 399    let mut event_strings: Vec<String> = Vec::new();
 400    let mut total_tokens = header_tokens;
 401
 402    for event in events.iter().rev() {
 403        let mut event_str = String::new();
 404        write_event(&mut event_str, event);
 405        let event_tokens = estimate_tokens(event_str.len());
 406
 407        if total_tokens + event_tokens > max_tokens {
 408            break;
 409        }
 410        total_tokens += event_tokens;
 411        event_strings.push(event_str);
 412    }
 413
 414    if event_strings.is_empty() {
 415        return String::new();
 416    }
 417
 418    let mut result = header;
 419    for event_str in event_strings.iter().rev() {
 420        result.push_str(event_str);
 421    }
 422    result
 423}
 424
 425fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 426    let needs_newline = !excerpt.text.ends_with('\n');
 427    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 428    let len = excerpt.text.len()
 429        + if needs_newline { "\n".len() } else { 0 }
 430        + if needs_ellipsis { "...\n".len() } else { 0 };
 431    estimate_tokens(len)
 432}
 433
 434pub fn format_related_files_within_budget(
 435    related_files: &[RelatedFile],
 436    file_prefix: &str,
 437    file_suffix: &str,
 438    max_tokens: usize,
 439) -> String {
 440    struct ExcerptCandidate {
 441        file_ix: usize,
 442        excerpt_ix: usize,
 443        order: usize,
 444    }
 445
 446    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 447        .iter()
 448        .enumerate()
 449        .flat_map(|(file_ix, file)| {
 450            file.excerpts
 451                .iter()
 452                .enumerate()
 453                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 454                    file_ix,
 455                    excerpt_ix,
 456                    order: e.order,
 457                })
 458        })
 459        .collect();
 460
 461    // Pre-compute file header strings and their token costs.
 462    let file_headers: Vec<String> = related_files
 463        .iter()
 464        .map(|file| {
 465            let path_str = file.path.to_string_lossy();
 466            format!("{}{}\n", file_prefix, path_str)
 467        })
 468        .collect();
 469
 470    // Sort the excerpts by their order and determine how many fit within the budget.
 471    let mut total_tokens = 0;
 472    let mut included_excerpt_count = 0_usize;
 473    let mut included_file_indices = vec![false; related_files.len()];
 474    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 475    for candidate in &excerpt_candidates {
 476        let file = &related_files[candidate.file_ix];
 477        let excerpt = &file.excerpts[candidate.excerpt_ix];
 478        let file_already_included = included_file_indices[candidate.file_ix];
 479        let header_cost = if file_already_included {
 480            0
 481        } else {
 482            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 483        };
 484        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 485        if total_tokens + header_cost + excerpt_cost > max_tokens {
 486            break;
 487        }
 488        total_tokens += header_cost + excerpt_cost;
 489        if !file_already_included {
 490            included_file_indices[candidate.file_ix] = true;
 491        }
 492        included_excerpt_count += 1;
 493    }
 494
 495    excerpt_candidates.truncate(included_excerpt_count);
 496    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 497
 498    // Render all of the files that fit within the token budget, in the original order.
 499    let mut result = String::new();
 500    let mut last_file_ix = None;
 501    for candidate in &excerpt_candidates {
 502        if last_file_ix != Some(candidate.file_ix) {
 503            if last_file_ix.is_some() {
 504                result.push_str(file_suffix);
 505            }
 506            result.push_str(&file_headers[candidate.file_ix]);
 507            last_file_ix = Some(candidate.file_ix);
 508        }
 509        let file = &related_files[candidate.file_ix];
 510        let excerpt = &file.excerpts[candidate.excerpt_ix];
 511        result.push_str(&excerpt.text);
 512        if !result.ends_with('\n') {
 513            result.push('\n');
 514        }
 515        if excerpt.row_range.end < file.max_row {
 516            result.push_str("...\n");
 517        }
 518    }
 519
 520    result
 521}
 522
 523pub fn write_related_files(
 524    prompt: &mut String,
 525    related_files: &[RelatedFile],
 526) -> Vec<Range<usize>> {
 527    let mut ranges = Vec::new();
 528    for file in related_files {
 529        let start = prompt.len();
 530        let path_str = file.path.to_string_lossy();
 531        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 532        for excerpt in &file.excerpts {
 533            prompt.push_str(&excerpt.text);
 534            if !prompt.ends_with('\n') {
 535                prompt.push('\n');
 536            }
 537            if excerpt.row_range.end < file.max_row {
 538                prompt.push_str("...\n");
 539            }
 540        }
 541        let end = prompt.len();
 542        ranges.push(start..end);
 543    }
 544    ranges
 545}
 546
 547mod v0112_middle_at_end {
 548    use super::*;
 549
 550    pub fn write_cursor_excerpt_section(
 551        prompt: &mut String,
 552        path: &Path,
 553        context: &str,
 554        editable_range: &Range<usize>,
 555        cursor_offset: usize,
 556    ) {
 557        let path_str = path.to_string_lossy();
 558        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 559
 560        prompt.push_str("<|fim_prefix|>\n");
 561        prompt.push_str(&context[..editable_range.start]);
 562
 563        prompt.push_str("<|fim_suffix|>\n");
 564        prompt.push_str(&context[editable_range.end..]);
 565        if !prompt.ends_with('\n') {
 566            prompt.push('\n');
 567        }
 568
 569        prompt.push_str("<|fim_middle|>current\n");
 570        prompt.push_str(&context[editable_range.start..cursor_offset]);
 571        prompt.push_str(CURSOR_MARKER);
 572        prompt.push_str(&context[cursor_offset..editable_range.end]);
 573        if !prompt.ends_with('\n') {
 574            prompt.push('\n');
 575        }
 576
 577        prompt.push_str("<|fim_middle|>updated\n");
 578    }
 579}
 580
 581mod v0113_ordered {
 582    use super::*;
 583
 584    pub fn write_cursor_excerpt_section(
 585        prompt: &mut String,
 586        path: &Path,
 587        context: &str,
 588        editable_range: &Range<usize>,
 589        cursor_offset: usize,
 590    ) {
 591        let path_str = path.to_string_lossy();
 592        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 593
 594        prompt.push_str("<|fim_prefix|>\n");
 595        prompt.push_str(&context[..editable_range.start]);
 596        if !prompt.ends_with('\n') {
 597            prompt.push('\n');
 598        }
 599
 600        prompt.push_str("<|fim_middle|>current\n");
 601        prompt.push_str(&context[editable_range.start..cursor_offset]);
 602        prompt.push_str(CURSOR_MARKER);
 603        prompt.push_str(&context[cursor_offset..editable_range.end]);
 604        if !prompt.ends_with('\n') {
 605            prompt.push('\n');
 606        }
 607
 608        prompt.push_str("<|fim_suffix|>\n");
 609        prompt.push_str(&context[editable_range.end..]);
 610        if !prompt.ends_with('\n') {
 611            prompt.push('\n');
 612        }
 613
 614        prompt.push_str("<|fim_middle|>updated\n");
 615    }
 616}
 617
 618pub mod v0120_git_merge_markers {
 619    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 620    //!
 621    //! Example prompt:
 622    //!
 623    //! <|file_sep|>path/to/target_file.py
 624    //! <|fim_prefix|>
 625    //! code before editable region
 626    //! <|fim_suffix|>
 627    //! code after editable region
 628    //! <|fim_middle|>
 629    //! <<<<<<< CURRENT
 630    //! code that
 631    //! needs to<|user_cursor|>
 632    //! be rewritten
 633    //! =======
 634    //!
 635    //! Expected output (should be generated by the model):
 636    //!
 637    //! updated
 638    //! code with
 639    //! changes applied
 640    //! >>>>>>> UPDATED
 641
 642    use super::*;
 643
 644    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 645    pub const SEPARATOR: &str = "=======\n";
 646    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 647
 648    pub fn special_tokens() -> &'static [&'static str] {
 649        &[
 650            "<|fim_prefix|>",
 651            "<|fim_suffix|>",
 652            "<|fim_middle|>",
 653            "<|file_sep|>",
 654            START_MARKER,
 655            SEPARATOR,
 656            END_MARKER,
 657            CURSOR_MARKER,
 658        ]
 659    }
 660
 661    pub fn write_cursor_excerpt_section(
 662        prompt: &mut String,
 663        path: &Path,
 664        context: &str,
 665        editable_range: &Range<usize>,
 666        cursor_offset: usize,
 667    ) {
 668        let path_str = path.to_string_lossy();
 669        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 670
 671        prompt.push_str("<|fim_prefix|>");
 672        prompt.push_str(&context[..editable_range.start]);
 673
 674        prompt.push_str("<|fim_suffix|>");
 675        prompt.push_str(&context[editable_range.end..]);
 676        if !prompt.ends_with('\n') {
 677            prompt.push('\n');
 678        }
 679
 680        prompt.push_str("<|fim_middle|>");
 681        prompt.push_str(START_MARKER);
 682        prompt.push_str(&context[editable_range.start..cursor_offset]);
 683        prompt.push_str(CURSOR_MARKER);
 684        prompt.push_str(&context[cursor_offset..editable_range.end]);
 685        if !prompt.ends_with('\n') {
 686            prompt.push('\n');
 687        }
 688        prompt.push_str(SEPARATOR);
 689    }
 690}
 691
 692pub mod v0131_git_merge_markers_prefix {
 693    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 694    //!
 695    //! Example prompt:
 696    //!
 697    //! <|file_sep|>path/to/target_file.py
 698    //! <|fim_prefix|>
 699    //! code before editable region
 700    //! <<<<<<< CURRENT
 701    //! code that
 702    //! needs to<|user_cursor|>
 703    //! be rewritten
 704    //! =======
 705    //! <|fim_suffix|>
 706    //! code after editable region
 707    //! <|fim_middle|>
 708    //!
 709    //! Expected output (should be generated by the model):
 710    //!
 711    //! updated
 712    //! code with
 713    //! changes applied
 714    //! >>>>>>> UPDATED
 715
 716    use super::*;
 717
 718    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 719    pub const SEPARATOR: &str = "=======\n";
 720    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 721
 722    pub fn special_tokens() -> &'static [&'static str] {
 723        &[
 724            "<|fim_prefix|>",
 725            "<|fim_suffix|>",
 726            "<|fim_middle|>",
 727            "<|file_sep|>",
 728            START_MARKER,
 729            SEPARATOR,
 730            END_MARKER,
 731            CURSOR_MARKER,
 732        ]
 733    }
 734
 735    pub fn write_cursor_excerpt_section(
 736        prompt: &mut String,
 737        path: &Path,
 738        context: &str,
 739        editable_range: &Range<usize>,
 740        cursor_offset: usize,
 741    ) {
 742        let path_str = path.to_string_lossy();
 743        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 744
 745        prompt.push_str("<|fim_prefix|>");
 746        prompt.push_str(&context[..editable_range.start]);
 747        prompt.push_str(START_MARKER);
 748        prompt.push_str(&context[editable_range.start..cursor_offset]);
 749        prompt.push_str(CURSOR_MARKER);
 750        prompt.push_str(&context[cursor_offset..editable_range.end]);
 751        if !prompt.ends_with('\n') {
 752            prompt.push('\n');
 753        }
 754        prompt.push_str(SEPARATOR);
 755
 756        prompt.push_str("<|fim_suffix|>");
 757        prompt.push_str(&context[editable_range.end..]);
 758        if !prompt.ends_with('\n') {
 759            prompt.push('\n');
 760        }
 761
 762        prompt.push_str("<|fim_middle|>");
 763    }
 764}
 765
 766pub mod v0211_prefill {
 767    use super::*;
 768
 769    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 770        let editable_region = &context[editable_range.start..editable_range.end];
 771
 772        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 773        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 774
 775        // Find a token boundary to avoid splitting tokens in the prefill.
 776        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 777        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 778        // the \n and consume any consecutive \n characters after it.
 779        let prefill = &editable_region[..prefill_len];
 780        match prefill.rfind('\n') {
 781            Some(pos) => {
 782                let mut end = pos + 1;
 783                while end < editable_region.len()
 784                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 785                {
 786                    end += 1;
 787                }
 788                editable_region[..end].to_string()
 789            }
 790            // No newline found. Fall back to splitting before the last space
 791            // (word-level boundary)
 792            None => match prefill.rfind(' ') {
 793                Some(pos) => prefill[..pos].to_string(),
 794                None => prefill.to_string(),
 795            },
 796        }
 797    }
 798}
 799
 800pub mod seed_coder {
 801    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 802    //!
 803    //! Seed-Coder uses different FIM tokens and order than Qwen:
 804    //! - SPM order: suffix comes FIRST, then prefix, then middle
 805    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 806    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 807    //!
 808    //! All context (related files, edit history) goes in the PREFIX section.
 809    //! The suffix contains only code after the editable region.
 810    //!
 811    //! Example prompt:
 812    //!
 813    //! <[fim-suffix]>
 814    //! code after editable region
 815    //! <[fim-prefix]><filename>related/file.py
 816    //! related file content
 817    //!
 818    //! <filename>edit_history
 819    //! --- a/some_file.py
 820    //! +++ b/some_file.py
 821    //! -old
 822    //! +new
 823    //!
 824    //! <filename>path/to/target_file.py
 825    //! code before editable region
 826    //! <<<<<<< CURRENT
 827    //! code that
 828    //! needs to<|user_cursor|>
 829    //! be rewritten
 830    //! =======
 831    //! <[fim-middle]>
 832    //!
 833    //! Expected output (model generates):
 834    //!
 835    //! updated
 836    //! code with
 837    //! changes applied
 838    //! >>>>>>> UPDATED
 839
 840    use super::*;
 841
 842    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 843    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 844    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 845    pub const FILE_MARKER: &str = "<filename>";
 846
 847    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 848    pub const SEPARATOR: &str = "=======\n";
 849    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 850
 851    pub fn special_tokens() -> &'static [&'static str] {
 852        &[
 853            FIM_SUFFIX,
 854            FIM_PREFIX,
 855            FIM_MIDDLE,
 856            FILE_MARKER,
 857            START_MARKER,
 858            SEPARATOR,
 859            END_MARKER,
 860            CURSOR_MARKER,
 861        ]
 862    }
 863
 864    pub fn format_prompt_with_budget(
 865        path: &Path,
 866        context: &str,
 867        editable_range: &Range<usize>,
 868        cursor_offset: usize,
 869        events: &[Arc<Event>],
 870        related_files: &[RelatedFile],
 871        max_tokens: usize,
 872    ) -> String {
 873        let suffix_section = build_suffix_section(context, editable_range);
 874        let cursor_prefix_section =
 875            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 876
 877        let suffix_tokens = estimate_tokens(suffix_section.len());
 878        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 879        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 880
 881        let edit_history_section = super::format_edit_history_within_budget(
 882            events,
 883            FILE_MARKER,
 884            "edit_history",
 885            budget_after_cursor,
 886        );
 887        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 888        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 889
 890        let related_files_section = super::format_related_files_within_budget(
 891            related_files,
 892            FILE_MARKER,
 893            "",
 894            budget_after_edit_history,
 895        );
 896
 897        let mut prompt = String::new();
 898        prompt.push_str(&suffix_section);
 899        prompt.push_str(FIM_PREFIX);
 900        prompt.push_str(&related_files_section);
 901        if !related_files_section.is_empty() {
 902            prompt.push('\n');
 903        }
 904        prompt.push_str(&edit_history_section);
 905        if !edit_history_section.is_empty() {
 906            prompt.push('\n');
 907        }
 908        prompt.push_str(&cursor_prefix_section);
 909        prompt.push_str(FIM_MIDDLE);
 910        prompt
 911    }
 912
 913    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 914        let mut section = String::new();
 915        section.push_str(FIM_SUFFIX);
 916        section.push_str(&context[editable_range.end..]);
 917        if !section.ends_with('\n') {
 918            section.push('\n');
 919        }
 920        section
 921    }
 922
 923    fn build_cursor_prefix_section(
 924        path: &Path,
 925        context: &str,
 926        editable_range: &Range<usize>,
 927        cursor_offset: usize,
 928    ) -> String {
 929        let mut section = String::new();
 930        let path_str = path.to_string_lossy();
 931        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 932
 933        section.push_str(&context[..editable_range.start]);
 934        section.push_str(START_MARKER);
 935        section.push_str(&context[editable_range.start..cursor_offset]);
 936        section.push_str(CURSOR_MARKER);
 937        section.push_str(&context[cursor_offset..editable_range.end]);
 938        if !section.ends_with('\n') {
 939            section.push('\n');
 940        }
 941        section.push_str(SEPARATOR);
 942        section
 943    }
 944}
 945
 946/// The zeta1 prompt format
 947pub mod zeta1 {
 948    use super::*;
 949    use std::fmt::Write;
 950
 951    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 952    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 953    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 954    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 955
 956    const INSTRUCTION_HEADER: &str = concat!(
 957        "### Instruction:\n",
 958        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 959        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 960        "into account the cursor location.\n\n",
 961        "### User Edits:\n\n"
 962    );
 963    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 964    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 965
 966    /// Formats a complete zeta1 prompt from the input events and excerpt.
 967    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 968        let mut prompt = String::with_capacity(
 969            INSTRUCTION_HEADER.len()
 970                + input_events.len()
 971                + EXCERPT_HEADER.len()
 972                + input_excerpt.len()
 973                + RESPONSE_HEADER.len(),
 974        );
 975        prompt.push_str(INSTRUCTION_HEADER);
 976        prompt.push_str(input_events);
 977        prompt.push_str(EXCERPT_HEADER);
 978        prompt.push_str(input_excerpt);
 979        prompt.push_str(RESPONSE_HEADER);
 980        prompt
 981    }
 982
 983    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 984    /// editable and context byte-offset ranges within `cursor_excerpt`.
 985    pub fn format_zeta1_from_input(
 986        input: &ZetaPromptInput,
 987        editable_range: Range<usize>,
 988        context_range: Range<usize>,
 989    ) -> String {
 990        let events = format_zeta1_events(&input.events);
 991        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 992        format_zeta1_prompt(&events, &excerpt)
 993    }
 994
 995    /// Formats events in zeta1 style (oldest first).
 996    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 997        let mut result = String::new();
 998        for event in events {
 999            let event_string = format_zeta1_event(event);
1000            if event_string.is_empty() {
1001                continue;
1002            }
1003            if !result.is_empty() {
1004                result.push_str("\n\n");
1005            }
1006            result.push_str(&event_string);
1007        }
1008        result
1009    }
1010
1011    fn format_zeta1_event(event: &Event) -> String {
1012        match event {
1013            Event::BufferChange {
1014                path,
1015                old_path,
1016                diff,
1017                ..
1018            } => {
1019                let mut prompt = String::new();
1020                if old_path != path {
1021                    writeln!(
1022                        prompt,
1023                        "User renamed {} to {}\n",
1024                        old_path.display(),
1025                        path.display()
1026                    )
1027                    .ok();
1028                }
1029                if !diff.is_empty() {
1030                    write!(
1031                        prompt,
1032                        "User edited {}:\n```diff\n{}\n```",
1033                        path.display(),
1034                        diff
1035                    )
1036                    .ok();
1037                }
1038                prompt
1039            }
1040        }
1041    }
1042
1043    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1044    /// within `cursor_excerpt`.
1045    fn format_zeta1_excerpt(
1046        input: &ZetaPromptInput,
1047        editable_range: Range<usize>,
1048        context_range: Range<usize>,
1049    ) -> String {
1050        let path_str = input.cursor_path.to_string_lossy();
1051        let excerpt = &*input.cursor_excerpt;
1052        let cursor_offset = input.cursor_offset_in_excerpt;
1053
1054        let mut prompt = String::new();
1055        writeln!(&mut prompt, "```{path_str}").ok();
1056
1057        let starts_at_file_beginning =
1058            input.excerpt_start_row == Some(0) && context_range.start == 0;
1059        if starts_at_file_beginning {
1060            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1061        }
1062
1063        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1064
1065        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1066        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1067        prompt.push_str(CURSOR_MARKER);
1068        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1069        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1070
1071        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1072        write!(prompt, "\n```").ok();
1073
1074        prompt
1075    }
1076
1077    /// Cleans zeta1 model output by extracting content between editable region
1078    /// markers and converting the zeta1 cursor marker to the universal one.
1079    /// Returns `None` if the output doesn't contain the expected markers.
1080    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1081        let content = output.replace(CURSOR_MARKER, "");
1082
1083        let content_start = content
1084            .find(EDITABLE_REGION_START_MARKER)
1085            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1086            .map(|pos| {
1087                if content.as_bytes().get(pos) == Some(&b'\n') {
1088                    pos + 1
1089                } else {
1090                    pos
1091                }
1092            })
1093            .unwrap_or(0);
1094
1095        let content_end = content
1096            .find(EDITABLE_REGION_END_MARKER)
1097            .map(|pos| {
1098                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1099                    pos - 1
1100                } else {
1101                    pos
1102                }
1103            })
1104            .unwrap_or(content.len());
1105
1106        if content_start > content_end {
1107            return Some(String::new());
1108        }
1109
1110        let extracted = &content[content_start..content_end];
1111
1112        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1113            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1114            let text_before_cursor = text_before_cursor
1115                .find(EDITABLE_REGION_START_MARKER)
1116                .map(|pos| {
1117                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1118                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1119                        after_marker + 1
1120                    } else {
1121                        after_marker
1122                    }
1123                })
1124                .unwrap_or(0);
1125            let offset_in_extracted = zeta1_cursor_pos
1126                .saturating_sub(text_before_cursor)
1127                .min(extracted.len());
1128            offset_in_extracted
1129        });
1130
1131        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1132        if let Some(offset) = cursor_offset {
1133            result.push_str(&extracted[..offset]);
1134            result.push_str(super::CURSOR_MARKER);
1135            result.push_str(&extracted[offset..]);
1136        } else {
1137            result.push_str(extracted);
1138        }
1139
1140        Some(result)
1141    }
1142}
1143
1144#[cfg(test)]
1145mod tests {
1146    use super::*;
1147    use indoc::indoc;
1148
1149    fn make_input(
1150        cursor_excerpt: &str,
1151        editable_range: Range<usize>,
1152        cursor_offset: usize,
1153        events: Vec<Event>,
1154        related_files: Vec<RelatedFile>,
1155    ) -> ZetaPromptInput {
1156        ZetaPromptInput {
1157            cursor_path: Path::new("test.rs").into(),
1158            cursor_excerpt: cursor_excerpt.into(),
1159            editable_range_in_excerpt: editable_range,
1160            cursor_offset_in_excerpt: cursor_offset,
1161            excerpt_start_row: None,
1162            events: events.into_iter().map(Arc::new).collect(),
1163            related_files,
1164            excerpt_ranges: None,
1165            preferred_model: None,
1166            in_open_source_repo: false,
1167            can_collect_data: false,
1168        }
1169    }
1170
1171    fn make_event(path: &str, diff: &str) -> Event {
1172        Event::BufferChange {
1173            path: Path::new(path).into(),
1174            old_path: Path::new(path).into(),
1175            diff: diff.to_string(),
1176            predicted: false,
1177            in_open_source_repo: false,
1178        }
1179    }
1180
1181    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1182        RelatedFile {
1183            path: Path::new(path).into(),
1184            max_row: content.lines().count() as u32,
1185            excerpts: vec![RelatedExcerpt {
1186                row_range: 0..content.lines().count() as u32,
1187                text: content.into(),
1188                order: 0,
1189            }],
1190            in_open_source_repo: false,
1191        }
1192    }
1193
1194    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1195        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1196    }
1197
1198    #[test]
1199    fn test_no_truncation_when_within_budget() {
1200        let input = make_input(
1201            "prefix\neditable\nsuffix",
1202            7..15,
1203            10,
1204            vec![make_event("a.rs", "-old\n+new\n")],
1205            vec![make_related_file("related.rs", "fn helper() {}\n")],
1206        );
1207
1208        assert_eq!(
1209            format_with_budget(&input, 10000),
1210            indoc! {r#"
1211                <|file_sep|>related.rs
1212                fn helper() {}
1213                <|file_sep|>edit history
1214                --- a/a.rs
1215                +++ b/a.rs
1216                -old
1217                +new
1218                <|file_sep|>test.rs
1219                <|fim_prefix|>
1220                prefix
1221                <|fim_middle|>current
1222                edi<|user_cursor|>table
1223                <|fim_suffix|>
1224
1225                suffix
1226                <|fim_middle|>updated
1227            "#}
1228        );
1229    }
1230
1231    #[test]
1232    fn test_truncation_drops_edit_history_when_budget_tight() {
1233        let input = make_input(
1234            "code",
1235            0..4,
1236            2,
1237            vec![make_event("a.rs", "-x\n+y\n")],
1238            vec![
1239                make_related_file("r1.rs", "a\n"),
1240                make_related_file("r2.rs", "b\n"),
1241            ],
1242        );
1243
1244        assert_eq!(
1245            format_with_budget(&input, 10000),
1246            indoc! {r#"
1247                <|file_sep|>r1.rs
1248                a
1249                <|file_sep|>r2.rs
1250                b
1251                <|file_sep|>edit history
1252                --- a/a.rs
1253                +++ b/a.rs
1254                -x
1255                +y
1256                <|file_sep|>test.rs
1257                <|fim_prefix|>
1258                <|fim_middle|>current
1259                co<|user_cursor|>de
1260                <|fim_suffix|>
1261                <|fim_middle|>updated
1262            "#}
1263        );
1264
1265        assert_eq!(
1266            format_with_budget(&input, 50),
1267            indoc! {r#"
1268                <|file_sep|>r1.rs
1269                a
1270                <|file_sep|>r2.rs
1271                b
1272                <|file_sep|>test.rs
1273                <|fim_prefix|>
1274                <|fim_middle|>current
1275                co<|user_cursor|>de
1276                <|fim_suffix|>
1277                <|fim_middle|>updated
1278            "#}
1279        );
1280    }
1281
1282    #[test]
1283    fn test_truncation_includes_partial_excerpts() {
1284        let input = make_input(
1285            "x",
1286            0..1,
1287            0,
1288            vec![],
1289            vec![RelatedFile {
1290                path: Path::new("big.rs").into(),
1291                max_row: 30,
1292                in_open_source_repo: false,
1293                excerpts: vec![
1294                    RelatedExcerpt {
1295                        row_range: 0..10,
1296                        text: "first excerpt\n".into(),
1297                        order: 0,
1298                    },
1299                    RelatedExcerpt {
1300                        row_range: 10..20,
1301                        text: "second excerpt\n".into(),
1302                        order: 0,
1303                    },
1304                    RelatedExcerpt {
1305                        row_range: 20..30,
1306                        text: "third excerpt\n".into(),
1307                        order: 0,
1308                    },
1309                ],
1310            }],
1311        );
1312
1313        assert_eq!(
1314            format_with_budget(&input, 10000),
1315            indoc! {r#"
1316                <|file_sep|>big.rs
1317                first excerpt
1318                ...
1319                second excerpt
1320                ...
1321                third excerpt
1322                <|file_sep|>test.rs
1323                <|fim_prefix|>
1324                <|fim_middle|>current
1325                <|user_cursor|>x
1326                <|fim_suffix|>
1327                <|fim_middle|>updated
1328            "#}
1329        );
1330
1331        assert_eq!(
1332            format_with_budget(&input, 50),
1333            indoc! {r#"
1334                <|file_sep|>big.rs
1335                first excerpt
1336                ...
1337                <|file_sep|>test.rs
1338                <|fim_prefix|>
1339                <|fim_middle|>current
1340                <|user_cursor|>x
1341                <|fim_suffix|>
1342                <|fim_middle|>updated
1343            "#}
1344        );
1345    }
1346
1347    #[test]
1348    fn test_truncation_prioritizes_lower_order_excerpts() {
1349        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1350        // With tight budget, only the lower-order excerpt from file_b should be included.
1351        let input = make_input(
1352            "x",
1353            0..1,
1354            0,
1355            vec![],
1356            vec![
1357                RelatedFile {
1358                    path: Path::new("file_a.rs").into(),
1359                    max_row: 10,
1360                    in_open_source_repo: false,
1361                    excerpts: vec![RelatedExcerpt {
1362                        row_range: 0..10,
1363                        text: "low priority content\n".into(),
1364                        order: 5,
1365                    }],
1366                },
1367                RelatedFile {
1368                    path: Path::new("file_b.rs").into(),
1369                    max_row: 10,
1370                    in_open_source_repo: false,
1371                    excerpts: vec![RelatedExcerpt {
1372                        row_range: 0..10,
1373                        text: "high priority content\n".into(),
1374                        order: 1,
1375                    }],
1376                },
1377            ],
1378        );
1379
1380        // With large budget, both files included; rendered in stable lexicographic order.
1381        assert_eq!(
1382            format_with_budget(&input, 10000),
1383            indoc! {r#"
1384                <|file_sep|>file_a.rs
1385                low priority content
1386                <|file_sep|>file_b.rs
1387                high priority content
1388                <|file_sep|>test.rs
1389                <|fim_prefix|>
1390                <|fim_middle|>current
1391                <|user_cursor|>x
1392                <|fim_suffix|>
1393                <|fim_middle|>updated
1394            "#}
1395        );
1396
1397        // With tight budget, only file_b (lower order) fits.
1398        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1399        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1400        // file_a would need another 14 tokens, which doesn't fit.
1401        assert_eq!(
1402            format_with_budget(&input, 52),
1403            indoc! {r#"
1404                <|file_sep|>file_b.rs
1405                high priority content
1406                <|file_sep|>test.rs
1407                <|fim_prefix|>
1408                <|fim_middle|>current
1409                <|user_cursor|>x
1410                <|fim_suffix|>
1411                <|fim_middle|>updated
1412            "#}
1413        );
1414    }
1415
1416    #[test]
1417    fn test_truncation_drops_high_order_excerpts_within_file() {
1418        // A single file has excerpts at order 1 and order 3. With a tight budget,
1419        // only the order-1 excerpts are included while the order-3 excerpt is
1420        // dropped — even though they belong to the same file. This also preserves
1421        // the parent invariant: parent outline items have order ≤ their best
1422        // child, so they're always included when any child is.
1423        let input = make_input(
1424            "x",
1425            0..1,
1426            0,
1427            vec![],
1428            vec![RelatedFile {
1429                path: Path::new("mod.rs").into(),
1430                max_row: 30,
1431                in_open_source_repo: false,
1432                excerpts: vec![
1433                    RelatedExcerpt {
1434                        row_range: 0..5,
1435                        text: "mod header\n".into(),
1436                        order: 1,
1437                    },
1438                    RelatedExcerpt {
1439                        row_range: 5..15,
1440                        text: "important fn\n".into(),
1441                        order: 1,
1442                    },
1443                    RelatedExcerpt {
1444                        row_range: 15..30,
1445                        text: "less important fn\n".into(),
1446                        order: 3,
1447                    },
1448                ],
1449            }],
1450        );
1451
1452        // With large budget, all three excerpts included.
1453        assert_eq!(
1454            format_with_budget(&input, 10000),
1455            indoc! {r#"
1456                <|file_sep|>mod.rs
1457                mod header
1458                ...
1459                important fn
1460                ...
1461                less important fn
1462                <|file_sep|>test.rs
1463                <|fim_prefix|>
1464                <|fim_middle|>current
1465                <|user_cursor|>x
1466                <|fim_suffix|>
1467                <|fim_middle|>updated
1468            "#}
1469        );
1470
1471        // With tight budget, only order<=1 excerpts included (header + important fn).
1472        assert_eq!(
1473            format_with_budget(&input, 55),
1474            indoc! {r#"
1475                <|file_sep|>mod.rs
1476                mod header
1477                ...
1478                important fn
1479                ...
1480                <|file_sep|>test.rs
1481                <|fim_prefix|>
1482                <|fim_middle|>current
1483                <|user_cursor|>x
1484                <|fim_suffix|>
1485                <|fim_middle|>updated
1486            "#}
1487        );
1488    }
1489
1490    #[test]
1491    fn test_truncation_drops_older_events_first() {
1492        let input = make_input(
1493            "x",
1494            0..1,
1495            0,
1496            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1497            vec![],
1498        );
1499
1500        assert_eq!(
1501            format_with_budget(&input, 10000),
1502            indoc! {r#"
1503                <|file_sep|>edit history
1504                --- a/old.rs
1505                +++ b/old.rs
1506                -1
1507                --- a/new.rs
1508                +++ b/new.rs
1509                -2
1510                <|file_sep|>test.rs
1511                <|fim_prefix|>
1512                <|fim_middle|>current
1513                <|user_cursor|>x
1514                <|fim_suffix|>
1515                <|fim_middle|>updated
1516            "#}
1517        );
1518
1519        assert_eq!(
1520            format_with_budget(&input, 55),
1521            indoc! {r#"
1522                <|file_sep|>edit history
1523                --- a/new.rs
1524                +++ b/new.rs
1525                -2
1526                <|file_sep|>test.rs
1527                <|fim_prefix|>
1528                <|fim_middle|>current
1529                <|user_cursor|>x
1530                <|fim_suffix|>
1531                <|fim_middle|>updated
1532            "#}
1533        );
1534    }
1535
1536    #[test]
1537    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1538        let input = make_input(
1539            "fn main() {}",
1540            0..12,
1541            3,
1542            vec![make_event("a.rs", "-old\n+new\n")],
1543            vec![make_related_file("related.rs", "helper\n")],
1544        );
1545
1546        assert_eq!(
1547            format_with_budget(&input, 30),
1548            indoc! {r#"
1549                <|file_sep|>test.rs
1550                <|fim_prefix|>
1551                <|fim_middle|>current
1552                fn <|user_cursor|>main() {}
1553                <|fim_suffix|>
1554                <|fim_middle|>updated
1555            "#}
1556        );
1557    }
1558
1559    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1560        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1561    }
1562
1563    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1564        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1565    }
1566
1567    #[test]
1568    fn test_seed_coder_basic_format() {
1569        let input = make_input(
1570            "prefix\neditable\nsuffix",
1571            7..15,
1572            10,
1573            vec![make_event("a.rs", "-old\n+new\n")],
1574            vec![make_related_file("related.rs", "fn helper() {}\n")],
1575        );
1576
1577        assert_eq!(
1578            format_seed_coder(&input),
1579            indoc! {r#"
1580                <[fim-suffix]>
1581                suffix
1582                <[fim-prefix]><filename>related.rs
1583                fn helper() {}
1584
1585                <filename>edit_history
1586                --- a/a.rs
1587                +++ b/a.rs
1588                -old
1589                +new
1590
1591                <filename>test.rs
1592                prefix
1593                <<<<<<< CURRENT
1594                edi<|user_cursor|>table
1595                =======
1596                <[fim-middle]>"#}
1597        );
1598    }
1599
1600    #[test]
1601    fn test_seed_coder_no_context() {
1602        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1603
1604        assert_eq!(
1605            format_seed_coder(&input),
1606            indoc! {r#"
1607                <[fim-suffix]>
1608                after
1609                <[fim-prefix]><filename>test.rs
1610                before
1611                <<<<<<< CURRENT
1612                mid<|user_cursor|>dle
1613                =======
1614                <[fim-middle]>"#}
1615        );
1616    }
1617
1618    #[test]
1619    fn test_seed_coder_truncation_drops_context() {
1620        let input = make_input(
1621            "code",
1622            0..4,
1623            2,
1624            vec![make_event("a.rs", "-x\n+y\n")],
1625            vec![make_related_file("r1.rs", "content\n")],
1626        );
1627
1628        // With large budget, everything is included
1629        assert_eq!(
1630            format_seed_coder(&input),
1631            indoc! {r#"
1632                <[fim-suffix]>
1633                <[fim-prefix]><filename>r1.rs
1634                content
1635
1636                <filename>edit_history
1637                --- a/a.rs
1638                +++ b/a.rs
1639                -x
1640                +y
1641
1642                <filename>test.rs
1643                <<<<<<< CURRENT
1644                co<|user_cursor|>de
1645                =======
1646                <[fim-middle]>"#}
1647        );
1648
1649        // With tight budget, context is dropped but cursor section remains
1650        assert_eq!(
1651            format_seed_coder_with_budget(&input, 30),
1652            indoc! {r#"
1653                <[fim-suffix]>
1654                <[fim-prefix]><filename>test.rs
1655                <<<<<<< CURRENT
1656                co<|user_cursor|>de
1657                =======
1658                <[fim-middle]>"#}
1659        );
1660    }
1661
1662    #[test]
1663    fn test_seed_coder_truncation_prioritizes_lower_order() {
1664        let input = make_input(
1665            "code",
1666            0..4,
1667            2,
1668            vec![],
1669            vec![
1670                RelatedFile {
1671                    path: Path::new("low_prio.rs").into(),
1672                    max_row: 5,
1673                    in_open_source_repo: false,
1674                    excerpts: vec![RelatedExcerpt {
1675                        row_range: 0..5,
1676                        text: "low prio\n".into(),
1677                        order: 10,
1678                    }],
1679                },
1680                RelatedFile {
1681                    path: Path::new("high_prio.rs").into(),
1682                    max_row: 5,
1683                    in_open_source_repo: false,
1684                    excerpts: vec![RelatedExcerpt {
1685                        row_range: 0..5,
1686                        text: "high prio\n".into(),
1687                        order: 1,
1688                    }],
1689                },
1690            ],
1691        );
1692
1693        // With large budget, both included; rendered in stable lexicographic order.
1694        assert_eq!(
1695            format_seed_coder(&input),
1696            indoc! {r#"
1697                <[fim-suffix]>
1698                <[fim-prefix]><filename>low_prio.rs
1699                low prio
1700                <filename>high_prio.rs
1701                high prio
1702
1703                <filename>test.rs
1704                <<<<<<< CURRENT
1705                co<|user_cursor|>de
1706                =======
1707                <[fim-middle]>"#}
1708        );
1709
1710        // With tight budget, only high_prio included.
1711        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1712        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1713        assert_eq!(
1714            format_seed_coder_with_budget(&input, 44),
1715            indoc! {r#"
1716                <[fim-suffix]>
1717                <[fim-prefix]><filename>high_prio.rs
1718                high prio
1719
1720                <filename>test.rs
1721                <<<<<<< CURRENT
1722                co<|user_cursor|>de
1723                =======
1724                <[fim-middle]>"#}
1725        );
1726    }
1727
1728    #[test]
1729    fn test_seed_coder_clean_output() {
1730        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1731        let output_without_marker = "new code\n";
1732
1733        assert_eq!(
1734            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1735            "new code\n"
1736        );
1737        assert_eq!(
1738            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1739            "new code\n"
1740        );
1741    }
1742
1743    #[test]
1744    fn test_format_zeta1_from_input_basic() {
1745        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1746        let input = ZetaPromptInput {
1747            cursor_path: Path::new("src/main.rs").into(),
1748            cursor_excerpt: excerpt.into(),
1749            editable_range_in_excerpt: 15..41,
1750            cursor_offset_in_excerpt: 30,
1751            excerpt_start_row: Some(0),
1752            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1753            related_files: vec![],
1754            excerpt_ranges: None,
1755            preferred_model: None,
1756            in_open_source_repo: false,
1757            can_collect_data: false,
1758        };
1759
1760        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1761
1762        assert_eq!(
1763            prompt,
1764            concat!(
1765                "### Instruction:\n",
1766                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1767                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1768                "into account the cursor location.\n",
1769                "\n",
1770                "### User Edits:\n",
1771                "\n",
1772                "User edited other.rs:\n",
1773                "```diff\n",
1774                "-old\n",
1775                "+new\n",
1776                "\n",
1777                "```\n",
1778                "\n",
1779                "### User Excerpt:\n",
1780                "\n",
1781                "```src/main.rs\n",
1782                "<|start_of_file|>\n",
1783                "fn before() {}\n",
1784                "<|editable_region_start|>\n",
1785                "fn foo() {\n",
1786                "    <|user_cursor_is_here|>let x = 1;\n",
1787                "\n",
1788                "<|editable_region_end|>}\n",
1789                "fn after() {}\n",
1790                "\n",
1791                "```\n",
1792                "\n",
1793                "### Response:\n",
1794            ),
1795        );
1796    }
1797
1798    #[test]
1799    fn test_format_zeta1_from_input_no_start_of_file() {
1800        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1801        let input = ZetaPromptInput {
1802            cursor_path: Path::new("src/main.rs").into(),
1803            cursor_excerpt: excerpt.into(),
1804            editable_range_in_excerpt: 0..28,
1805            cursor_offset_in_excerpt: 15,
1806            excerpt_start_row: Some(10),
1807            events: vec![],
1808            related_files: vec![],
1809            excerpt_ranges: None,
1810            preferred_model: None,
1811            in_open_source_repo: false,
1812            can_collect_data: false,
1813        };
1814
1815        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1816
1817        assert_eq!(
1818            prompt,
1819            concat!(
1820                "### Instruction:\n",
1821                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1822                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1823                "into account the cursor location.\n",
1824                "\n",
1825                "### User Edits:\n",
1826                "\n",
1827                "\n",
1828                "\n",
1829                "### User Excerpt:\n",
1830                "\n",
1831                "```src/main.rs\n",
1832                "<|editable_region_start|>\n",
1833                "fn foo() {\n",
1834                "    <|user_cursor_is_here|>let x = 1;\n",
1835                "}\n",
1836                "\n",
1837                "<|editable_region_end|>\n",
1838                "```\n",
1839                "\n",
1840                "### Response:\n",
1841            ),
1842        );
1843    }
1844
1845    #[test]
1846    fn test_format_zeta1_from_input_with_sub_ranges() {
1847        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1848        let editable_range = 10..37;
1849        let context_range = 0..excerpt.len();
1850
1851        let input = ZetaPromptInput {
1852            cursor_path: Path::new("test.rs").into(),
1853            cursor_excerpt: excerpt.into(),
1854            editable_range_in_excerpt: editable_range.clone(),
1855            cursor_offset_in_excerpt: 25,
1856            excerpt_start_row: Some(0),
1857            events: vec![],
1858            related_files: vec![],
1859            excerpt_ranges: None,
1860            preferred_model: None,
1861            in_open_source_repo: false,
1862            can_collect_data: false,
1863        };
1864
1865        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1866
1867        assert_eq!(
1868            prompt,
1869            concat!(
1870                "### Instruction:\n",
1871                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1872                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1873                "into account the cursor location.\n",
1874                "\n",
1875                "### User Edits:\n",
1876                "\n",
1877                "\n",
1878                "\n",
1879                "### User Excerpt:\n",
1880                "\n",
1881                "```test.rs\n",
1882                "<|start_of_file|>\n",
1883                "// prefix\n",
1884                "<|editable_region_start|>\n",
1885                "fn foo() {\n",
1886                "    <|user_cursor_is_here|>let x = 1;\n",
1887                "}\n",
1888                "<|editable_region_end|>\n",
1889                "// suffix\n",
1890                "\n",
1891                "```\n",
1892                "\n",
1893                "### Response:\n",
1894            ),
1895        );
1896    }
1897
1898    #[test]
1899    fn test_clean_zeta1_model_output_basic() {
1900        let output = indoc! {"
1901            <|editable_region_start|>
1902            fn main() {
1903                println!(\"hello\");
1904            }
1905            <|editable_region_end|>
1906        "};
1907
1908        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1909        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1910    }
1911
1912    #[test]
1913    fn test_clean_zeta1_model_output_with_cursor() {
1914        let output = indoc! {"
1915            <|editable_region_start|>
1916            fn main() {
1917                <|user_cursor_is_here|>println!(\"hello\");
1918            }
1919            <|editable_region_end|>
1920        "};
1921
1922        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1923        assert_eq!(
1924            cleaned,
1925            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1926        );
1927    }
1928
1929    #[test]
1930    fn test_clean_zeta1_model_output_no_markers() {
1931        let output = "fn main() {}\n";
1932        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1933        assert_eq!(cleaned, "fn main() {}\n");
1934    }
1935
1936    #[test]
1937    fn test_clean_zeta1_model_output_empty_region() {
1938        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1939        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1940        assert_eq!(cleaned, "");
1941    }
1942}