zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Editable region computed with a 350-token budget.
  40    pub editable_512: Option<Range<usize>>,
  41    /// Context boundary when using editable_150 with 350 tokens of additional context.
  42    pub editable_150_context_350: Range<usize>,
  43    /// Context boundary when using editable_180 with 350 tokens of additional context.
  44    pub editable_180_context_350: Range<usize>,
  45    /// Context boundary when using editable_350 with 150 tokens of additional context.
  46    pub editable_350_context_150: Range<usize>,
  47    pub editable_350_context_512: Option<Range<usize>>,
  48    pub editable_350_context_1024: Option<Range<usize>>,
  49    pub context_4096: Option<Range<usize>>,
  50    pub context_8192: Option<Range<usize>>,
  51}
  52
  53#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  54pub struct ZetaPromptInput {
  55    pub cursor_path: Arc<Path>,
  56    pub cursor_excerpt: Arc<str>,
  57    pub editable_range_in_excerpt: Range<usize>,
  58    pub cursor_offset_in_excerpt: usize,
  59    #[serde(default, skip_serializing_if = "Option::is_none")]
  60    pub excerpt_start_row: Option<u32>,
  61    pub events: Vec<Arc<Event>>,
  62    pub related_files: Vec<RelatedFile>,
  63    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  64    /// and these ranges let the server select model-appropriate subsets.
  65    /// When absent, the excerpt IS the context region and
  66    /// `editable_range_in_excerpt` is the only editable range.
  67    #[serde(default, skip_serializing_if = "Option::is_none")]
  68    pub excerpt_ranges: Option<ExcerptRanges>,
  69    /// Client's preferred model. The server may override.
  70    #[serde(default, skip_serializing_if = "Option::is_none")]
  71    pub preferred_model: Option<EditPredictionModelKind>,
  72    #[serde(default)]
  73    pub in_open_source_repo: bool,
  74    #[serde(default)]
  75    pub can_collect_data: bool,
  76}
  77
  78#[derive(
  79    Default,
  80    Clone,
  81    Copy,
  82    Debug,
  83    PartialEq,
  84    Eq,
  85    Hash,
  86    EnumIter,
  87    IntoStaticStr,
  88    Serialize,
  89    Deserialize,
  90)]
  91#[allow(non_camel_case_types)]
  92pub enum ZetaFormat {
  93    V0112MiddleAtEnd,
  94    V0113Ordered,
  95    V0114180EditableRegion,
  96    V0120GitMergeMarkers,
  97    #[default]
  98    V0131GitMergeMarkersPrefix,
  99    V0211Prefill,
 100    V0211SeedCoder,
 101}
 102
 103impl std::fmt::Display for ZetaFormat {
 104    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 105        write!(f, "{}", <&'static str>::from(self))
 106    }
 107}
 108
 109impl ZetaFormat {
 110    pub fn parse(format_name: &str) -> Result<Self> {
 111        let mut results = ZetaFormat::iter().filter(|version| {
 112            <&'static str>::from(version)
 113                .to_lowercase()
 114                .contains(&format_name.to_lowercase())
 115        });
 116        let Some(result) = results.next() else {
 117            anyhow::bail!(
 118                "`{format_name}` did not match any of:\n{}",
 119                Self::options_as_string()
 120            );
 121        };
 122        if results.next().is_some() {
 123            anyhow::bail!(
 124                "`{format_name}` matched more than one of:\n{}",
 125                Self::options_as_string()
 126            );
 127        }
 128        Ok(result)
 129    }
 130
 131    pub fn options_as_string() -> String {
 132        ZetaFormat::iter()
 133            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 134            .collect::<Vec<_>>()
 135            .concat()
 136    }
 137
 138    pub fn special_tokens(&self) -> &'static [&'static str] {
 139        match self {
 140            ZetaFormat::V0112MiddleAtEnd
 141            | ZetaFormat::V0113Ordered
 142            | ZetaFormat::V0114180EditableRegion => &[
 143                "<|fim_prefix|>",
 144                "<|fim_suffix|>",
 145                "<|fim_middle|>",
 146                "<|file_sep|>",
 147                CURSOR_MARKER,
 148            ],
 149            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 150            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 151                v0131_git_merge_markers_prefix::special_tokens()
 152            }
 153            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 154        }
 155    }
 156}
 157
 158#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 159#[serde(tag = "event")]
 160pub enum Event {
 161    BufferChange {
 162        path: Arc<Path>,
 163        old_path: Arc<Path>,
 164        diff: String,
 165        predicted: bool,
 166        in_open_source_repo: bool,
 167    },
 168}
 169
 170impl Event {
 171    pub fn in_open_source_repo(&self) -> bool {
 172        match self {
 173            Event::BufferChange {
 174                in_open_source_repo,
 175                ..
 176            } => *in_open_source_repo,
 177        }
 178    }
 179}
 180
 181pub fn write_event(prompt: &mut String, event: &Event) {
 182    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 183        for component in path.components() {
 184            prompt.push('/');
 185            write!(prompt, "{}", component.as_os_str().display()).ok();
 186        }
 187    }
 188    match event {
 189        Event::BufferChange {
 190            path,
 191            old_path,
 192            diff,
 193            predicted,
 194            in_open_source_repo: _,
 195        } => {
 196            if *predicted {
 197                prompt.push_str("// User accepted prediction:\n");
 198            }
 199            prompt.push_str("--- a");
 200            write_path_as_unix_str(prompt, old_path.as_ref());
 201            prompt.push_str("\n+++ b");
 202            write_path_as_unix_str(prompt, path.as_ref());
 203            prompt.push('\n');
 204            prompt.push_str(diff);
 205        }
 206    }
 207}
 208
 209#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 210pub struct RelatedFile {
 211    pub path: Arc<Path>,
 212    pub max_row: u32,
 213    pub excerpts: Vec<RelatedExcerpt>,
 214    #[serde(default)]
 215    pub in_open_source_repo: bool,
 216}
 217
 218#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 219pub struct RelatedExcerpt {
 220    pub row_range: Range<u32>,
 221    pub text: Arc<str>,
 222    #[serde(default)]
 223    pub order: usize,
 224}
 225
 226pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 227    format
 228        .special_tokens()
 229        .iter()
 230        .any(|token| input.cursor_excerpt.contains(token))
 231}
 232
 233pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 234    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 235}
 236
 237/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 238pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 239    match format {
 240        ZetaFormat::V0120GitMergeMarkers => output
 241            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 242            .unwrap_or(output),
 243        ZetaFormat::V0131GitMergeMarkersPrefix => output
 244            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 245            .unwrap_or(output),
 246        ZetaFormat::V0211SeedCoder => output
 247            .strip_suffix(seed_coder::END_MARKER)
 248            .unwrap_or(output),
 249        _ => output,
 250    }
 251}
 252
 253pub fn excerpt_range_for_format(
 254    format: ZetaFormat,
 255    ranges: &ExcerptRanges,
 256) -> (Range<usize>, Range<usize>) {
 257    match format {
 258        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 259            ranges.editable_150.clone(),
 260            ranges.editable_150_context_350.clone(),
 261        ),
 262        ZetaFormat::V0114180EditableRegion
 263        | ZetaFormat::V0120GitMergeMarkers
 264        | ZetaFormat::V0131GitMergeMarkersPrefix
 265        | ZetaFormat::V0211Prefill
 266        | ZetaFormat::V0211SeedCoder => (
 267            ranges.editable_350.clone(),
 268            ranges.editable_350_context_150.clone(),
 269        ),
 270    }
 271}
 272
 273pub fn resolve_cursor_region(
 274    input: &ZetaPromptInput,
 275    format: ZetaFormat,
 276) -> (&str, Range<usize>, usize) {
 277    let Some(ranges) = &input.excerpt_ranges else {
 278        return (
 279            &input.cursor_excerpt,
 280            input.editable_range_in_excerpt.clone(),
 281            input.cursor_offset_in_excerpt,
 282        );
 283    };
 284
 285    let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
 286    let context_start = context_range.start;
 287    let context_text = &input.cursor_excerpt[context_range];
 288    let adjusted_editable =
 289        (editable_range.start - context_start)..(editable_range.end - context_start);
 290    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 291
 292    (context_text, adjusted_editable, adjusted_cursor)
 293}
 294
 295fn format_zeta_prompt_with_budget(
 296    input: &ZetaPromptInput,
 297    format: ZetaFormat,
 298    max_tokens: usize,
 299) -> String {
 300    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 301    let path = &*input.cursor_path;
 302
 303    let mut cursor_section = String::new();
 304    match format {
 305        ZetaFormat::V0112MiddleAtEnd => {
 306            v0112_middle_at_end::write_cursor_excerpt_section(
 307                &mut cursor_section,
 308                path,
 309                context,
 310                &editable_range,
 311                cursor_offset,
 312            );
 313        }
 314        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 315            v0113_ordered::write_cursor_excerpt_section(
 316                &mut cursor_section,
 317                path,
 318                context,
 319                &editable_range,
 320                cursor_offset,
 321            )
 322        }
 323        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 324            &mut cursor_section,
 325            path,
 326            context,
 327            &editable_range,
 328            cursor_offset,
 329        ),
 330        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 331            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 332                &mut cursor_section,
 333                path,
 334                context,
 335                &editable_range,
 336                cursor_offset,
 337            )
 338        }
 339        ZetaFormat::V0211SeedCoder => {
 340            return seed_coder::format_prompt_with_budget(
 341                path,
 342                context,
 343                &editable_range,
 344                cursor_offset,
 345                &input.events,
 346                &input.related_files,
 347                max_tokens,
 348            );
 349        }
 350    }
 351
 352    let cursor_tokens = estimate_tokens(cursor_section.len());
 353    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 354
 355    let edit_history_section = format_edit_history_within_budget(
 356        &input.events,
 357        "<|file_sep|>",
 358        "edit history",
 359        budget_after_cursor,
 360    );
 361    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 362    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 363
 364    let related_files_section = format_related_files_within_budget(
 365        &input.related_files,
 366        "<|file_sep|>",
 367        budget_after_edit_history,
 368    );
 369
 370    let mut prompt = String::new();
 371    prompt.push_str(&related_files_section);
 372    prompt.push_str(&edit_history_section);
 373    prompt.push_str(&cursor_section);
 374    prompt
 375}
 376
 377pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 378    match format {
 379        ZetaFormat::V0112MiddleAtEnd
 380        | ZetaFormat::V0113Ordered
 381        | ZetaFormat::V0114180EditableRegion
 382        | ZetaFormat::V0120GitMergeMarkers
 383        | ZetaFormat::V0131GitMergeMarkersPrefix
 384        | ZetaFormat::V0211SeedCoder => String::new(),
 385        ZetaFormat::V0211Prefill => {
 386            let (context, editable_range, _) = resolve_cursor_region(input, format);
 387            v0211_prefill::get_prefill(context, &editable_range)
 388        }
 389    }
 390}
 391
 392fn format_edit_history_within_budget(
 393    events: &[Arc<Event>],
 394    file_marker: &str,
 395    edit_history_name: &str,
 396    max_tokens: usize,
 397) -> String {
 398    let header = format!("{}{}\n", file_marker, edit_history_name);
 399    let header_tokens = estimate_tokens(header.len());
 400    if header_tokens >= max_tokens {
 401        return String::new();
 402    }
 403
 404    let mut event_strings: Vec<String> = Vec::new();
 405    let mut total_tokens = header_tokens;
 406
 407    for event in events.iter().rev() {
 408        let mut event_str = String::new();
 409        write_event(&mut event_str, event);
 410        let event_tokens = estimate_tokens(event_str.len());
 411
 412        if total_tokens + event_tokens > max_tokens {
 413            break;
 414        }
 415        total_tokens += event_tokens;
 416        event_strings.push(event_str);
 417    }
 418
 419    if event_strings.is_empty() {
 420        return String::new();
 421    }
 422
 423    let mut result = header;
 424    for event_str in event_strings.iter().rev() {
 425        result.push_str(event_str);
 426    }
 427    result
 428}
 429
 430fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 431    let needs_newline = !excerpt.text.ends_with('\n');
 432    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 433    let len = excerpt.text.len()
 434        + if needs_newline { "\n".len() } else { 0 }
 435        + if needs_ellipsis { "...\n".len() } else { 0 };
 436    estimate_tokens(len)
 437}
 438
 439fn format_related_files_within_budget(
 440    related_files: &[RelatedFile],
 441    file_marker: &str,
 442    max_tokens: usize,
 443) -> String {
 444    // Collect the distinct order values across all excerpts, sorted ascending.
 445    let mut order_levels: Vec<usize> = related_files
 446        .iter()
 447        .flat_map(|f| f.excerpts.iter().map(|e| e.order))
 448        .collect();
 449    order_levels.sort_unstable();
 450    order_levels.dedup();
 451
 452    // Pre-compute file header strings and their token costs.
 453    let file_headers: Vec<String> = related_files
 454        .iter()
 455        .map(|file| {
 456            let path_str = file.path.to_string_lossy();
 457            format!("{}{}\n", file_marker, path_str)
 458        })
 459        .collect();
 460
 461    // Track which excerpts are included per file.
 462    let mut included: Vec<Vec<bool>> = related_files
 463        .iter()
 464        .map(|file| vec![false; file.excerpts.len()])
 465        .collect();
 466    let mut file_included: Vec<bool> = vec![false; related_files.len()];
 467    let mut total_tokens = 0;
 468
 469    // Process order levels from best (lowest) to worst. At each level, try to
 470    // include all not-yet-included excerpts with that order across all files.
 471    // If the full level doesn't fit, include a partial prefix (top-to-bottom
 472    // within each file) and stop — don't proceed to worse order levels.
 473    'outer: for &order in &order_levels {
 474        // Gather the work for this order level: for each file that has excerpts
 475        // at this order, collect the not-yet-included excerpt indices (in their
 476        // original positional order) and the token cost to add them (including
 477        // the file header if the file isn't already included).
 478        struct FileWork {
 479            file_idx: usize,
 480            excerpt_indices: Vec<usize>,
 481            header_cost: usize,
 482            excerpt_costs: Vec<usize>,
 483        }
 484
 485        let mut work_items: Vec<FileWork> = Vec::new();
 486        for (file_idx, file) in related_files.iter().enumerate() {
 487            let mut excerpt_indices = Vec::new();
 488            let mut excerpt_costs = Vec::new();
 489            for (eidx, excerpt) in file.excerpts.iter().enumerate() {
 490                if excerpt.order == order && !included[file_idx][eidx] {
 491                    excerpt_indices.push(eidx);
 492                    excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row));
 493                }
 494            }
 495            if excerpt_indices.is_empty() {
 496                continue;
 497            }
 498            let header_cost = if file_included[file_idx] {
 499                0
 500            } else {
 501                estimate_tokens(file_headers[file_idx].len())
 502            };
 503            work_items.push(FileWork {
 504                file_idx,
 505                excerpt_indices,
 506                header_cost,
 507                excerpt_costs,
 508            });
 509        }
 510
 511        // Compute the total cost for this entire order level.
 512        let level_cost: usize = work_items
 513            .iter()
 514            .map(|w| w.header_cost + w.excerpt_costs.iter().sum::<usize>())
 515            .sum();
 516
 517        if total_tokens + level_cost <= max_tokens {
 518            // The whole level fits — include everything.
 519            for work in &work_items {
 520                total_tokens += work.header_cost;
 521                file_included[work.file_idx] = true;
 522                for (i, &eidx) in work.excerpt_indices.iter().enumerate() {
 523                    included[work.file_idx][eidx] = true;
 524                    total_tokens += work.excerpt_costs[i];
 525                }
 526            }
 527        } else {
 528            // The whole level doesn't fit. Include as many excerpts as possible
 529            // from each file (in positional order), then stop entirely.
 530            for work in &work_items {
 531                let available = max_tokens.saturating_sub(total_tokens);
 532                let mut file_cost = work.header_cost;
 533
 534                let mut count = 0;
 535                for i in 0..work.excerpt_indices.len() {
 536                    if file_cost + work.excerpt_costs[i] > available {
 537                        break;
 538                    }
 539                    file_cost += work.excerpt_costs[i];
 540                    count += 1;
 541                }
 542
 543                if count > 0 {
 544                    total_tokens += work.header_cost;
 545                    file_included[work.file_idx] = true;
 546                    for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() {
 547                        included[work.file_idx][eidx] = true;
 548                        total_tokens += work.excerpt_costs[i];
 549                    }
 550                }
 551            }
 552            break 'outer;
 553        }
 554    }
 555
 556    // Determine file rendering order: by the best (lowest) order of any
 557    // included excerpt, breaking ties by original file index.
 558    let mut file_order: Vec<(usize, usize)> = Vec::new();
 559    for (file_idx, file) in related_files.iter().enumerate() {
 560        if !file_included[file_idx] {
 561            continue;
 562        }
 563        let best_order = file
 564            .excerpts
 565            .iter()
 566            .enumerate()
 567            .filter(|(eidx, _)| included[file_idx][*eidx])
 568            .map(|(_, e)| e.order)
 569            .min()
 570            .unwrap_or(usize::MAX);
 571        file_order.push((file_idx, best_order));
 572    }
 573    file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx));
 574
 575    // Render included files and excerpts in positional order within each file.
 576    let mut result = String::new();
 577    for &(file_idx, _) in &file_order {
 578        let file = &related_files[file_idx];
 579        result.push_str(&file_headers[file_idx]);
 580        for (eidx, excerpt) in file.excerpts.iter().enumerate() {
 581            if !included[file_idx][eidx] {
 582                continue;
 583            }
 584            result.push_str(&excerpt.text);
 585            if !result.ends_with('\n') {
 586                result.push('\n');
 587            }
 588            if excerpt.row_range.end < file.max_row {
 589                result.push_str("...\n");
 590            }
 591        }
 592    }
 593
 594    result
 595}
 596
 597pub fn write_related_files(
 598    prompt: &mut String,
 599    related_files: &[RelatedFile],
 600) -> Vec<Range<usize>> {
 601    let mut ranges = Vec::new();
 602    for file in related_files {
 603        let start = prompt.len();
 604        let path_str = file.path.to_string_lossy();
 605        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 606        for excerpt in &file.excerpts {
 607            prompt.push_str(&excerpt.text);
 608            if !prompt.ends_with('\n') {
 609                prompt.push('\n');
 610            }
 611            if excerpt.row_range.end < file.max_row {
 612                prompt.push_str("...\n");
 613            }
 614        }
 615        let end = prompt.len();
 616        ranges.push(start..end);
 617    }
 618    ranges
 619}
 620
 621mod v0112_middle_at_end {
 622    use super::*;
 623
 624    pub fn write_cursor_excerpt_section(
 625        prompt: &mut String,
 626        path: &Path,
 627        context: &str,
 628        editable_range: &Range<usize>,
 629        cursor_offset: usize,
 630    ) {
 631        let path_str = path.to_string_lossy();
 632        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 633
 634        prompt.push_str("<|fim_prefix|>\n");
 635        prompt.push_str(&context[..editable_range.start]);
 636
 637        prompt.push_str("<|fim_suffix|>\n");
 638        prompt.push_str(&context[editable_range.end..]);
 639        if !prompt.ends_with('\n') {
 640            prompt.push('\n');
 641        }
 642
 643        prompt.push_str("<|fim_middle|>current\n");
 644        prompt.push_str(&context[editable_range.start..cursor_offset]);
 645        prompt.push_str(CURSOR_MARKER);
 646        prompt.push_str(&context[cursor_offset..editable_range.end]);
 647        if !prompt.ends_with('\n') {
 648            prompt.push('\n');
 649        }
 650
 651        prompt.push_str("<|fim_middle|>updated\n");
 652    }
 653}
 654
 655mod v0113_ordered {
 656    use super::*;
 657
 658    pub fn write_cursor_excerpt_section(
 659        prompt: &mut String,
 660        path: &Path,
 661        context: &str,
 662        editable_range: &Range<usize>,
 663        cursor_offset: usize,
 664    ) {
 665        let path_str = path.to_string_lossy();
 666        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 667
 668        prompt.push_str("<|fim_prefix|>\n");
 669        prompt.push_str(&context[..editable_range.start]);
 670        if !prompt.ends_with('\n') {
 671            prompt.push('\n');
 672        }
 673
 674        prompt.push_str("<|fim_middle|>current\n");
 675        prompt.push_str(&context[editable_range.start..cursor_offset]);
 676        prompt.push_str(CURSOR_MARKER);
 677        prompt.push_str(&context[cursor_offset..editable_range.end]);
 678        if !prompt.ends_with('\n') {
 679            prompt.push('\n');
 680        }
 681
 682        prompt.push_str("<|fim_suffix|>\n");
 683        prompt.push_str(&context[editable_range.end..]);
 684        if !prompt.ends_with('\n') {
 685            prompt.push('\n');
 686        }
 687
 688        prompt.push_str("<|fim_middle|>updated\n");
 689    }
 690}
 691
 692pub mod v0120_git_merge_markers {
 693    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 694    //!
 695    //! Example prompt:
 696    //!
 697    //! <|file_sep|>path/to/target_file.py
 698    //! <|fim_prefix|>
 699    //! code before editable region
 700    //! <|fim_suffix|>
 701    //! code after editable region
 702    //! <|fim_middle|>
 703    //! <<<<<<< CURRENT
 704    //! code that
 705    //! needs to<|user_cursor|>
 706    //! be rewritten
 707    //! =======
 708    //!
 709    //! Expected output (should be generated by the model):
 710    //!
 711    //! updated
 712    //! code with
 713    //! changes applied
 714    //! >>>>>>> UPDATED
 715
 716    use super::*;
 717
 718    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 719    pub const SEPARATOR: &str = "=======\n";
 720    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 721
 722    pub fn special_tokens() -> &'static [&'static str] {
 723        &[
 724            "<|fim_prefix|>",
 725            "<|fim_suffix|>",
 726            "<|fim_middle|>",
 727            "<|file_sep|>",
 728            START_MARKER,
 729            SEPARATOR,
 730            END_MARKER,
 731            CURSOR_MARKER,
 732        ]
 733    }
 734
 735    pub fn write_cursor_excerpt_section(
 736        prompt: &mut String,
 737        path: &Path,
 738        context: &str,
 739        editable_range: &Range<usize>,
 740        cursor_offset: usize,
 741    ) {
 742        let path_str = path.to_string_lossy();
 743        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 744
 745        prompt.push_str("<|fim_prefix|>");
 746        prompt.push_str(&context[..editable_range.start]);
 747
 748        prompt.push_str("<|fim_suffix|>");
 749        prompt.push_str(&context[editable_range.end..]);
 750        if !prompt.ends_with('\n') {
 751            prompt.push('\n');
 752        }
 753
 754        prompt.push_str("<|fim_middle|>");
 755        prompt.push_str(START_MARKER);
 756        prompt.push_str(&context[editable_range.start..cursor_offset]);
 757        prompt.push_str(CURSOR_MARKER);
 758        prompt.push_str(&context[cursor_offset..editable_range.end]);
 759        if !prompt.ends_with('\n') {
 760            prompt.push('\n');
 761        }
 762        prompt.push_str(SEPARATOR);
 763    }
 764}
 765
 766pub mod v0131_git_merge_markers_prefix {
 767    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 768    //!
 769    //! Example prompt:
 770    //!
 771    //! <|file_sep|>path/to/target_file.py
 772    //! <|fim_prefix|>
 773    //! code before editable region
 774    //! <<<<<<< CURRENT
 775    //! code that
 776    //! needs to<|user_cursor|>
 777    //! be rewritten
 778    //! =======
 779    //! <|fim_suffix|>
 780    //! code after editable region
 781    //! <|fim_middle|>
 782    //!
 783    //! Expected output (should be generated by the model):
 784    //!
 785    //! updated
 786    //! code with
 787    //! changes applied
 788    //! >>>>>>> UPDATED
 789
 790    use super::*;
 791
 792    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 793    pub const SEPARATOR: &str = "=======\n";
 794    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 795
 796    pub fn special_tokens() -> &'static [&'static str] {
 797        &[
 798            "<|fim_prefix|>",
 799            "<|fim_suffix|>",
 800            "<|fim_middle|>",
 801            "<|file_sep|>",
 802            START_MARKER,
 803            SEPARATOR,
 804            END_MARKER,
 805            CURSOR_MARKER,
 806        ]
 807    }
 808
 809    pub fn write_cursor_excerpt_section(
 810        prompt: &mut String,
 811        path: &Path,
 812        context: &str,
 813        editable_range: &Range<usize>,
 814        cursor_offset: usize,
 815    ) {
 816        let path_str = path.to_string_lossy();
 817        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 818
 819        prompt.push_str("<|fim_prefix|>");
 820        prompt.push_str(&context[..editable_range.start]);
 821        prompt.push_str(START_MARKER);
 822        prompt.push_str(&context[editable_range.start..cursor_offset]);
 823        prompt.push_str(CURSOR_MARKER);
 824        prompt.push_str(&context[cursor_offset..editable_range.end]);
 825        if !prompt.ends_with('\n') {
 826            prompt.push('\n');
 827        }
 828        prompt.push_str(SEPARATOR);
 829
 830        prompt.push_str("<|fim_suffix|>");
 831        prompt.push_str(&context[editable_range.end..]);
 832        if !prompt.ends_with('\n') {
 833            prompt.push('\n');
 834        }
 835
 836        prompt.push_str("<|fim_middle|>");
 837    }
 838}
 839
 840pub mod v0211_prefill {
 841    use super::*;
 842
 843    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 844        let editable_region = &context[editable_range.start..editable_range.end];
 845
 846        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 847        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 848
 849        // Find a token boundary to avoid splitting tokens in the prefill.
 850        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 851        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 852        // the \n and consume any consecutive \n characters after it.
 853        let prefill = &editable_region[..prefill_len];
 854        match prefill.rfind('\n') {
 855            Some(pos) => {
 856                let mut end = pos + 1;
 857                while end < editable_region.len()
 858                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 859                {
 860                    end += 1;
 861                }
 862                editable_region[..end].to_string()
 863            }
 864            // No newline found. Fall back to splitting before the last space
 865            // (word-level boundary)
 866            None => match prefill.rfind(' ') {
 867                Some(pos) => prefill[..pos].to_string(),
 868                None => prefill.to_string(),
 869            },
 870        }
 871    }
 872}
 873
 874pub mod seed_coder {
 875    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 876    //!
 877    //! Seed-Coder uses different FIM tokens and order than Qwen:
 878    //! - SPM order: suffix comes FIRST, then prefix, then middle
 879    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 880    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 881    //!
 882    //! All context (related files, edit history) goes in the PREFIX section.
 883    //! The suffix contains only code after the editable region.
 884    //!
 885    //! Example prompt:
 886    //!
 887    //! <[fim-suffix]>
 888    //! code after editable region
 889    //! <[fim-prefix]><filename>related/file.py
 890    //! related file content
 891    //!
 892    //! <filename>edit_history
 893    //! --- a/some_file.py
 894    //! +++ b/some_file.py
 895    //! -old
 896    //! +new
 897    //!
 898    //! <filename>path/to/target_file.py
 899    //! code before editable region
 900    //! <<<<<<< CURRENT
 901    //! code that
 902    //! needs to<|user_cursor|>
 903    //! be rewritten
 904    //! =======
 905    //! <[fim-middle]>
 906    //!
 907    //! Expected output (model generates):
 908    //!
 909    //! updated
 910    //! code with
 911    //! changes applied
 912    //! >>>>>>> UPDATED
 913
 914    use super::*;
 915
 916    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 917    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 918    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 919    pub const FILE_MARKER: &str = "<filename>";
 920
 921    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 922    pub const SEPARATOR: &str = "=======\n";
 923    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 924
 925    pub fn special_tokens() -> &'static [&'static str] {
 926        &[
 927            FIM_SUFFIX,
 928            FIM_PREFIX,
 929            FIM_MIDDLE,
 930            FILE_MARKER,
 931            START_MARKER,
 932            SEPARATOR,
 933            END_MARKER,
 934            CURSOR_MARKER,
 935        ]
 936    }
 937
 938    pub fn format_prompt_with_budget(
 939        path: &Path,
 940        context: &str,
 941        editable_range: &Range<usize>,
 942        cursor_offset: usize,
 943        events: &[Arc<Event>],
 944        related_files: &[RelatedFile],
 945        max_tokens: usize,
 946    ) -> String {
 947        let suffix_section = build_suffix_section(context, editable_range);
 948        let cursor_prefix_section =
 949            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 950
 951        let suffix_tokens = estimate_tokens(suffix_section.len());
 952        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 953        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 954
 955        let edit_history_section = super::format_edit_history_within_budget(
 956            events,
 957            FILE_MARKER,
 958            "edit_history",
 959            budget_after_cursor,
 960        );
 961        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 962        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 963
 964        let related_files_section = super::format_related_files_within_budget(
 965            related_files,
 966            FILE_MARKER,
 967            budget_after_edit_history,
 968        );
 969
 970        let mut prompt = String::new();
 971        prompt.push_str(&suffix_section);
 972        prompt.push_str(FIM_PREFIX);
 973        prompt.push_str(&related_files_section);
 974        if !related_files_section.is_empty() {
 975            prompt.push('\n');
 976        }
 977        prompt.push_str(&edit_history_section);
 978        if !edit_history_section.is_empty() {
 979            prompt.push('\n');
 980        }
 981        prompt.push_str(&cursor_prefix_section);
 982        prompt.push_str(FIM_MIDDLE);
 983        prompt
 984    }
 985
 986    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 987        let mut section = String::new();
 988        section.push_str(FIM_SUFFIX);
 989        section.push_str(&context[editable_range.end..]);
 990        if !section.ends_with('\n') {
 991            section.push('\n');
 992        }
 993        section
 994    }
 995
 996    fn build_cursor_prefix_section(
 997        path: &Path,
 998        context: &str,
 999        editable_range: &Range<usize>,
1000        cursor_offset: usize,
1001    ) -> String {
1002        let mut section = String::new();
1003        let path_str = path.to_string_lossy();
1004        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
1005
1006        section.push_str(&context[..editable_range.start]);
1007        section.push_str(START_MARKER);
1008        section.push_str(&context[editable_range.start..cursor_offset]);
1009        section.push_str(CURSOR_MARKER);
1010        section.push_str(&context[cursor_offset..editable_range.end]);
1011        if !section.ends_with('\n') {
1012            section.push('\n');
1013        }
1014        section.push_str(SEPARATOR);
1015        section
1016    }
1017}
1018
1019/// The zeta1 prompt format
1020pub mod zeta1 {
1021    use super::*;
1022    use std::fmt::Write;
1023
1024    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
1025    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
1026    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
1027    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
1028
1029    const INSTRUCTION_HEADER: &str = concat!(
1030        "### Instruction:\n",
1031        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1032        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1033        "into account the cursor location.\n\n",
1034        "### User Edits:\n\n"
1035    );
1036    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
1037    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
1038
1039    /// Formats a complete zeta1 prompt from the input events and excerpt.
1040    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
1041        let mut prompt = String::with_capacity(
1042            INSTRUCTION_HEADER.len()
1043                + input_events.len()
1044                + EXCERPT_HEADER.len()
1045                + input_excerpt.len()
1046                + RESPONSE_HEADER.len(),
1047        );
1048        prompt.push_str(INSTRUCTION_HEADER);
1049        prompt.push_str(input_events);
1050        prompt.push_str(EXCERPT_HEADER);
1051        prompt.push_str(input_excerpt);
1052        prompt.push_str(RESPONSE_HEADER);
1053        prompt
1054    }
1055
1056    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
1057    /// editable and context byte-offset ranges within `cursor_excerpt`.
1058    pub fn format_zeta1_from_input(
1059        input: &ZetaPromptInput,
1060        editable_range: Range<usize>,
1061        context_range: Range<usize>,
1062    ) -> String {
1063        let events = format_zeta1_events(&input.events);
1064        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
1065        format_zeta1_prompt(&events, &excerpt)
1066    }
1067
1068    /// Formats events in zeta1 style (oldest first).
1069    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
1070        let mut result = String::new();
1071        for event in events {
1072            let event_string = format_zeta1_event(event);
1073            if event_string.is_empty() {
1074                continue;
1075            }
1076            if !result.is_empty() {
1077                result.push_str("\n\n");
1078            }
1079            result.push_str(&event_string);
1080        }
1081        result
1082    }
1083
1084    fn format_zeta1_event(event: &Event) -> String {
1085        match event {
1086            Event::BufferChange {
1087                path,
1088                old_path,
1089                diff,
1090                ..
1091            } => {
1092                let mut prompt = String::new();
1093                if old_path != path {
1094                    writeln!(
1095                        prompt,
1096                        "User renamed {} to {}\n",
1097                        old_path.display(),
1098                        path.display()
1099                    )
1100                    .ok();
1101                }
1102                if !diff.is_empty() {
1103                    write!(
1104                        prompt,
1105                        "User edited {}:\n```diff\n{}\n```",
1106                        path.display(),
1107                        diff
1108                    )
1109                    .ok();
1110                }
1111                prompt
1112            }
1113        }
1114    }
1115
1116    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1117    /// within `cursor_excerpt`.
1118    fn format_zeta1_excerpt(
1119        input: &ZetaPromptInput,
1120        editable_range: Range<usize>,
1121        context_range: Range<usize>,
1122    ) -> String {
1123        let path_str = input.cursor_path.to_string_lossy();
1124        let excerpt = &*input.cursor_excerpt;
1125        let cursor_offset = input.cursor_offset_in_excerpt;
1126
1127        let mut prompt = String::new();
1128        writeln!(&mut prompt, "```{path_str}").ok();
1129
1130        let starts_at_file_beginning =
1131            input.excerpt_start_row == Some(0) && context_range.start == 0;
1132        if starts_at_file_beginning {
1133            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1134        }
1135
1136        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1137
1138        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1139        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1140        prompt.push_str(CURSOR_MARKER);
1141        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1142        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1143
1144        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1145        write!(prompt, "\n```").ok();
1146
1147        prompt
1148    }
1149
1150    /// Cleans zeta1 model output by extracting content between editable region
1151    /// markers and converting the zeta1 cursor marker to the universal one.
1152    /// Returns `None` if the output doesn't contain the expected markers.
1153    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1154        let content = output.replace(CURSOR_MARKER, "");
1155
1156        let content_start = content
1157            .find(EDITABLE_REGION_START_MARKER)
1158            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1159            .map(|pos| {
1160                if content.as_bytes().get(pos) == Some(&b'\n') {
1161                    pos + 1
1162                } else {
1163                    pos
1164                }
1165            })
1166            .unwrap_or(0);
1167
1168        let content_end = content
1169            .find(EDITABLE_REGION_END_MARKER)
1170            .map(|pos| {
1171                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1172                    pos - 1
1173                } else {
1174                    pos
1175                }
1176            })
1177            .unwrap_or(content.len());
1178
1179        if content_start > content_end {
1180            return Some(String::new());
1181        }
1182
1183        let extracted = &content[content_start..content_end];
1184
1185        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1186            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1187            let text_before_cursor = text_before_cursor
1188                .find(EDITABLE_REGION_START_MARKER)
1189                .map(|pos| {
1190                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1191                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1192                        after_marker + 1
1193                    } else {
1194                        after_marker
1195                    }
1196                })
1197                .unwrap_or(0);
1198            let offset_in_extracted = zeta1_cursor_pos
1199                .saturating_sub(text_before_cursor)
1200                .min(extracted.len());
1201            offset_in_extracted
1202        });
1203
1204        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1205        if let Some(offset) = cursor_offset {
1206            result.push_str(&extracted[..offset]);
1207            result.push_str(super::CURSOR_MARKER);
1208            result.push_str(&extracted[offset..]);
1209        } else {
1210            result.push_str(extracted);
1211        }
1212
1213        Some(result)
1214    }
1215}
1216
1217#[cfg(test)]
1218mod tests {
1219    use super::*;
1220    use indoc::indoc;
1221
1222    fn make_input(
1223        cursor_excerpt: &str,
1224        editable_range: Range<usize>,
1225        cursor_offset: usize,
1226        events: Vec<Event>,
1227        related_files: Vec<RelatedFile>,
1228    ) -> ZetaPromptInput {
1229        ZetaPromptInput {
1230            cursor_path: Path::new("test.rs").into(),
1231            cursor_excerpt: cursor_excerpt.into(),
1232            editable_range_in_excerpt: editable_range,
1233            cursor_offset_in_excerpt: cursor_offset,
1234            excerpt_start_row: None,
1235            events: events.into_iter().map(Arc::new).collect(),
1236            related_files,
1237            excerpt_ranges: None,
1238            preferred_model: None,
1239            in_open_source_repo: false,
1240            can_collect_data: false,
1241        }
1242    }
1243
1244    fn make_event(path: &str, diff: &str) -> Event {
1245        Event::BufferChange {
1246            path: Path::new(path).into(),
1247            old_path: Path::new(path).into(),
1248            diff: diff.to_string(),
1249            predicted: false,
1250            in_open_source_repo: false,
1251        }
1252    }
1253
1254    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1255        RelatedFile {
1256            path: Path::new(path).into(),
1257            max_row: content.lines().count() as u32,
1258            excerpts: vec![RelatedExcerpt {
1259                row_range: 0..content.lines().count() as u32,
1260                text: content.into(),
1261                order: 0,
1262            }],
1263            in_open_source_repo: false,
1264        }
1265    }
1266
1267    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1268        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1269    }
1270
1271    #[test]
1272    fn test_no_truncation_when_within_budget() {
1273        let input = make_input(
1274            "prefix\neditable\nsuffix",
1275            7..15,
1276            10,
1277            vec![make_event("a.rs", "-old\n+new\n")],
1278            vec![make_related_file("related.rs", "fn helper() {}\n")],
1279        );
1280
1281        assert_eq!(
1282            format_with_budget(&input, 10000),
1283            indoc! {r#"
1284                <|file_sep|>related.rs
1285                fn helper() {}
1286                <|file_sep|>edit history
1287                --- a/a.rs
1288                +++ b/a.rs
1289                -old
1290                +new
1291                <|file_sep|>test.rs
1292                <|fim_prefix|>
1293                prefix
1294                <|fim_middle|>current
1295                edi<|user_cursor|>table
1296                <|fim_suffix|>
1297
1298                suffix
1299                <|fim_middle|>updated
1300            "#}
1301        );
1302    }
1303
1304    #[test]
1305    fn test_truncation_drops_edit_history_when_budget_tight() {
1306        let input = make_input(
1307            "code",
1308            0..4,
1309            2,
1310            vec![make_event("a.rs", "-x\n+y\n")],
1311            vec![
1312                make_related_file("r1.rs", "a\n"),
1313                make_related_file("r2.rs", "b\n"),
1314            ],
1315        );
1316
1317        assert_eq!(
1318            format_with_budget(&input, 10000),
1319            indoc! {r#"
1320                <|file_sep|>r1.rs
1321                a
1322                <|file_sep|>r2.rs
1323                b
1324                <|file_sep|>edit history
1325                --- a/a.rs
1326                +++ b/a.rs
1327                -x
1328                +y
1329                <|file_sep|>test.rs
1330                <|fim_prefix|>
1331                <|fim_middle|>current
1332                co<|user_cursor|>de
1333                <|fim_suffix|>
1334                <|fim_middle|>updated
1335            "#}
1336        );
1337
1338        assert_eq!(
1339            format_with_budget(&input, 50),
1340            indoc! {r#"
1341                <|file_sep|>r1.rs
1342                a
1343                <|file_sep|>r2.rs
1344                b
1345                <|file_sep|>test.rs
1346                <|fim_prefix|>
1347                <|fim_middle|>current
1348                co<|user_cursor|>de
1349                <|fim_suffix|>
1350                <|fim_middle|>updated
1351            "#}
1352        );
1353    }
1354
1355    #[test]
1356    fn test_truncation_includes_partial_excerpts() {
1357        let input = make_input(
1358            "x",
1359            0..1,
1360            0,
1361            vec![],
1362            vec![RelatedFile {
1363                path: Path::new("big.rs").into(),
1364                max_row: 30,
1365                in_open_source_repo: false,
1366                excerpts: vec![
1367                    RelatedExcerpt {
1368                        row_range: 0..10,
1369                        text: "first excerpt\n".into(),
1370                        order: 0,
1371                    },
1372                    RelatedExcerpt {
1373                        row_range: 10..20,
1374                        text: "second excerpt\n".into(),
1375                        order: 0,
1376                    },
1377                    RelatedExcerpt {
1378                        row_range: 20..30,
1379                        text: "third excerpt\n".into(),
1380                        order: 0,
1381                    },
1382                ],
1383            }],
1384        );
1385
1386        assert_eq!(
1387            format_with_budget(&input, 10000),
1388            indoc! {r#"
1389                <|file_sep|>big.rs
1390                first excerpt
1391                ...
1392                second excerpt
1393                ...
1394                third excerpt
1395                <|file_sep|>test.rs
1396                <|fim_prefix|>
1397                <|fim_middle|>current
1398                <|user_cursor|>x
1399                <|fim_suffix|>
1400                <|fim_middle|>updated
1401            "#}
1402        );
1403
1404        assert_eq!(
1405            format_with_budget(&input, 50),
1406            indoc! {r#"
1407                <|file_sep|>big.rs
1408                first excerpt
1409                ...
1410                <|file_sep|>test.rs
1411                <|fim_prefix|>
1412                <|fim_middle|>current
1413                <|user_cursor|>x
1414                <|fim_suffix|>
1415                <|fim_middle|>updated
1416            "#}
1417        );
1418    }
1419
1420    #[test]
1421    fn test_truncation_prioritizes_lower_order_excerpts() {
1422        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1423        // With tight budget, only the lower-order excerpt from file_b should be included.
1424        let input = make_input(
1425            "x",
1426            0..1,
1427            0,
1428            vec![],
1429            vec![
1430                RelatedFile {
1431                    path: Path::new("file_a.rs").into(),
1432                    max_row: 10,
1433                    in_open_source_repo: false,
1434                    excerpts: vec![RelatedExcerpt {
1435                        row_range: 0..10,
1436                        text: "low priority content\n".into(),
1437                        order: 5,
1438                    }],
1439                },
1440                RelatedFile {
1441                    path: Path::new("file_b.rs").into(),
1442                    max_row: 10,
1443                    in_open_source_repo: false,
1444                    excerpts: vec![RelatedExcerpt {
1445                        row_range: 0..10,
1446                        text: "high priority content\n".into(),
1447                        order: 1,
1448                    }],
1449                },
1450            ],
1451        );
1452
1453        // With large budget, both files included; file_b (order 1) renders before file_a (order 5).
1454        assert_eq!(
1455            format_with_budget(&input, 10000),
1456            indoc! {r#"
1457                <|file_sep|>file_b.rs
1458                high priority content
1459                <|file_sep|>file_a.rs
1460                low priority content
1461                <|file_sep|>test.rs
1462                <|fim_prefix|>
1463                <|fim_middle|>current
1464                <|user_cursor|>x
1465                <|fim_suffix|>
1466                <|fim_middle|>updated
1467            "#}
1468        );
1469
1470        // With tight budget, only file_b (lower order) fits.
1471        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1472        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1473        // file_a would need another 14 tokens, which doesn't fit.
1474        assert_eq!(
1475            format_with_budget(&input, 52),
1476            indoc! {r#"
1477                <|file_sep|>file_b.rs
1478                high priority content
1479                <|file_sep|>test.rs
1480                <|fim_prefix|>
1481                <|fim_middle|>current
1482                <|user_cursor|>x
1483                <|fim_suffix|>
1484                <|fim_middle|>updated
1485            "#}
1486        );
1487    }
1488
1489    #[test]
1490    fn test_truncation_drops_high_order_excerpts_within_file() {
1491        // A single file has excerpts at order 1 and order 3. With a tight budget,
1492        // only the order-1 excerpts are included while the order-3 excerpt is
1493        // dropped — even though they belong to the same file. This also preserves
1494        // the parent invariant: parent outline items have order ≤ their best
1495        // child, so they're always included when any child is.
1496        let input = make_input(
1497            "x",
1498            0..1,
1499            0,
1500            vec![],
1501            vec![RelatedFile {
1502                path: Path::new("mod.rs").into(),
1503                max_row: 30,
1504                in_open_source_repo: false,
1505                excerpts: vec![
1506                    RelatedExcerpt {
1507                        row_range: 0..5,
1508                        text: "mod header\n".into(),
1509                        order: 1,
1510                    },
1511                    RelatedExcerpt {
1512                        row_range: 5..15,
1513                        text: "important fn\n".into(),
1514                        order: 1,
1515                    },
1516                    RelatedExcerpt {
1517                        row_range: 15..30,
1518                        text: "less important fn\n".into(),
1519                        order: 3,
1520                    },
1521                ],
1522            }],
1523        );
1524
1525        // With large budget, all three excerpts included.
1526        assert_eq!(
1527            format_with_budget(&input, 10000),
1528            indoc! {r#"
1529                <|file_sep|>mod.rs
1530                mod header
1531                ...
1532                important fn
1533                ...
1534                less important fn
1535                <|file_sep|>test.rs
1536                <|fim_prefix|>
1537                <|fim_middle|>current
1538                <|user_cursor|>x
1539                <|fim_suffix|>
1540                <|fim_middle|>updated
1541            "#}
1542        );
1543
1544        // With tight budget, only order<=1 excerpts included (header + important fn).
1545        assert_eq!(
1546            format_with_budget(&input, 55),
1547            indoc! {r#"
1548                <|file_sep|>mod.rs
1549                mod header
1550                ...
1551                important fn
1552                ...
1553                <|file_sep|>test.rs
1554                <|fim_prefix|>
1555                <|fim_middle|>current
1556                <|user_cursor|>x
1557                <|fim_suffix|>
1558                <|fim_middle|>updated
1559            "#}
1560        );
1561    }
1562
1563    #[test]
1564    fn test_truncation_drops_older_events_first() {
1565        let input = make_input(
1566            "x",
1567            0..1,
1568            0,
1569            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1570            vec![],
1571        );
1572
1573        assert_eq!(
1574            format_with_budget(&input, 10000),
1575            indoc! {r#"
1576                <|file_sep|>edit history
1577                --- a/old.rs
1578                +++ b/old.rs
1579                -1
1580                --- a/new.rs
1581                +++ b/new.rs
1582                -2
1583                <|file_sep|>test.rs
1584                <|fim_prefix|>
1585                <|fim_middle|>current
1586                <|user_cursor|>x
1587                <|fim_suffix|>
1588                <|fim_middle|>updated
1589            "#}
1590        );
1591
1592        assert_eq!(
1593            format_with_budget(&input, 55),
1594            indoc! {r#"
1595                <|file_sep|>edit history
1596                --- a/new.rs
1597                +++ b/new.rs
1598                -2
1599                <|file_sep|>test.rs
1600                <|fim_prefix|>
1601                <|fim_middle|>current
1602                <|user_cursor|>x
1603                <|fim_suffix|>
1604                <|fim_middle|>updated
1605            "#}
1606        );
1607    }
1608
1609    #[test]
1610    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1611        let input = make_input(
1612            "fn main() {}",
1613            0..12,
1614            3,
1615            vec![make_event("a.rs", "-old\n+new\n")],
1616            vec![make_related_file("related.rs", "helper\n")],
1617        );
1618
1619        assert_eq!(
1620            format_with_budget(&input, 30),
1621            indoc! {r#"
1622                <|file_sep|>test.rs
1623                <|fim_prefix|>
1624                <|fim_middle|>current
1625                fn <|user_cursor|>main() {}
1626                <|fim_suffix|>
1627                <|fim_middle|>updated
1628            "#}
1629        );
1630    }
1631
1632    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1633        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1634    }
1635
1636    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1637        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1638    }
1639
1640    #[test]
1641    fn test_seed_coder_basic_format() {
1642        let input = make_input(
1643            "prefix\neditable\nsuffix",
1644            7..15,
1645            10,
1646            vec![make_event("a.rs", "-old\n+new\n")],
1647            vec![make_related_file("related.rs", "fn helper() {}\n")],
1648        );
1649
1650        assert_eq!(
1651            format_seed_coder(&input),
1652            indoc! {r#"
1653                <[fim-suffix]>
1654                suffix
1655                <[fim-prefix]><filename>related.rs
1656                fn helper() {}
1657
1658                <filename>edit_history
1659                --- a/a.rs
1660                +++ b/a.rs
1661                -old
1662                +new
1663
1664                <filename>test.rs
1665                prefix
1666                <<<<<<< CURRENT
1667                edi<|user_cursor|>table
1668                =======
1669                <[fim-middle]>"#}
1670        );
1671    }
1672
1673    #[test]
1674    fn test_seed_coder_no_context() {
1675        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1676
1677        assert_eq!(
1678            format_seed_coder(&input),
1679            indoc! {r#"
1680                <[fim-suffix]>
1681                after
1682                <[fim-prefix]><filename>test.rs
1683                before
1684                <<<<<<< CURRENT
1685                mid<|user_cursor|>dle
1686                =======
1687                <[fim-middle]>"#}
1688        );
1689    }
1690
1691    #[test]
1692    fn test_seed_coder_truncation_drops_context() {
1693        let input = make_input(
1694            "code",
1695            0..4,
1696            2,
1697            vec![make_event("a.rs", "-x\n+y\n")],
1698            vec![make_related_file("r1.rs", "content\n")],
1699        );
1700
1701        // With large budget, everything is included
1702        assert_eq!(
1703            format_seed_coder(&input),
1704            indoc! {r#"
1705                <[fim-suffix]>
1706                <[fim-prefix]><filename>r1.rs
1707                content
1708
1709                <filename>edit_history
1710                --- a/a.rs
1711                +++ b/a.rs
1712                -x
1713                +y
1714
1715                <filename>test.rs
1716                <<<<<<< CURRENT
1717                co<|user_cursor|>de
1718                =======
1719                <[fim-middle]>"#}
1720        );
1721
1722        // With tight budget, context is dropped but cursor section remains
1723        assert_eq!(
1724            format_seed_coder_with_budget(&input, 30),
1725            indoc! {r#"
1726                <[fim-suffix]>
1727                <[fim-prefix]><filename>test.rs
1728                <<<<<<< CURRENT
1729                co<|user_cursor|>de
1730                =======
1731                <[fim-middle]>"#}
1732        );
1733    }
1734
1735    #[test]
1736    fn test_seed_coder_truncation_prioritizes_lower_order() {
1737        let input = make_input(
1738            "code",
1739            0..4,
1740            2,
1741            vec![],
1742            vec![
1743                RelatedFile {
1744                    path: Path::new("low_prio.rs").into(),
1745                    max_row: 5,
1746                    in_open_source_repo: false,
1747                    excerpts: vec![RelatedExcerpt {
1748                        row_range: 0..5,
1749                        text: "low prio\n".into(),
1750                        order: 10,
1751                    }],
1752                },
1753                RelatedFile {
1754                    path: Path::new("high_prio.rs").into(),
1755                    max_row: 5,
1756                    in_open_source_repo: false,
1757                    excerpts: vec![RelatedExcerpt {
1758                        row_range: 0..5,
1759                        text: "high prio\n".into(),
1760                        order: 1,
1761                    }],
1762                },
1763            ],
1764        );
1765
1766        // With large budget, both included; high_prio first due to lower order.
1767        assert_eq!(
1768            format_seed_coder(&input),
1769            indoc! {r#"
1770                <[fim-suffix]>
1771                <[fim-prefix]><filename>high_prio.rs
1772                high prio
1773                <filename>low_prio.rs
1774                low prio
1775
1776                <filename>test.rs
1777                <<<<<<< CURRENT
1778                co<|user_cursor|>de
1779                =======
1780                <[fim-middle]>"#}
1781        );
1782
1783        // With tight budget, only high_prio included.
1784        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1785        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1786        assert_eq!(
1787            format_seed_coder_with_budget(&input, 44),
1788            indoc! {r#"
1789                <[fim-suffix]>
1790                <[fim-prefix]><filename>high_prio.rs
1791                high prio
1792
1793                <filename>test.rs
1794                <<<<<<< CURRENT
1795                co<|user_cursor|>de
1796                =======
1797                <[fim-middle]>"#}
1798        );
1799    }
1800
1801    #[test]
1802    fn test_seed_coder_clean_output() {
1803        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1804        let output_without_marker = "new code\n";
1805
1806        assert_eq!(
1807            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1808            "new code\n"
1809        );
1810        assert_eq!(
1811            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1812            "new code\n"
1813        );
1814    }
1815
1816    #[test]
1817    fn test_format_zeta1_from_input_basic() {
1818        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1819        let input = ZetaPromptInput {
1820            cursor_path: Path::new("src/main.rs").into(),
1821            cursor_excerpt: excerpt.into(),
1822            editable_range_in_excerpt: 15..41,
1823            cursor_offset_in_excerpt: 30,
1824            excerpt_start_row: Some(0),
1825            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1826            related_files: vec![],
1827            excerpt_ranges: None,
1828            preferred_model: None,
1829            in_open_source_repo: false,
1830            can_collect_data: false,
1831        };
1832
1833        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1834
1835        assert_eq!(
1836            prompt,
1837            concat!(
1838                "### Instruction:\n",
1839                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1840                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1841                "into account the cursor location.\n",
1842                "\n",
1843                "### User Edits:\n",
1844                "\n",
1845                "User edited other.rs:\n",
1846                "```diff\n",
1847                "-old\n",
1848                "+new\n",
1849                "\n",
1850                "```\n",
1851                "\n",
1852                "### User Excerpt:\n",
1853                "\n",
1854                "```src/main.rs\n",
1855                "<|start_of_file|>\n",
1856                "fn before() {}\n",
1857                "<|editable_region_start|>\n",
1858                "fn foo() {\n",
1859                "    <|user_cursor_is_here|>let x = 1;\n",
1860                "\n",
1861                "<|editable_region_end|>}\n",
1862                "fn after() {}\n",
1863                "\n",
1864                "```\n",
1865                "\n",
1866                "### Response:\n",
1867            ),
1868        );
1869    }
1870
1871    #[test]
1872    fn test_format_zeta1_from_input_no_start_of_file() {
1873        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1874        let input = ZetaPromptInput {
1875            cursor_path: Path::new("src/main.rs").into(),
1876            cursor_excerpt: excerpt.into(),
1877            editable_range_in_excerpt: 0..28,
1878            cursor_offset_in_excerpt: 15,
1879            excerpt_start_row: Some(10),
1880            events: vec![],
1881            related_files: vec![],
1882            excerpt_ranges: None,
1883            preferred_model: None,
1884            in_open_source_repo: false,
1885            can_collect_data: false,
1886        };
1887
1888        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1889
1890        assert_eq!(
1891            prompt,
1892            concat!(
1893                "### Instruction:\n",
1894                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1895                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1896                "into account the cursor location.\n",
1897                "\n",
1898                "### User Edits:\n",
1899                "\n",
1900                "\n",
1901                "\n",
1902                "### User Excerpt:\n",
1903                "\n",
1904                "```src/main.rs\n",
1905                "<|editable_region_start|>\n",
1906                "fn foo() {\n",
1907                "    <|user_cursor_is_here|>let x = 1;\n",
1908                "}\n",
1909                "\n",
1910                "<|editable_region_end|>\n",
1911                "```\n",
1912                "\n",
1913                "### Response:\n",
1914            ),
1915        );
1916    }
1917
1918    #[test]
1919    fn test_format_zeta1_from_input_with_sub_ranges() {
1920        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1921        let editable_range = 10..37;
1922        let context_range = 0..excerpt.len();
1923
1924        let input = ZetaPromptInput {
1925            cursor_path: Path::new("test.rs").into(),
1926            cursor_excerpt: excerpt.into(),
1927            editable_range_in_excerpt: editable_range.clone(),
1928            cursor_offset_in_excerpt: 25,
1929            excerpt_start_row: Some(0),
1930            events: vec![],
1931            related_files: vec![],
1932            excerpt_ranges: None,
1933            preferred_model: None,
1934            in_open_source_repo: false,
1935            can_collect_data: false,
1936        };
1937
1938        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1939
1940        assert_eq!(
1941            prompt,
1942            concat!(
1943                "### Instruction:\n",
1944                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1945                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1946                "into account the cursor location.\n",
1947                "\n",
1948                "### User Edits:\n",
1949                "\n",
1950                "\n",
1951                "\n",
1952                "### User Excerpt:\n",
1953                "\n",
1954                "```test.rs\n",
1955                "<|start_of_file|>\n",
1956                "// prefix\n",
1957                "<|editable_region_start|>\n",
1958                "fn foo() {\n",
1959                "    <|user_cursor_is_here|>let x = 1;\n",
1960                "}\n",
1961                "<|editable_region_end|>\n",
1962                "// suffix\n",
1963                "\n",
1964                "```\n",
1965                "\n",
1966                "### Response:\n",
1967            ),
1968        );
1969    }
1970
1971    #[test]
1972    fn test_clean_zeta1_model_output_basic() {
1973        let output = indoc! {"
1974            <|editable_region_start|>
1975            fn main() {
1976                println!(\"hello\");
1977            }
1978            <|editable_region_end|>
1979        "};
1980
1981        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1982        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1983    }
1984
1985    #[test]
1986    fn test_clean_zeta1_model_output_with_cursor() {
1987        let output = indoc! {"
1988            <|editable_region_start|>
1989            fn main() {
1990                <|user_cursor_is_here|>println!(\"hello\");
1991            }
1992            <|editable_region_end|>
1993        "};
1994
1995        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1996        assert_eq!(
1997            cleaned,
1998            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1999        );
2000    }
2001
2002    #[test]
2003    fn test_clean_zeta1_model_output_no_markers() {
2004        let output = "fn main() {}\n";
2005        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2006        assert_eq!(cleaned, "fn main() {}\n");
2007    }
2008
2009    #[test]
2010    fn test_clean_zeta1_model_output_empty_region() {
2011        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
2012        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2013        assert_eq!(cleaned, "");
2014    }
2015}