zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68    #[serde(default)]
  69    pub can_collect_data: bool,
  70}
  71
  72#[derive(
  73    Default,
  74    Clone,
  75    Copy,
  76    Debug,
  77    PartialEq,
  78    Eq,
  79    Hash,
  80    EnumIter,
  81    IntoStaticStr,
  82    Serialize,
  83    Deserialize,
  84)]
  85#[allow(non_camel_case_types)]
  86pub enum ZetaFormat {
  87    V0112MiddleAtEnd,
  88    V0113Ordered,
  89    V0114180EditableRegion,
  90    V0120GitMergeMarkers,
  91    #[default]
  92    V0131GitMergeMarkersPrefix,
  93    V0211Prefill,
  94    V0211SeedCoder,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131
 132    pub fn special_tokens(&self) -> &'static [&'static str] {
 133        match self {
 134            ZetaFormat::V0112MiddleAtEnd
 135            | ZetaFormat::V0113Ordered
 136            | ZetaFormat::V0114180EditableRegion => &[
 137                "<|fim_prefix|>",
 138                "<|fim_suffix|>",
 139                "<|fim_middle|>",
 140                "<|file_sep|>",
 141                CURSOR_MARKER,
 142            ],
 143            ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 144            ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 145                v0131_git_merge_markers_prefix::special_tokens()
 146            }
 147            ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 148        }
 149    }
 150}
 151
 152#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 153#[serde(tag = "event")]
 154pub enum Event {
 155    BufferChange {
 156        path: Arc<Path>,
 157        old_path: Arc<Path>,
 158        diff: String,
 159        predicted: bool,
 160        in_open_source_repo: bool,
 161    },
 162}
 163
 164impl Event {
 165    pub fn in_open_source_repo(&self) -> bool {
 166        match self {
 167            Event::BufferChange {
 168                in_open_source_repo,
 169                ..
 170            } => *in_open_source_repo,
 171        }
 172    }
 173}
 174
 175pub fn write_event(prompt: &mut String, event: &Event) {
 176    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 177        for component in path.components() {
 178            prompt.push('/');
 179            write!(prompt, "{}", component.as_os_str().display()).ok();
 180        }
 181    }
 182    match event {
 183        Event::BufferChange {
 184            path,
 185            old_path,
 186            diff,
 187            predicted,
 188            in_open_source_repo: _,
 189        } => {
 190            if *predicted {
 191                prompt.push_str("// User accepted prediction:\n");
 192            }
 193            prompt.push_str("--- a");
 194            write_path_as_unix_str(prompt, old_path.as_ref());
 195            prompt.push_str("\n+++ b");
 196            write_path_as_unix_str(prompt, path.as_ref());
 197            prompt.push('\n');
 198            prompt.push_str(diff);
 199        }
 200    }
 201}
 202
 203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 204pub struct RelatedFile {
 205    pub path: Arc<Path>,
 206    pub max_row: u32,
 207    pub excerpts: Vec<RelatedExcerpt>,
 208    #[serde(default)]
 209    pub in_open_source_repo: bool,
 210}
 211
 212#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 213pub struct RelatedExcerpt {
 214    pub row_range: Range<u32>,
 215    pub text: Arc<str>,
 216    #[serde(default)]
 217    pub order: usize,
 218}
 219
 220pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 221    format
 222        .special_tokens()
 223        .iter()
 224        .any(|token| input.cursor_excerpt.contains(token))
 225}
 226
 227pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 228    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 229}
 230
 231/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 232pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 233    match format {
 234        ZetaFormat::V0120GitMergeMarkers => output
 235            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 236            .unwrap_or(output),
 237        ZetaFormat::V0131GitMergeMarkersPrefix => output
 238            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 239            .unwrap_or(output),
 240        ZetaFormat::V0211SeedCoder => output
 241            .strip_suffix(seed_coder::END_MARKER)
 242            .unwrap_or(output),
 243        _ => output,
 244    }
 245}
 246
 247pub fn excerpt_range_for_format(
 248    format: ZetaFormat,
 249    ranges: &ExcerptRanges,
 250) -> (Range<usize>, Range<usize>) {
 251    match format {
 252        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 253            ranges.editable_150.clone(),
 254            ranges.editable_150_context_350.clone(),
 255        ),
 256        ZetaFormat::V0114180EditableRegion
 257        | ZetaFormat::V0120GitMergeMarkers
 258        | ZetaFormat::V0131GitMergeMarkersPrefix
 259        | ZetaFormat::V0211Prefill
 260        | ZetaFormat::V0211SeedCoder => (
 261            ranges.editable_350.clone(),
 262            ranges.editable_350_context_150.clone(),
 263        ),
 264    }
 265}
 266
 267pub fn resolve_cursor_region(
 268    input: &ZetaPromptInput,
 269    format: ZetaFormat,
 270) -> (&str, Range<usize>, usize) {
 271    let Some(ranges) = &input.excerpt_ranges else {
 272        return (
 273            &input.cursor_excerpt,
 274            input.editable_range_in_excerpt.clone(),
 275            input.cursor_offset_in_excerpt,
 276        );
 277    };
 278
 279    let (editable_range, context_range) = excerpt_range_for_format(format, ranges);
 280    let context_start = context_range.start;
 281    let context_text = &input.cursor_excerpt[context_range];
 282    let adjusted_editable =
 283        (editable_range.start - context_start)..(editable_range.end - context_start);
 284    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 285
 286    (context_text, adjusted_editable, adjusted_cursor)
 287}
 288
 289fn format_zeta_prompt_with_budget(
 290    input: &ZetaPromptInput,
 291    format: ZetaFormat,
 292    max_tokens: usize,
 293) -> String {
 294    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 295    let path = &*input.cursor_path;
 296
 297    let mut cursor_section = String::new();
 298    match format {
 299        ZetaFormat::V0112MiddleAtEnd => {
 300            v0112_middle_at_end::write_cursor_excerpt_section(
 301                &mut cursor_section,
 302                path,
 303                context,
 304                &editable_range,
 305                cursor_offset,
 306            );
 307        }
 308        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 309            v0113_ordered::write_cursor_excerpt_section(
 310                &mut cursor_section,
 311                path,
 312                context,
 313                &editable_range,
 314                cursor_offset,
 315            )
 316        }
 317        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 318            &mut cursor_section,
 319            path,
 320            context,
 321            &editable_range,
 322            cursor_offset,
 323        ),
 324        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 325            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 326                &mut cursor_section,
 327                path,
 328                context,
 329                &editable_range,
 330                cursor_offset,
 331            )
 332        }
 333        ZetaFormat::V0211SeedCoder => {
 334            return seed_coder::format_prompt_with_budget(
 335                path,
 336                context,
 337                &editable_range,
 338                cursor_offset,
 339                &input.events,
 340                &input.related_files,
 341                max_tokens,
 342            );
 343        }
 344    }
 345
 346    let cursor_tokens = estimate_tokens(cursor_section.len());
 347    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 348
 349    let edit_history_section = format_edit_history_within_budget(
 350        &input.events,
 351        "<|file_sep|>",
 352        "edit history",
 353        budget_after_cursor,
 354    );
 355    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 356    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 357
 358    let related_files_section = format_related_files_within_budget(
 359        &input.related_files,
 360        "<|file_sep|>",
 361        budget_after_edit_history,
 362    );
 363
 364    let mut prompt = String::new();
 365    prompt.push_str(&related_files_section);
 366    prompt.push_str(&edit_history_section);
 367    prompt.push_str(&cursor_section);
 368    prompt
 369}
 370
 371pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 372    match format {
 373        ZetaFormat::V0112MiddleAtEnd
 374        | ZetaFormat::V0113Ordered
 375        | ZetaFormat::V0114180EditableRegion
 376        | ZetaFormat::V0120GitMergeMarkers
 377        | ZetaFormat::V0131GitMergeMarkersPrefix
 378        | ZetaFormat::V0211SeedCoder => String::new(),
 379        ZetaFormat::V0211Prefill => {
 380            let (context, editable_range, _) = resolve_cursor_region(input, format);
 381            v0211_prefill::get_prefill(context, &editable_range)
 382        }
 383    }
 384}
 385
 386fn format_edit_history_within_budget(
 387    events: &[Arc<Event>],
 388    file_marker: &str,
 389    edit_history_name: &str,
 390    max_tokens: usize,
 391) -> String {
 392    let header = format!("{}{}\n", file_marker, edit_history_name);
 393    let header_tokens = estimate_tokens(header.len());
 394    if header_tokens >= max_tokens {
 395        return String::new();
 396    }
 397
 398    let mut event_strings: Vec<String> = Vec::new();
 399    let mut total_tokens = header_tokens;
 400
 401    for event in events.iter().rev() {
 402        let mut event_str = String::new();
 403        write_event(&mut event_str, event);
 404        let event_tokens = estimate_tokens(event_str.len());
 405
 406        if total_tokens + event_tokens > max_tokens {
 407            break;
 408        }
 409        total_tokens += event_tokens;
 410        event_strings.push(event_str);
 411    }
 412
 413    if event_strings.is_empty() {
 414        return String::new();
 415    }
 416
 417    let mut result = header;
 418    for event_str in event_strings.iter().rev() {
 419        result.push_str(event_str);
 420    }
 421    result
 422}
 423
 424fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 425    let needs_newline = !excerpt.text.ends_with('\n');
 426    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 427    let len = excerpt.text.len()
 428        + if needs_newline { "\n".len() } else { 0 }
 429        + if needs_ellipsis { "...\n".len() } else { 0 };
 430    estimate_tokens(len)
 431}
 432
 433fn format_related_files_within_budget(
 434    related_files: &[RelatedFile],
 435    file_marker: &str,
 436    max_tokens: usize,
 437) -> String {
 438    // Collect the distinct order values across all excerpts, sorted ascending.
 439    let mut order_levels: Vec<usize> = related_files
 440        .iter()
 441        .flat_map(|f| f.excerpts.iter().map(|e| e.order))
 442        .collect();
 443    order_levels.sort_unstable();
 444    order_levels.dedup();
 445
 446    // Pre-compute file header strings and their token costs.
 447    let file_headers: Vec<String> = related_files
 448        .iter()
 449        .map(|file| {
 450            let path_str = file.path.to_string_lossy();
 451            format!("{}{}\n", file_marker, path_str)
 452        })
 453        .collect();
 454
 455    // Track which excerpts are included per file.
 456    let mut included: Vec<Vec<bool>> = related_files
 457        .iter()
 458        .map(|file| vec![false; file.excerpts.len()])
 459        .collect();
 460    let mut file_included: Vec<bool> = vec![false; related_files.len()];
 461    let mut total_tokens = 0;
 462
 463    // Process order levels from best (lowest) to worst. At each level, try to
 464    // include all not-yet-included excerpts with that order across all files.
 465    // If the full level doesn't fit, include a partial prefix (top-to-bottom
 466    // within each file) and stop — don't proceed to worse order levels.
 467    'outer: for &order in &order_levels {
 468        // Gather the work for this order level: for each file that has excerpts
 469        // at this order, collect the not-yet-included excerpt indices (in their
 470        // original positional order) and the token cost to add them (including
 471        // the file header if the file isn't already included).
 472        struct FileWork {
 473            file_idx: usize,
 474            excerpt_indices: Vec<usize>,
 475            header_cost: usize,
 476            excerpt_costs: Vec<usize>,
 477        }
 478
 479        let mut work_items: Vec<FileWork> = Vec::new();
 480        for (file_idx, file) in related_files.iter().enumerate() {
 481            let mut excerpt_indices = Vec::new();
 482            let mut excerpt_costs = Vec::new();
 483            for (eidx, excerpt) in file.excerpts.iter().enumerate() {
 484                if excerpt.order == order && !included[file_idx][eidx] {
 485                    excerpt_indices.push(eidx);
 486                    excerpt_costs.push(excerpt_rendered_tokens(excerpt, file.max_row));
 487                }
 488            }
 489            if excerpt_indices.is_empty() {
 490                continue;
 491            }
 492            let header_cost = if file_included[file_idx] {
 493                0
 494            } else {
 495                estimate_tokens(file_headers[file_idx].len())
 496            };
 497            work_items.push(FileWork {
 498                file_idx,
 499                excerpt_indices,
 500                header_cost,
 501                excerpt_costs,
 502            });
 503        }
 504
 505        // Compute the total cost for this entire order level.
 506        let level_cost: usize = work_items
 507            .iter()
 508            .map(|w| w.header_cost + w.excerpt_costs.iter().sum::<usize>())
 509            .sum();
 510
 511        if total_tokens + level_cost <= max_tokens {
 512            // The whole level fits — include everything.
 513            for work in &work_items {
 514                total_tokens += work.header_cost;
 515                file_included[work.file_idx] = true;
 516                for (i, &eidx) in work.excerpt_indices.iter().enumerate() {
 517                    included[work.file_idx][eidx] = true;
 518                    total_tokens += work.excerpt_costs[i];
 519                }
 520            }
 521        } else {
 522            // The whole level doesn't fit. Include as many excerpts as possible
 523            // from each file (in positional order), then stop entirely.
 524            for work in &work_items {
 525                let available = max_tokens.saturating_sub(total_tokens);
 526                let mut file_cost = work.header_cost;
 527
 528                let mut count = 0;
 529                for i in 0..work.excerpt_indices.len() {
 530                    if file_cost + work.excerpt_costs[i] > available {
 531                        break;
 532                    }
 533                    file_cost += work.excerpt_costs[i];
 534                    count += 1;
 535                }
 536
 537                if count > 0 {
 538                    total_tokens += work.header_cost;
 539                    file_included[work.file_idx] = true;
 540                    for (i, &eidx) in work.excerpt_indices.iter().take(count).enumerate() {
 541                        included[work.file_idx][eidx] = true;
 542                        total_tokens += work.excerpt_costs[i];
 543                    }
 544                }
 545            }
 546            break 'outer;
 547        }
 548    }
 549
 550    // Determine file rendering order: by the best (lowest) order of any
 551    // included excerpt, breaking ties by original file index.
 552    let mut file_order: Vec<(usize, usize)> = Vec::new();
 553    for (file_idx, file) in related_files.iter().enumerate() {
 554        if !file_included[file_idx] {
 555            continue;
 556        }
 557        let best_order = file
 558            .excerpts
 559            .iter()
 560            .enumerate()
 561            .filter(|(eidx, _)| included[file_idx][*eidx])
 562            .map(|(_, e)| e.order)
 563            .min()
 564            .unwrap_or(usize::MAX);
 565        file_order.push((file_idx, best_order));
 566    }
 567    file_order.sort_by_key(|&(file_idx, best_order)| (best_order, file_idx));
 568
 569    // Render included files and excerpts in positional order within each file.
 570    let mut result = String::new();
 571    for &(file_idx, _) in &file_order {
 572        let file = &related_files[file_idx];
 573        result.push_str(&file_headers[file_idx]);
 574        for (eidx, excerpt) in file.excerpts.iter().enumerate() {
 575            if !included[file_idx][eidx] {
 576                continue;
 577            }
 578            result.push_str(&excerpt.text);
 579            if !result.ends_with('\n') {
 580                result.push('\n');
 581            }
 582            if excerpt.row_range.end < file.max_row {
 583                result.push_str("...\n");
 584            }
 585        }
 586    }
 587
 588    result
 589}
 590
 591pub fn write_related_files(
 592    prompt: &mut String,
 593    related_files: &[RelatedFile],
 594) -> Vec<Range<usize>> {
 595    let mut ranges = Vec::new();
 596    for file in related_files {
 597        let start = prompt.len();
 598        let path_str = file.path.to_string_lossy();
 599        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 600        for excerpt in &file.excerpts {
 601            prompt.push_str(&excerpt.text);
 602            if !prompt.ends_with('\n') {
 603                prompt.push('\n');
 604            }
 605            if excerpt.row_range.end < file.max_row {
 606                prompt.push_str("...\n");
 607            }
 608        }
 609        let end = prompt.len();
 610        ranges.push(start..end);
 611    }
 612    ranges
 613}
 614
 615mod v0112_middle_at_end {
 616    use super::*;
 617
 618    pub fn write_cursor_excerpt_section(
 619        prompt: &mut String,
 620        path: &Path,
 621        context: &str,
 622        editable_range: &Range<usize>,
 623        cursor_offset: usize,
 624    ) {
 625        let path_str = path.to_string_lossy();
 626        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 627
 628        prompt.push_str("<|fim_prefix|>\n");
 629        prompt.push_str(&context[..editable_range.start]);
 630
 631        prompt.push_str("<|fim_suffix|>\n");
 632        prompt.push_str(&context[editable_range.end..]);
 633        if !prompt.ends_with('\n') {
 634            prompt.push('\n');
 635        }
 636
 637        prompt.push_str("<|fim_middle|>current\n");
 638        prompt.push_str(&context[editable_range.start..cursor_offset]);
 639        prompt.push_str(CURSOR_MARKER);
 640        prompt.push_str(&context[cursor_offset..editable_range.end]);
 641        if !prompt.ends_with('\n') {
 642            prompt.push('\n');
 643        }
 644
 645        prompt.push_str("<|fim_middle|>updated\n");
 646    }
 647}
 648
 649mod v0113_ordered {
 650    use super::*;
 651
 652    pub fn write_cursor_excerpt_section(
 653        prompt: &mut String,
 654        path: &Path,
 655        context: &str,
 656        editable_range: &Range<usize>,
 657        cursor_offset: usize,
 658    ) {
 659        let path_str = path.to_string_lossy();
 660        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 661
 662        prompt.push_str("<|fim_prefix|>\n");
 663        prompt.push_str(&context[..editable_range.start]);
 664        if !prompt.ends_with('\n') {
 665            prompt.push('\n');
 666        }
 667
 668        prompt.push_str("<|fim_middle|>current\n");
 669        prompt.push_str(&context[editable_range.start..cursor_offset]);
 670        prompt.push_str(CURSOR_MARKER);
 671        prompt.push_str(&context[cursor_offset..editable_range.end]);
 672        if !prompt.ends_with('\n') {
 673            prompt.push('\n');
 674        }
 675
 676        prompt.push_str("<|fim_suffix|>\n");
 677        prompt.push_str(&context[editable_range.end..]);
 678        if !prompt.ends_with('\n') {
 679            prompt.push('\n');
 680        }
 681
 682        prompt.push_str("<|fim_middle|>updated\n");
 683    }
 684}
 685
 686pub mod v0120_git_merge_markers {
 687    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 688    //!
 689    //! Example prompt:
 690    //!
 691    //! <|file_sep|>path/to/target_file.py
 692    //! <|fim_prefix|>
 693    //! code before editable region
 694    //! <|fim_suffix|>
 695    //! code after editable region
 696    //! <|fim_middle|>
 697    //! <<<<<<< CURRENT
 698    //! code that
 699    //! needs to<|user_cursor|>
 700    //! be rewritten
 701    //! =======
 702    //!
 703    //! Expected output (should be generated by the model):
 704    //!
 705    //! updated
 706    //! code with
 707    //! changes applied
 708    //! >>>>>>> UPDATED
 709
 710    use super::*;
 711
 712    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 713    pub const SEPARATOR: &str = "=======\n";
 714    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 715
 716    pub fn special_tokens() -> &'static [&'static str] {
 717        &[
 718            "<|fim_prefix|>",
 719            "<|fim_suffix|>",
 720            "<|fim_middle|>",
 721            "<|file_sep|>",
 722            START_MARKER,
 723            SEPARATOR,
 724            END_MARKER,
 725            CURSOR_MARKER,
 726        ]
 727    }
 728
 729    pub fn write_cursor_excerpt_section(
 730        prompt: &mut String,
 731        path: &Path,
 732        context: &str,
 733        editable_range: &Range<usize>,
 734        cursor_offset: usize,
 735    ) {
 736        let path_str = path.to_string_lossy();
 737        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 738
 739        prompt.push_str("<|fim_prefix|>");
 740        prompt.push_str(&context[..editable_range.start]);
 741
 742        prompt.push_str("<|fim_suffix|>");
 743        prompt.push_str(&context[editable_range.end..]);
 744        if !prompt.ends_with('\n') {
 745            prompt.push('\n');
 746        }
 747
 748        prompt.push_str("<|fim_middle|>");
 749        prompt.push_str(START_MARKER);
 750        prompt.push_str(&context[editable_range.start..cursor_offset]);
 751        prompt.push_str(CURSOR_MARKER);
 752        prompt.push_str(&context[cursor_offset..editable_range.end]);
 753        if !prompt.ends_with('\n') {
 754            prompt.push('\n');
 755        }
 756        prompt.push_str(SEPARATOR);
 757    }
 758}
 759
 760pub mod v0131_git_merge_markers_prefix {
 761    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 762    //!
 763    //! Example prompt:
 764    //!
 765    //! <|file_sep|>path/to/target_file.py
 766    //! <|fim_prefix|>
 767    //! code before editable region
 768    //! <<<<<<< CURRENT
 769    //! code that
 770    //! needs to<|user_cursor|>
 771    //! be rewritten
 772    //! =======
 773    //! <|fim_suffix|>
 774    //! code after editable region
 775    //! <|fim_middle|>
 776    //!
 777    //! Expected output (should be generated by the model):
 778    //!
 779    //! updated
 780    //! code with
 781    //! changes applied
 782    //! >>>>>>> UPDATED
 783
 784    use super::*;
 785
 786    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 787    pub const SEPARATOR: &str = "=======\n";
 788    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 789
 790    pub fn special_tokens() -> &'static [&'static str] {
 791        &[
 792            "<|fim_prefix|>",
 793            "<|fim_suffix|>",
 794            "<|fim_middle|>",
 795            "<|file_sep|>",
 796            START_MARKER,
 797            SEPARATOR,
 798            END_MARKER,
 799            CURSOR_MARKER,
 800        ]
 801    }
 802
 803    pub fn write_cursor_excerpt_section(
 804        prompt: &mut String,
 805        path: &Path,
 806        context: &str,
 807        editable_range: &Range<usize>,
 808        cursor_offset: usize,
 809    ) {
 810        let path_str = path.to_string_lossy();
 811        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 812
 813        prompt.push_str("<|fim_prefix|>");
 814        prompt.push_str(&context[..editable_range.start]);
 815        prompt.push_str(START_MARKER);
 816        prompt.push_str(&context[editable_range.start..cursor_offset]);
 817        prompt.push_str(CURSOR_MARKER);
 818        prompt.push_str(&context[cursor_offset..editable_range.end]);
 819        if !prompt.ends_with('\n') {
 820            prompt.push('\n');
 821        }
 822        prompt.push_str(SEPARATOR);
 823
 824        prompt.push_str("<|fim_suffix|>");
 825        prompt.push_str(&context[editable_range.end..]);
 826        if !prompt.ends_with('\n') {
 827            prompt.push('\n');
 828        }
 829
 830        prompt.push_str("<|fim_middle|>");
 831    }
 832}
 833
 834pub mod v0211_prefill {
 835    use super::*;
 836
 837    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 838        let editable_region = &context[editable_range.start..editable_range.end];
 839
 840        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 841        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 842
 843        // Find a token boundary to avoid splitting tokens in the prefill.
 844        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 845        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 846        // the \n and consume any consecutive \n characters after it.
 847        let prefill = &editable_region[..prefill_len];
 848        match prefill.rfind('\n') {
 849            Some(pos) => {
 850                let mut end = pos + 1;
 851                while end < editable_region.len()
 852                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 853                {
 854                    end += 1;
 855                }
 856                editable_region[..end].to_string()
 857            }
 858            // No newline found. Fall back to splitting before the last space
 859            // (word-level boundary)
 860            None => match prefill.rfind(' ') {
 861                Some(pos) => prefill[..pos].to_string(),
 862                None => prefill.to_string(),
 863            },
 864        }
 865    }
 866}
 867
 868pub mod seed_coder {
 869    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 870    //!
 871    //! Seed-Coder uses different FIM tokens and order than Qwen:
 872    //! - SPM order: suffix comes FIRST, then prefix, then middle
 873    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 874    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 875    //!
 876    //! All context (related files, edit history) goes in the PREFIX section.
 877    //! The suffix contains only code after the editable region.
 878    //!
 879    //! Example prompt:
 880    //!
 881    //! <[fim-suffix]>
 882    //! code after editable region
 883    //! <[fim-prefix]><filename>related/file.py
 884    //! related file content
 885    //!
 886    //! <filename>edit_history
 887    //! --- a/some_file.py
 888    //! +++ b/some_file.py
 889    //! -old
 890    //! +new
 891    //!
 892    //! <filename>path/to/target_file.py
 893    //! code before editable region
 894    //! <<<<<<< CURRENT
 895    //! code that
 896    //! needs to<|user_cursor|>
 897    //! be rewritten
 898    //! =======
 899    //! <[fim-middle]>
 900    //!
 901    //! Expected output (model generates):
 902    //!
 903    //! updated
 904    //! code with
 905    //! changes applied
 906    //! >>>>>>> UPDATED
 907
 908    use super::*;
 909
 910    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 911    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 912    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 913    pub const FILE_MARKER: &str = "<filename>";
 914
 915    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 916    pub const SEPARATOR: &str = "=======\n";
 917    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 918
 919    pub fn special_tokens() -> &'static [&'static str] {
 920        &[
 921            FIM_SUFFIX,
 922            FIM_PREFIX,
 923            FIM_MIDDLE,
 924            FILE_MARKER,
 925            START_MARKER,
 926            SEPARATOR,
 927            END_MARKER,
 928            CURSOR_MARKER,
 929        ]
 930    }
 931
 932    pub fn format_prompt_with_budget(
 933        path: &Path,
 934        context: &str,
 935        editable_range: &Range<usize>,
 936        cursor_offset: usize,
 937        events: &[Arc<Event>],
 938        related_files: &[RelatedFile],
 939        max_tokens: usize,
 940    ) -> String {
 941        let suffix_section = build_suffix_section(context, editable_range);
 942        let cursor_prefix_section =
 943            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 944
 945        let suffix_tokens = estimate_tokens(suffix_section.len());
 946        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 947        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 948
 949        let edit_history_section = super::format_edit_history_within_budget(
 950            events,
 951            FILE_MARKER,
 952            "edit_history",
 953            budget_after_cursor,
 954        );
 955        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 956        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 957
 958        let related_files_section = super::format_related_files_within_budget(
 959            related_files,
 960            FILE_MARKER,
 961            budget_after_edit_history,
 962        );
 963
 964        let mut prompt = String::new();
 965        prompt.push_str(&suffix_section);
 966        prompt.push_str(FIM_PREFIX);
 967        prompt.push_str(&related_files_section);
 968        if !related_files_section.is_empty() {
 969            prompt.push('\n');
 970        }
 971        prompt.push_str(&edit_history_section);
 972        if !edit_history_section.is_empty() {
 973            prompt.push('\n');
 974        }
 975        prompt.push_str(&cursor_prefix_section);
 976        prompt.push_str(FIM_MIDDLE);
 977        prompt
 978    }
 979
 980    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 981        let mut section = String::new();
 982        section.push_str(FIM_SUFFIX);
 983        section.push_str(&context[editable_range.end..]);
 984        if !section.ends_with('\n') {
 985            section.push('\n');
 986        }
 987        section
 988    }
 989
 990    fn build_cursor_prefix_section(
 991        path: &Path,
 992        context: &str,
 993        editable_range: &Range<usize>,
 994        cursor_offset: usize,
 995    ) -> String {
 996        let mut section = String::new();
 997        let path_str = path.to_string_lossy();
 998        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 999
1000        section.push_str(&context[..editable_range.start]);
1001        section.push_str(START_MARKER);
1002        section.push_str(&context[editable_range.start..cursor_offset]);
1003        section.push_str(CURSOR_MARKER);
1004        section.push_str(&context[cursor_offset..editable_range.end]);
1005        if !section.ends_with('\n') {
1006            section.push('\n');
1007        }
1008        section.push_str(SEPARATOR);
1009        section
1010    }
1011}
1012
1013/// The zeta1 prompt format
1014pub mod zeta1 {
1015    use super::*;
1016    use std::fmt::Write;
1017
1018    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
1019    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
1020    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
1021    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
1022
1023    const INSTRUCTION_HEADER: &str = concat!(
1024        "### Instruction:\n",
1025        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1026        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1027        "into account the cursor location.\n\n",
1028        "### User Edits:\n\n"
1029    );
1030    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
1031    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
1032
1033    /// Formats a complete zeta1 prompt from the input events and excerpt.
1034    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
1035        let mut prompt = String::with_capacity(
1036            INSTRUCTION_HEADER.len()
1037                + input_events.len()
1038                + EXCERPT_HEADER.len()
1039                + input_excerpt.len()
1040                + RESPONSE_HEADER.len(),
1041        );
1042        prompt.push_str(INSTRUCTION_HEADER);
1043        prompt.push_str(input_events);
1044        prompt.push_str(EXCERPT_HEADER);
1045        prompt.push_str(input_excerpt);
1046        prompt.push_str(RESPONSE_HEADER);
1047        prompt
1048    }
1049
1050    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
1051    /// editable and context byte-offset ranges within `cursor_excerpt`.
1052    pub fn format_zeta1_from_input(
1053        input: &ZetaPromptInput,
1054        editable_range: Range<usize>,
1055        context_range: Range<usize>,
1056    ) -> String {
1057        let events = format_zeta1_events(&input.events);
1058        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
1059        format_zeta1_prompt(&events, &excerpt)
1060    }
1061
1062    /// Formats events in zeta1 style (oldest first).
1063    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
1064        let mut result = String::new();
1065        for event in events {
1066            let event_string = format_zeta1_event(event);
1067            if event_string.is_empty() {
1068                continue;
1069            }
1070            if !result.is_empty() {
1071                result.push_str("\n\n");
1072            }
1073            result.push_str(&event_string);
1074        }
1075        result
1076    }
1077
1078    fn format_zeta1_event(event: &Event) -> String {
1079        match event {
1080            Event::BufferChange {
1081                path,
1082                old_path,
1083                diff,
1084                ..
1085            } => {
1086                let mut prompt = String::new();
1087                if old_path != path {
1088                    writeln!(
1089                        prompt,
1090                        "User renamed {} to {}\n",
1091                        old_path.display(),
1092                        path.display()
1093                    )
1094                    .ok();
1095                }
1096                if !diff.is_empty() {
1097                    write!(
1098                        prompt,
1099                        "User edited {}:\n```diff\n{}\n```",
1100                        path.display(),
1101                        diff
1102                    )
1103                    .ok();
1104                }
1105                prompt
1106            }
1107        }
1108    }
1109
1110    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
1111    /// within `cursor_excerpt`.
1112    fn format_zeta1_excerpt(
1113        input: &ZetaPromptInput,
1114        editable_range: Range<usize>,
1115        context_range: Range<usize>,
1116    ) -> String {
1117        let path_str = input.cursor_path.to_string_lossy();
1118        let excerpt = &*input.cursor_excerpt;
1119        let cursor_offset = input.cursor_offset_in_excerpt;
1120
1121        let mut prompt = String::new();
1122        writeln!(&mut prompt, "```{path_str}").ok();
1123
1124        let starts_at_file_beginning =
1125            input.excerpt_start_row == Some(0) && context_range.start == 0;
1126        if starts_at_file_beginning {
1127            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
1128        }
1129
1130        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
1131
1132        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
1133        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
1134        prompt.push_str(CURSOR_MARKER);
1135        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
1136        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
1137
1138        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
1139        write!(prompt, "\n```").ok();
1140
1141        prompt
1142    }
1143
1144    /// Cleans zeta1 model output by extracting content between editable region
1145    /// markers and converting the zeta1 cursor marker to the universal one.
1146    /// Returns `None` if the output doesn't contain the expected markers.
1147    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
1148        let content = output.replace(CURSOR_MARKER, "");
1149
1150        let content_start = content
1151            .find(EDITABLE_REGION_START_MARKER)
1152            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
1153            .map(|pos| {
1154                if content.as_bytes().get(pos) == Some(&b'\n') {
1155                    pos + 1
1156                } else {
1157                    pos
1158                }
1159            })
1160            .unwrap_or(0);
1161
1162        let content_end = content
1163            .find(EDITABLE_REGION_END_MARKER)
1164            .map(|pos| {
1165                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
1166                    pos - 1
1167                } else {
1168                    pos
1169                }
1170            })
1171            .unwrap_or(content.len());
1172
1173        if content_start > content_end {
1174            return Some(String::new());
1175        }
1176
1177        let extracted = &content[content_start..content_end];
1178
1179        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
1180            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
1181            let text_before_cursor = text_before_cursor
1182                .find(EDITABLE_REGION_START_MARKER)
1183                .map(|pos| {
1184                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
1185                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
1186                        after_marker + 1
1187                    } else {
1188                        after_marker
1189                    }
1190                })
1191                .unwrap_or(0);
1192            let offset_in_extracted = zeta1_cursor_pos
1193                .saturating_sub(text_before_cursor)
1194                .min(extracted.len());
1195            offset_in_extracted
1196        });
1197
1198        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1199        if let Some(offset) = cursor_offset {
1200            result.push_str(&extracted[..offset]);
1201            result.push_str(super::CURSOR_MARKER);
1202            result.push_str(&extracted[offset..]);
1203        } else {
1204            result.push_str(extracted);
1205        }
1206
1207        Some(result)
1208    }
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213    use super::*;
1214    use indoc::indoc;
1215
1216    fn make_input(
1217        cursor_excerpt: &str,
1218        editable_range: Range<usize>,
1219        cursor_offset: usize,
1220        events: Vec<Event>,
1221        related_files: Vec<RelatedFile>,
1222    ) -> ZetaPromptInput {
1223        ZetaPromptInput {
1224            cursor_path: Path::new("test.rs").into(),
1225            cursor_excerpt: cursor_excerpt.into(),
1226            editable_range_in_excerpt: editable_range,
1227            cursor_offset_in_excerpt: cursor_offset,
1228            excerpt_start_row: None,
1229            events: events.into_iter().map(Arc::new).collect(),
1230            related_files,
1231            excerpt_ranges: None,
1232            preferred_model: None,
1233            in_open_source_repo: false,
1234            can_collect_data: false,
1235        }
1236    }
1237
1238    fn make_event(path: &str, diff: &str) -> Event {
1239        Event::BufferChange {
1240            path: Path::new(path).into(),
1241            old_path: Path::new(path).into(),
1242            diff: diff.to_string(),
1243            predicted: false,
1244            in_open_source_repo: false,
1245        }
1246    }
1247
1248    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1249        RelatedFile {
1250            path: Path::new(path).into(),
1251            max_row: content.lines().count() as u32,
1252            excerpts: vec![RelatedExcerpt {
1253                row_range: 0..content.lines().count() as u32,
1254                text: content.into(),
1255                order: 0,
1256            }],
1257            in_open_source_repo: false,
1258        }
1259    }
1260
1261    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1262        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1263    }
1264
1265    #[test]
1266    fn test_no_truncation_when_within_budget() {
1267        let input = make_input(
1268            "prefix\neditable\nsuffix",
1269            7..15,
1270            10,
1271            vec![make_event("a.rs", "-old\n+new\n")],
1272            vec![make_related_file("related.rs", "fn helper() {}\n")],
1273        );
1274
1275        assert_eq!(
1276            format_with_budget(&input, 10000),
1277            indoc! {r#"
1278                <|file_sep|>related.rs
1279                fn helper() {}
1280                <|file_sep|>edit history
1281                --- a/a.rs
1282                +++ b/a.rs
1283                -old
1284                +new
1285                <|file_sep|>test.rs
1286                <|fim_prefix|>
1287                prefix
1288                <|fim_middle|>current
1289                edi<|user_cursor|>table
1290                <|fim_suffix|>
1291
1292                suffix
1293                <|fim_middle|>updated
1294            "#}
1295        );
1296    }
1297
1298    #[test]
1299    fn test_truncation_drops_edit_history_when_budget_tight() {
1300        let input = make_input(
1301            "code",
1302            0..4,
1303            2,
1304            vec![make_event("a.rs", "-x\n+y\n")],
1305            vec![
1306                make_related_file("r1.rs", "a\n"),
1307                make_related_file("r2.rs", "b\n"),
1308            ],
1309        );
1310
1311        assert_eq!(
1312            format_with_budget(&input, 10000),
1313            indoc! {r#"
1314                <|file_sep|>r1.rs
1315                a
1316                <|file_sep|>r2.rs
1317                b
1318                <|file_sep|>edit history
1319                --- a/a.rs
1320                +++ b/a.rs
1321                -x
1322                +y
1323                <|file_sep|>test.rs
1324                <|fim_prefix|>
1325                <|fim_middle|>current
1326                co<|user_cursor|>de
1327                <|fim_suffix|>
1328                <|fim_middle|>updated
1329            "#}
1330        );
1331
1332        assert_eq!(
1333            format_with_budget(&input, 50),
1334            indoc! {r#"
1335                <|file_sep|>r1.rs
1336                a
1337                <|file_sep|>r2.rs
1338                b
1339                <|file_sep|>test.rs
1340                <|fim_prefix|>
1341                <|fim_middle|>current
1342                co<|user_cursor|>de
1343                <|fim_suffix|>
1344                <|fim_middle|>updated
1345            "#}
1346        );
1347    }
1348
1349    #[test]
1350    fn test_truncation_includes_partial_excerpts() {
1351        let input = make_input(
1352            "x",
1353            0..1,
1354            0,
1355            vec![],
1356            vec![RelatedFile {
1357                path: Path::new("big.rs").into(),
1358                max_row: 30,
1359                in_open_source_repo: false,
1360                excerpts: vec![
1361                    RelatedExcerpt {
1362                        row_range: 0..10,
1363                        text: "first excerpt\n".into(),
1364                        order: 0,
1365                    },
1366                    RelatedExcerpt {
1367                        row_range: 10..20,
1368                        text: "second excerpt\n".into(),
1369                        order: 0,
1370                    },
1371                    RelatedExcerpt {
1372                        row_range: 20..30,
1373                        text: "third excerpt\n".into(),
1374                        order: 0,
1375                    },
1376                ],
1377            }],
1378        );
1379
1380        assert_eq!(
1381            format_with_budget(&input, 10000),
1382            indoc! {r#"
1383                <|file_sep|>big.rs
1384                first excerpt
1385                ...
1386                second excerpt
1387                ...
1388                third excerpt
1389                <|file_sep|>test.rs
1390                <|fim_prefix|>
1391                <|fim_middle|>current
1392                <|user_cursor|>x
1393                <|fim_suffix|>
1394                <|fim_middle|>updated
1395            "#}
1396        );
1397
1398        assert_eq!(
1399            format_with_budget(&input, 50),
1400            indoc! {r#"
1401                <|file_sep|>big.rs
1402                first excerpt
1403                ...
1404                <|file_sep|>test.rs
1405                <|fim_prefix|>
1406                <|fim_middle|>current
1407                <|user_cursor|>x
1408                <|fim_suffix|>
1409                <|fim_middle|>updated
1410            "#}
1411        );
1412    }
1413
1414    #[test]
1415    fn test_truncation_prioritizes_lower_order_excerpts() {
1416        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
1417        // With tight budget, only the lower-order excerpt from file_b should be included.
1418        let input = make_input(
1419            "x",
1420            0..1,
1421            0,
1422            vec![],
1423            vec![
1424                RelatedFile {
1425                    path: Path::new("file_a.rs").into(),
1426                    max_row: 10,
1427                    in_open_source_repo: false,
1428                    excerpts: vec![RelatedExcerpt {
1429                        row_range: 0..10,
1430                        text: "low priority content\n".into(),
1431                        order: 5,
1432                    }],
1433                },
1434                RelatedFile {
1435                    path: Path::new("file_b.rs").into(),
1436                    max_row: 10,
1437                    in_open_source_repo: false,
1438                    excerpts: vec![RelatedExcerpt {
1439                        row_range: 0..10,
1440                        text: "high priority content\n".into(),
1441                        order: 1,
1442                    }],
1443                },
1444            ],
1445        );
1446
1447        // With large budget, both files included; file_b (order 1) renders before file_a (order 5).
1448        assert_eq!(
1449            format_with_budget(&input, 10000),
1450            indoc! {r#"
1451                <|file_sep|>file_b.rs
1452                high priority content
1453                <|file_sep|>file_a.rs
1454                low priority content
1455                <|file_sep|>test.rs
1456                <|fim_prefix|>
1457                <|fim_middle|>current
1458                <|user_cursor|>x
1459                <|fim_suffix|>
1460                <|fim_middle|>updated
1461            "#}
1462        );
1463
1464        // With tight budget, only file_b (lower order) fits.
1465        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
1466        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
1467        // file_a would need another 14 tokens, which doesn't fit.
1468        assert_eq!(
1469            format_with_budget(&input, 52),
1470            indoc! {r#"
1471                <|file_sep|>file_b.rs
1472                high priority content
1473                <|file_sep|>test.rs
1474                <|fim_prefix|>
1475                <|fim_middle|>current
1476                <|user_cursor|>x
1477                <|fim_suffix|>
1478                <|fim_middle|>updated
1479            "#}
1480        );
1481    }
1482
1483    #[test]
1484    fn test_truncation_drops_high_order_excerpts_within_file() {
1485        // A single file has excerpts at order 1 and order 3. With a tight budget,
1486        // only the order-1 excerpts are included while the order-3 excerpt is
1487        // dropped — even though they belong to the same file. This also preserves
1488        // the parent invariant: parent outline items have order ≤ their best
1489        // child, so they're always included when any child is.
1490        let input = make_input(
1491            "x",
1492            0..1,
1493            0,
1494            vec![],
1495            vec![RelatedFile {
1496                path: Path::new("mod.rs").into(),
1497                max_row: 30,
1498                in_open_source_repo: false,
1499                excerpts: vec![
1500                    RelatedExcerpt {
1501                        row_range: 0..5,
1502                        text: "mod header\n".into(),
1503                        order: 1,
1504                    },
1505                    RelatedExcerpt {
1506                        row_range: 5..15,
1507                        text: "important fn\n".into(),
1508                        order: 1,
1509                    },
1510                    RelatedExcerpt {
1511                        row_range: 15..30,
1512                        text: "less important fn\n".into(),
1513                        order: 3,
1514                    },
1515                ],
1516            }],
1517        );
1518
1519        // With large budget, all three excerpts included.
1520        assert_eq!(
1521            format_with_budget(&input, 10000),
1522            indoc! {r#"
1523                <|file_sep|>mod.rs
1524                mod header
1525                ...
1526                important fn
1527                ...
1528                less important fn
1529                <|file_sep|>test.rs
1530                <|fim_prefix|>
1531                <|fim_middle|>current
1532                <|user_cursor|>x
1533                <|fim_suffix|>
1534                <|fim_middle|>updated
1535            "#}
1536        );
1537
1538        // With tight budget, only order<=1 excerpts included (header + important fn).
1539        assert_eq!(
1540            format_with_budget(&input, 55),
1541            indoc! {r#"
1542                <|file_sep|>mod.rs
1543                mod header
1544                ...
1545                important fn
1546                ...
1547                <|file_sep|>test.rs
1548                <|fim_prefix|>
1549                <|fim_middle|>current
1550                <|user_cursor|>x
1551                <|fim_suffix|>
1552                <|fim_middle|>updated
1553            "#}
1554        );
1555    }
1556
1557    #[test]
1558    fn test_truncation_drops_older_events_first() {
1559        let input = make_input(
1560            "x",
1561            0..1,
1562            0,
1563            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1564            vec![],
1565        );
1566
1567        assert_eq!(
1568            format_with_budget(&input, 10000),
1569            indoc! {r#"
1570                <|file_sep|>edit history
1571                --- a/old.rs
1572                +++ b/old.rs
1573                -1
1574                --- a/new.rs
1575                +++ b/new.rs
1576                -2
1577                <|file_sep|>test.rs
1578                <|fim_prefix|>
1579                <|fim_middle|>current
1580                <|user_cursor|>x
1581                <|fim_suffix|>
1582                <|fim_middle|>updated
1583            "#}
1584        );
1585
1586        assert_eq!(
1587            format_with_budget(&input, 55),
1588            indoc! {r#"
1589                <|file_sep|>edit history
1590                --- a/new.rs
1591                +++ b/new.rs
1592                -2
1593                <|file_sep|>test.rs
1594                <|fim_prefix|>
1595                <|fim_middle|>current
1596                <|user_cursor|>x
1597                <|fim_suffix|>
1598                <|fim_middle|>updated
1599            "#}
1600        );
1601    }
1602
1603    #[test]
1604    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1605        let input = make_input(
1606            "fn main() {}",
1607            0..12,
1608            3,
1609            vec![make_event("a.rs", "-old\n+new\n")],
1610            vec![make_related_file("related.rs", "helper\n")],
1611        );
1612
1613        assert_eq!(
1614            format_with_budget(&input, 30),
1615            indoc! {r#"
1616                <|file_sep|>test.rs
1617                <|fim_prefix|>
1618                <|fim_middle|>current
1619                fn <|user_cursor|>main() {}
1620                <|fim_suffix|>
1621                <|fim_middle|>updated
1622            "#}
1623        );
1624    }
1625
1626    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1627        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1628    }
1629
1630    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1631        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1632    }
1633
1634    #[test]
1635    fn test_seed_coder_basic_format() {
1636        let input = make_input(
1637            "prefix\neditable\nsuffix",
1638            7..15,
1639            10,
1640            vec![make_event("a.rs", "-old\n+new\n")],
1641            vec![make_related_file("related.rs", "fn helper() {}\n")],
1642        );
1643
1644        assert_eq!(
1645            format_seed_coder(&input),
1646            indoc! {r#"
1647                <[fim-suffix]>
1648                suffix
1649                <[fim-prefix]><filename>related.rs
1650                fn helper() {}
1651
1652                <filename>edit_history
1653                --- a/a.rs
1654                +++ b/a.rs
1655                -old
1656                +new
1657
1658                <filename>test.rs
1659                prefix
1660                <<<<<<< CURRENT
1661                edi<|user_cursor|>table
1662                =======
1663                <[fim-middle]>"#}
1664        );
1665    }
1666
1667    #[test]
1668    fn test_seed_coder_no_context() {
1669        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1670
1671        assert_eq!(
1672            format_seed_coder(&input),
1673            indoc! {r#"
1674                <[fim-suffix]>
1675                after
1676                <[fim-prefix]><filename>test.rs
1677                before
1678                <<<<<<< CURRENT
1679                mid<|user_cursor|>dle
1680                =======
1681                <[fim-middle]>"#}
1682        );
1683    }
1684
1685    #[test]
1686    fn test_seed_coder_truncation_drops_context() {
1687        let input = make_input(
1688            "code",
1689            0..4,
1690            2,
1691            vec![make_event("a.rs", "-x\n+y\n")],
1692            vec![make_related_file("r1.rs", "content\n")],
1693        );
1694
1695        // With large budget, everything is included
1696        assert_eq!(
1697            format_seed_coder(&input),
1698            indoc! {r#"
1699                <[fim-suffix]>
1700                <[fim-prefix]><filename>r1.rs
1701                content
1702
1703                <filename>edit_history
1704                --- a/a.rs
1705                +++ b/a.rs
1706                -x
1707                +y
1708
1709                <filename>test.rs
1710                <<<<<<< CURRENT
1711                co<|user_cursor|>de
1712                =======
1713                <[fim-middle]>"#}
1714        );
1715
1716        // With tight budget, context is dropped but cursor section remains
1717        assert_eq!(
1718            format_seed_coder_with_budget(&input, 30),
1719            indoc! {r#"
1720                <[fim-suffix]>
1721                <[fim-prefix]><filename>test.rs
1722                <<<<<<< CURRENT
1723                co<|user_cursor|>de
1724                =======
1725                <[fim-middle]>"#}
1726        );
1727    }
1728
1729    #[test]
1730    fn test_seed_coder_truncation_prioritizes_lower_order() {
1731        let input = make_input(
1732            "code",
1733            0..4,
1734            2,
1735            vec![],
1736            vec![
1737                RelatedFile {
1738                    path: Path::new("low_prio.rs").into(),
1739                    max_row: 5,
1740                    in_open_source_repo: false,
1741                    excerpts: vec![RelatedExcerpt {
1742                        row_range: 0..5,
1743                        text: "low prio\n".into(),
1744                        order: 10,
1745                    }],
1746                },
1747                RelatedFile {
1748                    path: Path::new("high_prio.rs").into(),
1749                    max_row: 5,
1750                    in_open_source_repo: false,
1751                    excerpts: vec![RelatedExcerpt {
1752                        row_range: 0..5,
1753                        text: "high prio\n".into(),
1754                        order: 1,
1755                    }],
1756                },
1757            ],
1758        );
1759
1760        // With large budget, both included; high_prio first due to lower order.
1761        assert_eq!(
1762            format_seed_coder(&input),
1763            indoc! {r#"
1764                <[fim-suffix]>
1765                <[fim-prefix]><filename>high_prio.rs
1766                high prio
1767                <filename>low_prio.rs
1768                low prio
1769
1770                <filename>test.rs
1771                <<<<<<< CURRENT
1772                co<|user_cursor|>de
1773                =======
1774                <[fim-middle]>"#}
1775        );
1776
1777        // With tight budget, only high_prio included.
1778        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
1779        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
1780        assert_eq!(
1781            format_seed_coder_with_budget(&input, 44),
1782            indoc! {r#"
1783                <[fim-suffix]>
1784                <[fim-prefix]><filename>high_prio.rs
1785                high prio
1786
1787                <filename>test.rs
1788                <<<<<<< CURRENT
1789                co<|user_cursor|>de
1790                =======
1791                <[fim-middle]>"#}
1792        );
1793    }
1794
1795    #[test]
1796    fn test_seed_coder_clean_output() {
1797        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1798        let output_without_marker = "new code\n";
1799
1800        assert_eq!(
1801            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1802            "new code\n"
1803        );
1804        assert_eq!(
1805            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1806            "new code\n"
1807        );
1808    }
1809
1810    #[test]
1811    fn test_format_zeta1_from_input_basic() {
1812        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1813        let input = ZetaPromptInput {
1814            cursor_path: Path::new("src/main.rs").into(),
1815            cursor_excerpt: excerpt.into(),
1816            editable_range_in_excerpt: 15..41,
1817            cursor_offset_in_excerpt: 30,
1818            excerpt_start_row: Some(0),
1819            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1820            related_files: vec![],
1821            excerpt_ranges: None,
1822            preferred_model: None,
1823            in_open_source_repo: false,
1824            can_collect_data: false,
1825        };
1826
1827        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1828
1829        assert_eq!(
1830            prompt,
1831            concat!(
1832                "### Instruction:\n",
1833                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1834                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1835                "into account the cursor location.\n",
1836                "\n",
1837                "### User Edits:\n",
1838                "\n",
1839                "User edited other.rs:\n",
1840                "```diff\n",
1841                "-old\n",
1842                "+new\n",
1843                "\n",
1844                "```\n",
1845                "\n",
1846                "### User Excerpt:\n",
1847                "\n",
1848                "```src/main.rs\n",
1849                "<|start_of_file|>\n",
1850                "fn before() {}\n",
1851                "<|editable_region_start|>\n",
1852                "fn foo() {\n",
1853                "    <|user_cursor_is_here|>let x = 1;\n",
1854                "\n",
1855                "<|editable_region_end|>}\n",
1856                "fn after() {}\n",
1857                "\n",
1858                "```\n",
1859                "\n",
1860                "### Response:\n",
1861            ),
1862        );
1863    }
1864
1865    #[test]
1866    fn test_format_zeta1_from_input_no_start_of_file() {
1867        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1868        let input = ZetaPromptInput {
1869            cursor_path: Path::new("src/main.rs").into(),
1870            cursor_excerpt: excerpt.into(),
1871            editable_range_in_excerpt: 0..28,
1872            cursor_offset_in_excerpt: 15,
1873            excerpt_start_row: Some(10),
1874            events: vec![],
1875            related_files: vec![],
1876            excerpt_ranges: None,
1877            preferred_model: None,
1878            in_open_source_repo: false,
1879            can_collect_data: false,
1880        };
1881
1882        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1883
1884        assert_eq!(
1885            prompt,
1886            concat!(
1887                "### Instruction:\n",
1888                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1889                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1890                "into account the cursor location.\n",
1891                "\n",
1892                "### User Edits:\n",
1893                "\n",
1894                "\n",
1895                "\n",
1896                "### User Excerpt:\n",
1897                "\n",
1898                "```src/main.rs\n",
1899                "<|editable_region_start|>\n",
1900                "fn foo() {\n",
1901                "    <|user_cursor_is_here|>let x = 1;\n",
1902                "}\n",
1903                "\n",
1904                "<|editable_region_end|>\n",
1905                "```\n",
1906                "\n",
1907                "### Response:\n",
1908            ),
1909        );
1910    }
1911
1912    #[test]
1913    fn test_format_zeta1_from_input_with_sub_ranges() {
1914        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1915        let editable_range = 10..37;
1916        let context_range = 0..excerpt.len();
1917
1918        let input = ZetaPromptInput {
1919            cursor_path: Path::new("test.rs").into(),
1920            cursor_excerpt: excerpt.into(),
1921            editable_range_in_excerpt: editable_range.clone(),
1922            cursor_offset_in_excerpt: 25,
1923            excerpt_start_row: Some(0),
1924            events: vec![],
1925            related_files: vec![],
1926            excerpt_ranges: None,
1927            preferred_model: None,
1928            in_open_source_repo: false,
1929            can_collect_data: false,
1930        };
1931
1932        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1933
1934        assert_eq!(
1935            prompt,
1936            concat!(
1937                "### Instruction:\n",
1938                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1939                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1940                "into account the cursor location.\n",
1941                "\n",
1942                "### User Edits:\n",
1943                "\n",
1944                "\n",
1945                "\n",
1946                "### User Excerpt:\n",
1947                "\n",
1948                "```test.rs\n",
1949                "<|start_of_file|>\n",
1950                "// prefix\n",
1951                "<|editable_region_start|>\n",
1952                "fn foo() {\n",
1953                "    <|user_cursor_is_here|>let x = 1;\n",
1954                "}\n",
1955                "<|editable_region_end|>\n",
1956                "// suffix\n",
1957                "\n",
1958                "```\n",
1959                "\n",
1960                "### Response:\n",
1961            ),
1962        );
1963    }
1964
1965    #[test]
1966    fn test_clean_zeta1_model_output_basic() {
1967        let output = indoc! {"
1968            <|editable_region_start|>
1969            fn main() {
1970                println!(\"hello\");
1971            }
1972            <|editable_region_end|>
1973        "};
1974
1975        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1976        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1977    }
1978
1979    #[test]
1980    fn test_clean_zeta1_model_output_with_cursor() {
1981        let output = indoc! {"
1982            <|editable_region_start|>
1983            fn main() {
1984                <|user_cursor_is_here|>println!(\"hello\");
1985            }
1986            <|editable_region_end|>
1987        "};
1988
1989        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1990        assert_eq!(
1991            cleaned,
1992            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1993        );
1994    }
1995
1996    #[test]
1997    fn test_clean_zeta1_model_output_no_markers() {
1998        let output = "fn main() {}\n";
1999        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2000        assert_eq!(cleaned, "fn main() {}\n");
2001    }
2002
2003    #[test]
2004    fn test_clean_zeta1_model_output_empty_region() {
2005        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
2006        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
2007        assert_eq!(cleaned, "");
2008    }
2009}