zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// The client's preferred edit prediction model. The server may override this.
  22#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
  23pub enum EditPredictionModelKind {
  24    Zeta1,
  25    Zeta2,
  26}
  27
  28/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  29/// editable and context token budgets. Allows the server to select the
  30/// appropriate ranges for whichever model it uses.
  31#[derive(Clone, Debug, Serialize, Deserialize)]
  32pub struct ExcerptRanges {
  33    /// Editable region computed with a 150-token budget.
  34    pub editable_150: Range<usize>,
  35    /// Editable region computed with a 180-token budget.
  36    pub editable_180: Range<usize>,
  37    /// Editable region computed with a 350-token budget.
  38    pub editable_350: Range<usize>,
  39    /// Context boundary when using editable_150 with 350 tokens of additional context.
  40    pub editable_150_context_350: Range<usize>,
  41    /// Context boundary when using editable_180 with 350 tokens of additional context.
  42    pub editable_180_context_350: Range<usize>,
  43    /// Context boundary when using editable_350 with 150 tokens of additional context.
  44    pub editable_350_context_150: Range<usize>,
  45}
  46
  47#[derive(Clone, Debug, Serialize, Deserialize)]
  48pub struct ZetaPromptInput {
  49    pub cursor_path: Arc<Path>,
  50    pub cursor_excerpt: Arc<str>,
  51    pub editable_range_in_excerpt: Range<usize>,
  52    pub cursor_offset_in_excerpt: usize,
  53    #[serde(default, skip_serializing_if = "Option::is_none")]
  54    pub excerpt_start_row: Option<u32>,
  55    pub events: Vec<Arc<Event>>,
  56    pub related_files: Vec<RelatedFile>,
  57    /// When set, the excerpt was computed with a larger budget (~512 tokens)
  58    /// and these ranges let the server select model-appropriate subsets.
  59    /// When absent, the excerpt IS the context region and
  60    /// `editable_range_in_excerpt` is the only editable range.
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub excerpt_ranges: Option<ExcerptRanges>,
  63    /// Client's preferred model. The server may override.
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub preferred_model: Option<EditPredictionModelKind>,
  66    #[serde(default)]
  67    pub in_open_source_repo: bool,
  68    #[serde(default)]
  69    pub can_collect_data: bool,
  70}
  71
  72#[derive(
  73    Default,
  74    Clone,
  75    Copy,
  76    Debug,
  77    PartialEq,
  78    Eq,
  79    Hash,
  80    EnumIter,
  81    IntoStaticStr,
  82    Serialize,
  83    Deserialize,
  84)]
  85#[allow(non_camel_case_types)]
  86pub enum ZetaFormat {
  87    V0112MiddleAtEnd,
  88    V0113Ordered,
  89    #[default]
  90    V0114180EditableRegion,
  91    V0120GitMergeMarkers,
  92    V0131GitMergeMarkersPrefix,
  93    V0211Prefill,
  94    V0211SeedCoder,
  95}
  96
  97impl std::fmt::Display for ZetaFormat {
  98    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  99        write!(f, "{}", <&'static str>::from(self))
 100    }
 101}
 102
 103impl ZetaFormat {
 104    pub fn parse(format_name: &str) -> Result<Self> {
 105        let mut results = ZetaFormat::iter().filter(|version| {
 106            <&'static str>::from(version)
 107                .to_lowercase()
 108                .contains(&format_name.to_lowercase())
 109        });
 110        let Some(result) = results.next() else {
 111            anyhow::bail!(
 112                "`{format_name}` did not match any of:\n{}",
 113                Self::options_as_string()
 114            );
 115        };
 116        if results.next().is_some() {
 117            anyhow::bail!(
 118                "`{format_name}` matched more than one of:\n{}",
 119                Self::options_as_string()
 120            );
 121        }
 122        Ok(result)
 123    }
 124
 125    pub fn options_as_string() -> String {
 126        ZetaFormat::iter()
 127            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 128            .collect::<Vec<_>>()
 129            .concat()
 130    }
 131}
 132
 133#[derive(Clone, Debug, Serialize, Deserialize)]
 134#[serde(tag = "event")]
 135pub enum Event {
 136    BufferChange {
 137        path: Arc<Path>,
 138        old_path: Arc<Path>,
 139        diff: String,
 140        predicted: bool,
 141        in_open_source_repo: bool,
 142    },
 143}
 144
 145impl Event {
 146    pub fn in_open_source_repo(&self) -> bool {
 147        match self {
 148            Event::BufferChange {
 149                in_open_source_repo,
 150                ..
 151            } => *in_open_source_repo,
 152        }
 153    }
 154}
 155
 156pub fn write_event(prompt: &mut String, event: &Event) {
 157    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 158        for component in path.components() {
 159            prompt.push('/');
 160            write!(prompt, "{}", component.as_os_str().display()).ok();
 161        }
 162    }
 163    match event {
 164        Event::BufferChange {
 165            path,
 166            old_path,
 167            diff,
 168            predicted,
 169            in_open_source_repo: _,
 170        } => {
 171            if *predicted {
 172                prompt.push_str("// User accepted prediction:\n");
 173            }
 174            prompt.push_str("--- a");
 175            write_path_as_unix_str(prompt, old_path.as_ref());
 176            prompt.push_str("\n+++ b");
 177            write_path_as_unix_str(prompt, path.as_ref());
 178            prompt.push('\n');
 179            prompt.push_str(diff);
 180        }
 181    }
 182}
 183
 184#[derive(Clone, Debug, Serialize, Deserialize)]
 185pub struct RelatedFile {
 186    pub path: Arc<Path>,
 187    pub max_row: u32,
 188    pub excerpts: Vec<RelatedExcerpt>,
 189    #[serde(default)]
 190    pub in_open_source_repo: bool,
 191}
 192
 193#[derive(Clone, Debug, Serialize, Deserialize)]
 194pub struct RelatedExcerpt {
 195    pub row_range: Range<u32>,
 196    pub text: Arc<str>,
 197}
 198
 199pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 200    format_zeta_prompt_with_budget(input, format, MAX_PROMPT_TOKENS)
 201}
 202
 203/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 204pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 205    match format {
 206        ZetaFormat::V0120GitMergeMarkers => output
 207            .strip_suffix(v0120_git_merge_markers::END_MARKER)
 208            .unwrap_or(output),
 209        ZetaFormat::V0131GitMergeMarkersPrefix => output
 210            .strip_suffix(v0131_git_merge_markers_prefix::END_MARKER)
 211            .unwrap_or(output),
 212        ZetaFormat::V0211SeedCoder => output
 213            .strip_suffix(seed_coder::END_MARKER)
 214            .unwrap_or(output),
 215        _ => output,
 216    }
 217}
 218
 219fn resolve_cursor_region(
 220    input: &ZetaPromptInput,
 221    format: ZetaFormat,
 222) -> (&str, Range<usize>, usize) {
 223    let Some(ranges) = &input.excerpt_ranges else {
 224        return (
 225            &input.cursor_excerpt,
 226            input.editable_range_in_excerpt.clone(),
 227            input.cursor_offset_in_excerpt,
 228        );
 229    };
 230
 231    let (editable_range, context_range) = match format {
 232        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 233            ranges.editable_150.clone(),
 234            ranges.editable_150_context_350.clone(),
 235        ),
 236        ZetaFormat::V0114180EditableRegion
 237        | ZetaFormat::V0120GitMergeMarkers
 238        | ZetaFormat::V0131GitMergeMarkersPrefix
 239        | ZetaFormat::V0211Prefill
 240        | ZetaFormat::V0211SeedCoder => (
 241            ranges.editable_180.clone(),
 242            ranges.editable_180_context_350.clone(),
 243        ),
 244    };
 245
 246    let context_start = context_range.start;
 247    let context_text = &input.cursor_excerpt[context_range];
 248    let adjusted_editable =
 249        (editable_range.start - context_start)..(editable_range.end - context_start);
 250    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 251
 252    (context_text, adjusted_editable, adjusted_cursor)
 253}
 254
 255fn format_zeta_prompt_with_budget(
 256    input: &ZetaPromptInput,
 257    format: ZetaFormat,
 258    max_tokens: usize,
 259) -> String {
 260    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 261    let path = &*input.cursor_path;
 262
 263    let mut cursor_section = String::new();
 264    match format {
 265        ZetaFormat::V0112MiddleAtEnd => {
 266            v0112_middle_at_end::write_cursor_excerpt_section(
 267                &mut cursor_section,
 268                path,
 269                context,
 270                &editable_range,
 271                cursor_offset,
 272            );
 273        }
 274        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 275            v0113_ordered::write_cursor_excerpt_section(
 276                &mut cursor_section,
 277                path,
 278                context,
 279                &editable_range,
 280                cursor_offset,
 281            )
 282        }
 283        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 284            &mut cursor_section,
 285            path,
 286            context,
 287            &editable_range,
 288            cursor_offset,
 289        ),
 290        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 291            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 292                &mut cursor_section,
 293                path,
 294                context,
 295                &editable_range,
 296                cursor_offset,
 297            )
 298        }
 299        ZetaFormat::V0211SeedCoder => {
 300            return seed_coder::format_prompt_with_budget(
 301                path,
 302                context,
 303                &editable_range,
 304                cursor_offset,
 305                &input.events,
 306                &input.related_files,
 307                max_tokens,
 308            );
 309        }
 310    }
 311
 312    let cursor_tokens = estimate_tokens(cursor_section.len());
 313    let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 314
 315    let edit_history_section = format_edit_history_within_budget(
 316        &input.events,
 317        "<|file_sep|>",
 318        "edit history",
 319        budget_after_cursor,
 320    );
 321    let edit_history_tokens = estimate_tokens(edit_history_section.len());
 322    let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 323
 324    let related_files_section = format_related_files_within_budget(
 325        &input.related_files,
 326        "<|file_sep|>",
 327        budget_after_edit_history,
 328    );
 329
 330    let mut prompt = String::new();
 331    prompt.push_str(&related_files_section);
 332    prompt.push_str(&edit_history_section);
 333    prompt.push_str(&cursor_section);
 334    prompt
 335}
 336
 337pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 338    match format {
 339        ZetaFormat::V0112MiddleAtEnd
 340        | ZetaFormat::V0113Ordered
 341        | ZetaFormat::V0114180EditableRegion
 342        | ZetaFormat::V0120GitMergeMarkers
 343        | ZetaFormat::V0131GitMergeMarkersPrefix
 344        | ZetaFormat::V0211SeedCoder => String::new(),
 345        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(input),
 346    }
 347}
 348
 349fn format_edit_history_within_budget(
 350    events: &[Arc<Event>],
 351    file_marker: &str,
 352    edit_history_name: &str,
 353    max_tokens: usize,
 354) -> String {
 355    let header = format!("{}{}\n", file_marker, edit_history_name);
 356    let header_tokens = estimate_tokens(header.len());
 357    if header_tokens >= max_tokens {
 358        return String::new();
 359    }
 360
 361    let mut event_strings: Vec<String> = Vec::new();
 362    let mut total_tokens = header_tokens;
 363
 364    for event in events.iter().rev() {
 365        let mut event_str = String::new();
 366        write_event(&mut event_str, event);
 367        let event_tokens = estimate_tokens(event_str.len());
 368
 369        if total_tokens + event_tokens > max_tokens {
 370            break;
 371        }
 372        total_tokens += event_tokens;
 373        event_strings.push(event_str);
 374    }
 375
 376    if event_strings.is_empty() {
 377        return String::new();
 378    }
 379
 380    let mut result = header;
 381    for event_str in event_strings.iter().rev() {
 382        result.push_str(event_str);
 383    }
 384    result
 385}
 386
 387fn format_related_files_within_budget(
 388    related_files: &[RelatedFile],
 389    file_marker: &str,
 390    max_tokens: usize,
 391) -> String {
 392    let mut result = String::new();
 393    let mut total_tokens = 0;
 394
 395    for file in related_files {
 396        let path_str = file.path.to_string_lossy();
 397        let header = format!("{}{}\n", file_marker, path_str);
 398        let header_tokens = estimate_tokens(header.len());
 399
 400        if total_tokens + header_tokens > max_tokens {
 401            break;
 402        }
 403
 404        let mut file_tokens = header_tokens;
 405        let mut excerpts_to_include = 0;
 406
 407        for excerpt in &file.excerpts {
 408            let needs_newline = !excerpt.text.ends_with('\n');
 409            let needs_ellipsis = excerpt.row_range.end < file.max_row;
 410            let excerpt_len = excerpt.text.len()
 411                + if needs_newline { "\n".len() } else { 0 }
 412                + if needs_ellipsis { "...\n".len() } else { 0 };
 413
 414            let excerpt_tokens = estimate_tokens(excerpt_len);
 415            if total_tokens + file_tokens + excerpt_tokens > max_tokens {
 416                break;
 417            }
 418            file_tokens += excerpt_tokens;
 419            excerpts_to_include += 1;
 420        }
 421
 422        if excerpts_to_include > 0 {
 423            total_tokens += file_tokens;
 424            result.push_str(&header);
 425            for excerpt in file.excerpts.iter().take(excerpts_to_include) {
 426                result.push_str(&excerpt.text);
 427                if !result.ends_with('\n') {
 428                    result.push('\n');
 429                }
 430                if excerpt.row_range.end < file.max_row {
 431                    result.push_str("...\n");
 432                }
 433            }
 434        }
 435    }
 436
 437    result
 438}
 439
 440pub fn write_related_files(
 441    prompt: &mut String,
 442    related_files: &[RelatedFile],
 443) -> Vec<Range<usize>> {
 444    let mut ranges = Vec::new();
 445    for file in related_files {
 446        let start = prompt.len();
 447        let path_str = file.path.to_string_lossy();
 448        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 449        for excerpt in &file.excerpts {
 450            prompt.push_str(&excerpt.text);
 451            if !prompt.ends_with('\n') {
 452                prompt.push('\n');
 453            }
 454            if excerpt.row_range.end < file.max_row {
 455                prompt.push_str("...\n");
 456            }
 457        }
 458        let end = prompt.len();
 459        ranges.push(start..end);
 460    }
 461    ranges
 462}
 463
 464mod v0112_middle_at_end {
 465    use super::*;
 466
 467    pub fn write_cursor_excerpt_section(
 468        prompt: &mut String,
 469        path: &Path,
 470        context: &str,
 471        editable_range: &Range<usize>,
 472        cursor_offset: usize,
 473    ) {
 474        let path_str = path.to_string_lossy();
 475        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 476
 477        prompt.push_str("<|fim_prefix|>\n");
 478        prompt.push_str(&context[..editable_range.start]);
 479
 480        prompt.push_str("<|fim_suffix|>\n");
 481        prompt.push_str(&context[editable_range.end..]);
 482        if !prompt.ends_with('\n') {
 483            prompt.push('\n');
 484        }
 485
 486        prompt.push_str("<|fim_middle|>current\n");
 487        prompt.push_str(&context[editable_range.start..cursor_offset]);
 488        prompt.push_str(CURSOR_MARKER);
 489        prompt.push_str(&context[cursor_offset..editable_range.end]);
 490        if !prompt.ends_with('\n') {
 491            prompt.push('\n');
 492        }
 493
 494        prompt.push_str("<|fim_middle|>updated\n");
 495    }
 496}
 497
 498mod v0113_ordered {
 499    use super::*;
 500
 501    pub fn write_cursor_excerpt_section(
 502        prompt: &mut String,
 503        path: &Path,
 504        context: &str,
 505        editable_range: &Range<usize>,
 506        cursor_offset: usize,
 507    ) {
 508        let path_str = path.to_string_lossy();
 509        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 510
 511        prompt.push_str("<|fim_prefix|>\n");
 512        prompt.push_str(&context[..editable_range.start]);
 513        if !prompt.ends_with('\n') {
 514            prompt.push('\n');
 515        }
 516
 517        prompt.push_str("<|fim_middle|>current\n");
 518        prompt.push_str(&context[editable_range.start..cursor_offset]);
 519        prompt.push_str(CURSOR_MARKER);
 520        prompt.push_str(&context[cursor_offset..editable_range.end]);
 521        if !prompt.ends_with('\n') {
 522            prompt.push('\n');
 523        }
 524
 525        prompt.push_str("<|fim_suffix|>\n");
 526        prompt.push_str(&context[editable_range.end..]);
 527        if !prompt.ends_with('\n') {
 528            prompt.push('\n');
 529        }
 530
 531        prompt.push_str("<|fim_middle|>updated\n");
 532    }
 533}
 534
 535pub mod v0120_git_merge_markers {
 536    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 537    //!
 538    //! Example prompt:
 539    //!
 540    //! <|file_sep|>path/to/target_file.py
 541    //! <|fim_prefix|>
 542    //! code before editable region
 543    //! <|fim_suffix|>
 544    //! code after editable region
 545    //! <|fim_middle|>
 546    //! <<<<<<< CURRENT
 547    //! code that
 548    //! needs to<|user_cursor|>
 549    //! be rewritten
 550    //! =======
 551    //!
 552    //! Expected output (should be generated by the model):
 553    //!
 554    //! updated
 555    //! code with
 556    //! changes applied
 557    //! >>>>>>> UPDATED
 558
 559    use super::*;
 560
 561    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 562    pub const SEPARATOR: &str = "=======\n";
 563    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 564
 565    pub fn write_cursor_excerpt_section(
 566        prompt: &mut String,
 567        path: &Path,
 568        context: &str,
 569        editable_range: &Range<usize>,
 570        cursor_offset: usize,
 571    ) {
 572        let path_str = path.to_string_lossy();
 573        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 574
 575        prompt.push_str("<|fim_prefix|>");
 576        prompt.push_str(&context[..editable_range.start]);
 577
 578        prompt.push_str("<|fim_suffix|>");
 579        prompt.push_str(&context[editable_range.end..]);
 580        if !prompt.ends_with('\n') {
 581            prompt.push('\n');
 582        }
 583
 584        prompt.push_str("<|fim_middle|>");
 585        prompt.push_str(START_MARKER);
 586        prompt.push_str(&context[editable_range.start..cursor_offset]);
 587        prompt.push_str(CURSOR_MARKER);
 588        prompt.push_str(&context[cursor_offset..editable_range.end]);
 589        if !prompt.ends_with('\n') {
 590            prompt.push('\n');
 591        }
 592        prompt.push_str(SEPARATOR);
 593    }
 594}
 595
 596pub mod v0131_git_merge_markers_prefix {
 597    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 598    //!
 599    //! Example prompt:
 600    //!
 601    //! <|file_sep|>path/to/target_file.py
 602    //! <|fim_prefix|>
 603    //! code before editable region
 604    //! <<<<<<< CURRENT
 605    //! code that
 606    //! needs to<|user_cursor|>
 607    //! be rewritten
 608    //! =======
 609    //! <|fim_suffix|>
 610    //! code after editable region
 611    //! <|fim_middle|>
 612    //!
 613    //! Expected output (should be generated by the model):
 614    //!
 615    //! updated
 616    //! code with
 617    //! changes applied
 618    //! >>>>>>> UPDATED
 619
 620    use super::*;
 621
 622    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 623    pub const SEPARATOR: &str = "=======\n";
 624    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 625
 626    pub fn write_cursor_excerpt_section(
 627        prompt: &mut String,
 628        path: &Path,
 629        context: &str,
 630        editable_range: &Range<usize>,
 631        cursor_offset: usize,
 632    ) {
 633        let path_str = path.to_string_lossy();
 634        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 635
 636        prompt.push_str("<|fim_prefix|>");
 637        prompt.push_str(&context[..editable_range.start]);
 638        prompt.push_str(START_MARKER);
 639        prompt.push_str(&context[editable_range.start..cursor_offset]);
 640        prompt.push_str(CURSOR_MARKER);
 641        prompt.push_str(&context[cursor_offset..editable_range.end]);
 642        if !prompt.ends_with('\n') {
 643            prompt.push('\n');
 644        }
 645        prompt.push_str(SEPARATOR);
 646
 647        prompt.push_str("<|fim_suffix|>");
 648        prompt.push_str(&context[editable_range.end..]);
 649        if !prompt.ends_with('\n') {
 650            prompt.push('\n');
 651        }
 652
 653        prompt.push_str("<|fim_middle|>");
 654    }
 655}
 656
 657pub mod v0211_prefill {
 658    use super::*;
 659
 660    pub fn get_prefill(input: &ZetaPromptInput) -> String {
 661        let editable_region = &input.cursor_excerpt
 662            [input.editable_range_in_excerpt.start..input.editable_range_in_excerpt.end];
 663
 664        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 665        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 666
 667        // Find a token boundary to avoid splitting tokens in the prefill.
 668        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 669        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 670        // the \n and consume any consecutive \n characters after it.
 671        let prefill = &editable_region[..prefill_len];
 672        match prefill.rfind('\n') {
 673            Some(pos) => {
 674                let mut end = pos + 1;
 675                while end < editable_region.len()
 676                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 677                {
 678                    end += 1;
 679                }
 680                editable_region[..end].to_string()
 681            }
 682            // No newline found. Fall back to splitting before the last space
 683            // (word-level boundary)
 684            None => match prefill.rfind(' ') {
 685                Some(pos) => prefill[..pos].to_string(),
 686                None => prefill.to_string(),
 687            },
 688        }
 689    }
 690}
 691
 692pub mod seed_coder {
 693    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
 694    //!
 695    //! Seed-Coder uses different FIM tokens and order than Qwen:
 696    //! - SPM order: suffix comes FIRST, then prefix, then middle
 697    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
 698    //! - File markers: StarCoder-style `<filename>path` (single token + path)
 699    //!
 700    //! All context (related files, edit history) goes in the PREFIX section.
 701    //! The suffix contains only code after the editable region.
 702    //!
 703    //! Example prompt:
 704    //!
 705    //! <[fim-suffix]>
 706    //! code after editable region
 707    //! <[fim-prefix]><filename>related/file.py
 708    //! related file content
 709    //!
 710    //! <filename>edit_history
 711    //! --- a/some_file.py
 712    //! +++ b/some_file.py
 713    //! -old
 714    //! +new
 715    //!
 716    //! <filename>path/to/target_file.py
 717    //! code before editable region
 718    //! <<<<<<< CURRENT
 719    //! code that
 720    //! needs to<|user_cursor|>
 721    //! be rewritten
 722    //! =======
 723    //! <[fim-middle]>
 724    //!
 725    //! Expected output (model generates):
 726    //!
 727    //! updated
 728    //! code with
 729    //! changes applied
 730    //! >>>>>>> UPDATED
 731
 732    use super::*;
 733
 734    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
 735    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
 736    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
 737    pub const FILE_MARKER: &str = "<filename>";
 738
 739    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 740    pub const SEPARATOR: &str = "=======\n";
 741    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 742
 743    pub fn format_prompt_with_budget(
 744        path: &Path,
 745        context: &str,
 746        editable_range: &Range<usize>,
 747        cursor_offset: usize,
 748        events: &[Arc<Event>],
 749        related_files: &[RelatedFile],
 750        max_tokens: usize,
 751    ) -> String {
 752        let suffix_section = build_suffix_section(context, editable_range);
 753        let cursor_prefix_section =
 754            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
 755
 756        let suffix_tokens = estimate_tokens(suffix_section.len());
 757        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
 758        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 759
 760        let edit_history_section = super::format_edit_history_within_budget(
 761            events,
 762            FILE_MARKER,
 763            "edit_history",
 764            budget_after_cursor,
 765        );
 766        let edit_history_tokens = estimate_tokens(edit_history_section.len());
 767        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 768
 769        let related_files_section = super::format_related_files_within_budget(
 770            related_files,
 771            FILE_MARKER,
 772            budget_after_edit_history,
 773        );
 774
 775        let mut prompt = String::new();
 776        prompt.push_str(&suffix_section);
 777        prompt.push_str(FIM_PREFIX);
 778        prompt.push_str(&related_files_section);
 779        if !related_files_section.is_empty() {
 780            prompt.push('\n');
 781        }
 782        prompt.push_str(&edit_history_section);
 783        if !edit_history_section.is_empty() {
 784            prompt.push('\n');
 785        }
 786        prompt.push_str(&cursor_prefix_section);
 787        prompt.push_str(FIM_MIDDLE);
 788        prompt
 789    }
 790
 791    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
 792        let mut section = String::new();
 793        section.push_str(FIM_SUFFIX);
 794        section.push_str(&context[editable_range.end..]);
 795        if !section.ends_with('\n') {
 796            section.push('\n');
 797        }
 798        section
 799    }
 800
 801    fn build_cursor_prefix_section(
 802        path: &Path,
 803        context: &str,
 804        editable_range: &Range<usize>,
 805        cursor_offset: usize,
 806    ) -> String {
 807        let mut section = String::new();
 808        let path_str = path.to_string_lossy();
 809        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
 810
 811        section.push_str(&context[..editable_range.start]);
 812        section.push_str(START_MARKER);
 813        section.push_str(&context[editable_range.start..cursor_offset]);
 814        section.push_str(CURSOR_MARKER);
 815        section.push_str(&context[cursor_offset..editable_range.end]);
 816        if !section.ends_with('\n') {
 817            section.push('\n');
 818        }
 819        section.push_str(SEPARATOR);
 820        section
 821    }
 822}
 823
 824/// The zeta1 prompt format
 825pub mod zeta1 {
 826    use super::*;
 827    use std::fmt::Write;
 828
 829    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
 830    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
 831    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
 832    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
 833
 834    const INSTRUCTION_HEADER: &str = concat!(
 835        "### Instruction:\n",
 836        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
 837        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
 838        "into account the cursor location.\n\n",
 839        "### User Edits:\n\n"
 840    );
 841    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
 842    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
 843
 844    /// Formats a complete zeta1 prompt from the input events and excerpt.
 845    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
 846        let mut prompt = String::with_capacity(
 847            INSTRUCTION_HEADER.len()
 848                + input_events.len()
 849                + EXCERPT_HEADER.len()
 850                + input_excerpt.len()
 851                + RESPONSE_HEADER.len(),
 852        );
 853        prompt.push_str(INSTRUCTION_HEADER);
 854        prompt.push_str(input_events);
 855        prompt.push_str(EXCERPT_HEADER);
 856        prompt.push_str(input_excerpt);
 857        prompt.push_str(RESPONSE_HEADER);
 858        prompt
 859    }
 860
 861    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
 862    /// editable and context byte-offset ranges within `cursor_excerpt`.
 863    pub fn format_zeta1_from_input(
 864        input: &ZetaPromptInput,
 865        editable_range: Range<usize>,
 866        context_range: Range<usize>,
 867    ) -> String {
 868        let events = format_zeta1_events(&input.events);
 869        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
 870        format_zeta1_prompt(&events, &excerpt)
 871    }
 872
 873    /// Formats events in zeta1 style (oldest first).
 874    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
 875        let mut result = String::new();
 876        for event in events {
 877            let event_string = format_zeta1_event(event);
 878            if event_string.is_empty() {
 879                continue;
 880            }
 881            if !result.is_empty() {
 882                result.push_str("\n\n");
 883            }
 884            result.push_str(&event_string);
 885        }
 886        result
 887    }
 888
 889    fn format_zeta1_event(event: &Event) -> String {
 890        match event {
 891            Event::BufferChange {
 892                path,
 893                old_path,
 894                diff,
 895                ..
 896            } => {
 897                let mut prompt = String::new();
 898                if old_path != path {
 899                    writeln!(
 900                        prompt,
 901                        "User renamed {} to {}\n",
 902                        old_path.display(),
 903                        path.display()
 904                    )
 905                    .ok();
 906                }
 907                if !diff.is_empty() {
 908                    write!(
 909                        prompt,
 910                        "User edited {}:\n```diff\n{}\n```",
 911                        path.display(),
 912                        diff
 913                    )
 914                    .ok();
 915                }
 916                prompt
 917            }
 918        }
 919    }
 920
 921    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
 922    /// within `cursor_excerpt`.
 923    fn format_zeta1_excerpt(
 924        input: &ZetaPromptInput,
 925        editable_range: Range<usize>,
 926        context_range: Range<usize>,
 927    ) -> String {
 928        let path_str = input.cursor_path.to_string_lossy();
 929        let excerpt = &*input.cursor_excerpt;
 930        let cursor_offset = input.cursor_offset_in_excerpt;
 931
 932        let mut prompt = String::new();
 933        writeln!(&mut prompt, "```{path_str}").ok();
 934
 935        let starts_at_file_beginning =
 936            input.excerpt_start_row == Some(0) && context_range.start == 0;
 937        if starts_at_file_beginning {
 938            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
 939        }
 940
 941        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
 942
 943        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
 944        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
 945        prompt.push_str(CURSOR_MARKER);
 946        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
 947        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
 948
 949        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
 950        write!(prompt, "\n```").ok();
 951
 952        prompt
 953    }
 954
 955    /// Cleans zeta1 model output by extracting content between editable region
 956    /// markers and converting the zeta1 cursor marker to the universal one.
 957    /// Returns `None` if the output doesn't contain the expected markers.
 958    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
 959        let content = output.replace(CURSOR_MARKER, "");
 960
 961        let content_start = content
 962            .find(EDITABLE_REGION_START_MARKER)
 963            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
 964            .map(|pos| {
 965                if content.as_bytes().get(pos) == Some(&b'\n') {
 966                    pos + 1
 967                } else {
 968                    pos
 969                }
 970            })
 971            .unwrap_or(0);
 972
 973        let content_end = content
 974            .find(EDITABLE_REGION_END_MARKER)
 975            .map(|pos| {
 976                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
 977                    pos - 1
 978                } else {
 979                    pos
 980                }
 981            })
 982            .unwrap_or(content.len());
 983
 984        if content_start > content_end {
 985            return Some(String::new());
 986        }
 987
 988        let extracted = &content[content_start..content_end];
 989
 990        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
 991            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
 992            let text_before_cursor = text_before_cursor
 993                .find(EDITABLE_REGION_START_MARKER)
 994                .map(|pos| {
 995                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
 996                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
 997                        after_marker + 1
 998                    } else {
 999                        after_marker
1000                    }
1001                })
1002                .unwrap_or(0);
1003            let offset_in_extracted = zeta1_cursor_pos
1004                .saturating_sub(text_before_cursor)
1005                .min(extracted.len());
1006            offset_in_extracted
1007        });
1008
1009        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
1010        if let Some(offset) = cursor_offset {
1011            result.push_str(&extracted[..offset]);
1012            result.push_str(super::CURSOR_MARKER);
1013            result.push_str(&extracted[offset..]);
1014        } else {
1015            result.push_str(extracted);
1016        }
1017
1018        Some(result)
1019    }
1020}
1021
1022#[cfg(test)]
1023mod tests {
1024    use super::*;
1025    use indoc::indoc;
1026
1027    fn make_input(
1028        cursor_excerpt: &str,
1029        editable_range: Range<usize>,
1030        cursor_offset: usize,
1031        events: Vec<Event>,
1032        related_files: Vec<RelatedFile>,
1033    ) -> ZetaPromptInput {
1034        ZetaPromptInput {
1035            cursor_path: Path::new("test.rs").into(),
1036            cursor_excerpt: cursor_excerpt.into(),
1037            editable_range_in_excerpt: editable_range,
1038            cursor_offset_in_excerpt: cursor_offset,
1039            excerpt_start_row: None,
1040            events: events.into_iter().map(Arc::new).collect(),
1041            related_files,
1042            excerpt_ranges: None,
1043            preferred_model: None,
1044            in_open_source_repo: false,
1045            can_collect_data: false,
1046        }
1047    }
1048
1049    fn make_event(path: &str, diff: &str) -> Event {
1050        Event::BufferChange {
1051            path: Path::new(path).into(),
1052            old_path: Path::new(path).into(),
1053            diff: diff.to_string(),
1054            predicted: false,
1055            in_open_source_repo: false,
1056        }
1057    }
1058
1059    fn make_related_file(path: &str, content: &str) -> RelatedFile {
1060        RelatedFile {
1061            path: Path::new(path).into(),
1062            max_row: content.lines().count() as u32,
1063            excerpts: vec![RelatedExcerpt {
1064                row_range: 0..content.lines().count() as u32,
1065                text: content.into(),
1066            }],
1067            in_open_source_repo: false,
1068        }
1069    }
1070
1071    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1072        format_zeta_prompt_with_budget(input, ZetaFormat::V0114180EditableRegion, max_tokens)
1073    }
1074
1075    #[test]
1076    fn test_no_truncation_when_within_budget() {
1077        let input = make_input(
1078            "prefix\neditable\nsuffix",
1079            7..15,
1080            10,
1081            vec![make_event("a.rs", "-old\n+new\n")],
1082            vec![make_related_file("related.rs", "fn helper() {}\n")],
1083        );
1084
1085        assert_eq!(
1086            format_with_budget(&input, 10000),
1087            indoc! {r#"
1088                <|file_sep|>related.rs
1089                fn helper() {}
1090                <|file_sep|>edit history
1091                --- a/a.rs
1092                +++ b/a.rs
1093                -old
1094                +new
1095                <|file_sep|>test.rs
1096                <|fim_prefix|>
1097                prefix
1098                <|fim_middle|>current
1099                edi<|user_cursor|>table
1100                <|fim_suffix|>
1101
1102                suffix
1103                <|fim_middle|>updated
1104            "#}
1105        );
1106    }
1107
1108    #[test]
1109    fn test_truncation_drops_edit_history_when_budget_tight() {
1110        let input = make_input(
1111            "code",
1112            0..4,
1113            2,
1114            vec![make_event("a.rs", "-x\n+y\n")],
1115            vec![
1116                make_related_file("r1.rs", "a\n"),
1117                make_related_file("r2.rs", "b\n"),
1118            ],
1119        );
1120
1121        assert_eq!(
1122            format_with_budget(&input, 10000),
1123            indoc! {r#"
1124                <|file_sep|>r1.rs
1125                a
1126                <|file_sep|>r2.rs
1127                b
1128                <|file_sep|>edit history
1129                --- a/a.rs
1130                +++ b/a.rs
1131                -x
1132                +y
1133                <|file_sep|>test.rs
1134                <|fim_prefix|>
1135                <|fim_middle|>current
1136                co<|user_cursor|>de
1137                <|fim_suffix|>
1138                <|fim_middle|>updated
1139            "#}
1140        );
1141
1142        assert_eq!(
1143            format_with_budget(&input, 50),
1144            indoc! {r#"
1145                <|file_sep|>r1.rs
1146                a
1147                <|file_sep|>r2.rs
1148                b
1149                <|file_sep|>test.rs
1150                <|fim_prefix|>
1151                <|fim_middle|>current
1152                co<|user_cursor|>de
1153                <|fim_suffix|>
1154                <|fim_middle|>updated
1155            "#}
1156        );
1157    }
1158
1159    #[test]
1160    fn test_truncation_includes_partial_excerpts() {
1161        let input = make_input(
1162            "x",
1163            0..1,
1164            0,
1165            vec![],
1166            vec![RelatedFile {
1167                path: Path::new("big.rs").into(),
1168                max_row: 30,
1169                in_open_source_repo: false,
1170                excerpts: vec![
1171                    RelatedExcerpt {
1172                        row_range: 0..10,
1173                        text: "first excerpt\n".into(),
1174                    },
1175                    RelatedExcerpt {
1176                        row_range: 10..20,
1177                        text: "second excerpt\n".into(),
1178                    },
1179                    RelatedExcerpt {
1180                        row_range: 20..30,
1181                        text: "third excerpt\n".into(),
1182                    },
1183                ],
1184            }],
1185        );
1186
1187        assert_eq!(
1188            format_with_budget(&input, 10000),
1189            indoc! {r#"
1190                <|file_sep|>big.rs
1191                first excerpt
1192                ...
1193                second excerpt
1194                ...
1195                third excerpt
1196                <|file_sep|>test.rs
1197                <|fim_prefix|>
1198                <|fim_middle|>current
1199                <|user_cursor|>x
1200                <|fim_suffix|>
1201                <|fim_middle|>updated
1202            "#}
1203        );
1204
1205        assert_eq!(
1206            format_with_budget(&input, 50),
1207            indoc! {r#"
1208                <|file_sep|>big.rs
1209                first excerpt
1210                ...
1211                <|file_sep|>test.rs
1212                <|fim_prefix|>
1213                <|fim_middle|>current
1214                <|user_cursor|>x
1215                <|fim_suffix|>
1216                <|fim_middle|>updated
1217            "#}
1218        );
1219    }
1220
1221    #[test]
1222    fn test_truncation_drops_older_events_first() {
1223        let input = make_input(
1224            "x",
1225            0..1,
1226            0,
1227            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
1228            vec![],
1229        );
1230
1231        assert_eq!(
1232            format_with_budget(&input, 10000),
1233            indoc! {r#"
1234                <|file_sep|>edit history
1235                --- a/old.rs
1236                +++ b/old.rs
1237                -1
1238                --- a/new.rs
1239                +++ b/new.rs
1240                -2
1241                <|file_sep|>test.rs
1242                <|fim_prefix|>
1243                <|fim_middle|>current
1244                <|user_cursor|>x
1245                <|fim_suffix|>
1246                <|fim_middle|>updated
1247            "#}
1248        );
1249
1250        assert_eq!(
1251            format_with_budget(&input, 55),
1252            indoc! {r#"
1253                <|file_sep|>edit history
1254                --- a/new.rs
1255                +++ b/new.rs
1256                -2
1257                <|file_sep|>test.rs
1258                <|fim_prefix|>
1259                <|fim_middle|>current
1260                <|user_cursor|>x
1261                <|fim_suffix|>
1262                <|fim_middle|>updated
1263            "#}
1264        );
1265    }
1266
1267    #[test]
1268    fn test_cursor_excerpt_always_included_with_minimal_budget() {
1269        let input = make_input(
1270            "fn main() {}",
1271            0..12,
1272            3,
1273            vec![make_event("a.rs", "-old\n+new\n")],
1274            vec![make_related_file("related.rs", "helper\n")],
1275        );
1276
1277        assert_eq!(
1278            format_with_budget(&input, 30),
1279            indoc! {r#"
1280                <|file_sep|>test.rs
1281                <|fim_prefix|>
1282                <|fim_middle|>current
1283                fn <|user_cursor|>main() {}
1284                <|fim_suffix|>
1285                <|fim_middle|>updated
1286            "#}
1287        );
1288    }
1289
1290    fn format_seed_coder(input: &ZetaPromptInput) -> String {
1291        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, 10000)
1292    }
1293
1294    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
1295        format_zeta_prompt_with_budget(input, ZetaFormat::V0211SeedCoder, max_tokens)
1296    }
1297
1298    #[test]
1299    fn test_seed_coder_basic_format() {
1300        let input = make_input(
1301            "prefix\neditable\nsuffix",
1302            7..15,
1303            10,
1304            vec![make_event("a.rs", "-old\n+new\n")],
1305            vec![make_related_file("related.rs", "fn helper() {}\n")],
1306        );
1307
1308        assert_eq!(
1309            format_seed_coder(&input),
1310            indoc! {r#"
1311                <[fim-suffix]>
1312                suffix
1313                <[fim-prefix]><filename>related.rs
1314                fn helper() {}
1315
1316                <filename>edit_history
1317                --- a/a.rs
1318                +++ b/a.rs
1319                -old
1320                +new
1321
1322                <filename>test.rs
1323                prefix
1324                <<<<<<< CURRENT
1325                edi<|user_cursor|>table
1326                =======
1327                <[fim-middle]>"#}
1328        );
1329    }
1330
1331    #[test]
1332    fn test_seed_coder_no_context() {
1333        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
1334
1335        assert_eq!(
1336            format_seed_coder(&input),
1337            indoc! {r#"
1338                <[fim-suffix]>
1339                after
1340                <[fim-prefix]><filename>test.rs
1341                before
1342                <<<<<<< CURRENT
1343                mid<|user_cursor|>dle
1344                =======
1345                <[fim-middle]>"#}
1346        );
1347    }
1348
1349    #[test]
1350    fn test_seed_coder_truncation_drops_context() {
1351        let input = make_input(
1352            "code",
1353            0..4,
1354            2,
1355            vec![make_event("a.rs", "-x\n+y\n")],
1356            vec![make_related_file("r1.rs", "content\n")],
1357        );
1358
1359        // With large budget, everything is included
1360        assert_eq!(
1361            format_seed_coder(&input),
1362            indoc! {r#"
1363                <[fim-suffix]>
1364                <[fim-prefix]><filename>r1.rs
1365                content
1366
1367                <filename>edit_history
1368                --- a/a.rs
1369                +++ b/a.rs
1370                -x
1371                +y
1372
1373                <filename>test.rs
1374                <<<<<<< CURRENT
1375                co<|user_cursor|>de
1376                =======
1377                <[fim-middle]>"#}
1378        );
1379
1380        // With tight budget, context is dropped but cursor section remains
1381        assert_eq!(
1382            format_seed_coder_with_budget(&input, 30),
1383            indoc! {r#"
1384                <[fim-suffix]>
1385                <[fim-prefix]><filename>test.rs
1386                <<<<<<< CURRENT
1387                co<|user_cursor|>de
1388                =======
1389                <[fim-middle]>"#}
1390        );
1391    }
1392
1393    #[test]
1394    fn test_seed_coder_clean_output() {
1395        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
1396        let output_without_marker = "new code\n";
1397
1398        assert_eq!(
1399            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
1400            "new code\n"
1401        );
1402        assert_eq!(
1403            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
1404            "new code\n"
1405        );
1406    }
1407
1408    #[test]
1409    fn test_format_zeta1_from_input_basic() {
1410        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
1411        let input = ZetaPromptInput {
1412            cursor_path: Path::new("src/main.rs").into(),
1413            cursor_excerpt: excerpt.into(),
1414            editable_range_in_excerpt: 15..41,
1415            cursor_offset_in_excerpt: 30,
1416            excerpt_start_row: Some(0),
1417            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
1418            related_files: vec![],
1419            excerpt_ranges: None,
1420            preferred_model: None,
1421            in_open_source_repo: false,
1422            can_collect_data: false,
1423        };
1424
1425        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
1426
1427        assert_eq!(
1428            prompt,
1429            concat!(
1430                "### Instruction:\n",
1431                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1432                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1433                "into account the cursor location.\n",
1434                "\n",
1435                "### User Edits:\n",
1436                "\n",
1437                "User edited other.rs:\n",
1438                "```diff\n",
1439                "-old\n",
1440                "+new\n",
1441                "\n",
1442                "```\n",
1443                "\n",
1444                "### User Excerpt:\n",
1445                "\n",
1446                "```src/main.rs\n",
1447                "<|start_of_file|>\n",
1448                "fn before() {}\n",
1449                "<|editable_region_start|>\n",
1450                "fn foo() {\n",
1451                "    <|user_cursor_is_here|>let x = 1;\n",
1452                "\n",
1453                "<|editable_region_end|>}\n",
1454                "fn after() {}\n",
1455                "\n",
1456                "```\n",
1457                "\n",
1458                "### Response:\n",
1459            ),
1460        );
1461    }
1462
1463    #[test]
1464    fn test_format_zeta1_from_input_no_start_of_file() {
1465        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
1466        let input = ZetaPromptInput {
1467            cursor_path: Path::new("src/main.rs").into(),
1468            cursor_excerpt: excerpt.into(),
1469            editable_range_in_excerpt: 0..28,
1470            cursor_offset_in_excerpt: 15,
1471            excerpt_start_row: Some(10),
1472            events: vec![],
1473            related_files: vec![],
1474            excerpt_ranges: None,
1475            preferred_model: None,
1476            in_open_source_repo: false,
1477            can_collect_data: false,
1478        };
1479
1480        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
1481
1482        assert_eq!(
1483            prompt,
1484            concat!(
1485                "### Instruction:\n",
1486                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1487                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1488                "into account the cursor location.\n",
1489                "\n",
1490                "### User Edits:\n",
1491                "\n",
1492                "\n",
1493                "\n",
1494                "### User Excerpt:\n",
1495                "\n",
1496                "```src/main.rs\n",
1497                "<|editable_region_start|>\n",
1498                "fn foo() {\n",
1499                "    <|user_cursor_is_here|>let x = 1;\n",
1500                "}\n",
1501                "\n",
1502                "<|editable_region_end|>\n",
1503                "```\n",
1504                "\n",
1505                "### Response:\n",
1506            ),
1507        );
1508    }
1509
1510    #[test]
1511    fn test_format_zeta1_from_input_with_sub_ranges() {
1512        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
1513        let editable_range = 10..37;
1514        let context_range = 0..excerpt.len();
1515
1516        let input = ZetaPromptInput {
1517            cursor_path: Path::new("test.rs").into(),
1518            cursor_excerpt: excerpt.into(),
1519            editable_range_in_excerpt: editable_range.clone(),
1520            cursor_offset_in_excerpt: 25,
1521            excerpt_start_row: Some(0),
1522            events: vec![],
1523            related_files: vec![],
1524            excerpt_ranges: None,
1525            preferred_model: None,
1526            in_open_source_repo: false,
1527            can_collect_data: false,
1528        };
1529
1530        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
1531
1532        assert_eq!(
1533            prompt,
1534            concat!(
1535                "### Instruction:\n",
1536                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
1537                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
1538                "into account the cursor location.\n",
1539                "\n",
1540                "### User Edits:\n",
1541                "\n",
1542                "\n",
1543                "\n",
1544                "### User Excerpt:\n",
1545                "\n",
1546                "```test.rs\n",
1547                "<|start_of_file|>\n",
1548                "// prefix\n",
1549                "<|editable_region_start|>\n",
1550                "fn foo() {\n",
1551                "    <|user_cursor_is_here|>let x = 1;\n",
1552                "}\n",
1553                "<|editable_region_end|>\n",
1554                "// suffix\n",
1555                "\n",
1556                "```\n",
1557                "\n",
1558                "### Response:\n",
1559            ),
1560        );
1561    }
1562
1563    #[test]
1564    fn test_clean_zeta1_model_output_basic() {
1565        let output = indoc! {"
1566            <|editable_region_start|>
1567            fn main() {
1568                println!(\"hello\");
1569            }
1570            <|editable_region_end|>
1571        "};
1572
1573        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1574        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
1575    }
1576
1577    #[test]
1578    fn test_clean_zeta1_model_output_with_cursor() {
1579        let output = indoc! {"
1580            <|editable_region_start|>
1581            fn main() {
1582                <|user_cursor_is_here|>println!(\"hello\");
1583            }
1584            <|editable_region_end|>
1585        "};
1586
1587        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1588        assert_eq!(
1589            cleaned,
1590            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
1591        );
1592    }
1593
1594    #[test]
1595    fn test_clean_zeta1_model_output_no_markers() {
1596        let output = "fn main() {}\n";
1597        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1598        assert_eq!(cleaned, "fn main() {}\n");
1599    }
1600
1601    #[test]
1602    fn test_clean_zeta1_model_output_empty_region() {
1603        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
1604        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
1605        assert_eq!(cleaned, "");
1606    }
1607}