zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  34pub struct ZetaPromptInput {
  35    pub cursor_path: Arc<Path>,
  36    pub cursor_excerpt: Arc<str>,
  37    pub cursor_offset_in_excerpt: usize,
  38    #[serde(default, skip_serializing_if = "Option::is_none")]
  39    pub excerpt_start_row: Option<u32>,
  40    pub events: Vec<Arc<Event>>,
  41    #[serde(default)]
  42    pub related_files: Option<Vec<RelatedFile>>,
  43    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  44    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  45    /// These ranges let the server select model-appropriate subsets.
  46    pub excerpt_ranges: ExcerptRanges,
  47    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  48    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  49    /// When present, the server uses these to compute editable/context ranges
  50    /// instead of `excerpt_ranges`.
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub syntax_ranges: Option<Vec<Range<usize>>>,
  53    /// The name of the edit prediction model experiment to use.
  54    #[serde(default, skip_serializing_if = "Option::is_none")]
  55    pub experiment: Option<String>,
  56    #[serde(default)]
  57    pub in_open_source_repo: bool,
  58    #[serde(default)]
  59    pub can_collect_data: bool,
  60    #[serde(default, skip_serializing_if = "Option::is_none")]
  61    pub repo_url: Option<String>,
  62}
  63
  64#[derive(
  65    Default,
  66    Clone,
  67    Copy,
  68    Debug,
  69    PartialEq,
  70    Eq,
  71    Hash,
  72    EnumIter,
  73    IntoStaticStr,
  74    Serialize,
  75    Deserialize,
  76)]
  77#[allow(non_camel_case_types)]
  78pub enum ZetaFormat {
  79    V0112MiddleAtEnd,
  80    V0113Ordered,
  81    V0114180EditableRegion,
  82    V0120GitMergeMarkers,
  83    #[default]
  84    V0131GitMergeMarkersPrefix,
  85    V0211Prefill,
  86    V0211SeedCoder,
  87    v0226Hashline,
  88    V0304VariableEdit,
  89    V0304SeedNoEdits,
  90    /// Multi-block marker spans with NO_EDITS sentinel.
  91    V0306SeedMultiRegions,
  92    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  93    V0316SeedMultiRegions,
  94    /// V0316 with larger block sizes.
  95    V0318SeedMultiRegions,
  96    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  97    V0317SeedMultiRegions,
  98}
  99
 100impl std::fmt::Display for ZetaFormat {
 101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 102        write!(f, "{}", <&'static str>::from(self))
 103    }
 104}
 105
 106impl ZetaFormat {
 107    pub fn parse(format_name: &str) -> Result<Self> {
 108        let mut results = ZetaFormat::iter().filter(|version| {
 109            <&'static str>::from(version)
 110                .to_lowercase()
 111                .contains(&format_name.to_lowercase())
 112        });
 113        let Some(result) = results.next() else {
 114            anyhow::bail!(
 115                "`{format_name}` did not match any of:\n{}",
 116                Self::options_as_string()
 117            );
 118        };
 119        if results.next().is_some() {
 120            anyhow::bail!(
 121                "`{format_name}` matched more than one of:\n{}",
 122                Self::options_as_string()
 123            );
 124        }
 125        Ok(result)
 126    }
 127
 128    pub fn options_as_string() -> String {
 129        ZetaFormat::iter()
 130            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 131            .collect::<Vec<_>>()
 132            .concat()
 133    }
 134}
 135
 136#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 137#[serde(tag = "event")]
 138pub enum Event {
 139    BufferChange {
 140        path: Arc<Path>,
 141        old_path: Arc<Path>,
 142        diff: String,
 143        predicted: bool,
 144        in_open_source_repo: bool,
 145    },
 146}
 147
 148impl Event {
 149    pub fn in_open_source_repo(&self) -> bool {
 150        match self {
 151            Event::BufferChange {
 152                in_open_source_repo,
 153                ..
 154            } => *in_open_source_repo,
 155        }
 156    }
 157}
 158
 159pub fn write_event(prompt: &mut String, event: &Event) {
 160    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 161        for component in path.components() {
 162            prompt.push('/');
 163            write!(prompt, "{}", component.as_os_str().display()).ok();
 164        }
 165    }
 166    match event {
 167        Event::BufferChange {
 168            path,
 169            old_path,
 170            diff,
 171            predicted,
 172            in_open_source_repo: _,
 173        } => {
 174            if *predicted {
 175                prompt.push_str("// User accepted prediction:\n");
 176            }
 177            prompt.push_str("--- a");
 178            write_path_as_unix_str(prompt, old_path.as_ref());
 179            prompt.push_str("\n+++ b");
 180            write_path_as_unix_str(prompt, path.as_ref());
 181            prompt.push('\n');
 182            prompt.push_str(diff);
 183        }
 184    }
 185}
 186
 187#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 188pub struct ActiveBufferDiagnostic {
 189    pub severity: Option<i32>,
 190    pub message: String,
 191    pub snippet: String,
 192    pub snippet_buffer_row_range: Range<u32>,
 193    pub diagnostic_range_in_snippet: Range<usize>,
 194}
 195
 196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 197pub struct RelatedFile {
 198    pub path: Arc<Path>,
 199    pub max_row: u32,
 200    pub excerpts: Vec<RelatedExcerpt>,
 201    #[serde(default)]
 202    pub in_open_source_repo: bool,
 203}
 204
 205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 206pub struct RelatedExcerpt {
 207    pub row_range: Range<u32>,
 208    pub text: Arc<str>,
 209    #[serde(default)]
 210    pub order: usize,
 211}
 212
 213pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 214    special_tokens_for_format(format)
 215        .iter()
 216        .any(|token| input.cursor_excerpt.contains(token))
 217}
 218
 219pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 220    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 221}
 222
 223pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 224    match format {
 225        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 226        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 227        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 228        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 229        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 230        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 231        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 232        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 233        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 234        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 235        ZetaFormat::V0316SeedMultiRegions => {
 236            static TOKENS: &[&str] = &[
 237                seed_coder::FIM_SUFFIX,
 238                seed_coder::FIM_PREFIX,
 239                seed_coder::FIM_MIDDLE,
 240                seed_coder::FILE_MARKER,
 241                multi_region::V0316_END_MARKER,
 242                CURSOR_MARKER,
 243                multi_region::MARKER_TAG_PREFIX,
 244            ];
 245            TOKENS
 246        }
 247        ZetaFormat::V0318SeedMultiRegions => {
 248            static TOKENS: &[&str] = &[
 249                seed_coder::FIM_SUFFIX,
 250                seed_coder::FIM_PREFIX,
 251                seed_coder::FIM_MIDDLE,
 252                seed_coder::FILE_MARKER,
 253                multi_region::V0318_END_MARKER,
 254                CURSOR_MARKER,
 255                multi_region::MARKER_TAG_PREFIX,
 256            ];
 257            TOKENS
 258        }
 259        ZetaFormat::V0317SeedMultiRegions => {
 260            static TOKENS: &[&str] = &[
 261                seed_coder::FIM_SUFFIX,
 262                seed_coder::FIM_PREFIX,
 263                seed_coder::FIM_MIDDLE,
 264                seed_coder::FILE_MARKER,
 265                multi_region::V0317_END_MARKER,
 266                CURSOR_MARKER,
 267                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 268            ];
 269            TOKENS
 270        }
 271        ZetaFormat::V0306SeedMultiRegions => {
 272            static TOKENS: &[&str] = &[
 273                seed_coder::FIM_SUFFIX,
 274                seed_coder::FIM_PREFIX,
 275                seed_coder::FIM_MIDDLE,
 276                seed_coder::FILE_MARKER,
 277                seed_coder::START_MARKER,
 278                seed_coder::SEPARATOR,
 279                seed_coder::END_MARKER,
 280                CURSOR_MARKER,
 281                multi_region::MARKER_TAG_PREFIX,
 282            ];
 283            TOKENS
 284        }
 285    }
 286}
 287
 288/// Returns the (editable_token_limit, context_token_limit) for a given format.
 289pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 290    match format {
 291        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 292        ZetaFormat::V0114180EditableRegion => (180, 350),
 293        ZetaFormat::V0120GitMergeMarkers
 294        | ZetaFormat::V0131GitMergeMarkersPrefix
 295        | ZetaFormat::V0211Prefill
 296        | ZetaFormat::V0211SeedCoder
 297        | ZetaFormat::v0226Hashline
 298        | ZetaFormat::V0306SeedMultiRegions
 299        | ZetaFormat::V0316SeedMultiRegions
 300        | ZetaFormat::V0318SeedMultiRegions
 301        | ZetaFormat::V0317SeedMultiRegions
 302        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 303        ZetaFormat::V0304VariableEdit => (1024, 0),
 304    }
 305}
 306
 307pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 308    match format {
 309        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 310        ZetaFormat::V0112MiddleAtEnd
 311        | ZetaFormat::V0113Ordered
 312        | ZetaFormat::V0114180EditableRegion
 313        | ZetaFormat::V0120GitMergeMarkers
 314        | ZetaFormat::V0131GitMergeMarkersPrefix
 315        | ZetaFormat::V0211Prefill
 316        | ZetaFormat::V0211SeedCoder
 317        | ZetaFormat::V0304VariableEdit
 318        | ZetaFormat::V0306SeedMultiRegions
 319        | ZetaFormat::V0304SeedNoEdits => &[],
 320        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 321        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 322        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 323    }
 324}
 325
 326pub fn excerpt_ranges_for_format(
 327    format: ZetaFormat,
 328    ranges: &ExcerptRanges,
 329) -> (Range<usize>, Range<usize>) {
 330    match format {
 331        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 332            ranges.editable_150.clone(),
 333            ranges.editable_150_context_350.clone(),
 334        ),
 335        ZetaFormat::V0114180EditableRegion => (
 336            ranges.editable_180.clone(),
 337            ranges.editable_180_context_350.clone(),
 338        ),
 339        ZetaFormat::V0120GitMergeMarkers
 340        | ZetaFormat::V0131GitMergeMarkersPrefix
 341        | ZetaFormat::V0211Prefill
 342        | ZetaFormat::V0211SeedCoder
 343        | ZetaFormat::v0226Hashline
 344        | ZetaFormat::V0304SeedNoEdits
 345        | ZetaFormat::V0306SeedMultiRegions
 346        | ZetaFormat::V0316SeedMultiRegions
 347        | ZetaFormat::V0318SeedMultiRegions
 348        | ZetaFormat::V0317SeedMultiRegions => (
 349            ranges.editable_350.clone(),
 350            ranges.editable_350_context_150.clone(),
 351        ),
 352        ZetaFormat::V0304VariableEdit => {
 353            let context = ranges
 354                .editable_350_context_1024
 355                .clone()
 356                .or(ranges.editable_350_context_512.clone())
 357                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 358            (context.clone(), context)
 359        }
 360    }
 361}
 362
 363pub fn write_cursor_excerpt_section_for_format(
 364    format: ZetaFormat,
 365    prompt: &mut String,
 366    path: &Path,
 367    context: &str,
 368    editable_range: &Range<usize>,
 369    cursor_offset: usize,
 370) {
 371    match format {
 372        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 373            prompt,
 374            path,
 375            context,
 376            editable_range,
 377            cursor_offset,
 378        ),
 379        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 380            v0113_ordered::write_cursor_excerpt_section(
 381                prompt,
 382                path,
 383                context,
 384                editable_range,
 385                cursor_offset,
 386            )
 387        }
 388        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 389            prompt,
 390            path,
 391            context,
 392            editable_range,
 393            cursor_offset,
 394        ),
 395        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 396            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 397                prompt,
 398                path,
 399                context,
 400                editable_range,
 401                cursor_offset,
 402            )
 403        }
 404        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 405            seed_coder::write_cursor_excerpt_section(
 406                prompt,
 407                path,
 408                context,
 409                editable_range,
 410                cursor_offset,
 411            )
 412        }
 413        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 414            prompt,
 415            path,
 416            context,
 417            editable_range,
 418            cursor_offset,
 419        ),
 420        ZetaFormat::V0304VariableEdit => {
 421            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 422        }
 423        ZetaFormat::V0306SeedMultiRegions => {
 424            prompt.push_str(&build_v0306_cursor_prefix(
 425                path,
 426                context,
 427                editable_range,
 428                cursor_offset,
 429            ));
 430        }
 431        ZetaFormat::V0316SeedMultiRegions => {
 432            prompt.push_str(&build_v0316_cursor_prefix(
 433                path,
 434                context,
 435                editable_range,
 436                cursor_offset,
 437            ));
 438        }
 439        ZetaFormat::V0318SeedMultiRegions => {
 440            prompt.push_str(&build_v0318_cursor_prefix(
 441                path,
 442                context,
 443                editable_range,
 444                cursor_offset,
 445            ));
 446        }
 447        ZetaFormat::V0317SeedMultiRegions => {
 448            prompt.push_str(&build_v0317_cursor_prefix(
 449                path,
 450                context,
 451                editable_range,
 452                cursor_offset,
 453            ));
 454        }
 455    }
 456}
 457
 458fn build_v0306_cursor_prefix(
 459    path: &Path,
 460    context: &str,
 461    editable_range: &Range<usize>,
 462    cursor_offset: usize,
 463) -> String {
 464    let mut section = String::new();
 465    let path_str = path.to_string_lossy();
 466    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 467
 468    section.push_str(&context[..editable_range.start]);
 469    section.push_str(seed_coder::START_MARKER);
 470
 471    let editable_text = &context[editable_range.clone()];
 472    let cursor_in_editable = cursor_offset - editable_range.start;
 473    multi_region::write_editable_with_markers(
 474        &mut section,
 475        editable_text,
 476        cursor_in_editable,
 477        CURSOR_MARKER,
 478    );
 479
 480    if !section.ends_with('\n') {
 481        section.push('\n');
 482    }
 483    section.push_str(seed_coder::SEPARATOR);
 484    section
 485}
 486
 487fn build_v0316_cursor_prefix(
 488    path: &Path,
 489    context: &str,
 490    editable_range: &Range<usize>,
 491    cursor_offset: usize,
 492) -> String {
 493    let mut section = String::new();
 494    let path_str = path.to_string_lossy();
 495    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 496
 497    section.push_str(&context[..editable_range.start]);
 498
 499    let editable_text = &context[editable_range.clone()];
 500    let cursor_in_editable = cursor_offset - editable_range.start;
 501    multi_region::write_editable_with_markers_v0316(
 502        &mut section,
 503        editable_text,
 504        cursor_in_editable,
 505        CURSOR_MARKER,
 506    );
 507
 508    if !section.ends_with('\n') {
 509        section.push('\n');
 510    }
 511    section
 512}
 513
 514fn build_v0318_cursor_prefix(
 515    path: &Path,
 516    context: &str,
 517    editable_range: &Range<usize>,
 518    cursor_offset: usize,
 519) -> String {
 520    let mut section = String::new();
 521    let path_str = path.to_string_lossy();
 522    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 523
 524    section.push_str(&context[..editable_range.start]);
 525
 526    let editable_text = &context[editable_range.clone()];
 527    let cursor_in_editable = cursor_offset - editable_range.start;
 528    multi_region::write_editable_with_markers_v0318(
 529        &mut section,
 530        editable_text,
 531        cursor_in_editable,
 532        CURSOR_MARKER,
 533    );
 534
 535    if !section.ends_with('\n') {
 536        section.push('\n');
 537    }
 538    section
 539}
 540
 541fn build_v0317_cursor_prefix(
 542    path: &Path,
 543    context: &str,
 544    editable_range: &Range<usize>,
 545    cursor_offset: usize,
 546) -> String {
 547    let mut section = String::new();
 548    let path_str = path.to_string_lossy();
 549    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 550
 551    section.push_str(&context[..editable_range.start]);
 552
 553    let editable_text = &context[editable_range.clone()];
 554    let cursor_in_editable = cursor_offset - editable_range.start;
 555    multi_region::write_editable_with_markers_v0317(
 556        &mut section,
 557        editable_text,
 558        cursor_in_editable,
 559        CURSOR_MARKER,
 560    );
 561
 562    if !section.ends_with('\n') {
 563        section.push('\n');
 564    }
 565    section
 566}
 567
 568fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 569    let start_row = text[0..range.start].matches('\n').count() as u32;
 570    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 571    if !text[..range.end].ends_with('\n') {
 572        end_row += 1;
 573    }
 574    return start_row..end_row;
 575}
 576
 577pub fn format_prompt_with_budget_for_format(
 578    input: &ZetaPromptInput,
 579    format: ZetaFormat,
 580    max_tokens: usize,
 581) -> Option<String> {
 582    let (context, editable_range, context_range, cursor_offset) =
 583        resolve_cursor_region(input, format);
 584    let path = &*input.cursor_path;
 585
 586    let empty_files = Vec::new();
 587    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 588    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 589        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 590        let row_range = relative_row_range.start + cursor_excerpt_start_row
 591            ..relative_row_range.end + cursor_excerpt_start_row;
 592        &filter_redundant_excerpts(
 593            input_related_files.to_vec(),
 594            input.cursor_path.as_ref(),
 595            row_range,
 596        )
 597    } else {
 598        input_related_files
 599    };
 600
 601    let prompt = match format {
 602        ZetaFormat::V0211SeedCoder
 603        | ZetaFormat::V0304SeedNoEdits
 604        | ZetaFormat::V0306SeedMultiRegions
 605        | ZetaFormat::V0316SeedMultiRegions
 606        | ZetaFormat::V0318SeedMultiRegions
 607        | ZetaFormat::V0317SeedMultiRegions => {
 608            let mut cursor_section = String::new();
 609            write_cursor_excerpt_section_for_format(
 610                format,
 611                &mut cursor_section,
 612                path,
 613                context,
 614                &editable_range,
 615                cursor_offset,
 616            );
 617
 618            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 619            seed_coder::assemble_fim_prompt(
 620                context,
 621                &editable_range,
 622                &cursor_section,
 623                &input.events,
 624                related_files,
 625                budget_with_margin,
 626            )
 627        }
 628        _ => {
 629            let mut cursor_section = String::new();
 630            write_cursor_excerpt_section_for_format(
 631                format,
 632                &mut cursor_section,
 633                path,
 634                context,
 635                &editable_range,
 636                cursor_offset,
 637            );
 638
 639            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 640            let cursor_tokens = estimate_tokens(cursor_section.len());
 641            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 642
 643            let edit_history_section = format_edit_history_within_budget(
 644                &input.events,
 645                "<|file_sep|>",
 646                "edit history",
 647                remaining_budget,
 648                max_edit_event_count_for_format(&format),
 649            );
 650            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 651            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 652
 653            let related_files_section = format_related_files_within_budget(
 654                &related_files,
 655                "<|file_sep|>",
 656                "",
 657                remaining_budget,
 658            );
 659
 660            let mut prompt = String::new();
 661            prompt.push_str(&related_files_section);
 662            prompt.push_str(&edit_history_section);
 663            prompt.push_str(&cursor_section);
 664            prompt
 665        }
 666    };
 667    let prompt_tokens = estimate_tokens(prompt.len());
 668    if prompt_tokens > max_tokens {
 669        return None;
 670    }
 671    return Some(prompt);
 672}
 673
 674pub fn filter_redundant_excerpts(
 675    mut related_files: Vec<RelatedFile>,
 676    cursor_path: &Path,
 677    cursor_row_range: Range<u32>,
 678) -> Vec<RelatedFile> {
 679    for file in &mut related_files {
 680        if file.path.as_ref() == cursor_path {
 681            file.excerpts.retain(|excerpt| {
 682                excerpt.row_range.start < cursor_row_range.start
 683                    || excerpt.row_range.end > cursor_row_range.end
 684            });
 685        }
 686    }
 687    related_files.retain(|file| !file.excerpts.is_empty());
 688    related_files
 689}
 690
 691pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 692    match format {
 693        ZetaFormat::V0112MiddleAtEnd
 694        | ZetaFormat::V0113Ordered
 695        | ZetaFormat::V0114180EditableRegion
 696        | ZetaFormat::V0120GitMergeMarkers
 697        | ZetaFormat::V0131GitMergeMarkersPrefix
 698        | ZetaFormat::V0211Prefill
 699        | ZetaFormat::V0211SeedCoder
 700        | ZetaFormat::v0226Hashline
 701        | ZetaFormat::V0304SeedNoEdits
 702        | ZetaFormat::V0304VariableEdit
 703        | ZetaFormat::V0306SeedMultiRegions
 704        | ZetaFormat::V0316SeedMultiRegions
 705        | ZetaFormat::V0318SeedMultiRegions
 706        | ZetaFormat::V0317SeedMultiRegions => 6,
 707    }
 708}
 709
 710pub fn get_prefill_for_format(
 711    format: ZetaFormat,
 712    context: &str,
 713    editable_range: &Range<usize>,
 714) -> String {
 715    match format {
 716        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 717        ZetaFormat::V0112MiddleAtEnd
 718        | ZetaFormat::V0113Ordered
 719        | ZetaFormat::V0114180EditableRegion
 720        | ZetaFormat::V0120GitMergeMarkers
 721        | ZetaFormat::V0131GitMergeMarkersPrefix
 722        | ZetaFormat::V0211SeedCoder
 723        | ZetaFormat::v0226Hashline
 724        | ZetaFormat::V0304VariableEdit => String::new(),
 725        ZetaFormat::V0304SeedNoEdits
 726        | ZetaFormat::V0306SeedMultiRegions
 727        | ZetaFormat::V0316SeedMultiRegions
 728        | ZetaFormat::V0318SeedMultiRegions
 729        | ZetaFormat::V0317SeedMultiRegions => String::new(),
 730    }
 731}
 732
 733pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 734    match format {
 735        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 736        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 737        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 738        ZetaFormat::V0211SeedCoder
 739        | ZetaFormat::V0304SeedNoEdits
 740        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 741        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 742        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 743        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 744        ZetaFormat::V0112MiddleAtEnd
 745        | ZetaFormat::V0113Ordered
 746        | ZetaFormat::V0114180EditableRegion
 747        | ZetaFormat::v0226Hashline
 748        | ZetaFormat::V0304VariableEdit => None,
 749    }
 750}
 751
 752pub fn encode_patch_as_output_for_format(
 753    format: ZetaFormat,
 754    old_editable_region: &str,
 755    patch: &str,
 756    cursor_offset: Option<usize>,
 757) -> Result<Option<String>> {
 758    match format {
 759        ZetaFormat::v0226Hashline => {
 760            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 761        }
 762        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 763            old_editable_region,
 764            patch,
 765            cursor_offset,
 766        )
 767        .map(Some),
 768        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 769            Ok(seed_coder::no_edits(patch))
 770        }
 771        ZetaFormat::V0316SeedMultiRegions => {
 772            let empty_patch = patch.lines().count() <= 3;
 773            if empty_patch {
 774                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 775                let marker_num =
 776                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 777                let tag = multi_region::marker_tag(marker_num);
 778                Ok(Some(format!(
 779                    "{tag}{tag}{}",
 780                    multi_region::V0316_END_MARKER
 781                )))
 782            } else {
 783                Ok(None)
 784            }
 785        }
 786        ZetaFormat::V0318SeedMultiRegions => {
 787            let empty_patch = patch.lines().count() <= 3;
 788            if empty_patch {
 789                let marker_offsets =
 790                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 791                let marker_num =
 792                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 793                let tag = multi_region::marker_tag(marker_num);
 794                Ok(Some(format!(
 795                    "{tag}{tag}{}",
 796                    multi_region::V0318_END_MARKER
 797                )))
 798            } else {
 799                Ok(None)
 800            }
 801        }
 802        ZetaFormat::V0317SeedMultiRegions => {
 803            let empty_patch = patch.lines().count() <= 3;
 804            if empty_patch {
 805                let tag = multi_region::marker_tag_relative(0);
 806                Ok(Some(format!(
 807                    "{tag}{tag}{}",
 808                    multi_region::V0317_END_MARKER
 809                )))
 810            } else {
 811                Ok(None)
 812            }
 813        }
 814        _ => Ok(None),
 815    }
 816}
 817
 818pub struct ParsedOutput {
 819    /// Text that should replace the editable region
 820    pub new_editable_region: String,
 821    /// The byte range within `cursor_excerpt` that this replacement applies to
 822    pub range_in_excerpt: Range<usize>,
 823}
 824
 825/// Parse model output for the given zeta format
 826pub fn parse_zeta2_model_output(
 827    output: &str,
 828    format: ZetaFormat,
 829    prompt_inputs: &ZetaPromptInput,
 830) -> Result<ParsedOutput> {
 831    let output = match output_end_marker_for_format(format) {
 832        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 833        None => output,
 834    };
 835
 836    let (context, editable_range_in_context, context_range, cursor_offset) =
 837        resolve_cursor_region(prompt_inputs, format);
 838    let context_start = context_range.start;
 839    let old_editable_region = &context[editable_range_in_context.clone()];
 840    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 841
 842    let (range_in_context, output) = match format {
 843        ZetaFormat::v0226Hashline => (
 844            editable_range_in_context,
 845            if hashline::output_has_edit_commands(output) {
 846                hashline::apply_edit_commands(old_editable_region, output)
 847            } else {
 848                output.to_string()
 849            },
 850        ),
 851        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 852        ZetaFormat::V0304SeedNoEdits => (
 853            editable_range_in_context,
 854            if output.starts_with(seed_coder::NO_EDITS) {
 855                old_editable_region.to_string()
 856            } else {
 857                output.to_string()
 858            },
 859        ),
 860        ZetaFormat::V0306SeedMultiRegions => (
 861            editable_range_in_context,
 862            if output.starts_with(seed_coder::NO_EDITS) {
 863                old_editable_region.to_string()
 864            } else {
 865                multi_region::apply_marker_span(old_editable_region, output)?
 866            },
 867        ),
 868        ZetaFormat::V0316SeedMultiRegions => (
 869            editable_range_in_context,
 870            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
 871        ),
 872        ZetaFormat::V0318SeedMultiRegions => (
 873            editable_range_in_context,
 874            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
 875        ),
 876        ZetaFormat::V0317SeedMultiRegions => (
 877            editable_range_in_context,
 878            multi_region::apply_marker_span_v0317(
 879                old_editable_region,
 880                output,
 881                Some(cursor_offset_in_editable),
 882            )?,
 883        ),
 884        _ => (editable_range_in_context, output.to_string()),
 885    };
 886
 887    let range_in_excerpt =
 888        range_in_context.start + context_start..range_in_context.end + context_start;
 889
 890    Ok(ParsedOutput {
 891        new_editable_region: output,
 892        range_in_excerpt,
 893    })
 894}
 895
 896pub fn excerpt_range_for_format(
 897    format: ZetaFormat,
 898    ranges: &ExcerptRanges,
 899) -> (Range<usize>, Range<usize>) {
 900    excerpt_ranges_for_format(format, ranges)
 901}
 902
 903pub fn resolve_cursor_region(
 904    input: &ZetaPromptInput,
 905    format: ZetaFormat,
 906) -> (&str, Range<usize>, Range<usize>, usize) {
 907    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 908        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 909        compute_editable_and_context_ranges(
 910            &input.cursor_excerpt,
 911            input.cursor_offset_in_excerpt,
 912            syntax_ranges,
 913            editable_tokens,
 914            context_tokens,
 915        )
 916    } else {
 917        excerpt_range_for_format(format, &input.excerpt_ranges)
 918    };
 919    let context_start = context_range.start;
 920    let context_text = &input.cursor_excerpt[context_range.clone()];
 921    let adjusted_editable =
 922        (editable_range.start - context_start)..(editable_range.end - context_start);
 923    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 924
 925    (
 926        context_text,
 927        adjusted_editable,
 928        context_range,
 929        adjusted_cursor,
 930    )
 931}
 932
 933pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 934    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 935    get_prefill_for_format(format, context, &editable_range)
 936}
 937
 938fn format_edit_history_within_budget(
 939    events: &[Arc<Event>],
 940    file_marker: &str,
 941    edit_history_name: &str,
 942    max_tokens: usize,
 943    max_edit_event_count: usize,
 944) -> String {
 945    let header = format!("{}{}\n", file_marker, edit_history_name);
 946    let header_tokens = estimate_tokens(header.len());
 947    if header_tokens >= max_tokens {
 948        return String::new();
 949    }
 950
 951    let mut event_strings: Vec<String> = Vec::new();
 952    let mut total_tokens = header_tokens;
 953
 954    for event in events.iter().rev().take(max_edit_event_count) {
 955        let mut event_str = String::new();
 956        write_event(&mut event_str, event);
 957        let event_tokens = estimate_tokens(event_str.len());
 958
 959        if total_tokens + event_tokens > max_tokens {
 960            break;
 961        }
 962        total_tokens += event_tokens;
 963        event_strings.push(event_str);
 964    }
 965
 966    if event_strings.is_empty() {
 967        return String::new();
 968    }
 969
 970    let mut result = header;
 971    for event_str in event_strings.iter().rev() {
 972        result.push_str(event_str);
 973    }
 974    result
 975}
 976
 977fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 978    let needs_newline = !excerpt.text.ends_with('\n');
 979    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 980    let len = excerpt.text.len()
 981        + if needs_newline { "\n".len() } else { 0 }
 982        + if needs_ellipsis { "...\n".len() } else { 0 };
 983    estimate_tokens(len)
 984}
 985
 986pub fn format_related_files_within_budget(
 987    related_files: &[RelatedFile],
 988    file_prefix: &str,
 989    file_suffix: &str,
 990    max_tokens: usize,
 991) -> String {
 992    struct ExcerptCandidate {
 993        file_ix: usize,
 994        excerpt_ix: usize,
 995        order: usize,
 996    }
 997
 998    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 999        .iter()
1000        .enumerate()
1001        .flat_map(|(file_ix, file)| {
1002            file.excerpts
1003                .iter()
1004                .enumerate()
1005                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1006                    file_ix,
1007                    excerpt_ix,
1008                    order: e.order,
1009                })
1010        })
1011        .collect();
1012
1013    // Pre-compute file header strings and their token costs.
1014    let file_headers: Vec<String> = related_files
1015        .iter()
1016        .map(|file| {
1017            let path_str = file.path.to_string_lossy();
1018            format!("{}{}\n", file_prefix, path_str)
1019        })
1020        .collect();
1021
1022    // Sort the excerpts by their order and determine how many fit within the budget.
1023    let mut total_tokens = 0;
1024    let mut included_excerpt_count = 0_usize;
1025    let mut included_file_indices = vec![false; related_files.len()];
1026    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1027    for candidate in &excerpt_candidates {
1028        let file = &related_files[candidate.file_ix];
1029        let excerpt = &file.excerpts[candidate.excerpt_ix];
1030        let file_already_included = included_file_indices[candidate.file_ix];
1031        let header_cost = if file_already_included {
1032            0
1033        } else {
1034            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1035        };
1036        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1037        if total_tokens + header_cost + excerpt_cost > max_tokens {
1038            break;
1039        }
1040        total_tokens += header_cost + excerpt_cost;
1041        if !file_already_included {
1042            included_file_indices[candidate.file_ix] = true;
1043        }
1044        included_excerpt_count += 1;
1045    }
1046
1047    excerpt_candidates.truncate(included_excerpt_count);
1048    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1049
1050    // Render all of the files that fit within the token budget, in the original order.
1051    let mut result = String::new();
1052    let mut last_file_ix = None;
1053    for candidate in &excerpt_candidates {
1054        if last_file_ix != Some(candidate.file_ix) {
1055            if last_file_ix.is_some() {
1056                result.push_str(file_suffix);
1057            }
1058            result.push_str(&file_headers[candidate.file_ix]);
1059            last_file_ix = Some(candidate.file_ix);
1060        }
1061        let file = &related_files[candidate.file_ix];
1062        let excerpt = &file.excerpts[candidate.excerpt_ix];
1063        result.push_str(&excerpt.text);
1064        if !result.ends_with('\n') {
1065            result.push('\n');
1066        }
1067        if excerpt.row_range.end < file.max_row {
1068            result.push_str("...\n");
1069        }
1070    }
1071
1072    result
1073}
1074
1075pub fn write_related_files(
1076    prompt: &mut String,
1077    related_files: &[RelatedFile],
1078) -> Vec<Range<usize>> {
1079    let mut ranges = Vec::new();
1080    for file in related_files {
1081        let start = prompt.len();
1082        let path_str = file.path.to_string_lossy();
1083        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1084        for excerpt in &file.excerpts {
1085            prompt.push_str(&excerpt.text);
1086            if !prompt.ends_with('\n') {
1087                prompt.push('\n');
1088            }
1089            if excerpt.row_range.end < file.max_row {
1090                prompt.push_str("...\n");
1091            }
1092        }
1093        let end = prompt.len();
1094        ranges.push(start..end);
1095    }
1096    ranges
1097}
1098
1099mod v0112_middle_at_end {
1100    use super::*;
1101
1102    pub fn special_tokens() -> &'static [&'static str] {
1103        &[
1104            "<|fim_prefix|>",
1105            "<|fim_suffix|>",
1106            "<|fim_middle|>",
1107            "<|file_sep|>",
1108            CURSOR_MARKER,
1109        ]
1110    }
1111
1112    pub fn write_cursor_excerpt_section(
1113        prompt: &mut String,
1114        path: &Path,
1115        context: &str,
1116        editable_range: &Range<usize>,
1117        cursor_offset: usize,
1118    ) {
1119        let path_str = path.to_string_lossy();
1120        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1121
1122        prompt.push_str("<|fim_prefix|>\n");
1123        prompt.push_str(&context[..editable_range.start]);
1124
1125        prompt.push_str("<|fim_suffix|>\n");
1126        prompt.push_str(&context[editable_range.end..]);
1127        if !prompt.ends_with('\n') {
1128            prompt.push('\n');
1129        }
1130
1131        prompt.push_str("<|fim_middle|>current\n");
1132        prompt.push_str(&context[editable_range.start..cursor_offset]);
1133        prompt.push_str(CURSOR_MARKER);
1134        prompt.push_str(&context[cursor_offset..editable_range.end]);
1135        if !prompt.ends_with('\n') {
1136            prompt.push('\n');
1137        }
1138
1139        prompt.push_str("<|fim_middle|>updated\n");
1140    }
1141}
1142
1143mod v0113_ordered {
1144    use super::*;
1145
1146    pub fn special_tokens() -> &'static [&'static str] {
1147        &[
1148            "<|fim_prefix|>",
1149            "<|fim_suffix|>",
1150            "<|fim_middle|>",
1151            "<|file_sep|>",
1152            CURSOR_MARKER,
1153        ]
1154    }
1155
1156    pub fn write_cursor_excerpt_section(
1157        prompt: &mut String,
1158        path: &Path,
1159        context: &str,
1160        editable_range: &Range<usize>,
1161        cursor_offset: usize,
1162    ) {
1163        let path_str = path.to_string_lossy();
1164        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1165
1166        prompt.push_str("<|fim_prefix|>\n");
1167        prompt.push_str(&context[..editable_range.start]);
1168        if !prompt.ends_with('\n') {
1169            prompt.push('\n');
1170        }
1171
1172        prompt.push_str("<|fim_middle|>current\n");
1173        prompt.push_str(&context[editable_range.start..cursor_offset]);
1174        prompt.push_str(CURSOR_MARKER);
1175        prompt.push_str(&context[cursor_offset..editable_range.end]);
1176        if !prompt.ends_with('\n') {
1177            prompt.push('\n');
1178        }
1179
1180        prompt.push_str("<|fim_suffix|>\n");
1181        prompt.push_str(&context[editable_range.end..]);
1182        if !prompt.ends_with('\n') {
1183            prompt.push('\n');
1184        }
1185
1186        prompt.push_str("<|fim_middle|>updated\n");
1187    }
1188}
1189
1190mod v0114180_editable_region {
1191    use super::*;
1192
1193    pub fn special_tokens() -> &'static [&'static str] {
1194        v0113_ordered::special_tokens()
1195    }
1196}
1197
1198pub mod v0120_git_merge_markers {
1199    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1200    //!
1201    //! Example prompt:
1202    //!
1203    //! <|file_sep|>path/to/target_file.py
1204    //! <|fim_prefix|>
1205    //! code before editable region
1206    //! <|fim_suffix|>
1207    //! code after editable region
1208    //! <|fim_middle|>
1209    //! <<<<<<< CURRENT
1210    //! code that
1211    //! needs to<|user_cursor|>
1212    //! be rewritten
1213    //! =======
1214    //!
1215    //! Expected output (should be generated by the model):
1216    //!
1217    //! updated
1218    //! code with
1219    //! changes applied
1220    //! >>>>>>> UPDATED
1221
1222    use super::*;
1223
1224    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1225    pub const SEPARATOR: &str = "=======\n";
1226    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1227
1228    pub fn special_tokens() -> &'static [&'static str] {
1229        &[
1230            "<|fim_prefix|>",
1231            "<|fim_suffix|>",
1232            "<|fim_middle|>",
1233            "<|file_sep|>",
1234            START_MARKER,
1235            SEPARATOR,
1236            END_MARKER,
1237            CURSOR_MARKER,
1238        ]
1239    }
1240
1241    pub fn write_cursor_excerpt_section(
1242        prompt: &mut String,
1243        path: &Path,
1244        context: &str,
1245        editable_range: &Range<usize>,
1246        cursor_offset: usize,
1247    ) {
1248        let path_str = path.to_string_lossy();
1249        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1250
1251        prompt.push_str("<|fim_prefix|>");
1252        prompt.push_str(&context[..editable_range.start]);
1253
1254        prompt.push_str("<|fim_suffix|>");
1255        prompt.push_str(&context[editable_range.end..]);
1256        if !prompt.ends_with('\n') {
1257            prompt.push('\n');
1258        }
1259
1260        prompt.push_str("<|fim_middle|>");
1261        prompt.push_str(START_MARKER);
1262        prompt.push_str(&context[editable_range.start..cursor_offset]);
1263        prompt.push_str(CURSOR_MARKER);
1264        prompt.push_str(&context[cursor_offset..editable_range.end]);
1265        if !prompt.ends_with('\n') {
1266            prompt.push('\n');
1267        }
1268        prompt.push_str(SEPARATOR);
1269    }
1270}
1271
1272pub mod v0131_git_merge_markers_prefix {
1273    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1274    //!
1275    //! Example prompt:
1276    //!
1277    //! <|file_sep|>path/to/target_file.py
1278    //! <|fim_prefix|>
1279    //! code before editable region
1280    //! <<<<<<< CURRENT
1281    //! code that
1282    //! needs to<|user_cursor|>
1283    //! be rewritten
1284    //! =======
1285    //! <|fim_suffix|>
1286    //! code after editable region
1287    //! <|fim_middle|>
1288    //!
1289    //! Expected output (should be generated by the model):
1290    //!
1291    //! updated
1292    //! code with
1293    //! changes applied
1294    //! >>>>>>> UPDATED
1295
1296    use super::*;
1297
1298    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1299    pub const SEPARATOR: &str = "=======\n";
1300    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1301
1302    pub fn special_tokens() -> &'static [&'static str] {
1303        &[
1304            "<|fim_prefix|>",
1305            "<|fim_suffix|>",
1306            "<|fim_middle|>",
1307            "<|file_sep|>",
1308            START_MARKER,
1309            SEPARATOR,
1310            END_MARKER,
1311            CURSOR_MARKER,
1312        ]
1313    }
1314
1315    pub fn write_cursor_excerpt_section(
1316        prompt: &mut String,
1317        path: &Path,
1318        context: &str,
1319        editable_range: &Range<usize>,
1320        cursor_offset: usize,
1321    ) {
1322        let path_str = path.to_string_lossy();
1323        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1324
1325        prompt.push_str("<|fim_prefix|>");
1326        prompt.push_str(&context[..editable_range.start]);
1327        prompt.push_str(START_MARKER);
1328        prompt.push_str(&context[editable_range.start..cursor_offset]);
1329        prompt.push_str(CURSOR_MARKER);
1330        prompt.push_str(&context[cursor_offset..editable_range.end]);
1331        if !prompt.ends_with('\n') {
1332            prompt.push('\n');
1333        }
1334        prompt.push_str(SEPARATOR);
1335
1336        prompt.push_str("<|fim_suffix|>");
1337        prompt.push_str(&context[editable_range.end..]);
1338        if !prompt.ends_with('\n') {
1339            prompt.push('\n');
1340        }
1341
1342        prompt.push_str("<|fim_middle|>");
1343    }
1344}
1345
1346pub mod v0211_prefill {
1347    use super::*;
1348
1349    pub fn special_tokens() -> &'static [&'static str] {
1350        v0131_git_merge_markers_prefix::special_tokens()
1351    }
1352
1353    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1354        let editable_region = &context[editable_range.start..editable_range.end];
1355
1356        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1357        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1358
1359        // Find a token boundary to avoid splitting tokens in the prefill.
1360        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1361        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1362        // the \n and consume any consecutive \n characters after it.
1363        let prefill = &editable_region[..prefill_len];
1364        match prefill.rfind('\n') {
1365            Some(pos) => {
1366                let mut end = pos + 1;
1367                while end < editable_region.len()
1368                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1369                {
1370                    end += 1;
1371                }
1372                editable_region[..end].to_string()
1373            }
1374            // No newline found. Fall back to splitting before the last space
1375            // (word-level boundary)
1376            None => match prefill.rfind(' ') {
1377                Some(pos) => prefill[..pos].to_string(),
1378                None => prefill.to_string(),
1379            },
1380        }
1381    }
1382}
1383
1384pub mod hashline {
1385
1386    use std::fmt::Display;
1387
1388    pub const END_MARKER: &str = "<|fim_middle|>updated";
1389    pub const START_MARKER: &str = "<|fim_middle|>current";
1390
1391    use super::*;
1392
1393    const SET_COMMAND_MARKER: &str = "<|set|>";
1394    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1395    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1396
1397    pub fn special_tokens() -> &'static [&'static str] {
1398        return &[
1399            SET_COMMAND_MARKER,
1400            "<|set_range|>",
1401            INSERT_COMMAND_MARKER,
1402            NO_EDITS_COMMAND_MARKER,
1403            CURSOR_MARKER,
1404            "<|file_sep|>",
1405            "<|fim_prefix|>",
1406            "<|fim_suffix|>",
1407            "<|fim_middle|>",
1408        ];
1409    }
1410
1411    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1412    #[derive(Debug, Clone, PartialEq, Eq)]
1413    struct LineRef {
1414        index: usize,
1415        hash: u8,
1416    }
1417
1418    impl Display for LineRef {
1419        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1420            write!(f, "{}:{:02x}", self.index, self.hash)
1421        }
1422    }
1423
1424    pub fn hash_line(line: &[u8]) -> u8 {
1425        let mut h: u8 = 0;
1426        for &byte in line {
1427            h = h.wrapping_add(byte);
1428        }
1429        return h;
1430    }
1431
1432    /// Write the hashline-encoded editable region into `out`. Each line of
1433    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1434    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1435    /// to the start of `editable_text`).
1436    pub fn write_hashline_editable_region(
1437        out: &mut String,
1438        editable_text: &str,
1439        cursor_offset_in_editable: usize,
1440    ) {
1441        let mut offset = 0;
1442        for (i, line) in editable_text.lines().enumerate() {
1443            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1444                && cursor_offset_in_editable < offset + line.len()
1445            {
1446                (
1447                    &line[..cursor_offset_in_editable - offset],
1448                    CURSOR_MARKER,
1449                    &line[cursor_offset_in_editable - offset..],
1450                )
1451            } else {
1452                (line, "", "")
1453            };
1454            write!(
1455                out,
1456                "\n{}|{head}{cursor}{tail}",
1457                LineRef {
1458                    index: i,
1459                    hash: hash_line(line.as_bytes())
1460                }
1461            )
1462            .unwrap();
1463            offset += line.len() + 1;
1464        }
1465    }
1466
1467    pub fn write_cursor_excerpt_section(
1468        prompt: &mut String,
1469        path: &Path,
1470        context: &str,
1471        editable_range: &Range<usize>,
1472        cursor_offset: usize,
1473    ) {
1474        let path_str = path.to_string_lossy();
1475        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1476
1477        prompt.push_str("<|fim_prefix|>\n");
1478        prompt.push_str(&context[..editable_range.start]);
1479        prompt.push_str(START_MARKER);
1480
1481        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1482        let editable_region = &context[editable_range.clone()];
1483        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1484
1485        if !prompt.ends_with('\n') {
1486            prompt.push('\n');
1487        }
1488
1489        prompt.push_str("<|fim_suffix|>\n");
1490        prompt.push_str(&context[editable_range.end..]);
1491        if !prompt.ends_with('\n') {
1492            prompt.push('\n');
1493        }
1494
1495        prompt.push_str(END_MARKER);
1496        prompt.push('\n');
1497    }
1498
1499    /// A single edit command parsed from the model output.
1500    #[derive(Debug)]
1501    enum EditCommand<'a> {
1502        /// Replace a range of lines (inclusive on both ends). Single-line set is
1503        /// represented by `start == end`.
1504        Set {
1505            start: LineRef,
1506            end: LineRef,
1507            content: &'a str,
1508        },
1509        /// Insert new lines after the given line, or before the first line if
1510        /// `after` is `None`.
1511        Insert {
1512            after: Option<LineRef>,
1513            content: &'a str,
1514        },
1515    }
1516
1517    /// Parse a line reference like `3:c3` into a `LineRef`.
1518    fn parse_line_ref(s: &str) -> Option<LineRef> {
1519        let (idx_str, hash_str) = s.split_once(':')?;
1520        let index = idx_str.parse::<usize>().ok()?;
1521        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1522        Some(LineRef { index, hash })
1523    }
1524
1525    /// Parse the model output into a list of `EditCommand`s.
1526    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1527        let mut commands = Vec::new();
1528        let mut offset = 0usize;
1529
1530        while offset < model_output.len() {
1531            let next_nl = model_output[offset..]
1532                .find('\n')
1533                .map(|i| offset + i)
1534                .unwrap_or(model_output.len());
1535            let line = &model_output[offset..next_nl];
1536            let line_end = if next_nl < model_output.len() {
1537                next_nl + 1
1538            } else {
1539                next_nl
1540            };
1541
1542            let trimmed = line.trim();
1543            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1544                (true, spec)
1545            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1546                (false, spec)
1547            } else {
1548                offset = line_end;
1549                continue;
1550            };
1551
1552            let mut content_end = line_end;
1553            let mut scan = line_end;
1554
1555            while scan < model_output.len() {
1556                let body_nl = model_output[scan..]
1557                    .find('\n')
1558                    .map(|i| scan + i)
1559                    .unwrap_or(model_output.len());
1560                let body_line = &model_output[scan..body_nl];
1561                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1562                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1563                {
1564                    break;
1565                }
1566                scan = if body_nl < model_output.len() {
1567                    body_nl + 1
1568                } else {
1569                    body_nl
1570                };
1571                content_end = scan;
1572            }
1573
1574            let content = &model_output[line_end..content_end];
1575
1576            if is_set {
1577                if let Some((start_str, end_str)) = specifier.split_once('-') {
1578                    if let (Some(start), Some(end)) =
1579                        (parse_line_ref(start_str), parse_line_ref(end_str))
1580                    {
1581                        commands.push(EditCommand::Set {
1582                            start,
1583                            end,
1584                            content,
1585                        });
1586                    }
1587                } else if let Some(target) = parse_line_ref(specifier) {
1588                    commands.push(EditCommand::Set {
1589                        start: target.clone(),
1590                        end: target,
1591                        content,
1592                    });
1593                }
1594            } else {
1595                let after = parse_line_ref(specifier);
1596                commands.push(EditCommand::Insert { after, content });
1597            }
1598
1599            offset = scan;
1600        }
1601
1602        commands
1603    }
1604
1605    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1606    /// (as opposed to being a plain full-replacement output).
1607    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1608    /// editable region, returning the plain text content.
1609    pub fn strip_hashline_prefixes(region: &str) -> String {
1610        let mut decoded: String = region
1611            .lines()
1612            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1613            .collect::<Vec<_>>()
1614            .join("\n");
1615        if region.ends_with('\n') {
1616            decoded.push('\n');
1617        }
1618        decoded
1619    }
1620
1621    pub fn output_has_edit_commands(model_output: &str) -> bool {
1622        model_output.contains(SET_COMMAND_MARKER)
1623            || model_output.contains(INSERT_COMMAND_MARKER)
1624            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1625    }
1626
1627    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1628    /// original editable region text.
1629    ///
1630    /// `editable_region` is the original text of the editable region (without hash
1631    /// prefixes). `model_output` is the raw model response containing edit commands.
1632    ///
1633    /// Returns the full replacement text for the editable region.
1634    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1635        if model_output
1636            .trim_start()
1637            .starts_with(NO_EDITS_COMMAND_MARKER)
1638        {
1639            return editable_region.to_string();
1640        }
1641
1642        let original_lines: Vec<&str> = editable_region.lines().collect();
1643        let old_hashes: Vec<u8> = original_lines
1644            .iter()
1645            .map(|line| hash_line(line.as_bytes()))
1646            .collect();
1647
1648        let commands = parse_edit_commands(model_output);
1649
1650        // For set operations: indexed by start line → Some((end line index, content))
1651        // For insert operations: indexed by line index → vec of content to insert after
1652        // Insert-before-first is tracked separately.
1653        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1654        let mut insert_before_first: Vec<&str> = Vec::new();
1655        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1656
1657        for command in &commands {
1658            match command {
1659                EditCommand::Set {
1660                    start,
1661                    end,
1662                    content,
1663                } => {
1664                    if start.index < old_hashes.len()
1665                        && end.index < old_hashes.len()
1666                        && start.index <= end.index
1667                        && old_hashes[start.index] == start.hash
1668                        && old_hashes[end.index] == end.hash
1669                    {
1670                        set_ops[start.index] = Some((end.index, *content));
1671                    }
1672                }
1673                EditCommand::Insert { after, content } => match after {
1674                    None => insert_before_first.push(*content),
1675                    Some(line_ref) => {
1676                        if line_ref.index < old_hashes.len()
1677                            && old_hashes[line_ref.index] == line_ref.hash
1678                        {
1679                            insert_after[line_ref.index].push(*content);
1680                        }
1681                    }
1682                },
1683            }
1684        }
1685
1686        let mut result = String::new();
1687
1688        // Emit any insertions before the first line
1689        for content in &insert_before_first {
1690            result.push_str(content);
1691            if !content.ends_with('\n') {
1692                result.push('\n');
1693            }
1694        }
1695
1696        let mut i = 0;
1697        while i < original_lines.len() {
1698            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1699                // Replace lines i..=end_index with the replacement content
1700                result.push_str(replacement);
1701                if !replacement.is_empty() && !replacement.ends_with('\n') {
1702                    result.push('\n');
1703                }
1704                // Emit any insertions after the end of this set range
1705                if *end_index < insert_after.len() {
1706                    for content in &insert_after[*end_index] {
1707                        result.push_str(content);
1708                        if !content.ends_with('\n') {
1709                            result.push('\n');
1710                        }
1711                    }
1712                }
1713                i = end_index + 1;
1714            } else {
1715                // Keep the original line
1716                result.push_str(original_lines[i]);
1717                result.push('\n');
1718                // Emit any insertions after this line
1719                for content in &insert_after[i] {
1720                    result.push_str(content);
1721                    if !content.ends_with('\n') {
1722                        result.push('\n');
1723                    }
1724                }
1725                i += 1;
1726            }
1727        }
1728
1729        // Preserve trailing newline behavior: if the original ended with a
1730        // newline the result already has one; if it didn't, trim the extra one
1731        // we added.
1732        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1733            result.pop();
1734        }
1735
1736        result
1737    }
1738
1739    /// Convert a unified diff patch into hashline edit commands.
1740    ///
1741    /// Parses the unified diff `patch` directly to determine which lines of
1742    /// `old_text` are deleted/replaced and what new lines are added, then emits
1743    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1744    /// `{index}:{hash}` identifiers.
1745    ///
1746    /// `cursor_offset` is an optional byte offset into the first hunk's new
1747    /// text (context + additions) where the cursor marker should be placed.
1748    pub fn patch_to_edit_commands(
1749        old_text: &str,
1750        patch: &str,
1751        cursor_offset: Option<usize>,
1752    ) -> Result<String> {
1753        let old_lines: Vec<&str> = old_text.lines().collect();
1754        let old_hashes: Vec<u8> = old_lines
1755            .iter()
1756            .map(|line| hash_line(line.as_bytes()))
1757            .collect();
1758
1759        let mut result = String::new();
1760        let mut first_hunk = true;
1761
1762        struct Hunk<'a> {
1763            line_range: Range<usize>,
1764            new_text_lines: Vec<&'a str>,
1765            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1766        }
1767
1768        // Parse the patch line by line. We only care about hunk headers,
1769        // context, deletions, and additions.
1770        let mut old_line_index: usize = 0;
1771        let mut current_hunk: Option<Hunk> = None;
1772        // Byte offset tracking within the hunk's new text for cursor placement.
1773        let mut new_text_byte_offset: usize = 0;
1774        // The line index of the last old line seen before/in the current hunk
1775        // (used for insert-after reference).
1776        let mut last_old_line_before_hunk: Option<usize> = None;
1777
1778        fn flush_hunk(
1779            hunk: Hunk,
1780            last_old_line: Option<usize>,
1781            result: &mut String,
1782            old_hashes: &[u8],
1783        ) {
1784            if hunk.line_range.is_empty() {
1785                // Pure insertion — reference the old line to insert after when in bounds.
1786                if let Some(after) = last_old_line
1787                    && let Some(&hash) = old_hashes.get(after)
1788                {
1789                    write!(
1790                        result,
1791                        "{INSERT_COMMAND_MARKER}{}\n",
1792                        LineRef { index: after, hash }
1793                    )
1794                    .unwrap();
1795                } else {
1796                    result.push_str(INSERT_COMMAND_MARKER);
1797                    result.push('\n');
1798                }
1799            } else {
1800                let start = hunk.line_range.start;
1801                let end_exclusive = hunk.line_range.end;
1802                let deleted_line_count = end_exclusive.saturating_sub(start);
1803
1804                if deleted_line_count == 1 {
1805                    if let Some(&hash) = old_hashes.get(start) {
1806                        write!(
1807                            result,
1808                            "{SET_COMMAND_MARKER}{}\n",
1809                            LineRef { index: start, hash }
1810                        )
1811                        .unwrap();
1812                    } else {
1813                        result.push_str(SET_COMMAND_MARKER);
1814                        result.push('\n');
1815                    }
1816                } else {
1817                    let end_inclusive = end_exclusive - 1;
1818                    match (
1819                        old_hashes.get(start).copied(),
1820                        old_hashes.get(end_inclusive).copied(),
1821                    ) {
1822                        (Some(start_hash), Some(end_hash)) => {
1823                            write!(
1824                                result,
1825                                "{SET_COMMAND_MARKER}{}-{}\n",
1826                                LineRef {
1827                                    index: start,
1828                                    hash: start_hash
1829                                },
1830                                LineRef {
1831                                    index: end_inclusive,
1832                                    hash: end_hash
1833                                }
1834                            )
1835                            .unwrap();
1836                        }
1837                        _ => {
1838                            result.push_str(SET_COMMAND_MARKER);
1839                            result.push('\n');
1840                        }
1841                    }
1842                }
1843            }
1844            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1845                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1846                    && line_offset == cursor_line_offset
1847                {
1848                    result.push_str(&line[..char_offset]);
1849                    result.push_str(CURSOR_MARKER);
1850                    result.push_str(&line[char_offset..]);
1851                    continue;
1852                }
1853
1854                result.push_str(line);
1855            }
1856        }
1857
1858        for raw_line in patch.split_inclusive('\n') {
1859            if raw_line.starts_with("@@") {
1860                // Flush any pending change hunk from a previous patch hunk.
1861                if let Some(hunk) = current_hunk.take() {
1862                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1863                }
1864
1865                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1866                // We intentionally do not trust old_start as a direct local index into `old_text`,
1867                // because some patches are produced against a larger file region and carry
1868                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1869                if first_hunk {
1870                    new_text_byte_offset = 0;
1871                    first_hunk = false;
1872                }
1873                continue;
1874            }
1875
1876            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1877                continue;
1878            }
1879            if raw_line.starts_with("\\ No newline") {
1880                continue;
1881            }
1882
1883            if raw_line.starts_with('-') {
1884                // Extend or start a change hunk with this deleted old line.
1885                match &mut current_hunk {
1886                    Some(Hunk {
1887                        line_range: range, ..
1888                    }) => range.end = old_line_index + 1,
1889                    None => {
1890                        current_hunk = Some(Hunk {
1891                            line_range: old_line_index..old_line_index + 1,
1892                            new_text_lines: Vec::new(),
1893                            cursor_line_offset_in_new_text: None,
1894                        });
1895                    }
1896                }
1897                old_line_index += 1;
1898            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1899                // Place cursor marker if cursor_offset falls within this line.
1900                let mut cursor_line_offset = None;
1901                if let Some(cursor_off) = cursor_offset
1902                    && (first_hunk
1903                        || cursor_off >= new_text_byte_offset
1904                            && cursor_off <= new_text_byte_offset + added_content.len())
1905                {
1906                    let line_offset = added_content.floor_char_boundary(
1907                        cursor_off
1908                            .saturating_sub(new_text_byte_offset)
1909                            .min(added_content.len()),
1910                    );
1911                    cursor_line_offset = Some(line_offset);
1912                }
1913
1914                new_text_byte_offset += added_content.len();
1915
1916                let hunk = current_hunk.get_or_insert(Hunk {
1917                    line_range: old_line_index..old_line_index,
1918                    new_text_lines: vec![],
1919                    cursor_line_offset_in_new_text: None,
1920                });
1921                hunk.new_text_lines.push(added_content);
1922                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1923                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1924            } else {
1925                // Context line (starts with ' ' or is empty).
1926                if let Some(hunk) = current_hunk.take() {
1927                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1928                }
1929                last_old_line_before_hunk = Some(old_line_index);
1930                old_line_index += 1;
1931                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1932                new_text_byte_offset += content.len();
1933            }
1934        }
1935
1936        // Flush final group.
1937        if let Some(hunk) = current_hunk.take() {
1938            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1939        }
1940
1941        // Trim a single trailing newline.
1942        if result.ends_with('\n') {
1943            result.pop();
1944        }
1945
1946        if result.is_empty() {
1947            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1948        }
1949
1950        Ok(result)
1951    }
1952
1953    #[cfg(test)]
1954    mod tests {
1955        use super::*;
1956        use indoc::indoc;
1957
1958        #[test]
1959        fn test_format_cursor_region() {
1960            struct Case {
1961                name: &'static str,
1962                context: &'static str,
1963                editable_range: Range<usize>,
1964                cursor_offset: usize,
1965                expected: &'static str,
1966            }
1967
1968            let cases = [
1969                Case {
1970                    name: "basic_cursor_placement",
1971                    context: "hello world\n",
1972                    editable_range: 0..12,
1973                    cursor_offset: 5,
1974                    expected: indoc! {"
1975                    <|file_sep|>test.rs
1976                    <|fim_prefix|>
1977                    <|fim_middle|>current
1978                    0:5c|hello<|user_cursor|> world
1979                    <|fim_suffix|>
1980                    <|fim_middle|>updated
1981                    "},
1982                },
1983                Case {
1984                    name: "multiline_cursor_on_second_line",
1985                    context: "aaa\nbbb\nccc\n",
1986                    editable_range: 0..12,
1987                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1988                    expected: indoc! {"
1989                    <|file_sep|>test.rs
1990                    <|fim_prefix|>
1991                    <|fim_middle|>current
1992                    0:23|aaa
1993                    1:26|b<|user_cursor|>bb
1994                    2:29|ccc
1995                    <|fim_suffix|>
1996                    <|fim_middle|>updated
1997                    "},
1998                },
1999                Case {
2000                    name: "no_trailing_newline_in_context",
2001                    context: "line1\nline2",
2002                    editable_range: 0..11,
2003                    cursor_offset: 3,
2004                    expected: indoc! {"
2005                    <|file_sep|>test.rs
2006                    <|fim_prefix|>
2007                    <|fim_middle|>current
2008                    0:d9|lin<|user_cursor|>e1
2009                    1:da|line2
2010                    <|fim_suffix|>
2011                    <|fim_middle|>updated
2012                    "},
2013                },
2014                Case {
2015                    name: "leading_newline_in_editable_region",
2016                    context: "\nabc\n",
2017                    editable_range: 0..5,
2018                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2019                    expected: indoc! {"
2020                    <|file_sep|>test.rs
2021                    <|fim_prefix|>
2022                    <|fim_middle|>current
2023                    0:00|
2024                    1:26|a<|user_cursor|>bc
2025                    <|fim_suffix|>
2026                    <|fim_middle|>updated
2027                    "},
2028                },
2029                Case {
2030                    name: "with_suffix",
2031                    context: "abc\ndef",
2032                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2033                    cursor_offset: 2,
2034                    expected: indoc! {"
2035                    <|file_sep|>test.rs
2036                    <|fim_prefix|>
2037                    <|fim_middle|>current
2038                    0:26|ab<|user_cursor|>c
2039                    <|fim_suffix|>
2040                    def
2041                    <|fim_middle|>updated
2042                    "},
2043                },
2044                Case {
2045                    name: "unicode_two_byte_chars",
2046                    context: "héllo\n",
2047                    editable_range: 0..7,
2048                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2049                    expected: indoc! {"
2050                    <|file_sep|>test.rs
2051                    <|fim_prefix|>
2052                    <|fim_middle|>current
2053                    0:1b|hé<|user_cursor|>llo
2054                    <|fim_suffix|>
2055                    <|fim_middle|>updated
2056                    "},
2057                },
2058                Case {
2059                    name: "unicode_three_byte_chars",
2060                    context: "日本語\n",
2061                    editable_range: 0..10,
2062                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2063                    expected: indoc! {"
2064                    <|file_sep|>test.rs
2065                    <|fim_prefix|>
2066                    <|fim_middle|>current
2067                    0:80|日本<|user_cursor|>語
2068                    <|fim_suffix|>
2069                    <|fim_middle|>updated
2070                    "},
2071                },
2072                Case {
2073                    name: "unicode_four_byte_chars",
2074                    context: "a🌍b\n",
2075                    editable_range: 0..7,
2076                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2077                    expected: indoc! {"
2078                    <|file_sep|>test.rs
2079                    <|fim_prefix|>
2080                    <|fim_middle|>current
2081                    0:6b|a🌍<|user_cursor|>b
2082                    <|fim_suffix|>
2083                    <|fim_middle|>updated
2084                    "},
2085                },
2086                Case {
2087                    name: "cursor_at_start_of_region_not_placed",
2088                    context: "abc\n",
2089                    editable_range: 0..4,
2090                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2091                    expected: indoc! {"
2092                    <|file_sep|>test.rs
2093                    <|fim_prefix|>
2094                    <|fim_middle|>current
2095                    0:26|abc
2096                    <|fim_suffix|>
2097                    <|fim_middle|>updated
2098                    "},
2099                },
2100                Case {
2101                    name: "cursor_at_end_of_line_not_placed",
2102                    context: "abc\ndef\n",
2103                    editable_range: 0..8,
2104                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2105                    expected: indoc! {"
2106                    <|file_sep|>test.rs
2107                    <|fim_prefix|>
2108                    <|fim_middle|>current
2109                    0:26|abc
2110                    1:2f|def
2111                    <|fim_suffix|>
2112                    <|fim_middle|>updated
2113                    "},
2114                },
2115                Case {
2116                    name: "cursor_offset_relative_to_context_not_editable_region",
2117                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2118                    // write_cursor_excerpt_section must subtract it before comparing against
2119                    // per-line offsets within the editable region.
2120                    context: "pre\naaa\nbbb\nsuf\n",
2121                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2122                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2123                    expected: indoc! {"
2124                    <|file_sep|>test.rs
2125                    <|fim_prefix|>
2126                    pre
2127                    <|fim_middle|>current
2128                    0:23|aaa
2129                    1:26|b<|user_cursor|>bb
2130                    <|fim_suffix|>
2131                    suf
2132                    <|fim_middle|>updated
2133                    "},
2134                },
2135            ];
2136
2137            for case in &cases {
2138                let mut prompt = String::new();
2139                hashline::write_cursor_excerpt_section(
2140                    &mut prompt,
2141                    Path::new("test.rs"),
2142                    case.context,
2143                    &case.editable_range,
2144                    case.cursor_offset,
2145                );
2146                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2147            }
2148        }
2149
2150        #[test]
2151        fn test_apply_edit_commands() {
2152            struct Case {
2153                name: &'static str,
2154                original: &'static str,
2155                model_output: &'static str,
2156                expected: &'static str,
2157            }
2158
2159            let cases = vec![
2160                Case {
2161                    name: "set_single_line",
2162                    original: indoc! {"
2163                    let mut total = 0;
2164                    for product in products {
2165                        total += ;
2166                    }
2167                    total
2168                "},
2169                    model_output: indoc! {"
2170                    <|set|>2:87
2171                        total += product.price;
2172                "},
2173                    expected: indoc! {"
2174                    let mut total = 0;
2175                    for product in products {
2176                        total += product.price;
2177                    }
2178                    total
2179                "},
2180                },
2181                Case {
2182                    name: "set_range",
2183                    original: indoc! {"
2184                    fn foo() {
2185                        let x = 1;
2186                        let y = 2;
2187                        let z = 3;
2188                    }
2189                "},
2190                    model_output: indoc! {"
2191                    <|set|>1:46-3:4a
2192                        let sum = 6;
2193                "},
2194                    expected: indoc! {"
2195                    fn foo() {
2196                        let sum = 6;
2197                    }
2198                "},
2199                },
2200                Case {
2201                    name: "insert_after_line",
2202                    original: indoc! {"
2203                    fn main() {
2204                        let x = 1;
2205                    }
2206                "},
2207                    model_output: indoc! {"
2208                    <|insert|>1:46
2209                        let y = 2;
2210                "},
2211                    expected: indoc! {"
2212                    fn main() {
2213                        let x = 1;
2214                        let y = 2;
2215                    }
2216                "},
2217                },
2218                Case {
2219                    name: "insert_before_first",
2220                    original: indoc! {"
2221                    let x = 1;
2222                    let y = 2;
2223                "},
2224                    model_output: indoc! {"
2225                    <|insert|>
2226                    use std::io;
2227                "},
2228                    expected: indoc! {"
2229                    use std::io;
2230                    let x = 1;
2231                    let y = 2;
2232                "},
2233                },
2234                Case {
2235                    name: "set_with_cursor_marker",
2236                    original: indoc! {"
2237                    fn main() {
2238                        println!();
2239                    }
2240                "},
2241                    model_output: indoc! {"
2242                    <|set|>1:34
2243                        eprintln!(\"<|user_cursor|>\");
2244                "},
2245                    expected: indoc! {"
2246                    fn main() {
2247                        eprintln!(\"<|user_cursor|>\");
2248                    }
2249                "},
2250                },
2251                Case {
2252                    name: "multiple_set_commands",
2253                    original: indoc! {"
2254                    aaa
2255                    bbb
2256                    ccc
2257                    ddd
2258                "},
2259                    model_output: indoc! {"
2260                    <|set|>0:23
2261                    AAA
2262                    <|set|>2:29
2263                    CCC
2264                "},
2265                    expected: indoc! {"
2266                    AAA
2267                    bbb
2268                    CCC
2269                    ddd
2270                "},
2271                },
2272                Case {
2273                    name: "set_range_multiline_replacement",
2274                    original: indoc! {"
2275                    fn handle_submit() {
2276                    }
2277
2278                    fn handle_keystroke() {
2279                "},
2280                    model_output: indoc! {"
2281                    <|set|>0:3f-1:7d
2282                    fn handle_submit(modal_state: &mut ModalState) {
2283                        <|user_cursor|>
2284                    }
2285                "},
2286                    expected: indoc! {"
2287                    fn handle_submit(modal_state: &mut ModalState) {
2288                        <|user_cursor|>
2289                    }
2290
2291                    fn handle_keystroke() {
2292                "},
2293                },
2294                Case {
2295                    name: "no_edit_commands_returns_original",
2296                    original: indoc! {"
2297                    hello
2298                    world
2299                "},
2300                    model_output: "some random text with no commands",
2301                    expected: indoc! {"
2302                    hello
2303                    world
2304                "},
2305                },
2306                Case {
2307                    name: "no_edits_command_returns_original",
2308                    original: indoc! {"
2309                    hello
2310                    world
2311                "},
2312                    model_output: "<|no_edits|>",
2313                    expected: indoc! {"
2314                    hello
2315                    world
2316                "},
2317                },
2318                Case {
2319                    name: "wrong_hash_set_ignored",
2320                    original: indoc! {"
2321                    aaa
2322                    bbb
2323                "},
2324                    model_output: indoc! {"
2325                    <|set|>0:ff
2326                    ZZZ
2327                "},
2328                    expected: indoc! {"
2329                    aaa
2330                    bbb
2331                "},
2332                },
2333                Case {
2334                    name: "insert_and_set_combined",
2335                    original: indoc! {"
2336                    alpha
2337                    beta
2338                    gamma
2339                "},
2340                    model_output: indoc! {"
2341                    <|set|>0:06
2342                    ALPHA
2343                    <|insert|>1:9c
2344                    beta_extra
2345                "},
2346                    expected: indoc! {"
2347                    ALPHA
2348                    beta
2349                    beta_extra
2350                    gamma
2351                "},
2352                },
2353                Case {
2354                    name: "no_trailing_newline_preserved",
2355                    original: "hello\nworld",
2356                    model_output: indoc! {"
2357                    <|set|>0:14
2358                    HELLO
2359                "},
2360                    expected: "HELLO\nworld",
2361                },
2362                Case {
2363                    name: "set_range_hash_mismatch_in_end_bound",
2364                    original: indoc! {"
2365                    one
2366                    two
2367                    three
2368                "},
2369                    model_output: indoc! {"
2370                    <|set|>0:42-2:ff
2371                    ONE_TWO_THREE
2372                "},
2373                    expected: indoc! {"
2374                    one
2375                    two
2376                    three
2377                "},
2378                },
2379                Case {
2380                    name: "set_range_start_greater_than_end_ignored",
2381                    original: indoc! {"
2382                    a
2383                    b
2384                    c
2385                "},
2386                    model_output: indoc! {"
2387                    <|set|>2:63-1:62
2388                    X
2389                "},
2390                    expected: indoc! {"
2391                    a
2392                    b
2393                    c
2394                "},
2395                },
2396                Case {
2397                    name: "insert_out_of_bounds_ignored",
2398                    original: indoc! {"
2399                    x
2400                    y
2401                "},
2402                    model_output: indoc! {"
2403                    <|insert|>99:aa
2404                    z
2405                "},
2406                    expected: indoc! {"
2407                    x
2408                    y
2409                "},
2410                },
2411                Case {
2412                    name: "set_out_of_bounds_ignored",
2413                    original: indoc! {"
2414                    x
2415                    y
2416                "},
2417                    model_output: indoc! {"
2418                    <|set|>99:aa
2419                    z
2420                "},
2421                    expected: indoc! {"
2422                    x
2423                    y
2424                "},
2425                },
2426                Case {
2427                    name: "malformed_set_command_ignored",
2428                    original: indoc! {"
2429                    alpha
2430                    beta
2431                "},
2432                    model_output: indoc! {"
2433                    <|set|>not-a-line-ref
2434                    UPDATED
2435                "},
2436                    expected: indoc! {"
2437                    alpha
2438                    beta
2439                "},
2440                },
2441                Case {
2442                    name: "malformed_insert_hash_treated_as_before_first",
2443                    original: indoc! {"
2444                    alpha
2445                    beta
2446                "},
2447                    model_output: indoc! {"
2448                    <|insert|>1:nothex
2449                    preamble
2450                "},
2451                    expected: indoc! {"
2452                    preamble
2453                    alpha
2454                    beta
2455                "},
2456                },
2457                Case {
2458                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2459                    original: indoc! {"
2460                    cat
2461                    dog
2462                "},
2463                    model_output: indoc! {"
2464                    <|set|>0:38
2465                    CAT
2466                    <|insert|>0:38
2467                    TAIL
2468                "},
2469                    expected: indoc! {"
2470                    CAT
2471                    TAIL
2472                    dog
2473                "},
2474                },
2475                Case {
2476                    name: "overlapping_set_ranges_last_wins",
2477                    original: indoc! {"
2478                    a
2479                    b
2480                    c
2481                    d
2482                "},
2483                    model_output: indoc! {"
2484                    <|set|>0:61-2:63
2485                    FIRST
2486                    <|set|>1:62-3:64
2487                    SECOND
2488                "},
2489                    expected: indoc! {"
2490                    FIRST
2491                    d
2492                "},
2493                },
2494                Case {
2495                    name: "insert_before_first_and_after_line",
2496                    original: indoc! {"
2497                        a
2498                        b
2499                    "},
2500                    model_output: indoc! {"
2501                        <|insert|>
2502                        HEAD
2503                        <|insert|>0:61
2504                        MID
2505                    "},
2506                    expected: indoc! {"
2507                        HEAD
2508                        a
2509                        MID
2510                        b
2511                    "},
2512                },
2513            ];
2514
2515            for case in &cases {
2516                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2517                assert_eq!(result, case.expected, "failed case: {}", case.name);
2518            }
2519        }
2520
2521        #[test]
2522        fn test_output_has_edit_commands() {
2523            assert!(hashline::output_has_edit_commands(&format!(
2524                "{}0:ab\nnew",
2525                SET_COMMAND_MARKER
2526            )));
2527            assert!(hashline::output_has_edit_commands(&format!(
2528                "{}0:ab\nnew",
2529                INSERT_COMMAND_MARKER
2530            )));
2531            assert!(hashline::output_has_edit_commands(&format!(
2532                "some text\n{}1:cd\nstuff",
2533                SET_COMMAND_MARKER
2534            )));
2535            assert!(!hashline::output_has_edit_commands("just plain text"));
2536            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2537            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2538        }
2539
2540        // ---- hashline::patch_to_edit_commands round-trip tests ----
2541
2542        #[test]
2543        fn test_patch_to_edit_commands() {
2544            struct Case {
2545                name: &'static str,
2546                old: &'static str,
2547                patch: &'static str,
2548                expected_new: &'static str,
2549            }
2550
2551            let cases = [
2552                Case {
2553                    name: "single_line_replacement",
2554                    old: indoc! {"
2555                    let mut total = 0;
2556                    for product in products {
2557                        total += ;
2558                    }
2559                    total
2560                "},
2561                    patch: indoc! {"
2562                    @@ -1,5 +1,5 @@
2563                     let mut total = 0;
2564                     for product in products {
2565                    -    total += ;
2566                    +    total += product.price;
2567                     }
2568                     total
2569                "},
2570                    expected_new: indoc! {"
2571                    let mut total = 0;
2572                    for product in products {
2573                        total += product.price;
2574                    }
2575                    total
2576                "},
2577                },
2578                Case {
2579                    name: "multiline_replacement",
2580                    old: indoc! {"
2581                    fn foo() {
2582                        let x = 1;
2583                        let y = 2;
2584                        let z = 3;
2585                    }
2586                "},
2587                    patch: indoc! {"
2588                    @@ -1,5 +1,3 @@
2589                     fn foo() {
2590                    -    let x = 1;
2591                    -    let y = 2;
2592                    -    let z = 3;
2593                    +    let sum = 1 + 2 + 3;
2594                     }
2595                "},
2596                    expected_new: indoc! {"
2597                    fn foo() {
2598                        let sum = 1 + 2 + 3;
2599                    }
2600                "},
2601                },
2602                Case {
2603                    name: "insertion",
2604                    old: indoc! {"
2605                    fn main() {
2606                        let x = 1;
2607                    }
2608                "},
2609                    patch: indoc! {"
2610                    @@ -1,3 +1,4 @@
2611                     fn main() {
2612                         let x = 1;
2613                    +    let y = 2;
2614                     }
2615                "},
2616                    expected_new: indoc! {"
2617                    fn main() {
2618                        let x = 1;
2619                        let y = 2;
2620                    }
2621                "},
2622                },
2623                Case {
2624                    name: "insertion_before_first",
2625                    old: indoc! {"
2626                    let x = 1;
2627                    let y = 2;
2628                "},
2629                    patch: indoc! {"
2630                    @@ -1,2 +1,3 @@
2631                    +use std::io;
2632                     let x = 1;
2633                     let y = 2;
2634                "},
2635                    expected_new: indoc! {"
2636                    use std::io;
2637                    let x = 1;
2638                    let y = 2;
2639                "},
2640                },
2641                Case {
2642                    name: "deletion",
2643                    old: indoc! {"
2644                    aaa
2645                    bbb
2646                    ccc
2647                    ddd
2648                "},
2649                    patch: indoc! {"
2650                    @@ -1,4 +1,2 @@
2651                     aaa
2652                    -bbb
2653                    -ccc
2654                     ddd
2655                "},
2656                    expected_new: indoc! {"
2657                    aaa
2658                    ddd
2659                "},
2660                },
2661                Case {
2662                    name: "multiple_changes",
2663                    old: indoc! {"
2664                    alpha
2665                    beta
2666                    gamma
2667                    delta
2668                    epsilon
2669                "},
2670                    patch: indoc! {"
2671                    @@ -1,5 +1,5 @@
2672                    -alpha
2673                    +ALPHA
2674                     beta
2675                     gamma
2676                    -delta
2677                    +DELTA
2678                     epsilon
2679                "},
2680                    expected_new: indoc! {"
2681                    ALPHA
2682                    beta
2683                    gamma
2684                    DELTA
2685                    epsilon
2686                "},
2687                },
2688                Case {
2689                    name: "replace_with_insertion",
2690                    old: indoc! {r#"
2691                    fn handle() {
2692                        modal_state.close();
2693                        modal_state.dismiss();
2694                "#},
2695                    patch: indoc! {r#"
2696                    @@ -1,3 +1,4 @@
2697                     fn handle() {
2698                         modal_state.close();
2699                    +    eprintln!("");
2700                         modal_state.dismiss();
2701                "#},
2702                    expected_new: indoc! {r#"
2703                    fn handle() {
2704                        modal_state.close();
2705                        eprintln!("");
2706                        modal_state.dismiss();
2707                "#},
2708                },
2709                Case {
2710                    name: "complete_replacement",
2711                    old: indoc! {"
2712                    aaa
2713                    bbb
2714                    ccc
2715                "},
2716                    patch: indoc! {"
2717                    @@ -1,3 +1,3 @@
2718                    -aaa
2719                    -bbb
2720                    -ccc
2721                    +xxx
2722                    +yyy
2723                    +zzz
2724                "},
2725                    expected_new: indoc! {"
2726                    xxx
2727                    yyy
2728                    zzz
2729                "},
2730                },
2731                Case {
2732                    name: "add_function_body",
2733                    old: indoc! {"
2734                    fn foo() {
2735                        modal_state.dismiss();
2736                    }
2737
2738                    fn
2739
2740                    fn handle_keystroke() {
2741                "},
2742                    patch: indoc! {"
2743                    @@ -1,6 +1,8 @@
2744                     fn foo() {
2745                         modal_state.dismiss();
2746                     }
2747
2748                    -fn
2749                    +fn handle_submit() {
2750                    +    todo()
2751                    +}
2752
2753                     fn handle_keystroke() {
2754                "},
2755                    expected_new: indoc! {"
2756                    fn foo() {
2757                        modal_state.dismiss();
2758                    }
2759
2760                    fn handle_submit() {
2761                        todo()
2762                    }
2763
2764                    fn handle_keystroke() {
2765                "},
2766                },
2767                Case {
2768                    name: "with_cursor_offset",
2769                    old: indoc! {r#"
2770                    fn main() {
2771                        println!();
2772                    }
2773                "#},
2774                    patch: indoc! {r#"
2775                        @@ -1,3 +1,3 @@
2776                        fn main() {
2777                        -    println!();
2778                        +    eprintln!("");
2779                        }
2780                    "#},
2781                    expected_new: indoc! {r#"
2782                        fn main() {
2783                            eprintln!("<|user_cursor|>");
2784                        }
2785                    "#},
2786                },
2787                Case {
2788                    name: "non_local_hunk_header_pure_insertion_repro",
2789                    old: indoc! {"
2790                        aaa
2791                        bbb
2792                    "},
2793                    patch: indoc! {"
2794                        @@ -20,2 +20,3 @@
2795                        aaa
2796                        +xxx
2797                        bbb
2798                    "},
2799                    expected_new: indoc! {"
2800                        aaa
2801                        xxx
2802                        bbb
2803                    "},
2804                },
2805                Case {
2806                    name: "empty_patch_produces_no_edits_marker",
2807                    old: indoc! {"
2808                        aaa
2809                        bbb
2810                    "},
2811                    patch: "@@ -20,2 +20,3 @@\n",
2812                    expected_new: indoc! {"
2813                        aaa
2814                        bbb
2815                    "},
2816                },
2817            ];
2818
2819            for case in &cases {
2820                // The cursor_offset for patch_to_edit_commands is relative to
2821                // the first hunk's new text (context + additions). We compute
2822                // it by finding where the marker sits in the expected output
2823                // (which mirrors the new text of the hunk).
2824                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2825
2826                let commands =
2827                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2828                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2829
2830                assert!(
2831                    hashline::output_has_edit_commands(&commands),
2832                    "case {}: expected edit commands, got: {commands:?}",
2833                    case.name,
2834                );
2835
2836                let applied = hashline::apply_edit_commands(case.old, &commands);
2837                assert_eq!(applied, case.expected_new, "case {}", case.name);
2838            }
2839        }
2840    }
2841}
2842
2843pub mod seed_coder {
2844    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2845    //!
2846    //! Seed-Coder uses different FIM tokens and order than Qwen:
2847    //! - SPM order: suffix comes FIRST, then prefix, then middle
2848    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2849    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2850    //!
2851    //! All context (related files, edit history) goes in the PREFIX section.
2852    //! The suffix contains only code after the editable region.
2853    //!
2854    //! Example prompt:
2855    //!
2856    //! <[fim-suffix]>
2857    //! code after editable region
2858    //! <[fim-prefix]><filename>related/file.py
2859    //! related file content
2860    //!
2861    //! <filename>edit_history
2862    //! --- a/some_file.py
2863    //! +++ b/some_file.py
2864    //! -old
2865    //! +new
2866    //!
2867    //! <filename>path/to/target_file.py
2868    //! code before editable region
2869    //! <<<<<<< CURRENT
2870    //! code that
2871    //! needs to<|user_cursor|>
2872    //! be rewritten
2873    //! =======
2874    //! <[fim-middle]>
2875    //!
2876    //! Expected output (model generates):
2877    //!
2878    //! updated
2879    //! code with
2880    //! changes applied
2881    //! >>>>>>> UPDATED
2882
2883    use super::*;
2884
2885    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2886    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2887    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2888    pub const FILE_MARKER: &str = "<filename>";
2889
2890    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2891    pub const SEPARATOR: &str = "=======\n";
2892    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2893
2894    pub const NO_EDITS: &str = "NO_EDITS\n";
2895
2896    pub fn special_tokens() -> &'static [&'static str] {
2897        &[
2898            FIM_SUFFIX,
2899            FIM_PREFIX,
2900            FIM_MIDDLE,
2901            FILE_MARKER,
2902            START_MARKER,
2903            SEPARATOR,
2904            END_MARKER,
2905            CURSOR_MARKER,
2906        ]
2907    }
2908
2909    pub fn write_cursor_excerpt_section(
2910        prompt: &mut String,
2911        path: &Path,
2912        context: &str,
2913        editable_range: &Range<usize>,
2914        cursor_offset: usize,
2915    ) {
2916        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2917        prompt.push_str(&section);
2918    }
2919
2920    pub fn format_prompt_with_budget(
2921        path: &Path,
2922        context: &str,
2923        editable_range: &Range<usize>,
2924        cursor_offset: usize,
2925        events: &[Arc<Event>],
2926        related_files: &[RelatedFile],
2927        max_tokens: usize,
2928    ) -> String {
2929        let cursor_prefix_section =
2930            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2931        assemble_fim_prompt(
2932            context,
2933            editable_range,
2934            &cursor_prefix_section,
2935            events,
2936            related_files,
2937            max_tokens,
2938        )
2939    }
2940
2941    pub fn assemble_fim_prompt(
2942        context: &str,
2943        editable_range: &Range<usize>,
2944        cursor_prefix_section: &str,
2945        events: &[Arc<Event>],
2946        related_files: &[RelatedFile],
2947        max_tokens: usize,
2948    ) -> String {
2949        let suffix_section = build_suffix_section(context, editable_range);
2950
2951        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2952        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2953        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2954
2955        let edit_history_section = super::format_edit_history_within_budget(
2956            events,
2957            FILE_MARKER,
2958            "edit_history",
2959            budget_after_cursor,
2960            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2961        );
2962        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2963        let budget_after_edit_history =
2964            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2965
2966        let related_files_section = super::format_related_files_within_budget(
2967            related_files,
2968            FILE_MARKER,
2969            "",
2970            budget_after_edit_history,
2971        );
2972
2973        let mut prompt = String::new();
2974        prompt.push_str(&suffix_section);
2975        prompt.push_str(FIM_PREFIX);
2976        prompt.push_str(&related_files_section);
2977        if !related_files_section.is_empty() {
2978            prompt.push('\n');
2979        }
2980        prompt.push_str(&edit_history_section);
2981        if !edit_history_section.is_empty() {
2982            prompt.push('\n');
2983        }
2984        prompt.push_str(cursor_prefix_section);
2985        prompt.push_str(FIM_MIDDLE);
2986
2987        prompt
2988    }
2989
2990    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2991        let mut section = String::new();
2992        section.push_str(FIM_SUFFIX);
2993        section.push_str(&context[editable_range.end..]);
2994        if !section.ends_with('\n') {
2995            section.push('\n');
2996        }
2997        section
2998    }
2999
3000    fn build_cursor_prefix_section(
3001        path: &Path,
3002        context: &str,
3003        editable_range: &Range<usize>,
3004        cursor_offset: usize,
3005    ) -> String {
3006        let mut section = String::new();
3007        let path_str = path.to_string_lossy();
3008        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3009
3010        section.push_str(&context[..editable_range.start]);
3011        section.push_str(START_MARKER);
3012        section.push_str(&context[editable_range.start..cursor_offset]);
3013        section.push_str(CURSOR_MARKER);
3014        section.push_str(&context[cursor_offset..editable_range.end]);
3015        if !section.ends_with('\n') {
3016            section.push('\n');
3017        }
3018        section.push_str(SEPARATOR);
3019        section
3020    }
3021
3022    /// Format patch as containing no changes if it's empty; otherwise return None.
3023    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3024        // Count lines in the patch
3025        let empty_patch = patch.lines().count() <= 3;
3026        if empty_patch {
3027            Some(format!("{NO_EDITS}{END_MARKER}"))
3028        } else {
3029            None
3030        }
3031    }
3032}
3033
3034pub mod v0304_variable_edit {
3035    //! A prompt format with no fixed editable region. The entire context is shown
3036    //! to the model, and it chooses which text to replace by outputting surrounding
3037    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3038    //! text.
3039    //!
3040    //! Example prompt:
3041    //!
3042    //! <|file_sep|>path/to/file.py
3043    //! zero
3044    //! one
3045    //! two
3046    //! three<|user_cursor|>
3047    //! four
3048    //! five
3049    //! <|fim_prefix|>
3050    //
3051    //! Expected output (model generates):
3052    //!
3053    //! two
3054    //! <|fim_middle|>
3055    //! THREE
3056    //! <|fim_suffix|>
3057    //! four
3058    //!
3059    //! The output means: find "two\n...\nfour" in the context, and replace
3060    //! everything between "two\n" and "four" with "THREE\n".
3061
3062    use super::*;
3063
3064    pub fn special_tokens() -> &'static [&'static str] {
3065        &[
3066            "<|fim_prefix|>",
3067            "<|fim_suffix|>",
3068            "<|fim_middle|>",
3069            "<|file_sep|>",
3070            CURSOR_MARKER,
3071        ]
3072    }
3073
3074    pub fn write_cursor_excerpt_section(
3075        prompt: &mut String,
3076        path: &Path,
3077        context: &str,
3078        cursor_offset: usize,
3079    ) {
3080        let path_str = path.to_string_lossy();
3081        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3082
3083        prompt.push_str(&context[..cursor_offset]);
3084        prompt.push_str(CURSOR_MARKER);
3085        prompt.push_str(&context[cursor_offset..]);
3086        if !prompt.ends_with('\n') {
3087            prompt.push('\n');
3088        }
3089        prompt.push_str("<|fim_prefix|>\n")
3090    }
3091
3092    /// Apply a variable-edit model output to the original context text.
3093    ///
3094    /// The model output has the form:
3095    ///
3096    /// - prefix context lines
3097    /// - `<|fim_middle|>`
3098    /// - new text
3099    /// - `<|fim_suffix|>`
3100    /// - suffix context lines
3101    ///
3102    /// We locate the prefix/suffix context lines in the original text and replace
3103    /// everything between them with the new text.
3104    pub fn apply_variable_edit(
3105        context: &str,
3106        model_output: &str,
3107    ) -> Result<(Range<usize>, String)> {
3108        let (prefix_context, rest) = model_output
3109            .split_once("<|fim_middle|>\n")
3110            .or_else(|| model_output.split_once("<|fim_middle|>"))
3111            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3112
3113        let (new_text, suffix_context) = rest
3114            .split_once("<|fim_suffix|>\n")
3115            .or_else(|| rest.split_once("<|fim_suffix|>"))
3116            .unwrap_or((rest, ""));
3117
3118        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3119            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3120        } else {
3121            suffix_context
3122        };
3123
3124        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3125            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3126            + prefix_context.len();
3127        let suffix_offset = if suffix_context.is_empty() {
3128            context.len()
3129        } else {
3130            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3131                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3132                + prefix_offset
3133        };
3134
3135        let edit_range = prefix_offset..suffix_offset;
3136        return Ok((edit_range, new_text.to_string()));
3137    }
3138
3139    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3140        if needle.is_empty() {
3141            return Some(0);
3142        }
3143
3144        haystack.match_indices(needle).find_map(|(offset, _)| {
3145            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3146            matched_line_start.then_some(offset)
3147        })
3148    }
3149
3150    /// Convert a unified diff patch into the variable-edit output format.
3151    ///
3152    /// Parses `patch` as a unified diff against `old_text` and produces model
3153    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3154    /// delimiters. The diff is resolved by content matching rather than line
3155    /// numbers.
3156    pub fn patch_to_variable_edit_output(
3157        old_text: &str,
3158        patch: &str,
3159        cursor_offset: Option<usize>,
3160    ) -> Result<String> {
3161        // Parse the unified diff into hunks. Each hunk has an `old_context`
3162        // string (context + deleted lines interleaved in order) and a list of
3163        // edits expressed as byte ranges within that context plus replacement
3164        // text.
3165        let hunks = parse_hunks(patch);
3166        if hunks.is_empty() {
3167            return Ok(String::new());
3168        }
3169
3170        // Apply each hunk by finding its old_context in the text and
3171        // performing the edits. We search forward from where the previous
3172        // hunk ended so that hunks are applied in order.
3173        let mut new_text = old_text.to_string();
3174        let mut search_from: usize = 0;
3175        let mut first_hunk_pos: Option<usize> = None;
3176
3177        for hunk in &hunks {
3178            let context_pos = new_text[search_from..]
3179                .find(&hunk.old_context)
3180                .map(|pos| pos + search_from)
3181                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3182
3183            if first_hunk_pos.is_none() {
3184                first_hunk_pos = Some(context_pos);
3185            }
3186
3187            // Apply edits in reverse order so byte offsets remain valid.
3188            for edit in hunk.edits.iter().rev() {
3189                let abs_start = context_pos + edit.range.start;
3190                let abs_end = context_pos + edit.range.end;
3191                new_text.replace_range(abs_start..abs_end, &edit.text);
3192            }
3193
3194            // Advance past this hunk's region in the (now modified) text.
3195            let new_region_len: usize =
3196                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3197                    len + edit.text.len() - (edit.range.end - edit.range.start)
3198                });
3199            search_from = context_pos + new_region_len;
3200        }
3201
3202        // Now we have old_text and new_text. Find the changed line range by
3203        // comparing them.
3204        let old_lines: Vec<&str> = old_text.lines().collect();
3205        let new_lines: Vec<&str> = new_text.lines().collect();
3206
3207        // Find first differing line.
3208        let first_changed_row = old_lines
3209            .iter()
3210            .zip(new_lines.iter())
3211            .position(|(a, b)| a != b)
3212            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3213
3214        // Find last differing line (from the end).
3215        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3216        let common_suffix = old_lines
3217            .iter()
3218            .rev()
3219            .zip(new_lines.iter().rev())
3220            .take(max_suffix)
3221            .take_while(|(a, b)| a == b)
3222            .count();
3223
3224        let old_end = old_lines.len() - common_suffix;
3225        let new_end = new_lines.len() - common_suffix;
3226
3227        if first_changed_row == old_end && first_changed_row == new_end {
3228            return Ok(String::new());
3229        }
3230
3231        // Build the replacement text from new_lines[first_diff..new_end].
3232        let mut merged_new_text = String::new();
3233        for line in &new_lines[first_changed_row..new_end] {
3234            merged_new_text.push_str(line);
3235            merged_new_text.push('\n');
3236        }
3237
3238        // cursor_offset is relative to the first hunk's new content in
3239        // new_text. Translate it to an offset within merged_new_text, which
3240        // only contains lines first_diff..new_end of new_text.
3241        if let Some(hunk_offset) = cursor_offset {
3242            let hunk_start = first_hunk_pos.unwrap_or(0);
3243            let absolute_pos = hunk_start + hunk_offset;
3244
3245            // Byte offset where first_diff starts in new_text.
3246            let merged_start: usize = new_lines[..first_changed_row]
3247                .iter()
3248                .map(|line| line.len() + 1)
3249                .sum();
3250
3251            if absolute_pos >= merged_start {
3252                let relative_offset = absolute_pos - merged_start;
3253                if relative_offset <= merged_new_text.len() {
3254                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3255                }
3256            }
3257        }
3258
3259        // Build output with 2 lines of context above and below.
3260        let context_lines_count = 2;
3261        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3262        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3263
3264        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3265            let pattern = &lines[line_range];
3266            let pattern_len = pattern.len();
3267
3268            let mut count = 0;
3269            for offset in 0..=lines.len() - pattern_len {
3270                if &lines[offset..offset + pattern_len] == pattern {
3271                    count += 1;
3272                }
3273            }
3274            count
3275        }
3276
3277        // Expand prefix and suffix until they are unique
3278        while prefix_start > 0 {
3279            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3280                prefix_start -= 1;
3281            } else {
3282                break;
3283            }
3284        }
3285        while suffix_end < old_lines.len() {
3286            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3287                suffix_end += 1;
3288            } else {
3289                break;
3290            }
3291        }
3292
3293        let mut output = String::new();
3294        for line in &old_lines[prefix_start..first_changed_row] {
3295            output.push_str(line);
3296            output.push('\n');
3297        }
3298        output.push_str("<|fim_middle|>\n");
3299        output.push_str(&merged_new_text);
3300        output.push_str("<|fim_suffix|>\n");
3301        for line in &old_lines[old_end..suffix_end] {
3302            output.push_str(line);
3303            output.push('\n');
3304        }
3305
3306        Ok(output)
3307    }
3308
3309    struct ParsedHunk {
3310        old_context: String,
3311        edits: Vec<ParsedEdit>,
3312    }
3313
3314    struct ParsedEdit {
3315        range: Range<usize>,
3316        text: String,
3317    }
3318
3319    /// Parse a unified diff into content-based hunks. Each hunk contains an
3320    /// `old_context` string (context lines + deleted lines, which together
3321    /// form the text that should be found in the original) and a list of edits
3322    /// expressed as byte ranges within that context.
3323    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3324        let mut hunks = Vec::new();
3325        let mut current: Option<ParsedHunk> = None;
3326
3327        for line in patch.lines() {
3328            if line.starts_with("@@") {
3329                if let Some(hunk) = current.take() {
3330                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3331                        hunks.push(hunk);
3332                    }
3333                }
3334                current = Some(ParsedHunk {
3335                    old_context: String::new(),
3336                    edits: Vec::new(),
3337                });
3338            } else if line.starts_with("---") || line.starts_with("+++") {
3339                continue;
3340            } else if let Some(hunk) = &mut current {
3341                if let Some(added) = line.strip_prefix('+') {
3342                    let pos = hunk.old_context.len();
3343                    if let Some(last_edit) = hunk.edits.last_mut() {
3344                        if last_edit.range.end == pos {
3345                            writeln!(&mut last_edit.text, "{added}").ok();
3346                            continue;
3347                        }
3348                    }
3349                    hunk.edits.push(ParsedEdit {
3350                        range: pos..pos,
3351                        text: format!("{added}\n"),
3352                    });
3353                } else if let Some(removed) = line.strip_prefix('-') {
3354                    let start = hunk.old_context.len();
3355                    writeln!(&mut hunk.old_context, "{removed}").ok();
3356                    let end = hunk.old_context.len();
3357                    if let Some(last_edit) = hunk.edits.last_mut() {
3358                        if last_edit.range.end == start {
3359                            last_edit.range.end = end;
3360                            continue;
3361                        }
3362                    }
3363                    hunk.edits.push(ParsedEdit {
3364                        range: start..end,
3365                        text: String::new(),
3366                    });
3367                } else {
3368                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3369                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3370                }
3371            }
3372        }
3373
3374        if let Some(hunk) = current {
3375            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3376                hunks.push(hunk);
3377            }
3378        }
3379
3380        hunks
3381    }
3382
3383    #[cfg(test)]
3384    mod tests {
3385        use super::*;
3386        use indoc::indoc;
3387
3388        #[test]
3389        fn test_apply_variable_edit() {
3390            struct Case {
3391                name: &'static str,
3392                original: &'static str,
3393                model_output: &'static str,
3394                expected: &'static str,
3395            }
3396
3397            let cases = [
3398                Case {
3399                    name: "simple_single_line_replacement",
3400                    original: indoc! {"
3401                        zero
3402                        one
3403                        two
3404                        three
3405                        four
3406                        five
3407                    "},
3408                    model_output: indoc! {"
3409                        two
3410                        <|fim_middle|>
3411                        THREE
3412                        <|fim_suffix|>
3413                        four
3414                    "},
3415                    expected: indoc! {"
3416                        zero
3417                        one
3418                        two
3419                        THREE
3420                        four
3421                        five
3422                    "},
3423                },
3424                Case {
3425                    name: "multi_line_replacement",
3426                    original: indoc! {"
3427                        a
3428                        b
3429                        c
3430                        d
3431                        e
3432                    "},
3433                    model_output: indoc! {"
3434                        a
3435                        <|fim_middle|>
3436                        B
3437                        C
3438                        D
3439                        <|fim_suffix|>
3440                        e
3441                    "},
3442                    expected: indoc! {"
3443                        a
3444                        B
3445                        C
3446                        D
3447                        e
3448                    "},
3449                },
3450                Case {
3451                    name: "insertion_between_existing_lines",
3452                    original: indoc! {"
3453                        a
3454                        b
3455                        c
3456                    "},
3457                    model_output: indoc! {"
3458                        a
3459                        <|fim_middle|>
3460                        X
3461                        <|fim_suffix|>
3462                        b
3463                    "},
3464                    expected: indoc! {"
3465                        a
3466                        X
3467                        b
3468                        c
3469                    "},
3470                },
3471                Case {
3472                    name: "deletion",
3473                    original: indoc! {"
3474                        a
3475                        b
3476                        c
3477                        d
3478                    "},
3479                    model_output: indoc! {"
3480                        a
3481                        <|fim_middle|>
3482                        <|fim_suffix|>
3483                        c
3484                    "},
3485                    expected: indoc! {"
3486                        a
3487                        c
3488                        d
3489                    "},
3490                },
3491                Case {
3492                    name: "replacement_at_start_no_prefix_context",
3493                    original: indoc! {"
3494                        a
3495                        b
3496                        c
3497                    "},
3498                    model_output: indoc! {"
3499                        <|fim_middle|>
3500                        X
3501                        <|fim_suffix|>
3502                        b
3503                    "},
3504                    expected: indoc! {"
3505                        X
3506                        b
3507                        c
3508                    "},
3509                },
3510                Case {
3511                    name: "replacement_at_end_no_suffix_context",
3512                    original: indoc! {"
3513                        a
3514                        b
3515                        c
3516                    "},
3517                    model_output: indoc! {"
3518                        b
3519                        <|fim_middle|>
3520                        Z
3521                        <|fim_suffix|>
3522                    "},
3523                    expected: indoc! {"
3524                        a
3525                        b
3526                        Z
3527                    "},
3528                },
3529                Case {
3530                    name: "context_with_trailing_newline_is_preserved",
3531                    original: indoc! {"
3532                        a
3533                        b
3534                        c
3535                    "},
3536                    model_output: indoc! {"
3537                        a
3538                        <|fim_middle|>
3539                        B
3540                        <|fim_suffix|>
3541                        c
3542                    "},
3543                    expected: indoc! {"
3544                        a
3545                        B
3546                        c
3547                    "},
3548                },
3549                Case {
3550                    name: "cursor_marker_passes_through_untouched",
3551                    original: indoc! {"
3552                        a
3553                        b
3554                        c
3555                    "},
3556                    model_output: indoc! {"
3557                        a
3558                        <|fim_middle|>
3559                        B<|user_cursor|>B
3560                        <|fim_suffix|>
3561                        c
3562                    "},
3563                    expected: indoc! {"
3564                        a
3565                        B<|user_cursor|>B
3566                        c
3567                    "},
3568                },
3569                Case {
3570                    name: "multiple_prefix_context_lines",
3571                    original: indoc! {"
3572                        a
3573                        b
3574                        c
3575                        d
3576                        e
3577                    "},
3578                    model_output: indoc! {"
3579                        b
3580                        c
3581                        <|fim_middle|>
3582                        D
3583                        <|fim_suffix|>
3584                        e
3585                    "},
3586                    expected: indoc! {"
3587                        a
3588                        b
3589                        c
3590                        D
3591                        e
3592                    "},
3593                },
3594            ];
3595
3596            for case in cases {
3597                let (edit_range, replacement) =
3598                    apply_variable_edit(case.original, case.model_output).unwrap();
3599                let mut edited = case.original.to_string();
3600                edited.replace_range(edit_range, &replacement);
3601                assert_eq!(edited, case.expected, "{}", case.name);
3602            }
3603        }
3604
3605        #[test]
3606        fn test_patch_to_variable_edit() {
3607            struct Case {
3608                name: &'static str,
3609                old: &'static str,
3610                patch: &'static str,
3611                cursor_offset: Option<usize>,
3612                expected_variable_edit: &'static str,
3613                expected_after_apply: &'static str,
3614            }
3615
3616            let cases = [
3617                Case {
3618                    name: "simple_replacement",
3619                    old: indoc! {"
3620                        zero
3621                        one
3622                        two
3623                        three
3624                        four
3625                        five
3626                    "},
3627                    patch: indoc! {"
3628                        @@ -3,3 +3,3 @@
3629                         two
3630                        -three
3631                        +THREE
3632                         four
3633                    "},
3634                    cursor_offset: None,
3635                    expected_variable_edit: indoc! {"
3636                        one
3637                        two
3638                        <|fim_middle|>
3639                        THREE
3640                        <|fim_suffix|>
3641                        four
3642                        five
3643                    "},
3644                    expected_after_apply: indoc! {"
3645                        zero
3646                        one
3647                        two
3648                        THREE
3649                        four
3650                        five
3651                    "},
3652                },
3653                Case {
3654                    name: "insertion",
3655                    old: indoc! {"
3656                        a
3657                        b
3658                        c
3659                        d
3660                        e
3661                    "},
3662                    patch: indoc! {"
3663                        @@ -2,0 +3,1 @@
3664                         b
3665                        +X
3666                         c
3667                    "},
3668                    cursor_offset: None,
3669                    expected_variable_edit: indoc! {"
3670                        a
3671                        b
3672                        <|fim_middle|>
3673                        X
3674                        <|fim_suffix|>
3675                        c
3676                        d
3677                    "},
3678                    expected_after_apply: indoc! {"
3679                        a
3680                        b
3681                        X
3682                        c
3683                        d
3684                        e
3685                    "},
3686                },
3687                Case {
3688                    name: "deletion",
3689                    old: indoc! {"
3690                        a
3691                        b
3692                        c
3693                        d
3694                        e
3695                    "},
3696                    patch: indoc! {"
3697                        @@ -2,3 +2,2 @@
3698                         b
3699                        -c
3700                         d
3701                    "},
3702                    cursor_offset: None,
3703                    expected_variable_edit: indoc! {"
3704                        a
3705                        b
3706                        <|fim_middle|>
3707                        <|fim_suffix|>
3708                        d
3709                        e
3710                    "},
3711                    expected_after_apply: indoc! {"
3712                        a
3713                        b
3714                        d
3715                        e
3716                    "},
3717                },
3718                Case {
3719                    name: "edit_near_start",
3720                    old: indoc! {"
3721                        first
3722                        second
3723                        third
3724                        fourth
3725                    "},
3726                    patch: indoc! {"
3727                        @@ -1,1 +1,1 @@
3728                        -first
3729                        +FIRST
3730                    "},
3731                    cursor_offset: None,
3732                    expected_variable_edit: indoc! {"
3733                        <|fim_middle|>
3734                        FIRST
3735                        <|fim_suffix|>
3736                        second
3737                        third
3738                    "},
3739                    expected_after_apply: indoc! {"
3740                        FIRST
3741                        second
3742                        third
3743                        fourth
3744                    "},
3745                },
3746                Case {
3747                    name: "edit_near_end",
3748                    old: indoc! {"
3749                        first
3750                        second
3751                        third
3752                        fourth
3753                    "},
3754                    patch: indoc! {"
3755                        @@ -4,1 +4,1 @@
3756                        -fourth
3757                        +FOURTH
3758                    "},
3759                    cursor_offset: None,
3760                    expected_variable_edit: indoc! {"
3761                        second
3762                        third
3763                        <|fim_middle|>
3764                        FOURTH
3765                        <|fim_suffix|>
3766                    "},
3767                    expected_after_apply: indoc! {"
3768                        first
3769                        second
3770                        third
3771                        FOURTH
3772                    "},
3773                },
3774                Case {
3775                    name: "cursor_at_start_of_replacement",
3776                    old: indoc! {"
3777                        zero
3778                        one
3779                        two
3780                        three
3781                        four
3782                        five
3783                    "},
3784                    patch: indoc! {"
3785                        @@ -3,3 +3,3 @@
3786                         two
3787                        -three
3788                        +THREE
3789                         four
3790                    "},
3791                    cursor_offset: Some(4),
3792                    expected_variable_edit: indoc! {"
3793                        one
3794                        two
3795                        <|fim_middle|>
3796                        <|user_cursor|>THREE
3797                        <|fim_suffix|>
3798                        four
3799                        five
3800                    "},
3801                    expected_after_apply: indoc! {"
3802                        zero
3803                        one
3804                        two
3805                        <|user_cursor|>THREE
3806                        four
3807                        five
3808                    "},
3809                },
3810                Case {
3811                    name: "cursor_in_middle_of_replacement",
3812                    old: indoc! {"
3813                        zero
3814                        one
3815                        two
3816                        three
3817                        four
3818                        five
3819                    "},
3820                    patch: indoc! {"
3821                        @@ -3,3 +3,3 @@
3822                         two
3823                        -three
3824                        +THREE
3825                         four
3826                    "},
3827                    cursor_offset: Some(6),
3828                    expected_variable_edit: indoc! {"
3829                        one
3830                        two
3831                        <|fim_middle|>
3832                        TH<|user_cursor|>REE
3833                        <|fim_suffix|>
3834                        four
3835                        five
3836                    "},
3837                    expected_after_apply: indoc! {"
3838                        zero
3839                        one
3840                        two
3841                        TH<|user_cursor|>REE
3842                        four
3843                        five
3844                    "},
3845                },
3846                Case {
3847                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3848                    old: indoc! {"
3849                        one
3850                        a
3851                        b
3852                        c
3853                        d
3854                        two
3855                        a
3856                        b
3857                        c
3858                        d
3859                        three
3860                        a
3861                        b
3862                        c
3863                        d
3864                        four
3865                    "},
3866                    patch: indoc! {"
3867                        @@ -4,5 +4,5 @@
3868                         two
3869                         a
3870                         b
3871                        -c
3872                        +C
3873                         d
3874                         three
3875                    "},
3876                    cursor_offset: None,
3877                    expected_variable_edit: indoc! {"
3878                        two
3879                        a
3880                        b
3881                        <|fim_middle|>
3882                        C
3883                        <|fim_suffix|>
3884                        d
3885                        three
3886                    "},
3887                    expected_after_apply: indoc! {"
3888                        one
3889                        a
3890                        b
3891                        c
3892                        d
3893                        two
3894                        a
3895                        b
3896                        C
3897                        d
3898                        three
3899                        a
3900                        b
3901                        c
3902                        d
3903                        four
3904                    "},
3905                },
3906                Case {
3907                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3908                    old: indoc! {"
3909                        {
3910                            {
3911                                one();
3912                            }
3913                        }
3914                        {
3915                            {
3916                                two();
3917                            }
3918                        }
3919                        {
3920                            {
3921                                three();
3922                            }
3923                        }
3924                        {
3925                            {
3926                                four();
3927                            }
3928                        }
3929                    "},
3930                    patch: indoc! {"
3931                        @@ -4,5 +4,5 @@
3932                             {
3933                        -        two();
3934                        +        TWO();
3935                             }
3936                    "},
3937                    cursor_offset: None,
3938                    expected_variable_edit: indoc! {"
3939                                one();
3940                            }
3941                        }
3942                        {
3943                            {
3944                        <|fim_middle|>
3945                                TWO();
3946                        <|fim_suffix|>
3947                            }
3948                        }
3949                        {
3950                            {
3951                                three();
3952                    "},
3953                    expected_after_apply: indoc! {"
3954                        {
3955                            {
3956                                one();
3957                            }
3958                        }
3959                        {
3960                            {
3961                                TWO();
3962                            }
3963                        }
3964                        {
3965                            {
3966                                three();
3967                            }
3968                        }
3969                        {
3970                            {
3971                                four();
3972                            }
3973                        }
3974                    "},
3975                },
3976            ];
3977
3978            for case in cases {
3979                let output =
3980                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3981                        .unwrap_or_else(|error| {
3982                            panic!("failed converting patch for {}: {error}", case.name)
3983                        });
3984                assert_eq!(
3985                    output, case.expected_variable_edit,
3986                    "patch->variable_edit mismatch for {}",
3987                    case.name
3988                );
3989
3990                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3991                    .unwrap_or_else(|error| {
3992                        panic!("failed applying variable_edit for {}: {error}", case.name)
3993                    });
3994                let mut edited_by_variable_edit = case.old.to_string();
3995                edited_by_variable_edit.replace_range(edit_range, &replacement);
3996                assert_eq!(
3997                    edited_by_variable_edit, case.expected_after_apply,
3998                    "variable_edit apply mismatch for {}",
3999                    case.name
4000                );
4001
4002                let (expected_edit_range, expected_replacement) =
4003                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4004                        |error| {
4005                            panic!(
4006                                "failed applying expected variable_edit for {}: {error}",
4007                                case.name
4008                            )
4009                        },
4010                    );
4011                let mut edited_by_expected_variable_edit = case.old.to_string();
4012                edited_by_expected_variable_edit
4013                    .replace_range(expected_edit_range, &expected_replacement);
4014                assert_eq!(
4015                    edited_by_expected_variable_edit, case.expected_after_apply,
4016                    "expected variable_edit apply mismatch for {}",
4017                    case.name
4018                );
4019            }
4020        }
4021
4022        #[test]
4023        fn test_write_cursor_excerpt_section() {
4024            let path = Path::new("test.rs");
4025            let context = "fn main() {\n    hello();\n}\n";
4026            let cursor_offset = 17;
4027            let mut prompt = String::new();
4028            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4029            assert_eq!(
4030                prompt,
4031                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4032            );
4033        }
4034    }
4035}
4036
4037/// The zeta1 prompt format
4038pub mod zeta1 {
4039    use super::*;
4040    use std::fmt::Write;
4041
4042    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4043    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4044    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4045    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4046
4047    const INSTRUCTION_HEADER: &str = concat!(
4048        "### Instruction:\n",
4049        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4050        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4051        "into account the cursor location.\n\n",
4052        "### User Edits:\n\n"
4053    );
4054    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4055    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4056
4057    /// Formats a complete zeta1 prompt from the input events and excerpt.
4058    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4059        let mut prompt = String::with_capacity(
4060            INSTRUCTION_HEADER.len()
4061                + input_events.len()
4062                + EXCERPT_HEADER.len()
4063                + input_excerpt.len()
4064                + RESPONSE_HEADER.len(),
4065        );
4066        prompt.push_str(INSTRUCTION_HEADER);
4067        prompt.push_str(input_events);
4068        prompt.push_str(EXCERPT_HEADER);
4069        prompt.push_str(input_excerpt);
4070        prompt.push_str(RESPONSE_HEADER);
4071        prompt
4072    }
4073
4074    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4075    /// editable and context byte-offset ranges within `cursor_excerpt`.
4076    pub fn format_zeta1_from_input(
4077        input: &ZetaPromptInput,
4078        editable_range: Range<usize>,
4079        context_range: Range<usize>,
4080    ) -> String {
4081        let events = format_zeta1_events(&input.events);
4082        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4083        format_zeta1_prompt(&events, &excerpt)
4084    }
4085
4086    /// Formats events in zeta1 style (oldest first).
4087    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4088        let mut result = String::new();
4089        for event in
4090            events
4091                .iter()
4092                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4093                    &ZetaFormat::V0114180EditableRegion,
4094                )))
4095        {
4096            let event_string = format_zeta1_event(event);
4097            if event_string.is_empty() {
4098                continue;
4099            }
4100            if !result.is_empty() {
4101                result.push_str("\n\n");
4102            }
4103            result.push_str(&event_string);
4104        }
4105        result
4106    }
4107
4108    fn format_zeta1_event(event: &Event) -> String {
4109        match event {
4110            Event::BufferChange {
4111                path,
4112                old_path,
4113                diff,
4114                ..
4115            } => {
4116                let mut prompt = String::new();
4117                if old_path != path {
4118                    writeln!(
4119                        prompt,
4120                        "User renamed {} to {}\n",
4121                        old_path.display(),
4122                        path.display()
4123                    )
4124                    .ok();
4125                }
4126                if !diff.is_empty() {
4127                    write!(
4128                        prompt,
4129                        "User edited {}:\n```diff\n{}\n```",
4130                        path.display(),
4131                        diff
4132                    )
4133                    .ok();
4134                }
4135                prompt
4136            }
4137        }
4138    }
4139
4140    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4141    /// within `cursor_excerpt`.
4142    fn format_zeta1_excerpt(
4143        input: &ZetaPromptInput,
4144        editable_range: Range<usize>,
4145        context_range: Range<usize>,
4146    ) -> String {
4147        let path_str = input.cursor_path.to_string_lossy();
4148        let excerpt = &*input.cursor_excerpt;
4149        let cursor_offset = input.cursor_offset_in_excerpt;
4150
4151        let mut prompt = String::new();
4152        writeln!(&mut prompt, "```{path_str}").ok();
4153
4154        let starts_at_file_beginning =
4155            input.excerpt_start_row == Some(0) && context_range.start == 0;
4156        if starts_at_file_beginning {
4157            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4158        }
4159
4160        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4161
4162        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4163        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4164        prompt.push_str(CURSOR_MARKER);
4165        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4166        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4167
4168        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4169        write!(prompt, "\n```").ok();
4170
4171        prompt
4172    }
4173
4174    /// Cleans zeta1 model output by extracting content between editable region
4175    /// markers and converting the zeta1 cursor marker to the universal one.
4176    /// Returns `None` if the output doesn't contain the expected markers.
4177    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4178        let content = output.replace(CURSOR_MARKER, "");
4179
4180        let content_start = content
4181            .find(EDITABLE_REGION_START_MARKER)
4182            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4183            .map(|pos| {
4184                if content.as_bytes().get(pos) == Some(&b'\n') {
4185                    pos + 1
4186                } else {
4187                    pos
4188                }
4189            })
4190            .unwrap_or(0);
4191
4192        let content_end = content
4193            .find(EDITABLE_REGION_END_MARKER)
4194            .map(|pos| {
4195                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4196                    pos - 1
4197                } else {
4198                    pos
4199                }
4200            })
4201            .unwrap_or(content.len());
4202
4203        if content_start > content_end {
4204            return Some(String::new());
4205        }
4206
4207        let extracted = &content[content_start..content_end];
4208
4209        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4210            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4211            let text_before_cursor = text_before_cursor
4212                .find(EDITABLE_REGION_START_MARKER)
4213                .map(|pos| {
4214                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4215                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4216                        after_marker + 1
4217                    } else {
4218                        after_marker
4219                    }
4220                })
4221                .unwrap_or(0);
4222            let offset_in_extracted = zeta1_cursor_pos
4223                .saturating_sub(text_before_cursor)
4224                .min(extracted.len());
4225            offset_in_extracted
4226        });
4227
4228        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4229        if let Some(offset) = cursor_offset {
4230            result.push_str(&extracted[..offset]);
4231            result.push_str(super::CURSOR_MARKER);
4232            result.push_str(&extracted[offset..]);
4233        } else {
4234            result.push_str(extracted);
4235        }
4236
4237        Some(result)
4238    }
4239}
4240
4241#[cfg(test)]
4242mod tests {
4243    use super::*;
4244    use indoc::indoc;
4245
4246    fn make_input(
4247        cursor_excerpt: &str,
4248        editable_range: Range<usize>,
4249        cursor_offset: usize,
4250        events: Vec<Event>,
4251        related_files: Vec<RelatedFile>,
4252    ) -> ZetaPromptInput {
4253        let context_range = 0..cursor_excerpt.len();
4254        ZetaPromptInput {
4255            cursor_path: Path::new("test.rs").into(),
4256            cursor_excerpt: cursor_excerpt.into(),
4257            cursor_offset_in_excerpt: cursor_offset,
4258            excerpt_start_row: None,
4259            events: events.into_iter().map(Arc::new).collect(),
4260            related_files: Some(related_files),
4261            active_buffer_diagnostics: vec![],
4262            excerpt_ranges: ExcerptRanges {
4263                editable_150: editable_range.clone(),
4264                editable_180: editable_range.clone(),
4265                editable_350: editable_range,
4266                editable_150_context_350: context_range.clone(),
4267                editable_180_context_350: context_range.clone(),
4268                editable_350_context_150: context_range,
4269                ..Default::default()
4270            },
4271            syntax_ranges: None,
4272            experiment: None,
4273            in_open_source_repo: false,
4274            can_collect_data: false,
4275            repo_url: None,
4276        }
4277    }
4278
4279    fn make_input_with_context_range(
4280        excerpt: &str,
4281        editable_range: Range<usize>,
4282        context_range: Range<usize>,
4283        cursor_offset: usize,
4284    ) -> ZetaPromptInput {
4285        ZetaPromptInput {
4286            cursor_path: Path::new("test.rs").into(),
4287            cursor_excerpt: excerpt.into(),
4288            cursor_offset_in_excerpt: cursor_offset,
4289            excerpt_start_row: None,
4290            events: vec![],
4291            related_files: Some(vec![]),
4292            active_buffer_diagnostics: vec![],
4293            excerpt_ranges: ExcerptRanges {
4294                editable_150: editable_range.clone(),
4295                editable_180: editable_range.clone(),
4296                editable_350: editable_range,
4297                editable_150_context_350: context_range.clone(),
4298                editable_180_context_350: context_range.clone(),
4299                editable_350_context_150: context_range,
4300                ..Default::default()
4301            },
4302            syntax_ranges: None,
4303            experiment: None,
4304            in_open_source_repo: false,
4305            can_collect_data: false,
4306            repo_url: None,
4307        }
4308    }
4309
4310    fn make_event(path: &str, diff: &str) -> Event {
4311        Event::BufferChange {
4312            path: Path::new(path).into(),
4313            old_path: Path::new(path).into(),
4314            diff: diff.to_string(),
4315            predicted: false,
4316            in_open_source_repo: false,
4317        }
4318    }
4319
4320    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4321        RelatedFile {
4322            path: Path::new(path).into(),
4323            max_row: content.lines().count() as u32,
4324            excerpts: vec![RelatedExcerpt {
4325                row_range: 0..content.lines().count() as u32,
4326                text: content.into(),
4327                order: 0,
4328            }],
4329            in_open_source_repo: false,
4330        }
4331    }
4332
4333    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4334        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4335    }
4336
4337    fn budget_with_margin(requested_tokens: usize) -> usize {
4338        ((requested_tokens as f64) / 0.9).ceil() as usize
4339    }
4340
4341    #[test]
4342    fn test_no_truncation_when_within_budget() {
4343        let input = make_input(
4344            "prefix\neditable\nsuffix",
4345            7..15,
4346            10,
4347            vec![make_event("a.rs", "-old\n+new\n")],
4348            vec![make_related_file("related.rs", "fn helper() {}\n")],
4349        );
4350
4351        assert_eq!(
4352            format_with_budget(&input, 10000).unwrap(),
4353            indoc! {r#"
4354                <|file_sep|>related.rs
4355                fn helper() {}
4356                <|file_sep|>edit history
4357                --- a/a.rs
4358                +++ b/a.rs
4359                -old
4360                +new
4361                <|file_sep|>test.rs
4362                <|fim_prefix|>
4363                prefix
4364                <|fim_middle|>current
4365                edi<|user_cursor|>table
4366                <|fim_suffix|>
4367
4368                suffix
4369                <|fim_middle|>updated
4370            "#}
4371            .to_string()
4372        );
4373    }
4374
4375    #[test]
4376    fn test_truncation_drops_edit_history_when_budget_tight() {
4377        let input = make_input(
4378            "code",
4379            0..4,
4380            2,
4381            vec![make_event("a.rs", "-x\n+y\n")],
4382            vec![
4383                make_related_file("r1.rs", "aaaaaaa\n"),
4384                make_related_file("r2.rs", "bbbbbbb\n"),
4385            ],
4386        );
4387
4388        assert_eq!(
4389            format_with_budget(&input, 10000).unwrap(),
4390            indoc! {r#"
4391                <|file_sep|>r1.rs
4392                aaaaaaa
4393                <|file_sep|>r2.rs
4394                bbbbbbb
4395                <|file_sep|>edit history
4396                --- a/a.rs
4397                +++ b/a.rs
4398                -x
4399                +y
4400                <|file_sep|>test.rs
4401                <|fim_prefix|>
4402                <|fim_middle|>current
4403                co<|user_cursor|>de
4404                <|fim_suffix|>
4405                <|fim_middle|>updated
4406            "#}
4407            .to_string()
4408        );
4409
4410        assert_eq!(
4411            format_with_budget(&input, budget_with_margin(55)),
4412            Some(
4413                indoc! {r#"
4414                <|file_sep|>edit history
4415                --- a/a.rs
4416                +++ b/a.rs
4417                -x
4418                +y
4419                <|file_sep|>test.rs
4420                <|fim_prefix|>
4421                <|fim_middle|>current
4422                co<|user_cursor|>de
4423                <|fim_suffix|>
4424                <|fim_middle|>updated
4425            "#}
4426                .to_string()
4427            )
4428        );
4429    }
4430
4431    #[test]
4432    fn test_truncation_includes_partial_excerpts() {
4433        let input = make_input(
4434            "x",
4435            0..1,
4436            0,
4437            vec![],
4438            vec![RelatedFile {
4439                path: Path::new("big.rs").into(),
4440                max_row: 30,
4441                in_open_source_repo: false,
4442                excerpts: vec![
4443                    RelatedExcerpt {
4444                        row_range: 0..10,
4445                        text: "first excerpt\n".into(),
4446                        order: 0,
4447                    },
4448                    RelatedExcerpt {
4449                        row_range: 10..20,
4450                        text: "second excerpt\n".into(),
4451                        order: 0,
4452                    },
4453                    RelatedExcerpt {
4454                        row_range: 20..30,
4455                        text: "third excerpt\n".into(),
4456                        order: 0,
4457                    },
4458                ],
4459            }],
4460        );
4461
4462        assert_eq!(
4463            format_with_budget(&input, 10000).unwrap(),
4464            indoc! {r#"
4465                <|file_sep|>big.rs
4466                first excerpt
4467                ...
4468                second excerpt
4469                ...
4470                third excerpt
4471                <|file_sep|>test.rs
4472                <|fim_prefix|>
4473                <|fim_middle|>current
4474                <|user_cursor|>x
4475                <|fim_suffix|>
4476                <|fim_middle|>updated
4477            "#}
4478            .to_string()
4479        );
4480
4481        assert_eq!(
4482            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4483            indoc! {r#"
4484                <|file_sep|>big.rs
4485                first excerpt
4486                ...
4487                <|file_sep|>test.rs
4488                <|fim_prefix|>
4489                <|fim_middle|>current
4490                <|user_cursor|>x
4491                <|fim_suffix|>
4492                <|fim_middle|>updated
4493            "#}
4494            .to_string()
4495        );
4496    }
4497
4498    #[test]
4499    fn test_truncation_prioritizes_lower_order_excerpts() {
4500        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4501        // With tight budget, only the lower-order excerpt from file_b should be included.
4502        let input = make_input(
4503            "x",
4504            0..1,
4505            0,
4506            vec![],
4507            vec![
4508                RelatedFile {
4509                    path: Path::new("file_a.rs").into(),
4510                    max_row: 10,
4511                    in_open_source_repo: false,
4512                    excerpts: vec![RelatedExcerpt {
4513                        row_range: 0..10,
4514                        text: "low priority content\n".into(),
4515                        order: 5,
4516                    }],
4517                },
4518                RelatedFile {
4519                    path: Path::new("file_b.rs").into(),
4520                    max_row: 10,
4521                    in_open_source_repo: false,
4522                    excerpts: vec![RelatedExcerpt {
4523                        row_range: 0..10,
4524                        text: "high priority content\n".into(),
4525                        order: 1,
4526                    }],
4527                },
4528            ],
4529        );
4530
4531        // With large budget, both files included; rendered in stable lexicographic order.
4532        assert_eq!(
4533            format_with_budget(&input, 10000).unwrap(),
4534            indoc! {r#"
4535                <|file_sep|>file_a.rs
4536                low priority content
4537                <|file_sep|>file_b.rs
4538                high priority content
4539                <|file_sep|>test.rs
4540                <|fim_prefix|>
4541                <|fim_middle|>current
4542                <|user_cursor|>x
4543                <|fim_suffix|>
4544                <|fim_middle|>updated
4545            "#}
4546            .to_string()
4547        );
4548
4549        // With tight budget, only file_b (lower order) fits.
4550        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4551        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4552        // file_a would need another 14 tokens, which doesn't fit.
4553        assert_eq!(
4554            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4555            indoc! {r#"
4556                <|file_sep|>file_b.rs
4557                high priority content
4558                <|file_sep|>test.rs
4559                <|fim_prefix|>
4560                <|fim_middle|>current
4561                <|user_cursor|>x
4562                <|fim_suffix|>
4563                <|fim_middle|>updated
4564            "#}
4565            .to_string()
4566        );
4567    }
4568
4569    #[test]
4570    fn test_truncation_drops_high_order_excerpts_within_file() {
4571        // A single file has excerpts at order 1 and order 3. With a tight budget,
4572        // only the order-1 excerpts are included while the order-3 excerpt is
4573        // dropped — even though they belong to the same file. This also preserves
4574        // the parent invariant: parent outline items have order ≤ their best
4575        // child, so they're always included when any child is.
4576        let input = make_input(
4577            "x",
4578            0..1,
4579            0,
4580            vec![],
4581            vec![RelatedFile {
4582                path: Path::new("mod.rs").into(),
4583                max_row: 30,
4584                in_open_source_repo: false,
4585                excerpts: vec![
4586                    RelatedExcerpt {
4587                        row_range: 0..5,
4588                        text: "mod header\n".into(),
4589                        order: 1,
4590                    },
4591                    RelatedExcerpt {
4592                        row_range: 5..15,
4593                        text: "important fn\n".into(),
4594                        order: 1,
4595                    },
4596                    RelatedExcerpt {
4597                        row_range: 15..30,
4598                        text: "less important fn\n".into(),
4599                        order: 3,
4600                    },
4601                ],
4602            }],
4603        );
4604
4605        // With large budget, all three excerpts included.
4606        assert_eq!(
4607            format_with_budget(&input, 10000).unwrap(),
4608            indoc! {r#"
4609                <|file_sep|>mod.rs
4610                mod header
4611                ...
4612                important fn
4613                ...
4614                less important fn
4615                <|file_sep|>test.rs
4616                <|fim_prefix|>
4617                <|fim_middle|>current
4618                <|user_cursor|>x
4619                <|fim_suffix|>
4620                <|fim_middle|>updated
4621            "#}
4622            .to_string()
4623        );
4624
4625        // With tight budget, only order<=1 excerpts included (header + important fn).
4626        assert_eq!(
4627            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4628            indoc! {r#"
4629                <|file_sep|>mod.rs
4630                mod header
4631                ...
4632                important fn
4633                ...
4634                <|file_sep|>test.rs
4635                <|fim_prefix|>
4636                <|fim_middle|>current
4637                <|user_cursor|>x
4638                <|fim_suffix|>
4639                <|fim_middle|>updated
4640            "#}
4641            .to_string()
4642        );
4643    }
4644
4645    #[test]
4646    fn test_truncation_drops_older_events_first() {
4647        let input = make_input(
4648            "x",
4649            0..1,
4650            0,
4651            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4652            vec![],
4653        );
4654
4655        assert_eq!(
4656            format_with_budget(&input, 10000).unwrap(),
4657            indoc! {r#"
4658                <|file_sep|>edit history
4659                --- a/old.rs
4660                +++ b/old.rs
4661                -1
4662                --- a/new.rs
4663                +++ b/new.rs
4664                -2
4665                <|file_sep|>test.rs
4666                <|fim_prefix|>
4667                <|fim_middle|>current
4668                <|user_cursor|>x
4669                <|fim_suffix|>
4670                <|fim_middle|>updated
4671            "#}
4672            .to_string()
4673        );
4674
4675        assert_eq!(
4676            format_with_budget(&input, 60).unwrap(),
4677            indoc! {r#"
4678                <|file_sep|>edit history
4679                --- a/new.rs
4680                +++ b/new.rs
4681                -2
4682                <|file_sep|>test.rs
4683                <|fim_prefix|>
4684                <|fim_middle|>current
4685                <|user_cursor|>x
4686                <|fim_suffix|>
4687                <|fim_middle|>updated
4688            "#}
4689            .to_string()
4690        );
4691    }
4692
4693    #[test]
4694    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4695        let input = make_input(
4696            "fn main() {}",
4697            0..12,
4698            3,
4699            vec![make_event("a.rs", "-old\n+new\n")],
4700            vec![make_related_file("related.rs", "helper\n")],
4701        );
4702
4703        assert!(format_with_budget(&input, 30).is_none())
4704    }
4705
4706    #[track_caller]
4707    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4708        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4709            .expect("seed coder prompt formatting should succeed")
4710    }
4711
4712    #[track_caller]
4713    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4714        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4715            .expect("seed coder prompt formatting should succeed")
4716    }
4717
4718    #[test]
4719    fn test_seed_coder_basic_format() {
4720        let input = make_input(
4721            "prefix\neditable\nsuffix",
4722            7..15,
4723            10,
4724            vec![make_event("a.rs", "-old\n+new\n")],
4725            vec![make_related_file("related.rs", "fn helper() {}\n")],
4726        );
4727
4728        assert_eq!(
4729            format_seed_coder(&input),
4730            indoc! {r#"
4731                <[fim-suffix]>
4732                suffix
4733                <[fim-prefix]><filename>related.rs
4734                fn helper() {}
4735
4736                <filename>edit_history
4737                --- a/a.rs
4738                +++ b/a.rs
4739                -old
4740                +new
4741
4742                <filename>test.rs
4743                prefix
4744                <<<<<<< CURRENT
4745                edi<|user_cursor|>table
4746                =======
4747                <[fim-middle]>"#}
4748        );
4749    }
4750
4751    #[test]
4752    fn test_v0317_formats_prompt_with_many_related_files() {
4753        let related_files = (0..900)
4754            .map(|index| {
4755                make_related_file(
4756                    &format!("related_{index}.rs"),
4757                    "fn helper() {\n    let value = 1;\n}\n",
4758                )
4759            })
4760            .collect();
4761
4762        let input = make_input(
4763            "code",
4764            0..4,
4765            2,
4766            vec![make_event("a.rs", "-x\n+y\n")],
4767            related_files,
4768        );
4769
4770        let prompt =
4771            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4772
4773        assert!(prompt.is_some());
4774        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4775        assert!(prompt.contains("test.rs"));
4776        assert!(prompt.contains(CURSOR_MARKER));
4777    }
4778
4779    #[test]
4780    fn test_seed_coder_no_context() {
4781        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4782
4783        assert_eq!(
4784            format_seed_coder(&input),
4785            indoc! {r#"
4786                <[fim-suffix]>
4787                after
4788                <[fim-prefix]><filename>test.rs
4789                before
4790                <<<<<<< CURRENT
4791                mid<|user_cursor|>dle
4792                =======
4793                <[fim-middle]>"#}
4794        );
4795    }
4796
4797    #[test]
4798    fn test_seed_coder_truncation_drops_context() {
4799        let input = make_input(
4800            "code",
4801            0..4,
4802            2,
4803            vec![make_event("a.rs", "-x\n+y\n")],
4804            vec![make_related_file("r1.rs", "content\n")],
4805        );
4806
4807        // With large budget, everything is included
4808        assert_eq!(
4809            format_seed_coder(&input),
4810            indoc! {r#"
4811                <[fim-suffix]>
4812                <[fim-prefix]><filename>r1.rs
4813                content
4814
4815                <filename>edit_history
4816                --- a/a.rs
4817                +++ b/a.rs
4818                -x
4819                +y
4820
4821                <filename>test.rs
4822                <<<<<<< CURRENT
4823                co<|user_cursor|>de
4824                =======
4825                <[fim-middle]>"#}
4826        );
4827
4828        assert_eq!(
4829            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4830            None
4831        );
4832
4833        assert_eq!(
4834            format_seed_coder_with_budget(&input, 40),
4835            indoc! {r#"
4836                <[fim-suffix]>
4837                <[fim-prefix]><filename>test.rs
4838                <<<<<<< CURRENT
4839                co<|user_cursor|>de
4840                =======
4841                <[fim-middle]>"#
4842            }
4843        )
4844    }
4845
4846    #[test]
4847    fn test_seed_coder_truncation_prioritizes_lower_order() {
4848        let input = make_input(
4849            "code",
4850            0..4,
4851            2,
4852            vec![],
4853            vec![
4854                RelatedFile {
4855                    path: Path::new("low_prio.rs").into(),
4856                    max_row: 5,
4857                    in_open_source_repo: false,
4858                    excerpts: vec![RelatedExcerpt {
4859                        row_range: 0..5,
4860                        text: "low prio\n".into(),
4861                        order: 10,
4862                    }],
4863                },
4864                RelatedFile {
4865                    path: Path::new("high_prio.rs").into(),
4866                    max_row: 5,
4867                    in_open_source_repo: false,
4868                    excerpts: vec![RelatedExcerpt {
4869                        row_range: 0..5,
4870                        text: "high prio\n".into(),
4871                        order: 1,
4872                    }],
4873                },
4874            ],
4875        );
4876
4877        // With large budget, both included; rendered in stable lexicographic order.
4878        assert_eq!(
4879            format_seed_coder(&input),
4880            indoc! {r#"
4881                <[fim-suffix]>
4882                <[fim-prefix]><filename>low_prio.rs
4883                low prio
4884                <filename>high_prio.rs
4885                high prio
4886
4887                <filename>test.rs
4888                <<<<<<< CURRENT
4889                co<|user_cursor|>de
4890                =======
4891                <[fim-middle]>"#}
4892        );
4893
4894        // With tight budget under the generic heuristic, context is dropped but the
4895        // minimal cursor section still fits.
4896        assert_eq!(
4897            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4898            Some(
4899                indoc! {r#"
4900                    <[fim-suffix]>
4901                    <[fim-prefix]><filename>test.rs
4902                    <<<<<<< CURRENT
4903                    co<|user_cursor|>de
4904                    =======
4905                    <[fim-middle]>"#}
4906                .to_string()
4907            )
4908        );
4909    }
4910
4911    #[test]
4912    fn test_format_zeta1_from_input_basic() {
4913        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4914        let input = ZetaPromptInput {
4915            cursor_path: Path::new("src/main.rs").into(),
4916            cursor_excerpt: excerpt.into(),
4917            cursor_offset_in_excerpt: 30,
4918            excerpt_start_row: Some(0),
4919            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4920            related_files: Some(vec![]),
4921            active_buffer_diagnostics: vec![],
4922            excerpt_ranges: ExcerptRanges {
4923                editable_150: 15..41,
4924                editable_180: 15..41,
4925                editable_350: 15..41,
4926                editable_150_context_350: 0..excerpt.len(),
4927                editable_180_context_350: 0..excerpt.len(),
4928                editable_350_context_150: 0..excerpt.len(),
4929                ..Default::default()
4930            },
4931            syntax_ranges: None,
4932            experiment: None,
4933            in_open_source_repo: false,
4934            can_collect_data: false,
4935            repo_url: None,
4936        };
4937
4938        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4939
4940        assert_eq!(
4941            prompt,
4942            concat!(
4943                "### Instruction:\n",
4944                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4945                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4946                "into account the cursor location.\n",
4947                "\n",
4948                "### User Edits:\n",
4949                "\n",
4950                "User edited other.rs:\n",
4951                "```diff\n",
4952                "-old\n",
4953                "+new\n",
4954                "\n",
4955                "```\n",
4956                "\n",
4957                "### User Excerpt:\n",
4958                "\n",
4959                "```src/main.rs\n",
4960                "<|start_of_file|>\n",
4961                "fn before() {}\n",
4962                "<|editable_region_start|>\n",
4963                "fn foo() {\n",
4964                "    <|user_cursor_is_here|>let x = 1;\n",
4965                "\n",
4966                "<|editable_region_end|>}\n",
4967                "fn after() {}\n",
4968                "\n",
4969                "```\n",
4970                "\n",
4971                "### Response:\n",
4972            ),
4973        );
4974    }
4975
4976    #[test]
4977    fn test_format_zeta1_from_input_no_start_of_file() {
4978        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4979        let input = ZetaPromptInput {
4980            cursor_path: Path::new("src/main.rs").into(),
4981            cursor_excerpt: excerpt.into(),
4982            cursor_offset_in_excerpt: 15,
4983            excerpt_start_row: Some(10),
4984            events: vec![],
4985            related_files: Some(vec![]),
4986            active_buffer_diagnostics: vec![],
4987            excerpt_ranges: ExcerptRanges {
4988                editable_150: 0..28,
4989                editable_180: 0..28,
4990                editable_350: 0..28,
4991                editable_150_context_350: 0..28,
4992                editable_180_context_350: 0..28,
4993                editable_350_context_150: 0..28,
4994                ..Default::default()
4995            },
4996            syntax_ranges: None,
4997            experiment: None,
4998            in_open_source_repo: false,
4999            can_collect_data: false,
5000            repo_url: None,
5001        };
5002
5003        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5004
5005        assert_eq!(
5006            prompt,
5007            concat!(
5008                "### Instruction:\n",
5009                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5010                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5011                "into account the cursor location.\n",
5012                "\n",
5013                "### User Edits:\n",
5014                "\n",
5015                "\n",
5016                "\n",
5017                "### User Excerpt:\n",
5018                "\n",
5019                "```src/main.rs\n",
5020                "<|editable_region_start|>\n",
5021                "fn foo() {\n",
5022                "    <|user_cursor_is_here|>let x = 1;\n",
5023                "}\n",
5024                "\n",
5025                "<|editable_region_end|>\n",
5026                "```\n",
5027                "\n",
5028                "### Response:\n",
5029            ),
5030        );
5031    }
5032
5033    #[test]
5034    fn test_format_zeta1_from_input_with_sub_ranges() {
5035        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5036        let editable_range = 10..37;
5037        let context_range = 0..excerpt.len();
5038
5039        let input = ZetaPromptInput {
5040            cursor_path: Path::new("test.rs").into(),
5041            cursor_excerpt: excerpt.into(),
5042            cursor_offset_in_excerpt: 25,
5043            excerpt_start_row: Some(0),
5044            events: vec![],
5045            related_files: Some(vec![]),
5046            active_buffer_diagnostics: vec![],
5047            excerpt_ranges: ExcerptRanges {
5048                editable_150: editable_range.clone(),
5049                editable_180: editable_range.clone(),
5050                editable_350: editable_range.clone(),
5051                editable_150_context_350: context_range.clone(),
5052                editable_180_context_350: context_range.clone(),
5053                editable_350_context_150: context_range.clone(),
5054                ..Default::default()
5055            },
5056            syntax_ranges: None,
5057            experiment: None,
5058            in_open_source_repo: false,
5059            can_collect_data: false,
5060            repo_url: None,
5061        };
5062
5063        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5064
5065        assert_eq!(
5066            prompt,
5067            concat!(
5068                "### Instruction:\n",
5069                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5070                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5071                "into account the cursor location.\n",
5072                "\n",
5073                "### User Edits:\n",
5074                "\n",
5075                "\n",
5076                "\n",
5077                "### User Excerpt:\n",
5078                "\n",
5079                "```test.rs\n",
5080                "<|start_of_file|>\n",
5081                "// prefix\n",
5082                "<|editable_region_start|>\n",
5083                "fn foo() {\n",
5084                "    <|user_cursor_is_here|>let x = 1;\n",
5085                "}\n",
5086                "<|editable_region_end|>\n",
5087                "// suffix\n",
5088                "\n",
5089                "```\n",
5090                "\n",
5091                "### Response:\n",
5092            ),
5093        );
5094    }
5095
5096    #[test]
5097    fn test_max_event_count() {
5098        fn make_numbered_event(index: usize) -> Event {
5099            return make_event(
5100                &format!("event-{index}.rs"),
5101                &format!("-old-{index}\n+new-{index}\n"),
5102            );
5103        }
5104        let input = make_input(
5105            "x",
5106            0..1,
5107            0,
5108            (0..3).map(make_numbered_event).collect(),
5109            vec![],
5110        );
5111
5112        let edit_history_section = format_edit_history_within_budget(
5113            &input.events,
5114            "<|file_sep|>",
5115            "edit history",
5116            usize::MAX,
5117            5,
5118        );
5119
5120        assert_eq!(
5121            &edit_history_section,
5122            indoc!(
5123                "
5124                <|file_sep|>edit history
5125                --- a/event-0.rs
5126                +++ b/event-0.rs
5127                -old-0
5128                +new-0
5129                --- a/event-1.rs
5130                +++ b/event-1.rs
5131                -old-1
5132                +new-1
5133                --- a/event-2.rs
5134                +++ b/event-2.rs
5135                -old-2
5136                +new-2
5137            "
5138            )
5139        );
5140
5141        let edit_history_section = format_edit_history_within_budget(
5142            &input.events,
5143            "<|file_sep|>",
5144            "edit history",
5145            usize::MAX,
5146            2,
5147        );
5148
5149        assert_eq!(
5150            &edit_history_section,
5151            indoc!(
5152                "
5153                <|file_sep|>edit history
5154                --- a/event-1.rs
5155                +++ b/event-1.rs
5156                -old-1
5157                +new-1
5158                --- a/event-2.rs
5159                +++ b/event-2.rs
5160                -old-2
5161                +new-2
5162            "
5163            )
5164        );
5165
5166        let edit_history_section = format_edit_history_within_budget(
5167            &input.events,
5168            "<|file_sep|>",
5169            "edit history",
5170            usize::MAX,
5171            0,
5172        );
5173
5174        assert_eq!(&edit_history_section, "");
5175    }
5176
5177    #[test]
5178    fn test_clean_zeta1_model_output_basic() {
5179        let output = indoc! {"
5180            <|editable_region_start|>
5181            fn main() {
5182                println!(\"hello\");
5183            }
5184            <|editable_region_end|>
5185        "};
5186
5187        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5188        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5189    }
5190
5191    #[test]
5192    fn test_clean_zeta1_model_output_with_cursor() {
5193        let output = indoc! {"
5194            <|editable_region_start|>
5195            fn main() {
5196                <|user_cursor_is_here|>println!(\"hello\");
5197            }
5198            <|editable_region_end|>
5199        "};
5200
5201        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5202        assert_eq!(
5203            cleaned,
5204            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5205        );
5206    }
5207
5208    #[test]
5209    fn test_clean_zeta1_model_output_no_markers() {
5210        let output = "fn main() {}\n";
5211        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5212        assert_eq!(cleaned, "fn main() {}\n");
5213    }
5214
5215    #[test]
5216    fn test_clean_zeta1_model_output_empty_region() {
5217        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5218        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5219        assert_eq!(cleaned, "");
5220    }
5221
5222    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5223        let mut result = excerpt.to_string();
5224        result.replace_range(
5225            parsed_output.range_in_excerpt.clone(),
5226            &parsed_output.new_editable_region,
5227        );
5228        result
5229    }
5230
5231    #[test]
5232    fn test_parse_zeta2_model_output() {
5233        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5234        let context_start = excerpt.find("ctx start").unwrap();
5235        let context_end = excerpt.find("after ctx").unwrap();
5236        let editable_start = excerpt.find("editable old").unwrap();
5237        let editable_end = editable_start + "editable old\n".len();
5238        let input = make_input_with_context_range(
5239            excerpt,
5240            editable_start..editable_end,
5241            context_start..context_end,
5242            editable_start,
5243        );
5244
5245        let output = parse_zeta2_model_output(
5246            "editable new\n>>>>>>> UPDATED\n",
5247            ZetaFormat::V0131GitMergeMarkersPrefix,
5248            &input,
5249        )
5250        .unwrap();
5251
5252        assert_eq!(
5253            apply_edit(excerpt, &output),
5254            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5255        );
5256    }
5257
5258    #[test]
5259    fn test_parse_zeta2_model_output_identity() {
5260        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5261        let editable_start = excerpt.find("bbb").unwrap();
5262        let editable_end = excerpt.find("ddd").unwrap();
5263        let input = make_input_with_context_range(
5264            excerpt,
5265            editable_start..editable_end,
5266            0..excerpt.len(),
5267            editable_start,
5268        );
5269
5270        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5271        let output =
5272            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5273
5274        assert_eq!(apply_edit(excerpt, &output), excerpt);
5275    }
5276
5277    #[test]
5278    fn test_parse_zeta2_model_output_strips_end_marker() {
5279        let excerpt = "hello\nworld\n";
5280        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5281
5282        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5283        let output1 =
5284            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5285        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5286
5287        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5288        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5289    }
5290}