zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  34pub struct ZetaPromptInput {
  35    pub cursor_path: Arc<Path>,
  36    pub cursor_excerpt: Arc<str>,
  37    pub cursor_offset_in_excerpt: usize,
  38    #[serde(default, skip_serializing_if = "Option::is_none")]
  39    pub excerpt_start_row: Option<u32>,
  40    pub events: Vec<Arc<Event>>,
  41    #[serde(default)]
  42    pub related_files: Option<Vec<RelatedFile>>,
  43    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  44    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  45    /// These ranges let the server select model-appropriate subsets.
  46    pub excerpt_ranges: ExcerptRanges,
  47    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  48    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  49    /// When present, the server uses these to compute editable/context ranges
  50    /// instead of `excerpt_ranges`.
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub syntax_ranges: Option<Vec<Range<usize>>>,
  53    /// The name of the edit prediction model experiment to use.
  54    #[serde(default, skip_serializing_if = "Option::is_none")]
  55    pub experiment: Option<String>,
  56    #[serde(default)]
  57    pub in_open_source_repo: bool,
  58    #[serde(default)]
  59    pub can_collect_data: bool,
  60    #[serde(default, skip_serializing_if = "Option::is_none")]
  61    pub repo_url: Option<String>,
  62}
  63
  64#[derive(
  65    Default,
  66    Clone,
  67    Copy,
  68    Debug,
  69    PartialEq,
  70    Eq,
  71    Hash,
  72    EnumIter,
  73    IntoStaticStr,
  74    Serialize,
  75    Deserialize,
  76)]
  77#[allow(non_camel_case_types)]
  78pub enum ZetaFormat {
  79    V0112MiddleAtEnd,
  80    V0113Ordered,
  81    V0114180EditableRegion,
  82    V0120GitMergeMarkers,
  83    #[default]
  84    V0131GitMergeMarkersPrefix,
  85    V0211Prefill,
  86    V0211SeedCoder,
  87    v0226Hashline,
  88    V0304VariableEdit,
  89    V0304SeedNoEdits,
  90    /// Multi-block marker spans with NO_EDITS sentinel.
  91    V0306SeedMultiRegions,
  92    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  93    V0316SeedMultiRegions,
  94    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  95    V0317SeedMultiRegions,
  96}
  97
  98impl std::fmt::Display for ZetaFormat {
  99    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 100        write!(f, "{}", <&'static str>::from(self))
 101    }
 102}
 103
 104impl ZetaFormat {
 105    pub fn parse(format_name: &str) -> Result<Self> {
 106        let mut results = ZetaFormat::iter().filter(|version| {
 107            <&'static str>::from(version)
 108                .to_lowercase()
 109                .contains(&format_name.to_lowercase())
 110        });
 111        let Some(result) = results.next() else {
 112            anyhow::bail!(
 113                "`{format_name}` did not match any of:\n{}",
 114                Self::options_as_string()
 115            );
 116        };
 117        if results.next().is_some() {
 118            anyhow::bail!(
 119                "`{format_name}` matched more than one of:\n{}",
 120                Self::options_as_string()
 121            );
 122        }
 123        Ok(result)
 124    }
 125
 126    pub fn options_as_string() -> String {
 127        ZetaFormat::iter()
 128            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 129            .collect::<Vec<_>>()
 130            .concat()
 131    }
 132}
 133
 134#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 135#[serde(tag = "event")]
 136pub enum Event {
 137    BufferChange {
 138        path: Arc<Path>,
 139        old_path: Arc<Path>,
 140        diff: String,
 141        predicted: bool,
 142        in_open_source_repo: bool,
 143    },
 144}
 145
 146impl Event {
 147    pub fn in_open_source_repo(&self) -> bool {
 148        match self {
 149            Event::BufferChange {
 150                in_open_source_repo,
 151                ..
 152            } => *in_open_source_repo,
 153        }
 154    }
 155}
 156
 157pub fn write_event(prompt: &mut String, event: &Event) {
 158    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 159        for component in path.components() {
 160            prompt.push('/');
 161            write!(prompt, "{}", component.as_os_str().display()).ok();
 162        }
 163    }
 164    match event {
 165        Event::BufferChange {
 166            path,
 167            old_path,
 168            diff,
 169            predicted,
 170            in_open_source_repo: _,
 171        } => {
 172            if *predicted {
 173                prompt.push_str("// User accepted prediction:\n");
 174            }
 175            prompt.push_str("--- a");
 176            write_path_as_unix_str(prompt, old_path.as_ref());
 177            prompt.push_str("\n+++ b");
 178            write_path_as_unix_str(prompt, path.as_ref());
 179            prompt.push('\n');
 180            prompt.push_str(diff);
 181        }
 182    }
 183}
 184
 185#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 186pub struct ActiveBufferDiagnostic {
 187    pub severity: Option<i32>,
 188    pub message: String,
 189    pub snippet: String,
 190    pub snippet_buffer_row_range: Range<u32>,
 191    pub diagnostic_range_in_snippet: Range<usize>,
 192}
 193
 194#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 195pub struct RelatedFile {
 196    pub path: Arc<Path>,
 197    pub max_row: u32,
 198    pub excerpts: Vec<RelatedExcerpt>,
 199    #[serde(default)]
 200    pub in_open_source_repo: bool,
 201}
 202
 203#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 204pub struct RelatedExcerpt {
 205    pub row_range: Range<u32>,
 206    pub text: Arc<str>,
 207    #[serde(default)]
 208    pub order: usize,
 209}
 210
 211pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 212    special_tokens_for_format(format)
 213        .iter()
 214        .any(|token| input.cursor_excerpt.contains(token))
 215}
 216
 217pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 218    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 219}
 220
 221pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 222    match format {
 223        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 224        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 225        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 226        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 227        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 228        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 229        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 230        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 231        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 232        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 233        ZetaFormat::V0316SeedMultiRegions => {
 234            static TOKENS: &[&str] = &[
 235                seed_coder::FIM_SUFFIX,
 236                seed_coder::FIM_PREFIX,
 237                seed_coder::FIM_MIDDLE,
 238                seed_coder::FILE_MARKER,
 239                multi_region::V0316_END_MARKER,
 240                CURSOR_MARKER,
 241                multi_region::MARKER_TAG_PREFIX,
 242            ];
 243            TOKENS
 244        }
 245        ZetaFormat::V0317SeedMultiRegions => {
 246            static TOKENS: &[&str] = &[
 247                seed_coder::FIM_SUFFIX,
 248                seed_coder::FIM_PREFIX,
 249                seed_coder::FIM_MIDDLE,
 250                seed_coder::FILE_MARKER,
 251                multi_region::V0317_END_MARKER,
 252                CURSOR_MARKER,
 253                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 254            ];
 255            TOKENS
 256        }
 257        ZetaFormat::V0306SeedMultiRegions => {
 258            static TOKENS: &[&str] = &[
 259                seed_coder::FIM_SUFFIX,
 260                seed_coder::FIM_PREFIX,
 261                seed_coder::FIM_MIDDLE,
 262                seed_coder::FILE_MARKER,
 263                seed_coder::START_MARKER,
 264                seed_coder::SEPARATOR,
 265                seed_coder::END_MARKER,
 266                CURSOR_MARKER,
 267                multi_region::MARKER_TAG_PREFIX,
 268            ];
 269            TOKENS
 270        }
 271    }
 272}
 273
 274/// Returns the (editable_token_limit, context_token_limit) for a given format.
 275pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 276    match format {
 277        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 278        ZetaFormat::V0114180EditableRegion => (180, 350),
 279        ZetaFormat::V0120GitMergeMarkers
 280        | ZetaFormat::V0131GitMergeMarkersPrefix
 281        | ZetaFormat::V0211Prefill
 282        | ZetaFormat::V0211SeedCoder
 283        | ZetaFormat::v0226Hashline
 284        | ZetaFormat::V0306SeedMultiRegions
 285        | ZetaFormat::V0316SeedMultiRegions
 286        | ZetaFormat::V0317SeedMultiRegions
 287        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 288        ZetaFormat::V0304VariableEdit => (1024, 0),
 289    }
 290}
 291
 292pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 293    match format {
 294        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 295        ZetaFormat::V0112MiddleAtEnd
 296        | ZetaFormat::V0113Ordered
 297        | ZetaFormat::V0114180EditableRegion
 298        | ZetaFormat::V0120GitMergeMarkers
 299        | ZetaFormat::V0131GitMergeMarkersPrefix
 300        | ZetaFormat::V0211Prefill
 301        | ZetaFormat::V0211SeedCoder
 302        | ZetaFormat::V0304VariableEdit
 303        | ZetaFormat::V0306SeedMultiRegions
 304        | ZetaFormat::V0304SeedNoEdits => &[],
 305        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 306        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 307    }
 308}
 309
 310pub fn excerpt_ranges_for_format(
 311    format: ZetaFormat,
 312    ranges: &ExcerptRanges,
 313) -> (Range<usize>, Range<usize>) {
 314    match format {
 315        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 316            ranges.editable_150.clone(),
 317            ranges.editable_150_context_350.clone(),
 318        ),
 319        ZetaFormat::V0114180EditableRegion => (
 320            ranges.editable_180.clone(),
 321            ranges.editable_180_context_350.clone(),
 322        ),
 323        ZetaFormat::V0120GitMergeMarkers
 324        | ZetaFormat::V0131GitMergeMarkersPrefix
 325        | ZetaFormat::V0211Prefill
 326        | ZetaFormat::V0211SeedCoder
 327        | ZetaFormat::v0226Hashline
 328        | ZetaFormat::V0304SeedNoEdits
 329        | ZetaFormat::V0306SeedMultiRegions
 330        | ZetaFormat::V0316SeedMultiRegions
 331        | ZetaFormat::V0317SeedMultiRegions => (
 332            ranges.editable_350.clone(),
 333            ranges.editable_350_context_150.clone(),
 334        ),
 335        ZetaFormat::V0304VariableEdit => {
 336            let context = ranges
 337                .editable_350_context_1024
 338                .clone()
 339                .or(ranges.editable_350_context_512.clone())
 340                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 341            (context.clone(), context)
 342        }
 343    }
 344}
 345
 346pub fn write_cursor_excerpt_section_for_format(
 347    format: ZetaFormat,
 348    prompt: &mut String,
 349    path: &Path,
 350    context: &str,
 351    editable_range: &Range<usize>,
 352    cursor_offset: usize,
 353) {
 354    match format {
 355        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 356            prompt,
 357            path,
 358            context,
 359            editable_range,
 360            cursor_offset,
 361        ),
 362        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 363            v0113_ordered::write_cursor_excerpt_section(
 364                prompt,
 365                path,
 366                context,
 367                editable_range,
 368                cursor_offset,
 369            )
 370        }
 371        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 372            prompt,
 373            path,
 374            context,
 375            editable_range,
 376            cursor_offset,
 377        ),
 378        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 379            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 380                prompt,
 381                path,
 382                context,
 383                editable_range,
 384                cursor_offset,
 385            )
 386        }
 387        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 388            seed_coder::write_cursor_excerpt_section(
 389                prompt,
 390                path,
 391                context,
 392                editable_range,
 393                cursor_offset,
 394            )
 395        }
 396        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 397            prompt,
 398            path,
 399            context,
 400            editable_range,
 401            cursor_offset,
 402        ),
 403        ZetaFormat::V0304VariableEdit => {
 404            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 405        }
 406        ZetaFormat::V0306SeedMultiRegions => {
 407            prompt.push_str(&build_v0306_cursor_prefix(
 408                path,
 409                context,
 410                editable_range,
 411                cursor_offset,
 412            ));
 413        }
 414        ZetaFormat::V0316SeedMultiRegions => {
 415            prompt.push_str(&build_v0316_cursor_prefix(
 416                path,
 417                context,
 418                editable_range,
 419                cursor_offset,
 420            ));
 421        }
 422        ZetaFormat::V0317SeedMultiRegions => {
 423            prompt.push_str(&build_v0317_cursor_prefix(
 424                path,
 425                context,
 426                editable_range,
 427                cursor_offset,
 428            ));
 429        }
 430    }
 431}
 432
 433fn build_v0306_cursor_prefix(
 434    path: &Path,
 435    context: &str,
 436    editable_range: &Range<usize>,
 437    cursor_offset: usize,
 438) -> String {
 439    let mut section = String::new();
 440    let path_str = path.to_string_lossy();
 441    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 442
 443    section.push_str(&context[..editable_range.start]);
 444    section.push_str(seed_coder::START_MARKER);
 445
 446    let editable_text = &context[editable_range.clone()];
 447    let cursor_in_editable = cursor_offset - editable_range.start;
 448    multi_region::write_editable_with_markers(
 449        &mut section,
 450        editable_text,
 451        cursor_in_editable,
 452        CURSOR_MARKER,
 453    );
 454
 455    if !section.ends_with('\n') {
 456        section.push('\n');
 457    }
 458    section.push_str(seed_coder::SEPARATOR);
 459    section
 460}
 461
 462fn build_v0316_cursor_prefix(
 463    path: &Path,
 464    context: &str,
 465    editable_range: &Range<usize>,
 466    cursor_offset: usize,
 467) -> String {
 468    let mut section = String::new();
 469    let path_str = path.to_string_lossy();
 470    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 471
 472    section.push_str(&context[..editable_range.start]);
 473
 474    let editable_text = &context[editable_range.clone()];
 475    let cursor_in_editable = cursor_offset - editable_range.start;
 476    multi_region::write_editable_with_markers_v0316(
 477        &mut section,
 478        editable_text,
 479        cursor_in_editable,
 480        CURSOR_MARKER,
 481    );
 482
 483    if !section.ends_with('\n') {
 484        section.push('\n');
 485    }
 486    section
 487}
 488
 489fn build_v0317_cursor_prefix(
 490    path: &Path,
 491    context: &str,
 492    editable_range: &Range<usize>,
 493    cursor_offset: usize,
 494) -> String {
 495    let mut section = String::new();
 496    let path_str = path.to_string_lossy();
 497    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 498
 499    section.push_str(&context[..editable_range.start]);
 500
 501    let editable_text = &context[editable_range.clone()];
 502    let cursor_in_editable = cursor_offset - editable_range.start;
 503    multi_region::write_editable_with_markers_v0317(
 504        &mut section,
 505        editable_text,
 506        cursor_in_editable,
 507        CURSOR_MARKER,
 508    );
 509
 510    if !section.ends_with('\n') {
 511        section.push('\n');
 512    }
 513    section
 514}
 515
 516fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 517    let start_row = text[0..range.start].matches('\n').count() as u32;
 518    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 519    if !text[..range.end].ends_with('\n') {
 520        end_row += 1;
 521    }
 522    return start_row..end_row;
 523}
 524
 525pub fn format_prompt_with_budget_for_format(
 526    input: &ZetaPromptInput,
 527    format: ZetaFormat,
 528    max_tokens: usize,
 529) -> Option<String> {
 530    let (context, editable_range, context_range, cursor_offset) =
 531        resolve_cursor_region(input, format);
 532    let path = &*input.cursor_path;
 533
 534    let empty_files = Vec::new();
 535    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 536    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 537        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 538        let row_range = relative_row_range.start + cursor_excerpt_start_row
 539            ..relative_row_range.end + cursor_excerpt_start_row;
 540        &filter_redundant_excerpts(
 541            input_related_files.to_vec(),
 542            input.cursor_path.as_ref(),
 543            row_range,
 544        )
 545    } else {
 546        input_related_files
 547    };
 548
 549    let prompt = match format {
 550        ZetaFormat::V0211SeedCoder
 551        | ZetaFormat::V0304SeedNoEdits
 552        | ZetaFormat::V0306SeedMultiRegions
 553        | ZetaFormat::V0316SeedMultiRegions
 554        | ZetaFormat::V0317SeedMultiRegions => {
 555            let mut cursor_section = String::new();
 556            write_cursor_excerpt_section_for_format(
 557                format,
 558                &mut cursor_section,
 559                path,
 560                context,
 561                &editable_range,
 562                cursor_offset,
 563            );
 564
 565            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 566            seed_coder::assemble_fim_prompt(
 567                context,
 568                &editable_range,
 569                &cursor_section,
 570                &input.events,
 571                related_files,
 572                budget_with_margin,
 573            )
 574        }
 575        _ => {
 576            let mut cursor_section = String::new();
 577            write_cursor_excerpt_section_for_format(
 578                format,
 579                &mut cursor_section,
 580                path,
 581                context,
 582                &editable_range,
 583                cursor_offset,
 584            );
 585
 586            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 587            let cursor_tokens = estimate_tokens(cursor_section.len());
 588            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 589
 590            let edit_history_section = format_edit_history_within_budget(
 591                &input.events,
 592                "<|file_sep|>",
 593                "edit history",
 594                remaining_budget,
 595                max_edit_event_count_for_format(&format),
 596            );
 597            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 598            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 599
 600            let related_files_section = format_related_files_within_budget(
 601                &related_files,
 602                "<|file_sep|>",
 603                "",
 604                remaining_budget,
 605            );
 606
 607            let mut prompt = String::new();
 608            prompt.push_str(&related_files_section);
 609            prompt.push_str(&edit_history_section);
 610            prompt.push_str(&cursor_section);
 611            prompt
 612        }
 613    };
 614    let prompt_tokens = estimate_tokens(prompt.len());
 615    if prompt_tokens > max_tokens {
 616        return None;
 617    }
 618    return Some(prompt);
 619}
 620
 621pub fn filter_redundant_excerpts(
 622    mut related_files: Vec<RelatedFile>,
 623    cursor_path: &Path,
 624    cursor_row_range: Range<u32>,
 625) -> Vec<RelatedFile> {
 626    for file in &mut related_files {
 627        if file.path.as_ref() == cursor_path {
 628            file.excerpts.retain(|excerpt| {
 629                excerpt.row_range.start < cursor_row_range.start
 630                    || excerpt.row_range.end > cursor_row_range.end
 631            });
 632        }
 633    }
 634    related_files.retain(|file| !file.excerpts.is_empty());
 635    related_files
 636}
 637
 638pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 639    match format {
 640        ZetaFormat::V0112MiddleAtEnd
 641        | ZetaFormat::V0113Ordered
 642        | ZetaFormat::V0114180EditableRegion
 643        | ZetaFormat::V0120GitMergeMarkers
 644        | ZetaFormat::V0131GitMergeMarkersPrefix
 645        | ZetaFormat::V0211Prefill
 646        | ZetaFormat::V0211SeedCoder
 647        | ZetaFormat::v0226Hashline
 648        | ZetaFormat::V0304SeedNoEdits
 649        | ZetaFormat::V0304VariableEdit
 650        | ZetaFormat::V0306SeedMultiRegions
 651        | ZetaFormat::V0316SeedMultiRegions
 652        | ZetaFormat::V0317SeedMultiRegions => 6,
 653    }
 654}
 655
 656pub fn get_prefill_for_format(
 657    format: ZetaFormat,
 658    context: &str,
 659    editable_range: &Range<usize>,
 660) -> String {
 661    match format {
 662        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 663        ZetaFormat::V0112MiddleAtEnd
 664        | ZetaFormat::V0113Ordered
 665        | ZetaFormat::V0114180EditableRegion
 666        | ZetaFormat::V0120GitMergeMarkers
 667        | ZetaFormat::V0131GitMergeMarkersPrefix
 668        | ZetaFormat::V0211SeedCoder
 669        | ZetaFormat::v0226Hashline
 670        | ZetaFormat::V0304VariableEdit => String::new(),
 671        ZetaFormat::V0304SeedNoEdits
 672        | ZetaFormat::V0306SeedMultiRegions
 673        | ZetaFormat::V0316SeedMultiRegions
 674        | ZetaFormat::V0317SeedMultiRegions => String::new(),
 675    }
 676}
 677
 678pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 679    match format {
 680        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 681        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 682        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 683        ZetaFormat::V0211SeedCoder
 684        | ZetaFormat::V0304SeedNoEdits
 685        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 686        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 687        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 688        ZetaFormat::V0112MiddleAtEnd
 689        | ZetaFormat::V0113Ordered
 690        | ZetaFormat::V0114180EditableRegion
 691        | ZetaFormat::v0226Hashline
 692        | ZetaFormat::V0304VariableEdit => None,
 693    }
 694}
 695
 696pub fn encode_patch_as_output_for_format(
 697    format: ZetaFormat,
 698    old_editable_region: &str,
 699    patch: &str,
 700    cursor_offset: Option<usize>,
 701) -> Result<Option<String>> {
 702    match format {
 703        ZetaFormat::v0226Hashline => {
 704            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 705        }
 706        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 707            old_editable_region,
 708            patch,
 709            cursor_offset,
 710        )
 711        .map(Some),
 712        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 713            Ok(seed_coder::no_edits(patch))
 714        }
 715        ZetaFormat::V0316SeedMultiRegions => {
 716            let empty_patch = patch.lines().count() <= 3;
 717            if empty_patch {
 718                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 719                let marker_num =
 720                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 721                let tag = multi_region::marker_tag(marker_num);
 722                Ok(Some(format!(
 723                    "{tag}{tag}{}",
 724                    multi_region::V0316_END_MARKER
 725                )))
 726            } else {
 727                Ok(None)
 728            }
 729        }
 730        ZetaFormat::V0317SeedMultiRegions => {
 731            let empty_patch = patch.lines().count() <= 3;
 732            if empty_patch {
 733                let tag = multi_region::marker_tag_relative(0);
 734                Ok(Some(format!(
 735                    "{tag}{tag}{}",
 736                    multi_region::V0317_END_MARKER
 737                )))
 738            } else {
 739                Ok(None)
 740            }
 741        }
 742        _ => Ok(None),
 743    }
 744}
 745
 746pub struct ParsedOutput {
 747    /// Text that should replace the editable region
 748    pub new_editable_region: String,
 749    /// The byte range within `cursor_excerpt` that this replacement applies to
 750    pub range_in_excerpt: Range<usize>,
 751}
 752
 753/// Parse model output for the given zeta format
 754pub fn parse_zeta2_model_output(
 755    output: &str,
 756    format: ZetaFormat,
 757    prompt_inputs: &ZetaPromptInput,
 758) -> Result<ParsedOutput> {
 759    let output = match output_end_marker_for_format(format) {
 760        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 761        None => output,
 762    };
 763
 764    let (context, editable_range_in_context, context_range, cursor_offset) =
 765        resolve_cursor_region(prompt_inputs, format);
 766    let context_start = context_range.start;
 767    let old_editable_region = &context[editable_range_in_context.clone()];
 768    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 769
 770    let (range_in_context, output) = match format {
 771        ZetaFormat::v0226Hashline => (
 772            editable_range_in_context,
 773            if hashline::output_has_edit_commands(output) {
 774                hashline::apply_edit_commands(old_editable_region, output)
 775            } else {
 776                output.to_string()
 777            },
 778        ),
 779        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 780        ZetaFormat::V0304SeedNoEdits => (
 781            editable_range_in_context,
 782            if output.starts_with(seed_coder::NO_EDITS) {
 783                old_editable_region.to_string()
 784            } else {
 785                output.to_string()
 786            },
 787        ),
 788        ZetaFormat::V0306SeedMultiRegions => (
 789            editable_range_in_context,
 790            if output.starts_with(seed_coder::NO_EDITS) {
 791                old_editable_region.to_string()
 792            } else {
 793                multi_region::apply_marker_span(old_editable_region, output)?
 794            },
 795        ),
 796        ZetaFormat::V0316SeedMultiRegions => (
 797            editable_range_in_context,
 798            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
 799        ),
 800        ZetaFormat::V0317SeedMultiRegions => (
 801            editable_range_in_context,
 802            multi_region::apply_marker_span_v0317(
 803                old_editable_region,
 804                output,
 805                Some(cursor_offset_in_editable),
 806            )?,
 807        ),
 808        _ => (editable_range_in_context, output.to_string()),
 809    };
 810
 811    let range_in_excerpt =
 812        range_in_context.start + context_start..range_in_context.end + context_start;
 813
 814    Ok(ParsedOutput {
 815        new_editable_region: output,
 816        range_in_excerpt,
 817    })
 818}
 819
 820pub fn excerpt_range_for_format(
 821    format: ZetaFormat,
 822    ranges: &ExcerptRanges,
 823) -> (Range<usize>, Range<usize>) {
 824    excerpt_ranges_for_format(format, ranges)
 825}
 826
 827pub fn resolve_cursor_region(
 828    input: &ZetaPromptInput,
 829    format: ZetaFormat,
 830) -> (&str, Range<usize>, Range<usize>, usize) {
 831    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 832        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 833        compute_editable_and_context_ranges(
 834            &input.cursor_excerpt,
 835            input.cursor_offset_in_excerpt,
 836            syntax_ranges,
 837            editable_tokens,
 838            context_tokens,
 839        )
 840    } else {
 841        excerpt_range_for_format(format, &input.excerpt_ranges)
 842    };
 843    let context_start = context_range.start;
 844    let context_text = &input.cursor_excerpt[context_range.clone()];
 845    let adjusted_editable =
 846        (editable_range.start - context_start)..(editable_range.end - context_start);
 847    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 848
 849    (
 850        context_text,
 851        adjusted_editable,
 852        context_range,
 853        adjusted_cursor,
 854    )
 855}
 856
 857pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 858    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 859    get_prefill_for_format(format, context, &editable_range)
 860}
 861
 862fn format_edit_history_within_budget(
 863    events: &[Arc<Event>],
 864    file_marker: &str,
 865    edit_history_name: &str,
 866    max_tokens: usize,
 867    max_edit_event_count: usize,
 868) -> String {
 869    let header = format!("{}{}\n", file_marker, edit_history_name);
 870    let header_tokens = estimate_tokens(header.len());
 871    if header_tokens >= max_tokens {
 872        return String::new();
 873    }
 874
 875    let mut event_strings: Vec<String> = Vec::new();
 876    let mut total_tokens = header_tokens;
 877
 878    for event in events.iter().rev().take(max_edit_event_count) {
 879        let mut event_str = String::new();
 880        write_event(&mut event_str, event);
 881        let event_tokens = estimate_tokens(event_str.len());
 882
 883        if total_tokens + event_tokens > max_tokens {
 884            break;
 885        }
 886        total_tokens += event_tokens;
 887        event_strings.push(event_str);
 888    }
 889
 890    if event_strings.is_empty() {
 891        return String::new();
 892    }
 893
 894    let mut result = header;
 895    for event_str in event_strings.iter().rev() {
 896        result.push_str(event_str);
 897    }
 898    result
 899}
 900
 901fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 902    let needs_newline = !excerpt.text.ends_with('\n');
 903    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 904    let len = excerpt.text.len()
 905        + if needs_newline { "\n".len() } else { 0 }
 906        + if needs_ellipsis { "...\n".len() } else { 0 };
 907    estimate_tokens(len)
 908}
 909
 910pub fn format_related_files_within_budget(
 911    related_files: &[RelatedFile],
 912    file_prefix: &str,
 913    file_suffix: &str,
 914    max_tokens: usize,
 915) -> String {
 916    struct ExcerptCandidate {
 917        file_ix: usize,
 918        excerpt_ix: usize,
 919        order: usize,
 920    }
 921
 922    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 923        .iter()
 924        .enumerate()
 925        .flat_map(|(file_ix, file)| {
 926            file.excerpts
 927                .iter()
 928                .enumerate()
 929                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 930                    file_ix,
 931                    excerpt_ix,
 932                    order: e.order,
 933                })
 934        })
 935        .collect();
 936
 937    // Pre-compute file header strings and their token costs.
 938    let file_headers: Vec<String> = related_files
 939        .iter()
 940        .map(|file| {
 941            let path_str = file.path.to_string_lossy();
 942            format!("{}{}\n", file_prefix, path_str)
 943        })
 944        .collect();
 945
 946    // Sort the excerpts by their order and determine how many fit within the budget.
 947    let mut total_tokens = 0;
 948    let mut included_excerpt_count = 0_usize;
 949    let mut included_file_indices = vec![false; related_files.len()];
 950    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 951    for candidate in &excerpt_candidates {
 952        let file = &related_files[candidate.file_ix];
 953        let excerpt = &file.excerpts[candidate.excerpt_ix];
 954        let file_already_included = included_file_indices[candidate.file_ix];
 955        let header_cost = if file_already_included {
 956            0
 957        } else {
 958            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 959        };
 960        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 961        if total_tokens + header_cost + excerpt_cost > max_tokens {
 962            break;
 963        }
 964        total_tokens += header_cost + excerpt_cost;
 965        if !file_already_included {
 966            included_file_indices[candidate.file_ix] = true;
 967        }
 968        included_excerpt_count += 1;
 969    }
 970
 971    excerpt_candidates.truncate(included_excerpt_count);
 972    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 973
 974    // Render all of the files that fit within the token budget, in the original order.
 975    let mut result = String::new();
 976    let mut last_file_ix = None;
 977    for candidate in &excerpt_candidates {
 978        if last_file_ix != Some(candidate.file_ix) {
 979            if last_file_ix.is_some() {
 980                result.push_str(file_suffix);
 981            }
 982            result.push_str(&file_headers[candidate.file_ix]);
 983            last_file_ix = Some(candidate.file_ix);
 984        }
 985        let file = &related_files[candidate.file_ix];
 986        let excerpt = &file.excerpts[candidate.excerpt_ix];
 987        result.push_str(&excerpt.text);
 988        if !result.ends_with('\n') {
 989            result.push('\n');
 990        }
 991        if excerpt.row_range.end < file.max_row {
 992            result.push_str("...\n");
 993        }
 994    }
 995
 996    result
 997}
 998
 999pub fn write_related_files(
1000    prompt: &mut String,
1001    related_files: &[RelatedFile],
1002) -> Vec<Range<usize>> {
1003    let mut ranges = Vec::new();
1004    for file in related_files {
1005        let start = prompt.len();
1006        let path_str = file.path.to_string_lossy();
1007        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1008        for excerpt in &file.excerpts {
1009            prompt.push_str(&excerpt.text);
1010            if !prompt.ends_with('\n') {
1011                prompt.push('\n');
1012            }
1013            if excerpt.row_range.end < file.max_row {
1014                prompt.push_str("...\n");
1015            }
1016        }
1017        let end = prompt.len();
1018        ranges.push(start..end);
1019    }
1020    ranges
1021}
1022
1023mod v0112_middle_at_end {
1024    use super::*;
1025
1026    pub fn special_tokens() -> &'static [&'static str] {
1027        &[
1028            "<|fim_prefix|>",
1029            "<|fim_suffix|>",
1030            "<|fim_middle|>",
1031            "<|file_sep|>",
1032            CURSOR_MARKER,
1033        ]
1034    }
1035
1036    pub fn write_cursor_excerpt_section(
1037        prompt: &mut String,
1038        path: &Path,
1039        context: &str,
1040        editable_range: &Range<usize>,
1041        cursor_offset: usize,
1042    ) {
1043        let path_str = path.to_string_lossy();
1044        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1045
1046        prompt.push_str("<|fim_prefix|>\n");
1047        prompt.push_str(&context[..editable_range.start]);
1048
1049        prompt.push_str("<|fim_suffix|>\n");
1050        prompt.push_str(&context[editable_range.end..]);
1051        if !prompt.ends_with('\n') {
1052            prompt.push('\n');
1053        }
1054
1055        prompt.push_str("<|fim_middle|>current\n");
1056        prompt.push_str(&context[editable_range.start..cursor_offset]);
1057        prompt.push_str(CURSOR_MARKER);
1058        prompt.push_str(&context[cursor_offset..editable_range.end]);
1059        if !prompt.ends_with('\n') {
1060            prompt.push('\n');
1061        }
1062
1063        prompt.push_str("<|fim_middle|>updated\n");
1064    }
1065}
1066
1067mod v0113_ordered {
1068    use super::*;
1069
1070    pub fn special_tokens() -> &'static [&'static str] {
1071        &[
1072            "<|fim_prefix|>",
1073            "<|fim_suffix|>",
1074            "<|fim_middle|>",
1075            "<|file_sep|>",
1076            CURSOR_MARKER,
1077        ]
1078    }
1079
1080    pub fn write_cursor_excerpt_section(
1081        prompt: &mut String,
1082        path: &Path,
1083        context: &str,
1084        editable_range: &Range<usize>,
1085        cursor_offset: usize,
1086    ) {
1087        let path_str = path.to_string_lossy();
1088        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1089
1090        prompt.push_str("<|fim_prefix|>\n");
1091        prompt.push_str(&context[..editable_range.start]);
1092        if !prompt.ends_with('\n') {
1093            prompt.push('\n');
1094        }
1095
1096        prompt.push_str("<|fim_middle|>current\n");
1097        prompt.push_str(&context[editable_range.start..cursor_offset]);
1098        prompt.push_str(CURSOR_MARKER);
1099        prompt.push_str(&context[cursor_offset..editable_range.end]);
1100        if !prompt.ends_with('\n') {
1101            prompt.push('\n');
1102        }
1103
1104        prompt.push_str("<|fim_suffix|>\n");
1105        prompt.push_str(&context[editable_range.end..]);
1106        if !prompt.ends_with('\n') {
1107            prompt.push('\n');
1108        }
1109
1110        prompt.push_str("<|fim_middle|>updated\n");
1111    }
1112}
1113
1114mod v0114180_editable_region {
1115    use super::*;
1116
1117    pub fn special_tokens() -> &'static [&'static str] {
1118        v0113_ordered::special_tokens()
1119    }
1120}
1121
1122pub mod v0120_git_merge_markers {
1123    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1124    //!
1125    //! Example prompt:
1126    //!
1127    //! <|file_sep|>path/to/target_file.py
1128    //! <|fim_prefix|>
1129    //! code before editable region
1130    //! <|fim_suffix|>
1131    //! code after editable region
1132    //! <|fim_middle|>
1133    //! <<<<<<< CURRENT
1134    //! code that
1135    //! needs to<|user_cursor|>
1136    //! be rewritten
1137    //! =======
1138    //!
1139    //! Expected output (should be generated by the model):
1140    //!
1141    //! updated
1142    //! code with
1143    //! changes applied
1144    //! >>>>>>> UPDATED
1145
1146    use super::*;
1147
1148    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1149    pub const SEPARATOR: &str = "=======\n";
1150    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1151
1152    pub fn special_tokens() -> &'static [&'static str] {
1153        &[
1154            "<|fim_prefix|>",
1155            "<|fim_suffix|>",
1156            "<|fim_middle|>",
1157            "<|file_sep|>",
1158            START_MARKER,
1159            SEPARATOR,
1160            END_MARKER,
1161            CURSOR_MARKER,
1162        ]
1163    }
1164
1165    pub fn write_cursor_excerpt_section(
1166        prompt: &mut String,
1167        path: &Path,
1168        context: &str,
1169        editable_range: &Range<usize>,
1170        cursor_offset: usize,
1171    ) {
1172        let path_str = path.to_string_lossy();
1173        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1174
1175        prompt.push_str("<|fim_prefix|>");
1176        prompt.push_str(&context[..editable_range.start]);
1177
1178        prompt.push_str("<|fim_suffix|>");
1179        prompt.push_str(&context[editable_range.end..]);
1180        if !prompt.ends_with('\n') {
1181            prompt.push('\n');
1182        }
1183
1184        prompt.push_str("<|fim_middle|>");
1185        prompt.push_str(START_MARKER);
1186        prompt.push_str(&context[editable_range.start..cursor_offset]);
1187        prompt.push_str(CURSOR_MARKER);
1188        prompt.push_str(&context[cursor_offset..editable_range.end]);
1189        if !prompt.ends_with('\n') {
1190            prompt.push('\n');
1191        }
1192        prompt.push_str(SEPARATOR);
1193    }
1194}
1195
1196pub mod v0131_git_merge_markers_prefix {
1197    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1198    //!
1199    //! Example prompt:
1200    //!
1201    //! <|file_sep|>path/to/target_file.py
1202    //! <|fim_prefix|>
1203    //! code before editable region
1204    //! <<<<<<< CURRENT
1205    //! code that
1206    //! needs to<|user_cursor|>
1207    //! be rewritten
1208    //! =======
1209    //! <|fim_suffix|>
1210    //! code after editable region
1211    //! <|fim_middle|>
1212    //!
1213    //! Expected output (should be generated by the model):
1214    //!
1215    //! updated
1216    //! code with
1217    //! changes applied
1218    //! >>>>>>> UPDATED
1219
1220    use super::*;
1221
1222    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1223    pub const SEPARATOR: &str = "=======\n";
1224    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1225
1226    pub fn special_tokens() -> &'static [&'static str] {
1227        &[
1228            "<|fim_prefix|>",
1229            "<|fim_suffix|>",
1230            "<|fim_middle|>",
1231            "<|file_sep|>",
1232            START_MARKER,
1233            SEPARATOR,
1234            END_MARKER,
1235            CURSOR_MARKER,
1236        ]
1237    }
1238
1239    pub fn write_cursor_excerpt_section(
1240        prompt: &mut String,
1241        path: &Path,
1242        context: &str,
1243        editable_range: &Range<usize>,
1244        cursor_offset: usize,
1245    ) {
1246        let path_str = path.to_string_lossy();
1247        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1248
1249        prompt.push_str("<|fim_prefix|>");
1250        prompt.push_str(&context[..editable_range.start]);
1251        prompt.push_str(START_MARKER);
1252        prompt.push_str(&context[editable_range.start..cursor_offset]);
1253        prompt.push_str(CURSOR_MARKER);
1254        prompt.push_str(&context[cursor_offset..editable_range.end]);
1255        if !prompt.ends_with('\n') {
1256            prompt.push('\n');
1257        }
1258        prompt.push_str(SEPARATOR);
1259
1260        prompt.push_str("<|fim_suffix|>");
1261        prompt.push_str(&context[editable_range.end..]);
1262        if !prompt.ends_with('\n') {
1263            prompt.push('\n');
1264        }
1265
1266        prompt.push_str("<|fim_middle|>");
1267    }
1268}
1269
1270pub mod v0211_prefill {
1271    use super::*;
1272
1273    pub fn special_tokens() -> &'static [&'static str] {
1274        v0131_git_merge_markers_prefix::special_tokens()
1275    }
1276
1277    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1278        let editable_region = &context[editable_range.start..editable_range.end];
1279
1280        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1281        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1282
1283        // Find a token boundary to avoid splitting tokens in the prefill.
1284        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1285        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1286        // the \n and consume any consecutive \n characters after it.
1287        let prefill = &editable_region[..prefill_len];
1288        match prefill.rfind('\n') {
1289            Some(pos) => {
1290                let mut end = pos + 1;
1291                while end < editable_region.len()
1292                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1293                {
1294                    end += 1;
1295                }
1296                editable_region[..end].to_string()
1297            }
1298            // No newline found. Fall back to splitting before the last space
1299            // (word-level boundary)
1300            None => match prefill.rfind(' ') {
1301                Some(pos) => prefill[..pos].to_string(),
1302                None => prefill.to_string(),
1303            },
1304        }
1305    }
1306}
1307
1308pub mod hashline {
1309
1310    use std::fmt::Display;
1311
1312    pub const END_MARKER: &str = "<|fim_middle|>updated";
1313    pub const START_MARKER: &str = "<|fim_middle|>current";
1314
1315    use super::*;
1316
1317    const SET_COMMAND_MARKER: &str = "<|set|>";
1318    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1319    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1320
1321    pub fn special_tokens() -> &'static [&'static str] {
1322        return &[
1323            SET_COMMAND_MARKER,
1324            "<|set_range|>",
1325            INSERT_COMMAND_MARKER,
1326            NO_EDITS_COMMAND_MARKER,
1327            CURSOR_MARKER,
1328            "<|file_sep|>",
1329            "<|fim_prefix|>",
1330            "<|fim_suffix|>",
1331            "<|fim_middle|>",
1332        ];
1333    }
1334
1335    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1336    #[derive(Debug, Clone, PartialEq, Eq)]
1337    struct LineRef {
1338        index: usize,
1339        hash: u8,
1340    }
1341
1342    impl Display for LineRef {
1343        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1344            write!(f, "{}:{:02x}", self.index, self.hash)
1345        }
1346    }
1347
1348    pub fn hash_line(line: &[u8]) -> u8 {
1349        let mut h: u8 = 0;
1350        for &byte in line {
1351            h = h.wrapping_add(byte);
1352        }
1353        return h;
1354    }
1355
1356    /// Write the hashline-encoded editable region into `out`. Each line of
1357    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1358    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1359    /// to the start of `editable_text`).
1360    pub fn write_hashline_editable_region(
1361        out: &mut String,
1362        editable_text: &str,
1363        cursor_offset_in_editable: usize,
1364    ) {
1365        let mut offset = 0;
1366        for (i, line) in editable_text.lines().enumerate() {
1367            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1368                && cursor_offset_in_editable < offset + line.len()
1369            {
1370                (
1371                    &line[..cursor_offset_in_editable - offset],
1372                    CURSOR_MARKER,
1373                    &line[cursor_offset_in_editable - offset..],
1374                )
1375            } else {
1376                (line, "", "")
1377            };
1378            write!(
1379                out,
1380                "\n{}|{head}{cursor}{tail}",
1381                LineRef {
1382                    index: i,
1383                    hash: hash_line(line.as_bytes())
1384                }
1385            )
1386            .unwrap();
1387            offset += line.len() + 1;
1388        }
1389    }
1390
1391    pub fn write_cursor_excerpt_section(
1392        prompt: &mut String,
1393        path: &Path,
1394        context: &str,
1395        editable_range: &Range<usize>,
1396        cursor_offset: usize,
1397    ) {
1398        let path_str = path.to_string_lossy();
1399        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1400
1401        prompt.push_str("<|fim_prefix|>\n");
1402        prompt.push_str(&context[..editable_range.start]);
1403        prompt.push_str(START_MARKER);
1404
1405        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1406        let editable_region = &context[editable_range.clone()];
1407        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1408
1409        if !prompt.ends_with('\n') {
1410            prompt.push('\n');
1411        }
1412
1413        prompt.push_str("<|fim_suffix|>\n");
1414        prompt.push_str(&context[editable_range.end..]);
1415        if !prompt.ends_with('\n') {
1416            prompt.push('\n');
1417        }
1418
1419        prompt.push_str(END_MARKER);
1420        prompt.push('\n');
1421    }
1422
1423    /// A single edit command parsed from the model output.
1424    #[derive(Debug)]
1425    enum EditCommand<'a> {
1426        /// Replace a range of lines (inclusive on both ends). Single-line set is
1427        /// represented by `start == end`.
1428        Set {
1429            start: LineRef,
1430            end: LineRef,
1431            content: &'a str,
1432        },
1433        /// Insert new lines after the given line, or before the first line if
1434        /// `after` is `None`.
1435        Insert {
1436            after: Option<LineRef>,
1437            content: &'a str,
1438        },
1439    }
1440
1441    /// Parse a line reference like `3:c3` into a `LineRef`.
1442    fn parse_line_ref(s: &str) -> Option<LineRef> {
1443        let (idx_str, hash_str) = s.split_once(':')?;
1444        let index = idx_str.parse::<usize>().ok()?;
1445        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1446        Some(LineRef { index, hash })
1447    }
1448
1449    /// Parse the model output into a list of `EditCommand`s.
1450    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1451        let mut commands = Vec::new();
1452        let mut offset = 0usize;
1453
1454        while offset < model_output.len() {
1455            let next_nl = model_output[offset..]
1456                .find('\n')
1457                .map(|i| offset + i)
1458                .unwrap_or(model_output.len());
1459            let line = &model_output[offset..next_nl];
1460            let line_end = if next_nl < model_output.len() {
1461                next_nl + 1
1462            } else {
1463                next_nl
1464            };
1465
1466            let trimmed = line.trim();
1467            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1468                (true, spec)
1469            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1470                (false, spec)
1471            } else {
1472                offset = line_end;
1473                continue;
1474            };
1475
1476            let mut content_end = line_end;
1477            let mut scan = line_end;
1478
1479            while scan < model_output.len() {
1480                let body_nl = model_output[scan..]
1481                    .find('\n')
1482                    .map(|i| scan + i)
1483                    .unwrap_or(model_output.len());
1484                let body_line = &model_output[scan..body_nl];
1485                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1486                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1487                {
1488                    break;
1489                }
1490                scan = if body_nl < model_output.len() {
1491                    body_nl + 1
1492                } else {
1493                    body_nl
1494                };
1495                content_end = scan;
1496            }
1497
1498            let content = &model_output[line_end..content_end];
1499
1500            if is_set {
1501                if let Some((start_str, end_str)) = specifier.split_once('-') {
1502                    if let (Some(start), Some(end)) =
1503                        (parse_line_ref(start_str), parse_line_ref(end_str))
1504                    {
1505                        commands.push(EditCommand::Set {
1506                            start,
1507                            end,
1508                            content,
1509                        });
1510                    }
1511                } else if let Some(target) = parse_line_ref(specifier) {
1512                    commands.push(EditCommand::Set {
1513                        start: target.clone(),
1514                        end: target,
1515                        content,
1516                    });
1517                }
1518            } else {
1519                let after = parse_line_ref(specifier);
1520                commands.push(EditCommand::Insert { after, content });
1521            }
1522
1523            offset = scan;
1524        }
1525
1526        commands
1527    }
1528
1529    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1530    /// (as opposed to being a plain full-replacement output).
1531    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1532    /// editable region, returning the plain text content.
1533    pub fn strip_hashline_prefixes(region: &str) -> String {
1534        let mut decoded: String = region
1535            .lines()
1536            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1537            .collect::<Vec<_>>()
1538            .join("\n");
1539        if region.ends_with('\n') {
1540            decoded.push('\n');
1541        }
1542        decoded
1543    }
1544
1545    pub fn output_has_edit_commands(model_output: &str) -> bool {
1546        model_output.contains(SET_COMMAND_MARKER)
1547            || model_output.contains(INSERT_COMMAND_MARKER)
1548            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1549    }
1550
1551    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1552    /// original editable region text.
1553    ///
1554    /// `editable_region` is the original text of the editable region (without hash
1555    /// prefixes). `model_output` is the raw model response containing edit commands.
1556    ///
1557    /// Returns the full replacement text for the editable region.
1558    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1559        if model_output
1560            .trim_start()
1561            .starts_with(NO_EDITS_COMMAND_MARKER)
1562        {
1563            return editable_region.to_string();
1564        }
1565
1566        let original_lines: Vec<&str> = editable_region.lines().collect();
1567        let old_hashes: Vec<u8> = original_lines
1568            .iter()
1569            .map(|line| hash_line(line.as_bytes()))
1570            .collect();
1571
1572        let commands = parse_edit_commands(model_output);
1573
1574        // For set operations: indexed by start line → Some((end line index, content))
1575        // For insert operations: indexed by line index → vec of content to insert after
1576        // Insert-before-first is tracked separately.
1577        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1578        let mut insert_before_first: Vec<&str> = Vec::new();
1579        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1580
1581        for command in &commands {
1582            match command {
1583                EditCommand::Set {
1584                    start,
1585                    end,
1586                    content,
1587                } => {
1588                    if start.index < old_hashes.len()
1589                        && end.index < old_hashes.len()
1590                        && start.index <= end.index
1591                        && old_hashes[start.index] == start.hash
1592                        && old_hashes[end.index] == end.hash
1593                    {
1594                        set_ops[start.index] = Some((end.index, *content));
1595                    }
1596                }
1597                EditCommand::Insert { after, content } => match after {
1598                    None => insert_before_first.push(*content),
1599                    Some(line_ref) => {
1600                        if line_ref.index < old_hashes.len()
1601                            && old_hashes[line_ref.index] == line_ref.hash
1602                        {
1603                            insert_after[line_ref.index].push(*content);
1604                        }
1605                    }
1606                },
1607            }
1608        }
1609
1610        let mut result = String::new();
1611
1612        // Emit any insertions before the first line
1613        for content in &insert_before_first {
1614            result.push_str(content);
1615            if !content.ends_with('\n') {
1616                result.push('\n');
1617            }
1618        }
1619
1620        let mut i = 0;
1621        while i < original_lines.len() {
1622            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1623                // Replace lines i..=end_index with the replacement content
1624                result.push_str(replacement);
1625                if !replacement.is_empty() && !replacement.ends_with('\n') {
1626                    result.push('\n');
1627                }
1628                // Emit any insertions after the end of this set range
1629                if *end_index < insert_after.len() {
1630                    for content in &insert_after[*end_index] {
1631                        result.push_str(content);
1632                        if !content.ends_with('\n') {
1633                            result.push('\n');
1634                        }
1635                    }
1636                }
1637                i = end_index + 1;
1638            } else {
1639                // Keep the original line
1640                result.push_str(original_lines[i]);
1641                result.push('\n');
1642                // Emit any insertions after this line
1643                for content in &insert_after[i] {
1644                    result.push_str(content);
1645                    if !content.ends_with('\n') {
1646                        result.push('\n');
1647                    }
1648                }
1649                i += 1;
1650            }
1651        }
1652
1653        // Preserve trailing newline behavior: if the original ended with a
1654        // newline the result already has one; if it didn't, trim the extra one
1655        // we added.
1656        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1657            result.pop();
1658        }
1659
1660        result
1661    }
1662
1663    /// Convert a unified diff patch into hashline edit commands.
1664    ///
1665    /// Parses the unified diff `patch` directly to determine which lines of
1666    /// `old_text` are deleted/replaced and what new lines are added, then emits
1667    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1668    /// `{index}:{hash}` identifiers.
1669    ///
1670    /// `cursor_offset` is an optional byte offset into the first hunk's new
1671    /// text (context + additions) where the cursor marker should be placed.
1672    pub fn patch_to_edit_commands(
1673        old_text: &str,
1674        patch: &str,
1675        cursor_offset: Option<usize>,
1676    ) -> Result<String> {
1677        let old_lines: Vec<&str> = old_text.lines().collect();
1678        let old_hashes: Vec<u8> = old_lines
1679            .iter()
1680            .map(|line| hash_line(line.as_bytes()))
1681            .collect();
1682
1683        let mut result = String::new();
1684        let mut first_hunk = true;
1685
1686        struct Hunk<'a> {
1687            line_range: Range<usize>,
1688            new_text_lines: Vec<&'a str>,
1689            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1690        }
1691
1692        // Parse the patch line by line. We only care about hunk headers,
1693        // context, deletions, and additions.
1694        let mut old_line_index: usize = 0;
1695        let mut current_hunk: Option<Hunk> = None;
1696        // Byte offset tracking within the hunk's new text for cursor placement.
1697        let mut new_text_byte_offset: usize = 0;
1698        // The line index of the last old line seen before/in the current hunk
1699        // (used for insert-after reference).
1700        let mut last_old_line_before_hunk: Option<usize> = None;
1701
1702        fn flush_hunk(
1703            hunk: Hunk,
1704            last_old_line: Option<usize>,
1705            result: &mut String,
1706            old_hashes: &[u8],
1707        ) {
1708            if hunk.line_range.is_empty() {
1709                // Pure insertion — reference the old line to insert after when in bounds.
1710                if let Some(after) = last_old_line
1711                    && let Some(&hash) = old_hashes.get(after)
1712                {
1713                    write!(
1714                        result,
1715                        "{INSERT_COMMAND_MARKER}{}\n",
1716                        LineRef { index: after, hash }
1717                    )
1718                    .unwrap();
1719                } else {
1720                    result.push_str(INSERT_COMMAND_MARKER);
1721                    result.push('\n');
1722                }
1723            } else {
1724                let start = hunk.line_range.start;
1725                let end_exclusive = hunk.line_range.end;
1726                let deleted_line_count = end_exclusive.saturating_sub(start);
1727
1728                if deleted_line_count == 1 {
1729                    if let Some(&hash) = old_hashes.get(start) {
1730                        write!(
1731                            result,
1732                            "{SET_COMMAND_MARKER}{}\n",
1733                            LineRef { index: start, hash }
1734                        )
1735                        .unwrap();
1736                    } else {
1737                        result.push_str(SET_COMMAND_MARKER);
1738                        result.push('\n');
1739                    }
1740                } else {
1741                    let end_inclusive = end_exclusive - 1;
1742                    match (
1743                        old_hashes.get(start).copied(),
1744                        old_hashes.get(end_inclusive).copied(),
1745                    ) {
1746                        (Some(start_hash), Some(end_hash)) => {
1747                            write!(
1748                                result,
1749                                "{SET_COMMAND_MARKER}{}-{}\n",
1750                                LineRef {
1751                                    index: start,
1752                                    hash: start_hash
1753                                },
1754                                LineRef {
1755                                    index: end_inclusive,
1756                                    hash: end_hash
1757                                }
1758                            )
1759                            .unwrap();
1760                        }
1761                        _ => {
1762                            result.push_str(SET_COMMAND_MARKER);
1763                            result.push('\n');
1764                        }
1765                    }
1766                }
1767            }
1768            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1769                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1770                    && line_offset == cursor_line_offset
1771                {
1772                    result.push_str(&line[..char_offset]);
1773                    result.push_str(CURSOR_MARKER);
1774                    result.push_str(&line[char_offset..]);
1775                    continue;
1776                }
1777
1778                result.push_str(line);
1779            }
1780        }
1781
1782        for raw_line in patch.split_inclusive('\n') {
1783            if raw_line.starts_with("@@") {
1784                // Flush any pending change hunk from a previous patch hunk.
1785                if let Some(hunk) = current_hunk.take() {
1786                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1787                }
1788
1789                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1790                // We intentionally do not trust old_start as a direct local index into `old_text`,
1791                // because some patches are produced against a larger file region and carry
1792                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1793                if first_hunk {
1794                    new_text_byte_offset = 0;
1795                    first_hunk = false;
1796                }
1797                continue;
1798            }
1799
1800            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1801                continue;
1802            }
1803            if raw_line.starts_with("\\ No newline") {
1804                continue;
1805            }
1806
1807            if raw_line.starts_with('-') {
1808                // Extend or start a change hunk with this deleted old line.
1809                match &mut current_hunk {
1810                    Some(Hunk {
1811                        line_range: range, ..
1812                    }) => range.end = old_line_index + 1,
1813                    None => {
1814                        current_hunk = Some(Hunk {
1815                            line_range: old_line_index..old_line_index + 1,
1816                            new_text_lines: Vec::new(),
1817                            cursor_line_offset_in_new_text: None,
1818                        });
1819                    }
1820                }
1821                old_line_index += 1;
1822            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1823                // Place cursor marker if cursor_offset falls within this line.
1824                let mut cursor_line_offset = None;
1825                if let Some(cursor_off) = cursor_offset
1826                    && (first_hunk
1827                        || cursor_off >= new_text_byte_offset
1828                            && cursor_off <= new_text_byte_offset + added_content.len())
1829                {
1830                    let line_offset = added_content.floor_char_boundary(
1831                        cursor_off
1832                            .saturating_sub(new_text_byte_offset)
1833                            .min(added_content.len()),
1834                    );
1835                    cursor_line_offset = Some(line_offset);
1836                }
1837
1838                new_text_byte_offset += added_content.len();
1839
1840                let hunk = current_hunk.get_or_insert(Hunk {
1841                    line_range: old_line_index..old_line_index,
1842                    new_text_lines: vec![],
1843                    cursor_line_offset_in_new_text: None,
1844                });
1845                hunk.new_text_lines.push(added_content);
1846                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1847                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1848            } else {
1849                // Context line (starts with ' ' or is empty).
1850                if let Some(hunk) = current_hunk.take() {
1851                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1852                }
1853                last_old_line_before_hunk = Some(old_line_index);
1854                old_line_index += 1;
1855                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1856                new_text_byte_offset += content.len();
1857            }
1858        }
1859
1860        // Flush final group.
1861        if let Some(hunk) = current_hunk.take() {
1862            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1863        }
1864
1865        // Trim a single trailing newline.
1866        if result.ends_with('\n') {
1867            result.pop();
1868        }
1869
1870        if result.is_empty() {
1871            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1872        }
1873
1874        Ok(result)
1875    }
1876
1877    #[cfg(test)]
1878    mod tests {
1879        use super::*;
1880        use indoc::indoc;
1881
1882        #[test]
1883        fn test_format_cursor_region() {
1884            struct Case {
1885                name: &'static str,
1886                context: &'static str,
1887                editable_range: Range<usize>,
1888                cursor_offset: usize,
1889                expected: &'static str,
1890            }
1891
1892            let cases = [
1893                Case {
1894                    name: "basic_cursor_placement",
1895                    context: "hello world\n",
1896                    editable_range: 0..12,
1897                    cursor_offset: 5,
1898                    expected: indoc! {"
1899                    <|file_sep|>test.rs
1900                    <|fim_prefix|>
1901                    <|fim_middle|>current
1902                    0:5c|hello<|user_cursor|> world
1903                    <|fim_suffix|>
1904                    <|fim_middle|>updated
1905                    "},
1906                },
1907                Case {
1908                    name: "multiline_cursor_on_second_line",
1909                    context: "aaa\nbbb\nccc\n",
1910                    editable_range: 0..12,
1911                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1912                    expected: indoc! {"
1913                    <|file_sep|>test.rs
1914                    <|fim_prefix|>
1915                    <|fim_middle|>current
1916                    0:23|aaa
1917                    1:26|b<|user_cursor|>bb
1918                    2:29|ccc
1919                    <|fim_suffix|>
1920                    <|fim_middle|>updated
1921                    "},
1922                },
1923                Case {
1924                    name: "no_trailing_newline_in_context",
1925                    context: "line1\nline2",
1926                    editable_range: 0..11,
1927                    cursor_offset: 3,
1928                    expected: indoc! {"
1929                    <|file_sep|>test.rs
1930                    <|fim_prefix|>
1931                    <|fim_middle|>current
1932                    0:d9|lin<|user_cursor|>e1
1933                    1:da|line2
1934                    <|fim_suffix|>
1935                    <|fim_middle|>updated
1936                    "},
1937                },
1938                Case {
1939                    name: "leading_newline_in_editable_region",
1940                    context: "\nabc\n",
1941                    editable_range: 0..5,
1942                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1943                    expected: indoc! {"
1944                    <|file_sep|>test.rs
1945                    <|fim_prefix|>
1946                    <|fim_middle|>current
1947                    0:00|
1948                    1:26|a<|user_cursor|>bc
1949                    <|fim_suffix|>
1950                    <|fim_middle|>updated
1951                    "},
1952                },
1953                Case {
1954                    name: "with_suffix",
1955                    context: "abc\ndef",
1956                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1957                    cursor_offset: 2,
1958                    expected: indoc! {"
1959                    <|file_sep|>test.rs
1960                    <|fim_prefix|>
1961                    <|fim_middle|>current
1962                    0:26|ab<|user_cursor|>c
1963                    <|fim_suffix|>
1964                    def
1965                    <|fim_middle|>updated
1966                    "},
1967                },
1968                Case {
1969                    name: "unicode_two_byte_chars",
1970                    context: "héllo\n",
1971                    editable_range: 0..7,
1972                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1973                    expected: indoc! {"
1974                    <|file_sep|>test.rs
1975                    <|fim_prefix|>
1976                    <|fim_middle|>current
1977                    0:1b|hé<|user_cursor|>llo
1978                    <|fim_suffix|>
1979                    <|fim_middle|>updated
1980                    "},
1981                },
1982                Case {
1983                    name: "unicode_three_byte_chars",
1984                    context: "日本語\n",
1985                    editable_range: 0..10,
1986                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1987                    expected: indoc! {"
1988                    <|file_sep|>test.rs
1989                    <|fim_prefix|>
1990                    <|fim_middle|>current
1991                    0:80|日本<|user_cursor|>語
1992                    <|fim_suffix|>
1993                    <|fim_middle|>updated
1994                    "},
1995                },
1996                Case {
1997                    name: "unicode_four_byte_chars",
1998                    context: "a🌍b\n",
1999                    editable_range: 0..7,
2000                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2001                    expected: indoc! {"
2002                    <|file_sep|>test.rs
2003                    <|fim_prefix|>
2004                    <|fim_middle|>current
2005                    0:6b|a🌍<|user_cursor|>b
2006                    <|fim_suffix|>
2007                    <|fim_middle|>updated
2008                    "},
2009                },
2010                Case {
2011                    name: "cursor_at_start_of_region_not_placed",
2012                    context: "abc\n",
2013                    editable_range: 0..4,
2014                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2015                    expected: indoc! {"
2016                    <|file_sep|>test.rs
2017                    <|fim_prefix|>
2018                    <|fim_middle|>current
2019                    0:26|abc
2020                    <|fim_suffix|>
2021                    <|fim_middle|>updated
2022                    "},
2023                },
2024                Case {
2025                    name: "cursor_at_end_of_line_not_placed",
2026                    context: "abc\ndef\n",
2027                    editable_range: 0..8,
2028                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2029                    expected: indoc! {"
2030                    <|file_sep|>test.rs
2031                    <|fim_prefix|>
2032                    <|fim_middle|>current
2033                    0:26|abc
2034                    1:2f|def
2035                    <|fim_suffix|>
2036                    <|fim_middle|>updated
2037                    "},
2038                },
2039                Case {
2040                    name: "cursor_offset_relative_to_context_not_editable_region",
2041                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2042                    // write_cursor_excerpt_section must subtract it before comparing against
2043                    // per-line offsets within the editable region.
2044                    context: "pre\naaa\nbbb\nsuf\n",
2045                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2046                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2047                    expected: indoc! {"
2048                    <|file_sep|>test.rs
2049                    <|fim_prefix|>
2050                    pre
2051                    <|fim_middle|>current
2052                    0:23|aaa
2053                    1:26|b<|user_cursor|>bb
2054                    <|fim_suffix|>
2055                    suf
2056                    <|fim_middle|>updated
2057                    "},
2058                },
2059            ];
2060
2061            for case in &cases {
2062                let mut prompt = String::new();
2063                hashline::write_cursor_excerpt_section(
2064                    &mut prompt,
2065                    Path::new("test.rs"),
2066                    case.context,
2067                    &case.editable_range,
2068                    case.cursor_offset,
2069                );
2070                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2071            }
2072        }
2073
2074        #[test]
2075        fn test_apply_edit_commands() {
2076            struct Case {
2077                name: &'static str,
2078                original: &'static str,
2079                model_output: &'static str,
2080                expected: &'static str,
2081            }
2082
2083            let cases = vec![
2084                Case {
2085                    name: "set_single_line",
2086                    original: indoc! {"
2087                    let mut total = 0;
2088                    for product in products {
2089                        total += ;
2090                    }
2091                    total
2092                "},
2093                    model_output: indoc! {"
2094                    <|set|>2:87
2095                        total += product.price;
2096                "},
2097                    expected: indoc! {"
2098                    let mut total = 0;
2099                    for product in products {
2100                        total += product.price;
2101                    }
2102                    total
2103                "},
2104                },
2105                Case {
2106                    name: "set_range",
2107                    original: indoc! {"
2108                    fn foo() {
2109                        let x = 1;
2110                        let y = 2;
2111                        let z = 3;
2112                    }
2113                "},
2114                    model_output: indoc! {"
2115                    <|set|>1:46-3:4a
2116                        let sum = 6;
2117                "},
2118                    expected: indoc! {"
2119                    fn foo() {
2120                        let sum = 6;
2121                    }
2122                "},
2123                },
2124                Case {
2125                    name: "insert_after_line",
2126                    original: indoc! {"
2127                    fn main() {
2128                        let x = 1;
2129                    }
2130                "},
2131                    model_output: indoc! {"
2132                    <|insert|>1:46
2133                        let y = 2;
2134                "},
2135                    expected: indoc! {"
2136                    fn main() {
2137                        let x = 1;
2138                        let y = 2;
2139                    }
2140                "},
2141                },
2142                Case {
2143                    name: "insert_before_first",
2144                    original: indoc! {"
2145                    let x = 1;
2146                    let y = 2;
2147                "},
2148                    model_output: indoc! {"
2149                    <|insert|>
2150                    use std::io;
2151                "},
2152                    expected: indoc! {"
2153                    use std::io;
2154                    let x = 1;
2155                    let y = 2;
2156                "},
2157                },
2158                Case {
2159                    name: "set_with_cursor_marker",
2160                    original: indoc! {"
2161                    fn main() {
2162                        println!();
2163                    }
2164                "},
2165                    model_output: indoc! {"
2166                    <|set|>1:34
2167                        eprintln!(\"<|user_cursor|>\");
2168                "},
2169                    expected: indoc! {"
2170                    fn main() {
2171                        eprintln!(\"<|user_cursor|>\");
2172                    }
2173                "},
2174                },
2175                Case {
2176                    name: "multiple_set_commands",
2177                    original: indoc! {"
2178                    aaa
2179                    bbb
2180                    ccc
2181                    ddd
2182                "},
2183                    model_output: indoc! {"
2184                    <|set|>0:23
2185                    AAA
2186                    <|set|>2:29
2187                    CCC
2188                "},
2189                    expected: indoc! {"
2190                    AAA
2191                    bbb
2192                    CCC
2193                    ddd
2194                "},
2195                },
2196                Case {
2197                    name: "set_range_multiline_replacement",
2198                    original: indoc! {"
2199                    fn handle_submit() {
2200                    }
2201
2202                    fn handle_keystroke() {
2203                "},
2204                    model_output: indoc! {"
2205                    <|set|>0:3f-1:7d
2206                    fn handle_submit(modal_state: &mut ModalState) {
2207                        <|user_cursor|>
2208                    }
2209                "},
2210                    expected: indoc! {"
2211                    fn handle_submit(modal_state: &mut ModalState) {
2212                        <|user_cursor|>
2213                    }
2214
2215                    fn handle_keystroke() {
2216                "},
2217                },
2218                Case {
2219                    name: "no_edit_commands_returns_original",
2220                    original: indoc! {"
2221                    hello
2222                    world
2223                "},
2224                    model_output: "some random text with no commands",
2225                    expected: indoc! {"
2226                    hello
2227                    world
2228                "},
2229                },
2230                Case {
2231                    name: "no_edits_command_returns_original",
2232                    original: indoc! {"
2233                    hello
2234                    world
2235                "},
2236                    model_output: "<|no_edits|>",
2237                    expected: indoc! {"
2238                    hello
2239                    world
2240                "},
2241                },
2242                Case {
2243                    name: "wrong_hash_set_ignored",
2244                    original: indoc! {"
2245                    aaa
2246                    bbb
2247                "},
2248                    model_output: indoc! {"
2249                    <|set|>0:ff
2250                    ZZZ
2251                "},
2252                    expected: indoc! {"
2253                    aaa
2254                    bbb
2255                "},
2256                },
2257                Case {
2258                    name: "insert_and_set_combined",
2259                    original: indoc! {"
2260                    alpha
2261                    beta
2262                    gamma
2263                "},
2264                    model_output: indoc! {"
2265                    <|set|>0:06
2266                    ALPHA
2267                    <|insert|>1:9c
2268                    beta_extra
2269                "},
2270                    expected: indoc! {"
2271                    ALPHA
2272                    beta
2273                    beta_extra
2274                    gamma
2275                "},
2276                },
2277                Case {
2278                    name: "no_trailing_newline_preserved",
2279                    original: "hello\nworld",
2280                    model_output: indoc! {"
2281                    <|set|>0:14
2282                    HELLO
2283                "},
2284                    expected: "HELLO\nworld",
2285                },
2286                Case {
2287                    name: "set_range_hash_mismatch_in_end_bound",
2288                    original: indoc! {"
2289                    one
2290                    two
2291                    three
2292                "},
2293                    model_output: indoc! {"
2294                    <|set|>0:42-2:ff
2295                    ONE_TWO_THREE
2296                "},
2297                    expected: indoc! {"
2298                    one
2299                    two
2300                    three
2301                "},
2302                },
2303                Case {
2304                    name: "set_range_start_greater_than_end_ignored",
2305                    original: indoc! {"
2306                    a
2307                    b
2308                    c
2309                "},
2310                    model_output: indoc! {"
2311                    <|set|>2:63-1:62
2312                    X
2313                "},
2314                    expected: indoc! {"
2315                    a
2316                    b
2317                    c
2318                "},
2319                },
2320                Case {
2321                    name: "insert_out_of_bounds_ignored",
2322                    original: indoc! {"
2323                    x
2324                    y
2325                "},
2326                    model_output: indoc! {"
2327                    <|insert|>99:aa
2328                    z
2329                "},
2330                    expected: indoc! {"
2331                    x
2332                    y
2333                "},
2334                },
2335                Case {
2336                    name: "set_out_of_bounds_ignored",
2337                    original: indoc! {"
2338                    x
2339                    y
2340                "},
2341                    model_output: indoc! {"
2342                    <|set|>99:aa
2343                    z
2344                "},
2345                    expected: indoc! {"
2346                    x
2347                    y
2348                "},
2349                },
2350                Case {
2351                    name: "malformed_set_command_ignored",
2352                    original: indoc! {"
2353                    alpha
2354                    beta
2355                "},
2356                    model_output: indoc! {"
2357                    <|set|>not-a-line-ref
2358                    UPDATED
2359                "},
2360                    expected: indoc! {"
2361                    alpha
2362                    beta
2363                "},
2364                },
2365                Case {
2366                    name: "malformed_insert_hash_treated_as_before_first",
2367                    original: indoc! {"
2368                    alpha
2369                    beta
2370                "},
2371                    model_output: indoc! {"
2372                    <|insert|>1:nothex
2373                    preamble
2374                "},
2375                    expected: indoc! {"
2376                    preamble
2377                    alpha
2378                    beta
2379                "},
2380                },
2381                Case {
2382                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2383                    original: indoc! {"
2384                    cat
2385                    dog
2386                "},
2387                    model_output: indoc! {"
2388                    <|set|>0:38
2389                    CAT
2390                    <|insert|>0:38
2391                    TAIL
2392                "},
2393                    expected: indoc! {"
2394                    CAT
2395                    TAIL
2396                    dog
2397                "},
2398                },
2399                Case {
2400                    name: "overlapping_set_ranges_last_wins",
2401                    original: indoc! {"
2402                    a
2403                    b
2404                    c
2405                    d
2406                "},
2407                    model_output: indoc! {"
2408                    <|set|>0:61-2:63
2409                    FIRST
2410                    <|set|>1:62-3:64
2411                    SECOND
2412                "},
2413                    expected: indoc! {"
2414                    FIRST
2415                    d
2416                "},
2417                },
2418                Case {
2419                    name: "insert_before_first_and_after_line",
2420                    original: indoc! {"
2421                        a
2422                        b
2423                    "},
2424                    model_output: indoc! {"
2425                        <|insert|>
2426                        HEAD
2427                        <|insert|>0:61
2428                        MID
2429                    "},
2430                    expected: indoc! {"
2431                        HEAD
2432                        a
2433                        MID
2434                        b
2435                    "},
2436                },
2437            ];
2438
2439            for case in &cases {
2440                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2441                assert_eq!(result, case.expected, "failed case: {}", case.name);
2442            }
2443        }
2444
2445        #[test]
2446        fn test_output_has_edit_commands() {
2447            assert!(hashline::output_has_edit_commands(&format!(
2448                "{}0:ab\nnew",
2449                SET_COMMAND_MARKER
2450            )));
2451            assert!(hashline::output_has_edit_commands(&format!(
2452                "{}0:ab\nnew",
2453                INSERT_COMMAND_MARKER
2454            )));
2455            assert!(hashline::output_has_edit_commands(&format!(
2456                "some text\n{}1:cd\nstuff",
2457                SET_COMMAND_MARKER
2458            )));
2459            assert!(!hashline::output_has_edit_commands("just plain text"));
2460            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2461            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2462        }
2463
2464        // ---- hashline::patch_to_edit_commands round-trip tests ----
2465
2466        #[test]
2467        fn test_patch_to_edit_commands() {
2468            struct Case {
2469                name: &'static str,
2470                old: &'static str,
2471                patch: &'static str,
2472                expected_new: &'static str,
2473            }
2474
2475            let cases = [
2476                Case {
2477                    name: "single_line_replacement",
2478                    old: indoc! {"
2479                    let mut total = 0;
2480                    for product in products {
2481                        total += ;
2482                    }
2483                    total
2484                "},
2485                    patch: indoc! {"
2486                    @@ -1,5 +1,5 @@
2487                     let mut total = 0;
2488                     for product in products {
2489                    -    total += ;
2490                    +    total += product.price;
2491                     }
2492                     total
2493                "},
2494                    expected_new: indoc! {"
2495                    let mut total = 0;
2496                    for product in products {
2497                        total += product.price;
2498                    }
2499                    total
2500                "},
2501                },
2502                Case {
2503                    name: "multiline_replacement",
2504                    old: indoc! {"
2505                    fn foo() {
2506                        let x = 1;
2507                        let y = 2;
2508                        let z = 3;
2509                    }
2510                "},
2511                    patch: indoc! {"
2512                    @@ -1,5 +1,3 @@
2513                     fn foo() {
2514                    -    let x = 1;
2515                    -    let y = 2;
2516                    -    let z = 3;
2517                    +    let sum = 1 + 2 + 3;
2518                     }
2519                "},
2520                    expected_new: indoc! {"
2521                    fn foo() {
2522                        let sum = 1 + 2 + 3;
2523                    }
2524                "},
2525                },
2526                Case {
2527                    name: "insertion",
2528                    old: indoc! {"
2529                    fn main() {
2530                        let x = 1;
2531                    }
2532                "},
2533                    patch: indoc! {"
2534                    @@ -1,3 +1,4 @@
2535                     fn main() {
2536                         let x = 1;
2537                    +    let y = 2;
2538                     }
2539                "},
2540                    expected_new: indoc! {"
2541                    fn main() {
2542                        let x = 1;
2543                        let y = 2;
2544                    }
2545                "},
2546                },
2547                Case {
2548                    name: "insertion_before_first",
2549                    old: indoc! {"
2550                    let x = 1;
2551                    let y = 2;
2552                "},
2553                    patch: indoc! {"
2554                    @@ -1,2 +1,3 @@
2555                    +use std::io;
2556                     let x = 1;
2557                     let y = 2;
2558                "},
2559                    expected_new: indoc! {"
2560                    use std::io;
2561                    let x = 1;
2562                    let y = 2;
2563                "},
2564                },
2565                Case {
2566                    name: "deletion",
2567                    old: indoc! {"
2568                    aaa
2569                    bbb
2570                    ccc
2571                    ddd
2572                "},
2573                    patch: indoc! {"
2574                    @@ -1,4 +1,2 @@
2575                     aaa
2576                    -bbb
2577                    -ccc
2578                     ddd
2579                "},
2580                    expected_new: indoc! {"
2581                    aaa
2582                    ddd
2583                "},
2584                },
2585                Case {
2586                    name: "multiple_changes",
2587                    old: indoc! {"
2588                    alpha
2589                    beta
2590                    gamma
2591                    delta
2592                    epsilon
2593                "},
2594                    patch: indoc! {"
2595                    @@ -1,5 +1,5 @@
2596                    -alpha
2597                    +ALPHA
2598                     beta
2599                     gamma
2600                    -delta
2601                    +DELTA
2602                     epsilon
2603                "},
2604                    expected_new: indoc! {"
2605                    ALPHA
2606                    beta
2607                    gamma
2608                    DELTA
2609                    epsilon
2610                "},
2611                },
2612                Case {
2613                    name: "replace_with_insertion",
2614                    old: indoc! {r#"
2615                    fn handle() {
2616                        modal_state.close();
2617                        modal_state.dismiss();
2618                "#},
2619                    patch: indoc! {r#"
2620                    @@ -1,3 +1,4 @@
2621                     fn handle() {
2622                         modal_state.close();
2623                    +    eprintln!("");
2624                         modal_state.dismiss();
2625                "#},
2626                    expected_new: indoc! {r#"
2627                    fn handle() {
2628                        modal_state.close();
2629                        eprintln!("");
2630                        modal_state.dismiss();
2631                "#},
2632                },
2633                Case {
2634                    name: "complete_replacement",
2635                    old: indoc! {"
2636                    aaa
2637                    bbb
2638                    ccc
2639                "},
2640                    patch: indoc! {"
2641                    @@ -1,3 +1,3 @@
2642                    -aaa
2643                    -bbb
2644                    -ccc
2645                    +xxx
2646                    +yyy
2647                    +zzz
2648                "},
2649                    expected_new: indoc! {"
2650                    xxx
2651                    yyy
2652                    zzz
2653                "},
2654                },
2655                Case {
2656                    name: "add_function_body",
2657                    old: indoc! {"
2658                    fn foo() {
2659                        modal_state.dismiss();
2660                    }
2661
2662                    fn
2663
2664                    fn handle_keystroke() {
2665                "},
2666                    patch: indoc! {"
2667                    @@ -1,6 +1,8 @@
2668                     fn foo() {
2669                         modal_state.dismiss();
2670                     }
2671
2672                    -fn
2673                    +fn handle_submit() {
2674                    +    todo()
2675                    +}
2676
2677                     fn handle_keystroke() {
2678                "},
2679                    expected_new: indoc! {"
2680                    fn foo() {
2681                        modal_state.dismiss();
2682                    }
2683
2684                    fn handle_submit() {
2685                        todo()
2686                    }
2687
2688                    fn handle_keystroke() {
2689                "},
2690                },
2691                Case {
2692                    name: "with_cursor_offset",
2693                    old: indoc! {r#"
2694                    fn main() {
2695                        println!();
2696                    }
2697                "#},
2698                    patch: indoc! {r#"
2699                        @@ -1,3 +1,3 @@
2700                        fn main() {
2701                        -    println!();
2702                        +    eprintln!("");
2703                        }
2704                    "#},
2705                    expected_new: indoc! {r#"
2706                        fn main() {
2707                            eprintln!("<|user_cursor|>");
2708                        }
2709                    "#},
2710                },
2711                Case {
2712                    name: "non_local_hunk_header_pure_insertion_repro",
2713                    old: indoc! {"
2714                        aaa
2715                        bbb
2716                    "},
2717                    patch: indoc! {"
2718                        @@ -20,2 +20,3 @@
2719                        aaa
2720                        +xxx
2721                        bbb
2722                    "},
2723                    expected_new: indoc! {"
2724                        aaa
2725                        xxx
2726                        bbb
2727                    "},
2728                },
2729                Case {
2730                    name: "empty_patch_produces_no_edits_marker",
2731                    old: indoc! {"
2732                        aaa
2733                        bbb
2734                    "},
2735                    patch: "@@ -20,2 +20,3 @@\n",
2736                    expected_new: indoc! {"
2737                        aaa
2738                        bbb
2739                    "},
2740                },
2741            ];
2742
2743            for case in &cases {
2744                // The cursor_offset for patch_to_edit_commands is relative to
2745                // the first hunk's new text (context + additions). We compute
2746                // it by finding where the marker sits in the expected output
2747                // (which mirrors the new text of the hunk).
2748                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2749
2750                let commands =
2751                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2752                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2753
2754                assert!(
2755                    hashline::output_has_edit_commands(&commands),
2756                    "case {}: expected edit commands, got: {commands:?}",
2757                    case.name,
2758                );
2759
2760                let applied = hashline::apply_edit_commands(case.old, &commands);
2761                assert_eq!(applied, case.expected_new, "case {}", case.name);
2762            }
2763        }
2764    }
2765}
2766
2767pub mod seed_coder {
2768    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2769    //!
2770    //! Seed-Coder uses different FIM tokens and order than Qwen:
2771    //! - SPM order: suffix comes FIRST, then prefix, then middle
2772    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2773    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2774    //!
2775    //! All context (related files, edit history) goes in the PREFIX section.
2776    //! The suffix contains only code after the editable region.
2777    //!
2778    //! Example prompt:
2779    //!
2780    //! <[fim-suffix]>
2781    //! code after editable region
2782    //! <[fim-prefix]><filename>related/file.py
2783    //! related file content
2784    //!
2785    //! <filename>edit_history
2786    //! --- a/some_file.py
2787    //! +++ b/some_file.py
2788    //! -old
2789    //! +new
2790    //!
2791    //! <filename>path/to/target_file.py
2792    //! code before editable region
2793    //! <<<<<<< CURRENT
2794    //! code that
2795    //! needs to<|user_cursor|>
2796    //! be rewritten
2797    //! =======
2798    //! <[fim-middle]>
2799    //!
2800    //! Expected output (model generates):
2801    //!
2802    //! updated
2803    //! code with
2804    //! changes applied
2805    //! >>>>>>> UPDATED
2806
2807    use super::*;
2808
2809    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2810    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2811    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2812    pub const FILE_MARKER: &str = "<filename>";
2813
2814    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2815    pub const SEPARATOR: &str = "=======\n";
2816    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2817
2818    pub const NO_EDITS: &str = "NO_EDITS\n";
2819
2820    pub fn special_tokens() -> &'static [&'static str] {
2821        &[
2822            FIM_SUFFIX,
2823            FIM_PREFIX,
2824            FIM_MIDDLE,
2825            FILE_MARKER,
2826            START_MARKER,
2827            SEPARATOR,
2828            END_MARKER,
2829            CURSOR_MARKER,
2830        ]
2831    }
2832
2833    pub fn write_cursor_excerpt_section(
2834        prompt: &mut String,
2835        path: &Path,
2836        context: &str,
2837        editable_range: &Range<usize>,
2838        cursor_offset: usize,
2839    ) {
2840        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2841        prompt.push_str(&section);
2842    }
2843
2844    pub fn format_prompt_with_budget(
2845        path: &Path,
2846        context: &str,
2847        editable_range: &Range<usize>,
2848        cursor_offset: usize,
2849        events: &[Arc<Event>],
2850        related_files: &[RelatedFile],
2851        max_tokens: usize,
2852    ) -> String {
2853        let cursor_prefix_section =
2854            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2855        assemble_fim_prompt(
2856            context,
2857            editable_range,
2858            &cursor_prefix_section,
2859            events,
2860            related_files,
2861            max_tokens,
2862        )
2863    }
2864
2865    pub fn assemble_fim_prompt(
2866        context: &str,
2867        editable_range: &Range<usize>,
2868        cursor_prefix_section: &str,
2869        events: &[Arc<Event>],
2870        related_files: &[RelatedFile],
2871        max_tokens: usize,
2872    ) -> String {
2873        let suffix_section = build_suffix_section(context, editable_range);
2874
2875        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2876        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2877        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2878
2879        let edit_history_section = super::format_edit_history_within_budget(
2880            events,
2881            FILE_MARKER,
2882            "edit_history",
2883            budget_after_cursor,
2884            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2885        );
2886        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2887        let budget_after_edit_history =
2888            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2889
2890        let related_files_section = super::format_related_files_within_budget(
2891            related_files,
2892            FILE_MARKER,
2893            "",
2894            budget_after_edit_history,
2895        );
2896
2897        let mut prompt = String::new();
2898        prompt.push_str(&suffix_section);
2899        prompt.push_str(FIM_PREFIX);
2900        prompt.push_str(&related_files_section);
2901        if !related_files_section.is_empty() {
2902            prompt.push('\n');
2903        }
2904        prompt.push_str(&edit_history_section);
2905        if !edit_history_section.is_empty() {
2906            prompt.push('\n');
2907        }
2908        prompt.push_str(cursor_prefix_section);
2909        prompt.push_str(FIM_MIDDLE);
2910
2911        prompt
2912    }
2913
2914    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2915        let mut section = String::new();
2916        section.push_str(FIM_SUFFIX);
2917        section.push_str(&context[editable_range.end..]);
2918        if !section.ends_with('\n') {
2919            section.push('\n');
2920        }
2921        section
2922    }
2923
2924    fn build_cursor_prefix_section(
2925        path: &Path,
2926        context: &str,
2927        editable_range: &Range<usize>,
2928        cursor_offset: usize,
2929    ) -> String {
2930        let mut section = String::new();
2931        let path_str = path.to_string_lossy();
2932        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2933
2934        section.push_str(&context[..editable_range.start]);
2935        section.push_str(START_MARKER);
2936        section.push_str(&context[editable_range.start..cursor_offset]);
2937        section.push_str(CURSOR_MARKER);
2938        section.push_str(&context[cursor_offset..editable_range.end]);
2939        if !section.ends_with('\n') {
2940            section.push('\n');
2941        }
2942        section.push_str(SEPARATOR);
2943        section
2944    }
2945
2946    /// Format patch as containing no changes if it's empty; otherwise return None.
2947    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2948        // Count lines in the patch
2949        let empty_patch = patch.lines().count() <= 3;
2950        if empty_patch {
2951            Some(format!("{NO_EDITS}{END_MARKER}"))
2952        } else {
2953            None
2954        }
2955    }
2956}
2957
2958pub mod v0304_variable_edit {
2959    //! A prompt format with no fixed editable region. The entire context is shown
2960    //! to the model, and it chooses which text to replace by outputting surrounding
2961    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2962    //! text.
2963    //!
2964    //! Example prompt:
2965    //!
2966    //! <|file_sep|>path/to/file.py
2967    //! zero
2968    //! one
2969    //! two
2970    //! three<|user_cursor|>
2971    //! four
2972    //! five
2973    //! <|fim_prefix|>
2974    //
2975    //! Expected output (model generates):
2976    //!
2977    //! two
2978    //! <|fim_middle|>
2979    //! THREE
2980    //! <|fim_suffix|>
2981    //! four
2982    //!
2983    //! The output means: find "two\n...\nfour" in the context, and replace
2984    //! everything between "two\n" and "four" with "THREE\n".
2985
2986    use super::*;
2987
2988    pub fn special_tokens() -> &'static [&'static str] {
2989        &[
2990            "<|fim_prefix|>",
2991            "<|fim_suffix|>",
2992            "<|fim_middle|>",
2993            "<|file_sep|>",
2994            CURSOR_MARKER,
2995        ]
2996    }
2997
2998    pub fn write_cursor_excerpt_section(
2999        prompt: &mut String,
3000        path: &Path,
3001        context: &str,
3002        cursor_offset: usize,
3003    ) {
3004        let path_str = path.to_string_lossy();
3005        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3006
3007        prompt.push_str(&context[..cursor_offset]);
3008        prompt.push_str(CURSOR_MARKER);
3009        prompt.push_str(&context[cursor_offset..]);
3010        if !prompt.ends_with('\n') {
3011            prompt.push('\n');
3012        }
3013        prompt.push_str("<|fim_prefix|>\n")
3014    }
3015
3016    /// Apply a variable-edit model output to the original context text.
3017    ///
3018    /// The model output has the form:
3019    ///
3020    /// - prefix context lines
3021    /// - `<|fim_middle|>`
3022    /// - new text
3023    /// - `<|fim_suffix|>`
3024    /// - suffix context lines
3025    ///
3026    /// We locate the prefix/suffix context lines in the original text and replace
3027    /// everything between them with the new text.
3028    pub fn apply_variable_edit(
3029        context: &str,
3030        model_output: &str,
3031    ) -> Result<(Range<usize>, String)> {
3032        let (prefix_context, rest) = model_output
3033            .split_once("<|fim_middle|>\n")
3034            .or_else(|| model_output.split_once("<|fim_middle|>"))
3035            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3036
3037        let (new_text, suffix_context) = rest
3038            .split_once("<|fim_suffix|>\n")
3039            .or_else(|| rest.split_once("<|fim_suffix|>"))
3040            .unwrap_or((rest, ""));
3041
3042        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3043            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3044        } else {
3045            suffix_context
3046        };
3047
3048        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3049            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3050            + prefix_context.len();
3051        let suffix_offset = if suffix_context.is_empty() {
3052            context.len()
3053        } else {
3054            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3055                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3056                + prefix_offset
3057        };
3058
3059        let edit_range = prefix_offset..suffix_offset;
3060        return Ok((edit_range, new_text.to_string()));
3061    }
3062
3063    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3064        if needle.is_empty() {
3065            return Some(0);
3066        }
3067
3068        haystack.match_indices(needle).find_map(|(offset, _)| {
3069            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3070            matched_line_start.then_some(offset)
3071        })
3072    }
3073
3074    /// Convert a unified diff patch into the variable-edit output format.
3075    ///
3076    /// Parses `patch` as a unified diff against `old_text` and produces model
3077    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3078    /// delimiters. The diff is resolved by content matching rather than line
3079    /// numbers.
3080    pub fn patch_to_variable_edit_output(
3081        old_text: &str,
3082        patch: &str,
3083        cursor_offset: Option<usize>,
3084    ) -> Result<String> {
3085        // Parse the unified diff into hunks. Each hunk has an `old_context`
3086        // string (context + deleted lines interleaved in order) and a list of
3087        // edits expressed as byte ranges within that context plus replacement
3088        // text.
3089        let hunks = parse_hunks(patch);
3090        if hunks.is_empty() {
3091            return Ok(String::new());
3092        }
3093
3094        // Apply each hunk by finding its old_context in the text and
3095        // performing the edits. We search forward from where the previous
3096        // hunk ended so that hunks are applied in order.
3097        let mut new_text = old_text.to_string();
3098        let mut search_from: usize = 0;
3099        let mut first_hunk_pos: Option<usize> = None;
3100
3101        for hunk in &hunks {
3102            let context_pos = new_text[search_from..]
3103                .find(&hunk.old_context)
3104                .map(|pos| pos + search_from)
3105                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3106
3107            if first_hunk_pos.is_none() {
3108                first_hunk_pos = Some(context_pos);
3109            }
3110
3111            // Apply edits in reverse order so byte offsets remain valid.
3112            for edit in hunk.edits.iter().rev() {
3113                let abs_start = context_pos + edit.range.start;
3114                let abs_end = context_pos + edit.range.end;
3115                new_text.replace_range(abs_start..abs_end, &edit.text);
3116            }
3117
3118            // Advance past this hunk's region in the (now modified) text.
3119            let new_region_len: usize =
3120                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3121                    len + edit.text.len() - (edit.range.end - edit.range.start)
3122                });
3123            search_from = context_pos + new_region_len;
3124        }
3125
3126        // Now we have old_text and new_text. Find the changed line range by
3127        // comparing them.
3128        let old_lines: Vec<&str> = old_text.lines().collect();
3129        let new_lines: Vec<&str> = new_text.lines().collect();
3130
3131        // Find first differing line.
3132        let first_changed_row = old_lines
3133            .iter()
3134            .zip(new_lines.iter())
3135            .position(|(a, b)| a != b)
3136            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3137
3138        // Find last differing line (from the end).
3139        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3140        let common_suffix = old_lines
3141            .iter()
3142            .rev()
3143            .zip(new_lines.iter().rev())
3144            .take(max_suffix)
3145            .take_while(|(a, b)| a == b)
3146            .count();
3147
3148        let old_end = old_lines.len() - common_suffix;
3149        let new_end = new_lines.len() - common_suffix;
3150
3151        if first_changed_row == old_end && first_changed_row == new_end {
3152            return Ok(String::new());
3153        }
3154
3155        // Build the replacement text from new_lines[first_diff..new_end].
3156        let mut merged_new_text = String::new();
3157        for line in &new_lines[first_changed_row..new_end] {
3158            merged_new_text.push_str(line);
3159            merged_new_text.push('\n');
3160        }
3161
3162        // cursor_offset is relative to the first hunk's new content in
3163        // new_text. Translate it to an offset within merged_new_text, which
3164        // only contains lines first_diff..new_end of new_text.
3165        if let Some(hunk_offset) = cursor_offset {
3166            let hunk_start = first_hunk_pos.unwrap_or(0);
3167            let absolute_pos = hunk_start + hunk_offset;
3168
3169            // Byte offset where first_diff starts in new_text.
3170            let merged_start: usize = new_lines[..first_changed_row]
3171                .iter()
3172                .map(|line| line.len() + 1)
3173                .sum();
3174
3175            if absolute_pos >= merged_start {
3176                let relative_offset = absolute_pos - merged_start;
3177                if relative_offset <= merged_new_text.len() {
3178                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3179                }
3180            }
3181        }
3182
3183        // Build output with 2 lines of context above and below.
3184        let context_lines_count = 2;
3185        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3186        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3187
3188        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3189            let pattern = &lines[line_range];
3190            let pattern_len = pattern.len();
3191
3192            let mut count = 0;
3193            for offset in 0..=lines.len() - pattern_len {
3194                if &lines[offset..offset + pattern_len] == pattern {
3195                    count += 1;
3196                }
3197            }
3198            count
3199        }
3200
3201        // Expand prefix and suffix until they are unique
3202        while prefix_start > 0 {
3203            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3204                prefix_start -= 1;
3205            } else {
3206                break;
3207            }
3208        }
3209        while suffix_end < old_lines.len() {
3210            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3211                suffix_end += 1;
3212            } else {
3213                break;
3214            }
3215        }
3216
3217        let mut output = String::new();
3218        for line in &old_lines[prefix_start..first_changed_row] {
3219            output.push_str(line);
3220            output.push('\n');
3221        }
3222        output.push_str("<|fim_middle|>\n");
3223        output.push_str(&merged_new_text);
3224        output.push_str("<|fim_suffix|>\n");
3225        for line in &old_lines[old_end..suffix_end] {
3226            output.push_str(line);
3227            output.push('\n');
3228        }
3229
3230        Ok(output)
3231    }
3232
3233    struct ParsedHunk {
3234        old_context: String,
3235        edits: Vec<ParsedEdit>,
3236    }
3237
3238    struct ParsedEdit {
3239        range: Range<usize>,
3240        text: String,
3241    }
3242
3243    /// Parse a unified diff into content-based hunks. Each hunk contains an
3244    /// `old_context` string (context lines + deleted lines, which together
3245    /// form the text that should be found in the original) and a list of edits
3246    /// expressed as byte ranges within that context.
3247    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3248        let mut hunks = Vec::new();
3249        let mut current: Option<ParsedHunk> = None;
3250
3251        for line in patch.lines() {
3252            if line.starts_with("@@") {
3253                if let Some(hunk) = current.take() {
3254                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3255                        hunks.push(hunk);
3256                    }
3257                }
3258                current = Some(ParsedHunk {
3259                    old_context: String::new(),
3260                    edits: Vec::new(),
3261                });
3262            } else if line.starts_with("---") || line.starts_with("+++") {
3263                continue;
3264            } else if let Some(hunk) = &mut current {
3265                if let Some(added) = line.strip_prefix('+') {
3266                    let pos = hunk.old_context.len();
3267                    if let Some(last_edit) = hunk.edits.last_mut() {
3268                        if last_edit.range.end == pos {
3269                            writeln!(&mut last_edit.text, "{added}").ok();
3270                            continue;
3271                        }
3272                    }
3273                    hunk.edits.push(ParsedEdit {
3274                        range: pos..pos,
3275                        text: format!("{added}\n"),
3276                    });
3277                } else if let Some(removed) = line.strip_prefix('-') {
3278                    let start = hunk.old_context.len();
3279                    writeln!(&mut hunk.old_context, "{removed}").ok();
3280                    let end = hunk.old_context.len();
3281                    if let Some(last_edit) = hunk.edits.last_mut() {
3282                        if last_edit.range.end == start {
3283                            last_edit.range.end = end;
3284                            continue;
3285                        }
3286                    }
3287                    hunk.edits.push(ParsedEdit {
3288                        range: start..end,
3289                        text: String::new(),
3290                    });
3291                } else {
3292                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3293                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3294                }
3295            }
3296        }
3297
3298        if let Some(hunk) = current {
3299            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3300                hunks.push(hunk);
3301            }
3302        }
3303
3304        hunks
3305    }
3306
3307    #[cfg(test)]
3308    mod tests {
3309        use super::*;
3310        use indoc::indoc;
3311
3312        #[test]
3313        fn test_apply_variable_edit() {
3314            struct Case {
3315                name: &'static str,
3316                original: &'static str,
3317                model_output: &'static str,
3318                expected: &'static str,
3319            }
3320
3321            let cases = [
3322                Case {
3323                    name: "simple_single_line_replacement",
3324                    original: indoc! {"
3325                        zero
3326                        one
3327                        two
3328                        three
3329                        four
3330                        five
3331                    "},
3332                    model_output: indoc! {"
3333                        two
3334                        <|fim_middle|>
3335                        THREE
3336                        <|fim_suffix|>
3337                        four
3338                    "},
3339                    expected: indoc! {"
3340                        zero
3341                        one
3342                        two
3343                        THREE
3344                        four
3345                        five
3346                    "},
3347                },
3348                Case {
3349                    name: "multi_line_replacement",
3350                    original: indoc! {"
3351                        a
3352                        b
3353                        c
3354                        d
3355                        e
3356                    "},
3357                    model_output: indoc! {"
3358                        a
3359                        <|fim_middle|>
3360                        B
3361                        C
3362                        D
3363                        <|fim_suffix|>
3364                        e
3365                    "},
3366                    expected: indoc! {"
3367                        a
3368                        B
3369                        C
3370                        D
3371                        e
3372                    "},
3373                },
3374                Case {
3375                    name: "insertion_between_existing_lines",
3376                    original: indoc! {"
3377                        a
3378                        b
3379                        c
3380                    "},
3381                    model_output: indoc! {"
3382                        a
3383                        <|fim_middle|>
3384                        X
3385                        <|fim_suffix|>
3386                        b
3387                    "},
3388                    expected: indoc! {"
3389                        a
3390                        X
3391                        b
3392                        c
3393                    "},
3394                },
3395                Case {
3396                    name: "deletion",
3397                    original: indoc! {"
3398                        a
3399                        b
3400                        c
3401                        d
3402                    "},
3403                    model_output: indoc! {"
3404                        a
3405                        <|fim_middle|>
3406                        <|fim_suffix|>
3407                        c
3408                    "},
3409                    expected: indoc! {"
3410                        a
3411                        c
3412                        d
3413                    "},
3414                },
3415                Case {
3416                    name: "replacement_at_start_no_prefix_context",
3417                    original: indoc! {"
3418                        a
3419                        b
3420                        c
3421                    "},
3422                    model_output: indoc! {"
3423                        <|fim_middle|>
3424                        X
3425                        <|fim_suffix|>
3426                        b
3427                    "},
3428                    expected: indoc! {"
3429                        X
3430                        b
3431                        c
3432                    "},
3433                },
3434                Case {
3435                    name: "replacement_at_end_no_suffix_context",
3436                    original: indoc! {"
3437                        a
3438                        b
3439                        c
3440                    "},
3441                    model_output: indoc! {"
3442                        b
3443                        <|fim_middle|>
3444                        Z
3445                        <|fim_suffix|>
3446                    "},
3447                    expected: indoc! {"
3448                        a
3449                        b
3450                        Z
3451                    "},
3452                },
3453                Case {
3454                    name: "context_with_trailing_newline_is_preserved",
3455                    original: indoc! {"
3456                        a
3457                        b
3458                        c
3459                    "},
3460                    model_output: indoc! {"
3461                        a
3462                        <|fim_middle|>
3463                        B
3464                        <|fim_suffix|>
3465                        c
3466                    "},
3467                    expected: indoc! {"
3468                        a
3469                        B
3470                        c
3471                    "},
3472                },
3473                Case {
3474                    name: "cursor_marker_passes_through_untouched",
3475                    original: indoc! {"
3476                        a
3477                        b
3478                        c
3479                    "},
3480                    model_output: indoc! {"
3481                        a
3482                        <|fim_middle|>
3483                        B<|user_cursor|>B
3484                        <|fim_suffix|>
3485                        c
3486                    "},
3487                    expected: indoc! {"
3488                        a
3489                        B<|user_cursor|>B
3490                        c
3491                    "},
3492                },
3493                Case {
3494                    name: "multiple_prefix_context_lines",
3495                    original: indoc! {"
3496                        a
3497                        b
3498                        c
3499                        d
3500                        e
3501                    "},
3502                    model_output: indoc! {"
3503                        b
3504                        c
3505                        <|fim_middle|>
3506                        D
3507                        <|fim_suffix|>
3508                        e
3509                    "},
3510                    expected: indoc! {"
3511                        a
3512                        b
3513                        c
3514                        D
3515                        e
3516                    "},
3517                },
3518            ];
3519
3520            for case in cases {
3521                let (edit_range, replacement) =
3522                    apply_variable_edit(case.original, case.model_output).unwrap();
3523                let mut edited = case.original.to_string();
3524                edited.replace_range(edit_range, &replacement);
3525                assert_eq!(edited, case.expected, "{}", case.name);
3526            }
3527        }
3528
3529        #[test]
3530        fn test_patch_to_variable_edit() {
3531            struct Case {
3532                name: &'static str,
3533                old: &'static str,
3534                patch: &'static str,
3535                cursor_offset: Option<usize>,
3536                expected_variable_edit: &'static str,
3537                expected_after_apply: &'static str,
3538            }
3539
3540            let cases = [
3541                Case {
3542                    name: "simple_replacement",
3543                    old: indoc! {"
3544                        zero
3545                        one
3546                        two
3547                        three
3548                        four
3549                        five
3550                    "},
3551                    patch: indoc! {"
3552                        @@ -3,3 +3,3 @@
3553                         two
3554                        -three
3555                        +THREE
3556                         four
3557                    "},
3558                    cursor_offset: None,
3559                    expected_variable_edit: indoc! {"
3560                        one
3561                        two
3562                        <|fim_middle|>
3563                        THREE
3564                        <|fim_suffix|>
3565                        four
3566                        five
3567                    "},
3568                    expected_after_apply: indoc! {"
3569                        zero
3570                        one
3571                        two
3572                        THREE
3573                        four
3574                        five
3575                    "},
3576                },
3577                Case {
3578                    name: "insertion",
3579                    old: indoc! {"
3580                        a
3581                        b
3582                        c
3583                        d
3584                        e
3585                    "},
3586                    patch: indoc! {"
3587                        @@ -2,0 +3,1 @@
3588                         b
3589                        +X
3590                         c
3591                    "},
3592                    cursor_offset: None,
3593                    expected_variable_edit: indoc! {"
3594                        a
3595                        b
3596                        <|fim_middle|>
3597                        X
3598                        <|fim_suffix|>
3599                        c
3600                        d
3601                    "},
3602                    expected_after_apply: indoc! {"
3603                        a
3604                        b
3605                        X
3606                        c
3607                        d
3608                        e
3609                    "},
3610                },
3611                Case {
3612                    name: "deletion",
3613                    old: indoc! {"
3614                        a
3615                        b
3616                        c
3617                        d
3618                        e
3619                    "},
3620                    patch: indoc! {"
3621                        @@ -2,3 +2,2 @@
3622                         b
3623                        -c
3624                         d
3625                    "},
3626                    cursor_offset: None,
3627                    expected_variable_edit: indoc! {"
3628                        a
3629                        b
3630                        <|fim_middle|>
3631                        <|fim_suffix|>
3632                        d
3633                        e
3634                    "},
3635                    expected_after_apply: indoc! {"
3636                        a
3637                        b
3638                        d
3639                        e
3640                    "},
3641                },
3642                Case {
3643                    name: "edit_near_start",
3644                    old: indoc! {"
3645                        first
3646                        second
3647                        third
3648                        fourth
3649                    "},
3650                    patch: indoc! {"
3651                        @@ -1,1 +1,1 @@
3652                        -first
3653                        +FIRST
3654                    "},
3655                    cursor_offset: None,
3656                    expected_variable_edit: indoc! {"
3657                        <|fim_middle|>
3658                        FIRST
3659                        <|fim_suffix|>
3660                        second
3661                        third
3662                    "},
3663                    expected_after_apply: indoc! {"
3664                        FIRST
3665                        second
3666                        third
3667                        fourth
3668                    "},
3669                },
3670                Case {
3671                    name: "edit_near_end",
3672                    old: indoc! {"
3673                        first
3674                        second
3675                        third
3676                        fourth
3677                    "},
3678                    patch: indoc! {"
3679                        @@ -4,1 +4,1 @@
3680                        -fourth
3681                        +FOURTH
3682                    "},
3683                    cursor_offset: None,
3684                    expected_variable_edit: indoc! {"
3685                        second
3686                        third
3687                        <|fim_middle|>
3688                        FOURTH
3689                        <|fim_suffix|>
3690                    "},
3691                    expected_after_apply: indoc! {"
3692                        first
3693                        second
3694                        third
3695                        FOURTH
3696                    "},
3697                },
3698                Case {
3699                    name: "cursor_at_start_of_replacement",
3700                    old: indoc! {"
3701                        zero
3702                        one
3703                        two
3704                        three
3705                        four
3706                        five
3707                    "},
3708                    patch: indoc! {"
3709                        @@ -3,3 +3,3 @@
3710                         two
3711                        -three
3712                        +THREE
3713                         four
3714                    "},
3715                    cursor_offset: Some(4),
3716                    expected_variable_edit: indoc! {"
3717                        one
3718                        two
3719                        <|fim_middle|>
3720                        <|user_cursor|>THREE
3721                        <|fim_suffix|>
3722                        four
3723                        five
3724                    "},
3725                    expected_after_apply: indoc! {"
3726                        zero
3727                        one
3728                        two
3729                        <|user_cursor|>THREE
3730                        four
3731                        five
3732                    "},
3733                },
3734                Case {
3735                    name: "cursor_in_middle_of_replacement",
3736                    old: indoc! {"
3737                        zero
3738                        one
3739                        two
3740                        three
3741                        four
3742                        five
3743                    "},
3744                    patch: indoc! {"
3745                        @@ -3,3 +3,3 @@
3746                         two
3747                        -three
3748                        +THREE
3749                         four
3750                    "},
3751                    cursor_offset: Some(6),
3752                    expected_variable_edit: indoc! {"
3753                        one
3754                        two
3755                        <|fim_middle|>
3756                        TH<|user_cursor|>REE
3757                        <|fim_suffix|>
3758                        four
3759                        five
3760                    "},
3761                    expected_after_apply: indoc! {"
3762                        zero
3763                        one
3764                        two
3765                        TH<|user_cursor|>REE
3766                        four
3767                        five
3768                    "},
3769                },
3770                Case {
3771                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3772                    old: indoc! {"
3773                        one
3774                        a
3775                        b
3776                        c
3777                        d
3778                        two
3779                        a
3780                        b
3781                        c
3782                        d
3783                        three
3784                        a
3785                        b
3786                        c
3787                        d
3788                        four
3789                    "},
3790                    patch: indoc! {"
3791                        @@ -4,5 +4,5 @@
3792                         two
3793                         a
3794                         b
3795                        -c
3796                        +C
3797                         d
3798                         three
3799                    "},
3800                    cursor_offset: None,
3801                    expected_variable_edit: indoc! {"
3802                        two
3803                        a
3804                        b
3805                        <|fim_middle|>
3806                        C
3807                        <|fim_suffix|>
3808                        d
3809                        three
3810                    "},
3811                    expected_after_apply: indoc! {"
3812                        one
3813                        a
3814                        b
3815                        c
3816                        d
3817                        two
3818                        a
3819                        b
3820                        C
3821                        d
3822                        three
3823                        a
3824                        b
3825                        c
3826                        d
3827                        four
3828                    "},
3829                },
3830                Case {
3831                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3832                    old: indoc! {"
3833                        {
3834                            {
3835                                one();
3836                            }
3837                        }
3838                        {
3839                            {
3840                                two();
3841                            }
3842                        }
3843                        {
3844                            {
3845                                three();
3846                            }
3847                        }
3848                        {
3849                            {
3850                                four();
3851                            }
3852                        }
3853                    "},
3854                    patch: indoc! {"
3855                        @@ -4,5 +4,5 @@
3856                             {
3857                        -        two();
3858                        +        TWO();
3859                             }
3860                    "},
3861                    cursor_offset: None,
3862                    expected_variable_edit: indoc! {"
3863                                one();
3864                            }
3865                        }
3866                        {
3867                            {
3868                        <|fim_middle|>
3869                                TWO();
3870                        <|fim_suffix|>
3871                            }
3872                        }
3873                        {
3874                            {
3875                                three();
3876                    "},
3877                    expected_after_apply: indoc! {"
3878                        {
3879                            {
3880                                one();
3881                            }
3882                        }
3883                        {
3884                            {
3885                                TWO();
3886                            }
3887                        }
3888                        {
3889                            {
3890                                three();
3891                            }
3892                        }
3893                        {
3894                            {
3895                                four();
3896                            }
3897                        }
3898                    "},
3899                },
3900            ];
3901
3902            for case in cases {
3903                let output =
3904                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3905                        .unwrap_or_else(|error| {
3906                            panic!("failed converting patch for {}: {error}", case.name)
3907                        });
3908                assert_eq!(
3909                    output, case.expected_variable_edit,
3910                    "patch->variable_edit mismatch for {}",
3911                    case.name
3912                );
3913
3914                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3915                    .unwrap_or_else(|error| {
3916                        panic!("failed applying variable_edit for {}: {error}", case.name)
3917                    });
3918                let mut edited_by_variable_edit = case.old.to_string();
3919                edited_by_variable_edit.replace_range(edit_range, &replacement);
3920                assert_eq!(
3921                    edited_by_variable_edit, case.expected_after_apply,
3922                    "variable_edit apply mismatch for {}",
3923                    case.name
3924                );
3925
3926                let (expected_edit_range, expected_replacement) =
3927                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3928                        |error| {
3929                            panic!(
3930                                "failed applying expected variable_edit for {}: {error}",
3931                                case.name
3932                            )
3933                        },
3934                    );
3935                let mut edited_by_expected_variable_edit = case.old.to_string();
3936                edited_by_expected_variable_edit
3937                    .replace_range(expected_edit_range, &expected_replacement);
3938                assert_eq!(
3939                    edited_by_expected_variable_edit, case.expected_after_apply,
3940                    "expected variable_edit apply mismatch for {}",
3941                    case.name
3942                );
3943            }
3944        }
3945
3946        #[test]
3947        fn test_write_cursor_excerpt_section() {
3948            let path = Path::new("test.rs");
3949            let context = "fn main() {\n    hello();\n}\n";
3950            let cursor_offset = 17;
3951            let mut prompt = String::new();
3952            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3953            assert_eq!(
3954                prompt,
3955                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3956            );
3957        }
3958    }
3959}
3960
3961/// The zeta1 prompt format
3962pub mod zeta1 {
3963    use super::*;
3964    use std::fmt::Write;
3965
3966    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3967    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3968    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3969    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3970
3971    const INSTRUCTION_HEADER: &str = concat!(
3972        "### Instruction:\n",
3973        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3974        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3975        "into account the cursor location.\n\n",
3976        "### User Edits:\n\n"
3977    );
3978    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3979    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3980
3981    /// Formats a complete zeta1 prompt from the input events and excerpt.
3982    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3983        let mut prompt = String::with_capacity(
3984            INSTRUCTION_HEADER.len()
3985                + input_events.len()
3986                + EXCERPT_HEADER.len()
3987                + input_excerpt.len()
3988                + RESPONSE_HEADER.len(),
3989        );
3990        prompt.push_str(INSTRUCTION_HEADER);
3991        prompt.push_str(input_events);
3992        prompt.push_str(EXCERPT_HEADER);
3993        prompt.push_str(input_excerpt);
3994        prompt.push_str(RESPONSE_HEADER);
3995        prompt
3996    }
3997
3998    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3999    /// editable and context byte-offset ranges within `cursor_excerpt`.
4000    pub fn format_zeta1_from_input(
4001        input: &ZetaPromptInput,
4002        editable_range: Range<usize>,
4003        context_range: Range<usize>,
4004    ) -> String {
4005        let events = format_zeta1_events(&input.events);
4006        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4007        format_zeta1_prompt(&events, &excerpt)
4008    }
4009
4010    /// Formats events in zeta1 style (oldest first).
4011    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4012        let mut result = String::new();
4013        for event in
4014            events
4015                .iter()
4016                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4017                    &ZetaFormat::V0114180EditableRegion,
4018                )))
4019        {
4020            let event_string = format_zeta1_event(event);
4021            if event_string.is_empty() {
4022                continue;
4023            }
4024            if !result.is_empty() {
4025                result.push_str("\n\n");
4026            }
4027            result.push_str(&event_string);
4028        }
4029        result
4030    }
4031
4032    fn format_zeta1_event(event: &Event) -> String {
4033        match event {
4034            Event::BufferChange {
4035                path,
4036                old_path,
4037                diff,
4038                ..
4039            } => {
4040                let mut prompt = String::new();
4041                if old_path != path {
4042                    writeln!(
4043                        prompt,
4044                        "User renamed {} to {}\n",
4045                        old_path.display(),
4046                        path.display()
4047                    )
4048                    .ok();
4049                }
4050                if !diff.is_empty() {
4051                    write!(
4052                        prompt,
4053                        "User edited {}:\n```diff\n{}\n```",
4054                        path.display(),
4055                        diff
4056                    )
4057                    .ok();
4058                }
4059                prompt
4060            }
4061        }
4062    }
4063
4064    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4065    /// within `cursor_excerpt`.
4066    fn format_zeta1_excerpt(
4067        input: &ZetaPromptInput,
4068        editable_range: Range<usize>,
4069        context_range: Range<usize>,
4070    ) -> String {
4071        let path_str = input.cursor_path.to_string_lossy();
4072        let excerpt = &*input.cursor_excerpt;
4073        let cursor_offset = input.cursor_offset_in_excerpt;
4074
4075        let mut prompt = String::new();
4076        writeln!(&mut prompt, "```{path_str}").ok();
4077
4078        let starts_at_file_beginning =
4079            input.excerpt_start_row == Some(0) && context_range.start == 0;
4080        if starts_at_file_beginning {
4081            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4082        }
4083
4084        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4085
4086        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4087        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4088        prompt.push_str(CURSOR_MARKER);
4089        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4090        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4091
4092        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4093        write!(prompt, "\n```").ok();
4094
4095        prompt
4096    }
4097
4098    /// Cleans zeta1 model output by extracting content between editable region
4099    /// markers and converting the zeta1 cursor marker to the universal one.
4100    /// Returns `None` if the output doesn't contain the expected markers.
4101    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4102        let content = output.replace(CURSOR_MARKER, "");
4103
4104        let content_start = content
4105            .find(EDITABLE_REGION_START_MARKER)
4106            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4107            .map(|pos| {
4108                if content.as_bytes().get(pos) == Some(&b'\n') {
4109                    pos + 1
4110                } else {
4111                    pos
4112                }
4113            })
4114            .unwrap_or(0);
4115
4116        let content_end = content
4117            .find(EDITABLE_REGION_END_MARKER)
4118            .map(|pos| {
4119                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4120                    pos - 1
4121                } else {
4122                    pos
4123                }
4124            })
4125            .unwrap_or(content.len());
4126
4127        if content_start > content_end {
4128            return Some(String::new());
4129        }
4130
4131        let extracted = &content[content_start..content_end];
4132
4133        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4134            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4135            let text_before_cursor = text_before_cursor
4136                .find(EDITABLE_REGION_START_MARKER)
4137                .map(|pos| {
4138                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4139                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4140                        after_marker + 1
4141                    } else {
4142                        after_marker
4143                    }
4144                })
4145                .unwrap_or(0);
4146            let offset_in_extracted = zeta1_cursor_pos
4147                .saturating_sub(text_before_cursor)
4148                .min(extracted.len());
4149            offset_in_extracted
4150        });
4151
4152        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4153        if let Some(offset) = cursor_offset {
4154            result.push_str(&extracted[..offset]);
4155            result.push_str(super::CURSOR_MARKER);
4156            result.push_str(&extracted[offset..]);
4157        } else {
4158            result.push_str(extracted);
4159        }
4160
4161        Some(result)
4162    }
4163}
4164
4165#[cfg(test)]
4166mod tests {
4167    use super::*;
4168    use indoc::indoc;
4169
4170    fn make_input(
4171        cursor_excerpt: &str,
4172        editable_range: Range<usize>,
4173        cursor_offset: usize,
4174        events: Vec<Event>,
4175        related_files: Vec<RelatedFile>,
4176    ) -> ZetaPromptInput {
4177        let context_range = 0..cursor_excerpt.len();
4178        ZetaPromptInput {
4179            cursor_path: Path::new("test.rs").into(),
4180            cursor_excerpt: cursor_excerpt.into(),
4181            cursor_offset_in_excerpt: cursor_offset,
4182            excerpt_start_row: None,
4183            events: events.into_iter().map(Arc::new).collect(),
4184            related_files: Some(related_files),
4185            active_buffer_diagnostics: vec![],
4186            excerpt_ranges: ExcerptRanges {
4187                editable_150: editable_range.clone(),
4188                editable_180: editable_range.clone(),
4189                editable_350: editable_range,
4190                editable_150_context_350: context_range.clone(),
4191                editable_180_context_350: context_range.clone(),
4192                editable_350_context_150: context_range,
4193                ..Default::default()
4194            },
4195            syntax_ranges: None,
4196            experiment: None,
4197            in_open_source_repo: false,
4198            can_collect_data: false,
4199            repo_url: None,
4200        }
4201    }
4202
4203    fn make_input_with_context_range(
4204        excerpt: &str,
4205        editable_range: Range<usize>,
4206        context_range: Range<usize>,
4207        cursor_offset: usize,
4208    ) -> ZetaPromptInput {
4209        ZetaPromptInput {
4210            cursor_path: Path::new("test.rs").into(),
4211            cursor_excerpt: excerpt.into(),
4212            cursor_offset_in_excerpt: cursor_offset,
4213            excerpt_start_row: None,
4214            events: vec![],
4215            related_files: Some(vec![]),
4216            active_buffer_diagnostics: vec![],
4217            excerpt_ranges: ExcerptRanges {
4218                editable_150: editable_range.clone(),
4219                editable_180: editable_range.clone(),
4220                editable_350: editable_range,
4221                editable_150_context_350: context_range.clone(),
4222                editable_180_context_350: context_range.clone(),
4223                editable_350_context_150: context_range,
4224                ..Default::default()
4225            },
4226            syntax_ranges: None,
4227            experiment: None,
4228            in_open_source_repo: false,
4229            can_collect_data: false,
4230            repo_url: None,
4231        }
4232    }
4233
4234    fn make_event(path: &str, diff: &str) -> Event {
4235        Event::BufferChange {
4236            path: Path::new(path).into(),
4237            old_path: Path::new(path).into(),
4238            diff: diff.to_string(),
4239            predicted: false,
4240            in_open_source_repo: false,
4241        }
4242    }
4243
4244    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4245        RelatedFile {
4246            path: Path::new(path).into(),
4247            max_row: content.lines().count() as u32,
4248            excerpts: vec![RelatedExcerpt {
4249                row_range: 0..content.lines().count() as u32,
4250                text: content.into(),
4251                order: 0,
4252            }],
4253            in_open_source_repo: false,
4254        }
4255    }
4256
4257    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4258        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4259    }
4260
4261    fn budget_with_margin(requested_tokens: usize) -> usize {
4262        ((requested_tokens as f64) / 0.9).ceil() as usize
4263    }
4264
4265    #[test]
4266    fn test_no_truncation_when_within_budget() {
4267        let input = make_input(
4268            "prefix\neditable\nsuffix",
4269            7..15,
4270            10,
4271            vec![make_event("a.rs", "-old\n+new\n")],
4272            vec![make_related_file("related.rs", "fn helper() {}\n")],
4273        );
4274
4275        assert_eq!(
4276            format_with_budget(&input, 10000).unwrap(),
4277            indoc! {r#"
4278                <|file_sep|>related.rs
4279                fn helper() {}
4280                <|file_sep|>edit history
4281                --- a/a.rs
4282                +++ b/a.rs
4283                -old
4284                +new
4285                <|file_sep|>test.rs
4286                <|fim_prefix|>
4287                prefix
4288                <|fim_middle|>current
4289                edi<|user_cursor|>table
4290                <|fim_suffix|>
4291
4292                suffix
4293                <|fim_middle|>updated
4294            "#}
4295            .to_string()
4296        );
4297    }
4298
4299    #[test]
4300    fn test_truncation_drops_edit_history_when_budget_tight() {
4301        let input = make_input(
4302            "code",
4303            0..4,
4304            2,
4305            vec![make_event("a.rs", "-x\n+y\n")],
4306            vec![
4307                make_related_file("r1.rs", "aaaaaaa\n"),
4308                make_related_file("r2.rs", "bbbbbbb\n"),
4309            ],
4310        );
4311
4312        assert_eq!(
4313            format_with_budget(&input, 10000).unwrap(),
4314            indoc! {r#"
4315                <|file_sep|>r1.rs
4316                aaaaaaa
4317                <|file_sep|>r2.rs
4318                bbbbbbb
4319                <|file_sep|>edit history
4320                --- a/a.rs
4321                +++ b/a.rs
4322                -x
4323                +y
4324                <|file_sep|>test.rs
4325                <|fim_prefix|>
4326                <|fim_middle|>current
4327                co<|user_cursor|>de
4328                <|fim_suffix|>
4329                <|fim_middle|>updated
4330            "#}
4331            .to_string()
4332        );
4333
4334        assert_eq!(
4335            format_with_budget(&input, budget_with_margin(55)),
4336            Some(
4337                indoc! {r#"
4338                <|file_sep|>edit history
4339                --- a/a.rs
4340                +++ b/a.rs
4341                -x
4342                +y
4343                <|file_sep|>test.rs
4344                <|fim_prefix|>
4345                <|fim_middle|>current
4346                co<|user_cursor|>de
4347                <|fim_suffix|>
4348                <|fim_middle|>updated
4349            "#}
4350                .to_string()
4351            )
4352        );
4353    }
4354
4355    #[test]
4356    fn test_truncation_includes_partial_excerpts() {
4357        let input = make_input(
4358            "x",
4359            0..1,
4360            0,
4361            vec![],
4362            vec![RelatedFile {
4363                path: Path::new("big.rs").into(),
4364                max_row: 30,
4365                in_open_source_repo: false,
4366                excerpts: vec![
4367                    RelatedExcerpt {
4368                        row_range: 0..10,
4369                        text: "first excerpt\n".into(),
4370                        order: 0,
4371                    },
4372                    RelatedExcerpt {
4373                        row_range: 10..20,
4374                        text: "second excerpt\n".into(),
4375                        order: 0,
4376                    },
4377                    RelatedExcerpt {
4378                        row_range: 20..30,
4379                        text: "third excerpt\n".into(),
4380                        order: 0,
4381                    },
4382                ],
4383            }],
4384        );
4385
4386        assert_eq!(
4387            format_with_budget(&input, 10000).unwrap(),
4388            indoc! {r#"
4389                <|file_sep|>big.rs
4390                first excerpt
4391                ...
4392                second excerpt
4393                ...
4394                third excerpt
4395                <|file_sep|>test.rs
4396                <|fim_prefix|>
4397                <|fim_middle|>current
4398                <|user_cursor|>x
4399                <|fim_suffix|>
4400                <|fim_middle|>updated
4401            "#}
4402            .to_string()
4403        );
4404
4405        assert_eq!(
4406            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4407            indoc! {r#"
4408                <|file_sep|>big.rs
4409                first excerpt
4410                ...
4411                <|file_sep|>test.rs
4412                <|fim_prefix|>
4413                <|fim_middle|>current
4414                <|user_cursor|>x
4415                <|fim_suffix|>
4416                <|fim_middle|>updated
4417            "#}
4418            .to_string()
4419        );
4420    }
4421
4422    #[test]
4423    fn test_truncation_prioritizes_lower_order_excerpts() {
4424        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4425        // With tight budget, only the lower-order excerpt from file_b should be included.
4426        let input = make_input(
4427            "x",
4428            0..1,
4429            0,
4430            vec![],
4431            vec![
4432                RelatedFile {
4433                    path: Path::new("file_a.rs").into(),
4434                    max_row: 10,
4435                    in_open_source_repo: false,
4436                    excerpts: vec![RelatedExcerpt {
4437                        row_range: 0..10,
4438                        text: "low priority content\n".into(),
4439                        order: 5,
4440                    }],
4441                },
4442                RelatedFile {
4443                    path: Path::new("file_b.rs").into(),
4444                    max_row: 10,
4445                    in_open_source_repo: false,
4446                    excerpts: vec![RelatedExcerpt {
4447                        row_range: 0..10,
4448                        text: "high priority content\n".into(),
4449                        order: 1,
4450                    }],
4451                },
4452            ],
4453        );
4454
4455        // With large budget, both files included; rendered in stable lexicographic order.
4456        assert_eq!(
4457            format_with_budget(&input, 10000).unwrap(),
4458            indoc! {r#"
4459                <|file_sep|>file_a.rs
4460                low priority content
4461                <|file_sep|>file_b.rs
4462                high priority content
4463                <|file_sep|>test.rs
4464                <|fim_prefix|>
4465                <|fim_middle|>current
4466                <|user_cursor|>x
4467                <|fim_suffix|>
4468                <|fim_middle|>updated
4469            "#}
4470            .to_string()
4471        );
4472
4473        // With tight budget, only file_b (lower order) fits.
4474        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4475        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4476        // file_a would need another 14 tokens, which doesn't fit.
4477        assert_eq!(
4478            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4479            indoc! {r#"
4480                <|file_sep|>file_b.rs
4481                high priority content
4482                <|file_sep|>test.rs
4483                <|fim_prefix|>
4484                <|fim_middle|>current
4485                <|user_cursor|>x
4486                <|fim_suffix|>
4487                <|fim_middle|>updated
4488            "#}
4489            .to_string()
4490        );
4491    }
4492
4493    #[test]
4494    fn test_truncation_drops_high_order_excerpts_within_file() {
4495        // A single file has excerpts at order 1 and order 3. With a tight budget,
4496        // only the order-1 excerpts are included while the order-3 excerpt is
4497        // dropped — even though they belong to the same file. This also preserves
4498        // the parent invariant: parent outline items have order ≤ their best
4499        // child, so they're always included when any child is.
4500        let input = make_input(
4501            "x",
4502            0..1,
4503            0,
4504            vec![],
4505            vec![RelatedFile {
4506                path: Path::new("mod.rs").into(),
4507                max_row: 30,
4508                in_open_source_repo: false,
4509                excerpts: vec![
4510                    RelatedExcerpt {
4511                        row_range: 0..5,
4512                        text: "mod header\n".into(),
4513                        order: 1,
4514                    },
4515                    RelatedExcerpt {
4516                        row_range: 5..15,
4517                        text: "important fn\n".into(),
4518                        order: 1,
4519                    },
4520                    RelatedExcerpt {
4521                        row_range: 15..30,
4522                        text: "less important fn\n".into(),
4523                        order: 3,
4524                    },
4525                ],
4526            }],
4527        );
4528
4529        // With large budget, all three excerpts included.
4530        assert_eq!(
4531            format_with_budget(&input, 10000).unwrap(),
4532            indoc! {r#"
4533                <|file_sep|>mod.rs
4534                mod header
4535                ...
4536                important fn
4537                ...
4538                less important fn
4539                <|file_sep|>test.rs
4540                <|fim_prefix|>
4541                <|fim_middle|>current
4542                <|user_cursor|>x
4543                <|fim_suffix|>
4544                <|fim_middle|>updated
4545            "#}
4546            .to_string()
4547        );
4548
4549        // With tight budget, only order<=1 excerpts included (header + important fn).
4550        assert_eq!(
4551            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4552            indoc! {r#"
4553                <|file_sep|>mod.rs
4554                mod header
4555                ...
4556                important fn
4557                ...
4558                <|file_sep|>test.rs
4559                <|fim_prefix|>
4560                <|fim_middle|>current
4561                <|user_cursor|>x
4562                <|fim_suffix|>
4563                <|fim_middle|>updated
4564            "#}
4565            .to_string()
4566        );
4567    }
4568
4569    #[test]
4570    fn test_truncation_drops_older_events_first() {
4571        let input = make_input(
4572            "x",
4573            0..1,
4574            0,
4575            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4576            vec![],
4577        );
4578
4579        assert_eq!(
4580            format_with_budget(&input, 10000).unwrap(),
4581            indoc! {r#"
4582                <|file_sep|>edit history
4583                --- a/old.rs
4584                +++ b/old.rs
4585                -1
4586                --- a/new.rs
4587                +++ b/new.rs
4588                -2
4589                <|file_sep|>test.rs
4590                <|fim_prefix|>
4591                <|fim_middle|>current
4592                <|user_cursor|>x
4593                <|fim_suffix|>
4594                <|fim_middle|>updated
4595            "#}
4596            .to_string()
4597        );
4598
4599        assert_eq!(
4600            format_with_budget(&input, 60).unwrap(),
4601            indoc! {r#"
4602                <|file_sep|>edit history
4603                --- a/new.rs
4604                +++ b/new.rs
4605                -2
4606                <|file_sep|>test.rs
4607                <|fim_prefix|>
4608                <|fim_middle|>current
4609                <|user_cursor|>x
4610                <|fim_suffix|>
4611                <|fim_middle|>updated
4612            "#}
4613            .to_string()
4614        );
4615    }
4616
4617    #[test]
4618    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4619        let input = make_input(
4620            "fn main() {}",
4621            0..12,
4622            3,
4623            vec![make_event("a.rs", "-old\n+new\n")],
4624            vec![make_related_file("related.rs", "helper\n")],
4625        );
4626
4627        assert!(format_with_budget(&input, 30).is_none())
4628    }
4629
4630    #[track_caller]
4631    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4632        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4633            .expect("seed coder prompt formatting should succeed")
4634    }
4635
4636    #[track_caller]
4637    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4638        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4639            .expect("seed coder prompt formatting should succeed")
4640    }
4641
4642    #[test]
4643    fn test_seed_coder_basic_format() {
4644        let input = make_input(
4645            "prefix\neditable\nsuffix",
4646            7..15,
4647            10,
4648            vec![make_event("a.rs", "-old\n+new\n")],
4649            vec![make_related_file("related.rs", "fn helper() {}\n")],
4650        );
4651
4652        assert_eq!(
4653            format_seed_coder(&input),
4654            indoc! {r#"
4655                <[fim-suffix]>
4656                suffix
4657                <[fim-prefix]><filename>related.rs
4658                fn helper() {}
4659
4660                <filename>edit_history
4661                --- a/a.rs
4662                +++ b/a.rs
4663                -old
4664                +new
4665
4666                <filename>test.rs
4667                prefix
4668                <<<<<<< CURRENT
4669                edi<|user_cursor|>table
4670                =======
4671                <[fim-middle]>"#}
4672        );
4673    }
4674
4675    #[test]
4676    fn test_v0317_formats_prompt_with_many_related_files() {
4677        let related_files = (0..900)
4678            .map(|index| {
4679                make_related_file(
4680                    &format!("related_{index}.rs"),
4681                    "fn helper() {\n    let value = 1;\n}\n",
4682                )
4683            })
4684            .collect();
4685
4686        let input = make_input(
4687            "code",
4688            0..4,
4689            2,
4690            vec![make_event("a.rs", "-x\n+y\n")],
4691            related_files,
4692        );
4693
4694        let prompt =
4695            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4696
4697        assert!(prompt.is_some());
4698        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4699        assert!(prompt.contains("test.rs"));
4700        assert!(prompt.contains(CURSOR_MARKER));
4701    }
4702
4703    #[test]
4704    fn test_seed_coder_no_context() {
4705        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4706
4707        assert_eq!(
4708            format_seed_coder(&input),
4709            indoc! {r#"
4710                <[fim-suffix]>
4711                after
4712                <[fim-prefix]><filename>test.rs
4713                before
4714                <<<<<<< CURRENT
4715                mid<|user_cursor|>dle
4716                =======
4717                <[fim-middle]>"#}
4718        );
4719    }
4720
4721    #[test]
4722    fn test_seed_coder_truncation_drops_context() {
4723        let input = make_input(
4724            "code",
4725            0..4,
4726            2,
4727            vec![make_event("a.rs", "-x\n+y\n")],
4728            vec![make_related_file("r1.rs", "content\n")],
4729        );
4730
4731        // With large budget, everything is included
4732        assert_eq!(
4733            format_seed_coder(&input),
4734            indoc! {r#"
4735                <[fim-suffix]>
4736                <[fim-prefix]><filename>r1.rs
4737                content
4738
4739                <filename>edit_history
4740                --- a/a.rs
4741                +++ b/a.rs
4742                -x
4743                +y
4744
4745                <filename>test.rs
4746                <<<<<<< CURRENT
4747                co<|user_cursor|>de
4748                =======
4749                <[fim-middle]>"#}
4750        );
4751
4752        assert_eq!(
4753            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4754            None
4755        );
4756
4757        assert_eq!(
4758            format_seed_coder_with_budget(&input, 40),
4759            indoc! {r#"
4760                <[fim-suffix]>
4761                <[fim-prefix]><filename>test.rs
4762                <<<<<<< CURRENT
4763                co<|user_cursor|>de
4764                =======
4765                <[fim-middle]>"#
4766            }
4767        )
4768    }
4769
4770    #[test]
4771    fn test_seed_coder_truncation_prioritizes_lower_order() {
4772        let input = make_input(
4773            "code",
4774            0..4,
4775            2,
4776            vec![],
4777            vec![
4778                RelatedFile {
4779                    path: Path::new("low_prio.rs").into(),
4780                    max_row: 5,
4781                    in_open_source_repo: false,
4782                    excerpts: vec![RelatedExcerpt {
4783                        row_range: 0..5,
4784                        text: "low prio\n".into(),
4785                        order: 10,
4786                    }],
4787                },
4788                RelatedFile {
4789                    path: Path::new("high_prio.rs").into(),
4790                    max_row: 5,
4791                    in_open_source_repo: false,
4792                    excerpts: vec![RelatedExcerpt {
4793                        row_range: 0..5,
4794                        text: "high prio\n".into(),
4795                        order: 1,
4796                    }],
4797                },
4798            ],
4799        );
4800
4801        // With large budget, both included; rendered in stable lexicographic order.
4802        assert_eq!(
4803            format_seed_coder(&input),
4804            indoc! {r#"
4805                <[fim-suffix]>
4806                <[fim-prefix]><filename>low_prio.rs
4807                low prio
4808                <filename>high_prio.rs
4809                high prio
4810
4811                <filename>test.rs
4812                <<<<<<< CURRENT
4813                co<|user_cursor|>de
4814                =======
4815                <[fim-middle]>"#}
4816        );
4817
4818        // With tight budget under the generic heuristic, context is dropped but the
4819        // minimal cursor section still fits.
4820        assert_eq!(
4821            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4822            Some(
4823                indoc! {r#"
4824                    <[fim-suffix]>
4825                    <[fim-prefix]><filename>test.rs
4826                    <<<<<<< CURRENT
4827                    co<|user_cursor|>de
4828                    =======
4829                    <[fim-middle]>"#}
4830                .to_string()
4831            )
4832        );
4833    }
4834
4835    #[test]
4836    fn test_format_zeta1_from_input_basic() {
4837        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4838        let input = ZetaPromptInput {
4839            cursor_path: Path::new("src/main.rs").into(),
4840            cursor_excerpt: excerpt.into(),
4841            cursor_offset_in_excerpt: 30,
4842            excerpt_start_row: Some(0),
4843            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4844            related_files: Some(vec![]),
4845            active_buffer_diagnostics: vec![],
4846            excerpt_ranges: ExcerptRanges {
4847                editable_150: 15..41,
4848                editable_180: 15..41,
4849                editable_350: 15..41,
4850                editable_150_context_350: 0..excerpt.len(),
4851                editable_180_context_350: 0..excerpt.len(),
4852                editable_350_context_150: 0..excerpt.len(),
4853                ..Default::default()
4854            },
4855            syntax_ranges: None,
4856            experiment: None,
4857            in_open_source_repo: false,
4858            can_collect_data: false,
4859            repo_url: None,
4860        };
4861
4862        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4863
4864        assert_eq!(
4865            prompt,
4866            concat!(
4867                "### Instruction:\n",
4868                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4869                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4870                "into account the cursor location.\n",
4871                "\n",
4872                "### User Edits:\n",
4873                "\n",
4874                "User edited other.rs:\n",
4875                "```diff\n",
4876                "-old\n",
4877                "+new\n",
4878                "\n",
4879                "```\n",
4880                "\n",
4881                "### User Excerpt:\n",
4882                "\n",
4883                "```src/main.rs\n",
4884                "<|start_of_file|>\n",
4885                "fn before() {}\n",
4886                "<|editable_region_start|>\n",
4887                "fn foo() {\n",
4888                "    <|user_cursor_is_here|>let x = 1;\n",
4889                "\n",
4890                "<|editable_region_end|>}\n",
4891                "fn after() {}\n",
4892                "\n",
4893                "```\n",
4894                "\n",
4895                "### Response:\n",
4896            ),
4897        );
4898    }
4899
4900    #[test]
4901    fn test_format_zeta1_from_input_no_start_of_file() {
4902        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4903        let input = ZetaPromptInput {
4904            cursor_path: Path::new("src/main.rs").into(),
4905            cursor_excerpt: excerpt.into(),
4906            cursor_offset_in_excerpt: 15,
4907            excerpt_start_row: Some(10),
4908            events: vec![],
4909            related_files: Some(vec![]),
4910            active_buffer_diagnostics: vec![],
4911            excerpt_ranges: ExcerptRanges {
4912                editable_150: 0..28,
4913                editable_180: 0..28,
4914                editable_350: 0..28,
4915                editable_150_context_350: 0..28,
4916                editable_180_context_350: 0..28,
4917                editable_350_context_150: 0..28,
4918                ..Default::default()
4919            },
4920            syntax_ranges: None,
4921            experiment: None,
4922            in_open_source_repo: false,
4923            can_collect_data: false,
4924            repo_url: None,
4925        };
4926
4927        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4928
4929        assert_eq!(
4930            prompt,
4931            concat!(
4932                "### Instruction:\n",
4933                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4934                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4935                "into account the cursor location.\n",
4936                "\n",
4937                "### User Edits:\n",
4938                "\n",
4939                "\n",
4940                "\n",
4941                "### User Excerpt:\n",
4942                "\n",
4943                "```src/main.rs\n",
4944                "<|editable_region_start|>\n",
4945                "fn foo() {\n",
4946                "    <|user_cursor_is_here|>let x = 1;\n",
4947                "}\n",
4948                "\n",
4949                "<|editable_region_end|>\n",
4950                "```\n",
4951                "\n",
4952                "### Response:\n",
4953            ),
4954        );
4955    }
4956
4957    #[test]
4958    fn test_format_zeta1_from_input_with_sub_ranges() {
4959        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4960        let editable_range = 10..37;
4961        let context_range = 0..excerpt.len();
4962
4963        let input = ZetaPromptInput {
4964            cursor_path: Path::new("test.rs").into(),
4965            cursor_excerpt: excerpt.into(),
4966            cursor_offset_in_excerpt: 25,
4967            excerpt_start_row: Some(0),
4968            events: vec![],
4969            related_files: Some(vec![]),
4970            active_buffer_diagnostics: vec![],
4971            excerpt_ranges: ExcerptRanges {
4972                editable_150: editable_range.clone(),
4973                editable_180: editable_range.clone(),
4974                editable_350: editable_range.clone(),
4975                editable_150_context_350: context_range.clone(),
4976                editable_180_context_350: context_range.clone(),
4977                editable_350_context_150: context_range.clone(),
4978                ..Default::default()
4979            },
4980            syntax_ranges: None,
4981            experiment: None,
4982            in_open_source_repo: false,
4983            can_collect_data: false,
4984            repo_url: None,
4985        };
4986
4987        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4988
4989        assert_eq!(
4990            prompt,
4991            concat!(
4992                "### Instruction:\n",
4993                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4994                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4995                "into account the cursor location.\n",
4996                "\n",
4997                "### User Edits:\n",
4998                "\n",
4999                "\n",
5000                "\n",
5001                "### User Excerpt:\n",
5002                "\n",
5003                "```test.rs\n",
5004                "<|start_of_file|>\n",
5005                "// prefix\n",
5006                "<|editable_region_start|>\n",
5007                "fn foo() {\n",
5008                "    <|user_cursor_is_here|>let x = 1;\n",
5009                "}\n",
5010                "<|editable_region_end|>\n",
5011                "// suffix\n",
5012                "\n",
5013                "```\n",
5014                "\n",
5015                "### Response:\n",
5016            ),
5017        );
5018    }
5019
5020    #[test]
5021    fn test_max_event_count() {
5022        fn make_numbered_event(index: usize) -> Event {
5023            return make_event(
5024                &format!("event-{index}.rs"),
5025                &format!("-old-{index}\n+new-{index}\n"),
5026            );
5027        }
5028        let input = make_input(
5029            "x",
5030            0..1,
5031            0,
5032            (0..3).map(make_numbered_event).collect(),
5033            vec![],
5034        );
5035
5036        let edit_history_section = format_edit_history_within_budget(
5037            &input.events,
5038            "<|file_sep|>",
5039            "edit history",
5040            usize::MAX,
5041            5,
5042        );
5043
5044        assert_eq!(
5045            &edit_history_section,
5046            indoc!(
5047                "
5048                <|file_sep|>edit history
5049                --- a/event-0.rs
5050                +++ b/event-0.rs
5051                -old-0
5052                +new-0
5053                --- a/event-1.rs
5054                +++ b/event-1.rs
5055                -old-1
5056                +new-1
5057                --- a/event-2.rs
5058                +++ b/event-2.rs
5059                -old-2
5060                +new-2
5061            "
5062            )
5063        );
5064
5065        let edit_history_section = format_edit_history_within_budget(
5066            &input.events,
5067            "<|file_sep|>",
5068            "edit history",
5069            usize::MAX,
5070            2,
5071        );
5072
5073        assert_eq!(
5074            &edit_history_section,
5075            indoc!(
5076                "
5077                <|file_sep|>edit history
5078                --- a/event-1.rs
5079                +++ b/event-1.rs
5080                -old-1
5081                +new-1
5082                --- a/event-2.rs
5083                +++ b/event-2.rs
5084                -old-2
5085                +new-2
5086            "
5087            )
5088        );
5089
5090        let edit_history_section = format_edit_history_within_budget(
5091            &input.events,
5092            "<|file_sep|>",
5093            "edit history",
5094            usize::MAX,
5095            0,
5096        );
5097
5098        assert_eq!(&edit_history_section, "");
5099    }
5100
5101    #[test]
5102    fn test_clean_zeta1_model_output_basic() {
5103        let output = indoc! {"
5104            <|editable_region_start|>
5105            fn main() {
5106                println!(\"hello\");
5107            }
5108            <|editable_region_end|>
5109        "};
5110
5111        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5112        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5113    }
5114
5115    #[test]
5116    fn test_clean_zeta1_model_output_with_cursor() {
5117        let output = indoc! {"
5118            <|editable_region_start|>
5119            fn main() {
5120                <|user_cursor_is_here|>println!(\"hello\");
5121            }
5122            <|editable_region_end|>
5123        "};
5124
5125        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5126        assert_eq!(
5127            cleaned,
5128            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5129        );
5130    }
5131
5132    #[test]
5133    fn test_clean_zeta1_model_output_no_markers() {
5134        let output = "fn main() {}\n";
5135        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5136        assert_eq!(cleaned, "fn main() {}\n");
5137    }
5138
5139    #[test]
5140    fn test_clean_zeta1_model_output_empty_region() {
5141        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5142        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5143        assert_eq!(cleaned, "");
5144    }
5145
5146    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5147        let mut result = excerpt.to_string();
5148        result.replace_range(
5149            parsed_output.range_in_excerpt.clone(),
5150            &parsed_output.new_editable_region,
5151        );
5152        result
5153    }
5154
5155    #[test]
5156    fn test_parse_zeta2_model_output() {
5157        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5158        let context_start = excerpt.find("ctx start").unwrap();
5159        let context_end = excerpt.find("after ctx").unwrap();
5160        let editable_start = excerpt.find("editable old").unwrap();
5161        let editable_end = editable_start + "editable old\n".len();
5162        let input = make_input_with_context_range(
5163            excerpt,
5164            editable_start..editable_end,
5165            context_start..context_end,
5166            editable_start,
5167        );
5168
5169        let output = parse_zeta2_model_output(
5170            "editable new\n>>>>>>> UPDATED\n",
5171            ZetaFormat::V0131GitMergeMarkersPrefix,
5172            &input,
5173        )
5174        .unwrap();
5175
5176        assert_eq!(
5177            apply_edit(excerpt, &output),
5178            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5179        );
5180    }
5181
5182    #[test]
5183    fn test_parse_zeta2_model_output_identity() {
5184        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5185        let editable_start = excerpt.find("bbb").unwrap();
5186        let editable_end = excerpt.find("ddd").unwrap();
5187        let input = make_input_with_context_range(
5188            excerpt,
5189            editable_start..editable_end,
5190            0..excerpt.len(),
5191            editable_start,
5192        );
5193
5194        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5195        let output =
5196            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5197
5198        assert_eq!(apply_edit(excerpt, &output), excerpt);
5199    }
5200
5201    #[test]
5202    fn test_parse_zeta2_model_output_strips_end_marker() {
5203        let excerpt = "hello\nworld\n";
5204        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5205
5206        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5207        let output1 =
5208            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5209        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5210
5211        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5212        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5213    }
5214}