zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18pub const MAX_PROMPT_TOKENS: usize = 4096;
  19
  20/// Use up to this amount of the editable region for prefill.
  21/// Larger values may result in more robust generation, but
  22/// this region becomes non-editable.
  23pub const PREFILL_RATIO: f64 = 0.1; // 10%
  24
  25fn estimate_tokens(bytes: usize) -> usize {
  26    bytes / 3
  27}
  28
  29/// Leave some slack to avoid overflow.
  30fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  31    (max_tokens as f64 * 0.9).floor() as usize
  32}
  33
  34#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  35pub struct ZetaPromptInput {
  36    pub cursor_path: Arc<Path>,
  37    pub cursor_excerpt: Arc<str>,
  38    pub cursor_offset_in_excerpt: usize,
  39    #[serde(default, skip_serializing_if = "Option::is_none")]
  40    pub excerpt_start_row: Option<u32>,
  41    pub events: Vec<Arc<Event>>,
  42    #[serde(default)]
  43    pub related_files: Option<Vec<RelatedFile>>,
  44    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  45    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  46    /// These ranges let the server select model-appropriate subsets.
  47    pub excerpt_ranges: ExcerptRanges,
  48    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  49    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  50    /// When present, the server uses these to compute editable/context ranges
  51    /// instead of `excerpt_ranges`.
  52    #[serde(default, skip_serializing_if = "Option::is_none")]
  53    pub syntax_ranges: Option<Vec<Range<usize>>>,
  54    /// The name of the edit prediction model experiment to use.
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub experiment: Option<String>,
  57    #[serde(default)]
  58    pub in_open_source_repo: bool,
  59    #[serde(default)]
  60    pub can_collect_data: bool,
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub repo_url: Option<String>,
  63}
  64
  65#[derive(
  66    Default,
  67    Clone,
  68    Copy,
  69    Debug,
  70    PartialEq,
  71    Eq,
  72    Hash,
  73    EnumIter,
  74    IntoStaticStr,
  75    Serialize,
  76    Deserialize,
  77)]
  78#[allow(non_camel_case_types)]
  79pub enum ZetaFormat {
  80    V0112MiddleAtEnd,
  81    V0113Ordered,
  82    V0114180EditableRegion,
  83    V0120GitMergeMarkers,
  84    #[default]
  85    V0131GitMergeMarkersPrefix,
  86    V0211Prefill,
  87    V0211SeedCoder,
  88    v0226Hashline,
  89    V0304VariableEdit,
  90    V0304SeedNoEdits,
  91    /// Multi-block marker spans with NO_EDITS sentinel.
  92    V0306SeedMultiRegions,
  93    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  94    V0316SeedMultiRegions,
  95    /// V0316 with larger block sizes.
  96    V0318SeedMultiRegions,
  97    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  98    V0317SeedMultiRegions,
  99}
 100
 101impl std::fmt::Display for ZetaFormat {
 102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 103        write!(f, "{}", <&'static str>::from(self))
 104    }
 105}
 106
 107impl ZetaFormat {
 108    pub fn parse(format_name: &str) -> Result<Self> {
 109        let lower = format_name.to_lowercase();
 110
 111        // Exact case-insensitive match takes priority, bypassing ambiguity checks.
 112        for variant in ZetaFormat::iter() {
 113            if <&'static str>::from(&variant).to_lowercase() == lower {
 114                return Ok(variant);
 115            }
 116        }
 117
 118        let mut results = ZetaFormat::iter().filter(|version| {
 119            <&'static str>::from(version)
 120                .to_lowercase()
 121                .contains(&lower)
 122        });
 123        let Some(result) = results.next() else {
 124            anyhow::bail!(
 125                "`{format_name}` did not match any of:\n{}",
 126                Self::options_as_string()
 127            );
 128        };
 129        if results.next().is_some() {
 130            anyhow::bail!(
 131                "`{format_name}` matched more than one of:\n{}",
 132                Self::options_as_string()
 133            );
 134        }
 135        Ok(result)
 136    }
 137
 138    pub fn options_as_string() -> String {
 139        ZetaFormat::iter()
 140            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 141            .collect::<Vec<_>>()
 142            .concat()
 143    }
 144}
 145
 146#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 147#[serde(tag = "event")]
 148pub enum Event {
 149    BufferChange {
 150        path: Arc<Path>,
 151        old_path: Arc<Path>,
 152        diff: String,
 153        predicted: bool,
 154        in_open_source_repo: bool,
 155    },
 156}
 157
 158impl Event {
 159    pub fn in_open_source_repo(&self) -> bool {
 160        match self {
 161            Event::BufferChange {
 162                in_open_source_repo,
 163                ..
 164            } => *in_open_source_repo,
 165        }
 166    }
 167}
 168
 169pub fn write_event(prompt: &mut String, event: &Event) {
 170    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 171        for component in path.components() {
 172            prompt.push('/');
 173            write!(prompt, "{}", component.as_os_str().display()).ok();
 174        }
 175    }
 176    match event {
 177        Event::BufferChange {
 178            path,
 179            old_path,
 180            diff,
 181            predicted,
 182            in_open_source_repo: _,
 183        } => {
 184            if *predicted {
 185                prompt.push_str("// User accepted prediction:\n");
 186            }
 187            prompt.push_str("--- a");
 188            write_path_as_unix_str(prompt, old_path.as_ref());
 189            prompt.push_str("\n+++ b");
 190            write_path_as_unix_str(prompt, path.as_ref());
 191            prompt.push('\n');
 192            prompt.push_str(diff);
 193        }
 194    }
 195}
 196
 197#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 198pub struct ActiveBufferDiagnostic {
 199    pub severity: Option<i32>,
 200    pub message: String,
 201    pub snippet: String,
 202    pub snippet_buffer_row_range: Range<u32>,
 203    pub diagnostic_range_in_snippet: Range<usize>,
 204}
 205
 206#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 207pub struct RelatedFile {
 208    pub path: Arc<Path>,
 209    pub max_row: u32,
 210    pub excerpts: Vec<RelatedExcerpt>,
 211    #[serde(default)]
 212    pub in_open_source_repo: bool,
 213}
 214
 215#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 216pub struct RelatedExcerpt {
 217    pub row_range: Range<u32>,
 218    pub text: Arc<str>,
 219    #[serde(default)]
 220    pub order: usize,
 221}
 222
 223pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 224    special_tokens_for_format(format).iter().any(|token| {
 225        if let Some(line_token) = token.strip_suffix('\n') {
 226            input.cursor_excerpt.lines().any(|line| line == line_token)
 227        } else {
 228            input.cursor_excerpt.contains(token)
 229        }
 230    })
 231}
 232
 233pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 234    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 235}
 236
 237pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 238    match format {
 239        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 240        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 241        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 242        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 243        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 244        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 245        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 246        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 247        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 248        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 249        ZetaFormat::V0316SeedMultiRegions => {
 250            static TOKENS: &[&str] = &[
 251                seed_coder::FIM_SUFFIX,
 252                seed_coder::FIM_PREFIX,
 253                seed_coder::FIM_MIDDLE,
 254                seed_coder::FILE_MARKER,
 255                multi_region::V0316_END_MARKER,
 256                CURSOR_MARKER,
 257                multi_region::MARKER_TAG_PREFIX,
 258            ];
 259            TOKENS
 260        }
 261        ZetaFormat::V0318SeedMultiRegions => {
 262            static TOKENS: &[&str] = &[
 263                seed_coder::FIM_SUFFIX,
 264                seed_coder::FIM_PREFIX,
 265                seed_coder::FIM_MIDDLE,
 266                seed_coder::FILE_MARKER,
 267                multi_region::V0318_END_MARKER,
 268                CURSOR_MARKER,
 269                multi_region::MARKER_TAG_PREFIX,
 270            ];
 271            TOKENS
 272        }
 273        ZetaFormat::V0317SeedMultiRegions => {
 274            static TOKENS: &[&str] = &[
 275                seed_coder::FIM_SUFFIX,
 276                seed_coder::FIM_PREFIX,
 277                seed_coder::FIM_MIDDLE,
 278                seed_coder::FILE_MARKER,
 279                multi_region::V0317_END_MARKER,
 280                CURSOR_MARKER,
 281                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 282            ];
 283            TOKENS
 284        }
 285        ZetaFormat::V0306SeedMultiRegions => {
 286            static TOKENS: &[&str] = &[
 287                seed_coder::FIM_SUFFIX,
 288                seed_coder::FIM_PREFIX,
 289                seed_coder::FIM_MIDDLE,
 290                seed_coder::FILE_MARKER,
 291                seed_coder::START_MARKER,
 292                seed_coder::SEPARATOR,
 293                seed_coder::END_MARKER,
 294                CURSOR_MARKER,
 295                multi_region::MARKER_TAG_PREFIX,
 296            ];
 297            TOKENS
 298        }
 299    }
 300}
 301
 302/// Returns the (editable_token_limit, context_token_limit) for a given format.
 303pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 304    match format {
 305        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 306        ZetaFormat::V0114180EditableRegion => (180, 350),
 307        ZetaFormat::V0120GitMergeMarkers
 308        | ZetaFormat::V0131GitMergeMarkersPrefix
 309        | ZetaFormat::V0211Prefill
 310        | ZetaFormat::V0211SeedCoder
 311        | ZetaFormat::v0226Hashline
 312        | ZetaFormat::V0306SeedMultiRegions
 313        | ZetaFormat::V0316SeedMultiRegions
 314        | ZetaFormat::V0318SeedMultiRegions
 315        | ZetaFormat::V0317SeedMultiRegions
 316        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 317        ZetaFormat::V0304VariableEdit => (1024, 0),
 318    }
 319}
 320
 321pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 322    match format {
 323        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 324        ZetaFormat::V0112MiddleAtEnd
 325        | ZetaFormat::V0113Ordered
 326        | ZetaFormat::V0114180EditableRegion
 327        | ZetaFormat::V0120GitMergeMarkers
 328        | ZetaFormat::V0131GitMergeMarkersPrefix
 329        | ZetaFormat::V0211Prefill
 330        | ZetaFormat::V0211SeedCoder
 331        | ZetaFormat::V0304VariableEdit
 332        | ZetaFormat::V0306SeedMultiRegions
 333        | ZetaFormat::V0304SeedNoEdits => &[],
 334        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 335        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 336        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 337    }
 338}
 339
 340pub fn excerpt_ranges_for_format(
 341    format: ZetaFormat,
 342    ranges: &ExcerptRanges,
 343) -> (Range<usize>, Range<usize>) {
 344    match format {
 345        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 346            ranges.editable_150.clone(),
 347            ranges.editable_150_context_350.clone(),
 348        ),
 349        ZetaFormat::V0114180EditableRegion => (
 350            ranges.editable_180.clone(),
 351            ranges.editable_180_context_350.clone(),
 352        ),
 353        ZetaFormat::V0120GitMergeMarkers
 354        | ZetaFormat::V0131GitMergeMarkersPrefix
 355        | ZetaFormat::V0211Prefill
 356        | ZetaFormat::V0211SeedCoder
 357        | ZetaFormat::v0226Hashline
 358        | ZetaFormat::V0304SeedNoEdits
 359        | ZetaFormat::V0306SeedMultiRegions
 360        | ZetaFormat::V0316SeedMultiRegions
 361        | ZetaFormat::V0318SeedMultiRegions
 362        | ZetaFormat::V0317SeedMultiRegions => (
 363            ranges.editable_350.clone(),
 364            ranges.editable_350_context_150.clone(),
 365        ),
 366        ZetaFormat::V0304VariableEdit => {
 367            let context = ranges
 368                .editable_350_context_1024
 369                .clone()
 370                .or(ranges.editable_350_context_512.clone())
 371                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 372            (context.clone(), context)
 373        }
 374    }
 375}
 376
 377pub fn write_cursor_excerpt_section_for_format(
 378    format: ZetaFormat,
 379    prompt: &mut String,
 380    path: &Path,
 381    context: &str,
 382    editable_range: &Range<usize>,
 383    cursor_offset: usize,
 384) {
 385    match format {
 386        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 387            prompt,
 388            path,
 389            context,
 390            editable_range,
 391            cursor_offset,
 392        ),
 393        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 394            v0113_ordered::write_cursor_excerpt_section(
 395                prompt,
 396                path,
 397                context,
 398                editable_range,
 399                cursor_offset,
 400            )
 401        }
 402        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 403            prompt,
 404            path,
 405            context,
 406            editable_range,
 407            cursor_offset,
 408        ),
 409        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 410            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 411                prompt,
 412                path,
 413                context,
 414                editable_range,
 415                cursor_offset,
 416            )
 417        }
 418        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 419            seed_coder::write_cursor_excerpt_section(
 420                prompt,
 421                path,
 422                context,
 423                editable_range,
 424                cursor_offset,
 425            )
 426        }
 427        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 428            prompt,
 429            path,
 430            context,
 431            editable_range,
 432            cursor_offset,
 433        ),
 434        ZetaFormat::V0304VariableEdit => {
 435            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 436        }
 437        ZetaFormat::V0306SeedMultiRegions => {
 438            prompt.push_str(&build_v0306_cursor_prefix(
 439                path,
 440                context,
 441                editable_range,
 442                cursor_offset,
 443            ));
 444        }
 445        ZetaFormat::V0316SeedMultiRegions => {
 446            prompt.push_str(&build_v0316_cursor_prefix(
 447                path,
 448                context,
 449                editable_range,
 450                cursor_offset,
 451            ));
 452        }
 453        ZetaFormat::V0318SeedMultiRegions => {
 454            prompt.push_str(&build_v0318_cursor_prefix(
 455                path,
 456                context,
 457                editable_range,
 458                cursor_offset,
 459            ));
 460        }
 461        ZetaFormat::V0317SeedMultiRegions => {
 462            prompt.push_str(&build_v0317_cursor_prefix(
 463                path,
 464                context,
 465                editable_range,
 466                cursor_offset,
 467            ));
 468        }
 469    }
 470}
 471
 472fn build_v0306_cursor_prefix(
 473    path: &Path,
 474    context: &str,
 475    editable_range: &Range<usize>,
 476    cursor_offset: usize,
 477) -> String {
 478    let mut section = String::new();
 479    let path_str = path.to_string_lossy();
 480    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 481
 482    section.push_str(&context[..editable_range.start]);
 483    section.push_str(seed_coder::START_MARKER);
 484
 485    let editable_text = &context[editable_range.clone()];
 486    let cursor_in_editable = cursor_offset - editable_range.start;
 487    multi_region::write_editable_with_markers(
 488        &mut section,
 489        editable_text,
 490        cursor_in_editable,
 491        CURSOR_MARKER,
 492    );
 493
 494    if !section.ends_with('\n') {
 495        section.push('\n');
 496    }
 497    section.push_str(seed_coder::SEPARATOR);
 498    section
 499}
 500
 501fn build_v0316_cursor_prefix(
 502    path: &Path,
 503    context: &str,
 504    editable_range: &Range<usize>,
 505    cursor_offset: usize,
 506) -> String {
 507    let mut section = String::new();
 508    let path_str = path.to_string_lossy();
 509    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 510
 511    section.push_str(&context[..editable_range.start]);
 512
 513    let editable_text = &context[editable_range.clone()];
 514    let cursor_in_editable = cursor_offset - editable_range.start;
 515    multi_region::write_editable_with_markers_v0316(
 516        &mut section,
 517        editable_text,
 518        cursor_in_editable,
 519        CURSOR_MARKER,
 520    );
 521
 522    if !section.ends_with('\n') {
 523        section.push('\n');
 524    }
 525    section
 526}
 527
 528fn build_v0318_cursor_prefix(
 529    path: &Path,
 530    context: &str,
 531    editable_range: &Range<usize>,
 532    cursor_offset: usize,
 533) -> String {
 534    let mut section = String::new();
 535    let path_str = path.to_string_lossy();
 536    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 537
 538    section.push_str(&context[..editable_range.start]);
 539
 540    let editable_text = &context[editable_range.clone()];
 541    let cursor_in_editable = cursor_offset - editable_range.start;
 542    multi_region::write_editable_with_markers_v0318(
 543        &mut section,
 544        editable_text,
 545        cursor_in_editable,
 546        CURSOR_MARKER,
 547    );
 548
 549    if !section.ends_with('\n') {
 550        section.push('\n');
 551    }
 552    section
 553}
 554
 555fn build_v0317_cursor_prefix(
 556    path: &Path,
 557    context: &str,
 558    editable_range: &Range<usize>,
 559    cursor_offset: usize,
 560) -> String {
 561    let mut section = String::new();
 562    let path_str = path.to_string_lossy();
 563    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 564
 565    section.push_str(&context[..editable_range.start]);
 566
 567    let editable_text = &context[editable_range.clone()];
 568    let cursor_in_editable = cursor_offset - editable_range.start;
 569    multi_region::write_editable_with_markers_v0317(
 570        &mut section,
 571        editable_text,
 572        cursor_in_editable,
 573        CURSOR_MARKER,
 574    );
 575
 576    if !section.ends_with('\n') {
 577        section.push('\n');
 578    }
 579    section
 580}
 581
 582fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 583    let start_row = text[0..range.start].matches('\n').count() as u32;
 584    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 585    if !text[..range.end].ends_with('\n') {
 586        end_row += 1;
 587    }
 588    return start_row..end_row;
 589}
 590
 591pub fn format_prompt_with_budget_for_format(
 592    input: &ZetaPromptInput,
 593    format: ZetaFormat,
 594    max_tokens: usize,
 595) -> Option<String> {
 596    let (context, editable_range, context_range, cursor_offset) =
 597        resolve_cursor_region(input, format);
 598    let path = &*input.cursor_path;
 599
 600    let empty_files = Vec::new();
 601    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 602    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 603        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 604        let row_range = relative_row_range.start + cursor_excerpt_start_row
 605            ..relative_row_range.end + cursor_excerpt_start_row;
 606        &filter_redundant_excerpts(
 607            input_related_files.to_vec(),
 608            input.cursor_path.as_ref(),
 609            row_range,
 610        )
 611    } else {
 612        input_related_files
 613    };
 614
 615    let prompt = match format {
 616        ZetaFormat::V0211SeedCoder
 617        | ZetaFormat::V0304SeedNoEdits
 618        | ZetaFormat::V0306SeedMultiRegions
 619        | ZetaFormat::V0316SeedMultiRegions
 620        | ZetaFormat::V0318SeedMultiRegions
 621        | ZetaFormat::V0317SeedMultiRegions => {
 622            let mut cursor_section = String::new();
 623            write_cursor_excerpt_section_for_format(
 624                format,
 625                &mut cursor_section,
 626                path,
 627                context,
 628                &editable_range,
 629                cursor_offset,
 630            );
 631
 632            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 633            seed_coder::assemble_fim_prompt(
 634                context,
 635                &editable_range,
 636                &cursor_section,
 637                &input.events,
 638                related_files,
 639                budget_with_margin,
 640            )
 641        }
 642        _ => {
 643            let mut cursor_section = String::new();
 644            write_cursor_excerpt_section_for_format(
 645                format,
 646                &mut cursor_section,
 647                path,
 648                context,
 649                &editable_range,
 650                cursor_offset,
 651            );
 652
 653            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 654            let cursor_tokens = estimate_tokens(cursor_section.len());
 655            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 656
 657            let edit_history_section = format_edit_history_within_budget(
 658                &input.events,
 659                "<|file_sep|>",
 660                "edit history",
 661                remaining_budget,
 662                max_edit_event_count_for_format(&format),
 663            );
 664            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 665            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 666
 667            let related_files_section = format_related_files_within_budget(
 668                &related_files,
 669                "<|file_sep|>",
 670                "",
 671                remaining_budget,
 672            );
 673
 674            let mut prompt = String::new();
 675            prompt.push_str(&related_files_section);
 676            prompt.push_str(&edit_history_section);
 677            prompt.push_str(&cursor_section);
 678            prompt
 679        }
 680    };
 681    let prompt_tokens = estimate_tokens(prompt.len());
 682    if prompt_tokens > max_tokens {
 683        return None;
 684    }
 685    return Some(prompt);
 686}
 687
 688pub fn filter_redundant_excerpts(
 689    mut related_files: Vec<RelatedFile>,
 690    cursor_path: &Path,
 691    cursor_row_range: Range<u32>,
 692) -> Vec<RelatedFile> {
 693    for file in &mut related_files {
 694        if file.path.as_ref() == cursor_path {
 695            file.excerpts.retain(|excerpt| {
 696                excerpt.row_range.start < cursor_row_range.start
 697                    || excerpt.row_range.end > cursor_row_range.end
 698            });
 699        }
 700    }
 701    related_files.retain(|file| !file.excerpts.is_empty());
 702    related_files
 703}
 704
 705pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 706    match format {
 707        ZetaFormat::V0112MiddleAtEnd
 708        | ZetaFormat::V0113Ordered
 709        | ZetaFormat::V0114180EditableRegion
 710        | ZetaFormat::V0120GitMergeMarkers
 711        | ZetaFormat::V0131GitMergeMarkersPrefix
 712        | ZetaFormat::V0211Prefill
 713        | ZetaFormat::V0211SeedCoder
 714        | ZetaFormat::v0226Hashline
 715        | ZetaFormat::V0304SeedNoEdits
 716        | ZetaFormat::V0304VariableEdit
 717        | ZetaFormat::V0306SeedMultiRegions
 718        | ZetaFormat::V0316SeedMultiRegions
 719        | ZetaFormat::V0318SeedMultiRegions
 720        | ZetaFormat::V0317SeedMultiRegions => 6,
 721    }
 722}
 723
 724pub fn get_prefill_for_format(
 725    format: ZetaFormat,
 726    context: &str,
 727    editable_range: &Range<usize>,
 728) -> String {
 729    match format {
 730        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 731        ZetaFormat::V0112MiddleAtEnd
 732        | ZetaFormat::V0113Ordered
 733        | ZetaFormat::V0114180EditableRegion
 734        | ZetaFormat::V0120GitMergeMarkers
 735        | ZetaFormat::V0131GitMergeMarkersPrefix
 736        | ZetaFormat::V0211SeedCoder
 737        | ZetaFormat::v0226Hashline
 738        | ZetaFormat::V0304VariableEdit => String::new(),
 739        ZetaFormat::V0304SeedNoEdits
 740        | ZetaFormat::V0306SeedMultiRegions
 741        | ZetaFormat::V0316SeedMultiRegions
 742        | ZetaFormat::V0318SeedMultiRegions
 743        | ZetaFormat::V0317SeedMultiRegions => String::new(),
 744    }
 745}
 746
 747pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 748    match format {
 749        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 750        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 751        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 752        ZetaFormat::V0211SeedCoder
 753        | ZetaFormat::V0304SeedNoEdits
 754        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 755        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 756        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 757        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 758        ZetaFormat::V0112MiddleAtEnd
 759        | ZetaFormat::V0113Ordered
 760        | ZetaFormat::V0114180EditableRegion
 761        | ZetaFormat::v0226Hashline
 762        | ZetaFormat::V0304VariableEdit => None,
 763    }
 764}
 765
 766pub fn encode_patch_as_output_for_format(
 767    format: ZetaFormat,
 768    old_editable_region: &str,
 769    patch: &str,
 770    cursor_offset: Option<usize>,
 771) -> Result<Option<String>> {
 772    match format {
 773        ZetaFormat::v0226Hashline => {
 774            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 775        }
 776        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 777            old_editable_region,
 778            patch,
 779            cursor_offset,
 780        )
 781        .map(Some),
 782        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 783            Ok(seed_coder::no_edits(patch))
 784        }
 785        ZetaFormat::V0316SeedMultiRegions => {
 786            let empty_patch = patch.lines().count() <= 3;
 787            if empty_patch {
 788                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 789                let marker_num =
 790                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 791                let tag = multi_region::marker_tag(marker_num);
 792                Ok(Some(format!(
 793                    "{tag}{tag}{}",
 794                    multi_region::V0316_END_MARKER
 795                )))
 796            } else {
 797                Ok(None)
 798            }
 799        }
 800        ZetaFormat::V0318SeedMultiRegions => {
 801            let empty_patch = patch.lines().count() <= 3;
 802            if empty_patch {
 803                let marker_offsets =
 804                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 805                let marker_num =
 806                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 807                let tag = multi_region::marker_tag(marker_num);
 808                Ok(Some(format!(
 809                    "{tag}{tag}{}",
 810                    multi_region::V0318_END_MARKER
 811                )))
 812            } else {
 813                Ok(None)
 814            }
 815        }
 816        ZetaFormat::V0317SeedMultiRegions => {
 817            let empty_patch = patch.lines().count() <= 3;
 818            if empty_patch {
 819                let tag = multi_region::marker_tag_relative(0);
 820                Ok(Some(format!(
 821                    "{tag}{tag}{}",
 822                    multi_region::V0317_END_MARKER
 823                )))
 824            } else {
 825                Ok(None)
 826            }
 827        }
 828        _ => Ok(None),
 829    }
 830}
 831
 832/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
 833/// extracted), produce the expected model output string for training.
 834pub fn format_expected_output(
 835    input: &ZetaPromptInput,
 836    format: ZetaFormat,
 837    patch: &str,
 838    cursor_offset: Option<usize>,
 839) -> Result<String> {
 840    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 841    let mut old_editable = context[editable_range].to_string();
 842    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
 843        old_editable.push('\n');
 844    }
 845
 846    // Formats with their own output encoding (hashline, variable-edit,
 847    // multi-region empty patches) are handled here.
 848    if let Some(output) =
 849        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
 850    {
 851        return Ok(output);
 852    }
 853
 854    let empty_patch = patch.lines().count() <= 3;
 855
 856    match format {
 857        // Multi-region formats: non-empty patches need diff application
 858        // then marker-span encoding.
 859        ZetaFormat::V0316SeedMultiRegions => {
 860            let (new_editable, first_hunk_offset) =
 861                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 862            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 863            multi_region::encode_from_old_and_new_v0316(
 864                &old_editable,
 865                &new_editable,
 866                cursor_in_new,
 867                CURSOR_MARKER,
 868                multi_region::V0316_END_MARKER,
 869            )
 870        }
 871        ZetaFormat::V0318SeedMultiRegions => {
 872            let (new_editable, first_hunk_offset) =
 873                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 874            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 875            multi_region::encode_from_old_and_new_v0318(
 876                &old_editable,
 877                &new_editable,
 878                cursor_in_new,
 879                CURSOR_MARKER,
 880                multi_region::V0318_END_MARKER,
 881            )
 882        }
 883        ZetaFormat::V0317SeedMultiRegions => {
 884            let (new_editable, first_hunk_offset) =
 885                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 886            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 887            multi_region::encode_from_old_and_new_v0317(
 888                &old_editable,
 889                &new_editable,
 890                cursor_in_new,
 891                CURSOR_MARKER,
 892                multi_region::V0317_END_MARKER,
 893            )
 894        }
 895        // V0131-style formats and fallback: produce new editable text with
 896        // cursor marker inserted, followed by the end marker.
 897        _ => {
 898            let (mut result, first_hunk_offset) = if empty_patch {
 899                (old_editable.clone(), None)
 900            } else {
 901                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
 902            };
 903
 904            if let Some(cursor) = cursor_offset {
 905                let hunk_start = if !empty_patch {
 906                    first_hunk_offset.unwrap_or(0)
 907                } else {
 908                    0
 909                };
 910                let offset = (hunk_start + cursor).min(result.len());
 911                result.insert_str(offset, CURSOR_MARKER);
 912            }
 913
 914            if !result.is_empty() && !result.ends_with('\n') {
 915                result.push('\n');
 916            }
 917
 918            if let Some(end_marker) = output_end_marker_for_format(format) {
 919                result.push_str(end_marker);
 920            }
 921
 922            Ok(result)
 923        }
 924    }
 925}
 926
 927/// Compute the cursor position within the new text after diff application.
 928fn cursor_in_new_text(
 929    cursor_offset: Option<usize>,
 930    first_hunk_offset: Option<usize>,
 931    new_text: &str,
 932) -> Option<usize> {
 933    cursor_offset.map(|cursor| {
 934        let hunk_start = first_hunk_offset.unwrap_or(0);
 935        (hunk_start + cursor).min(new_text.len())
 936    })
 937}
 938
 939#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 940pub struct ParsedOutput {
 941    /// Text that should replace the editable region
 942    pub new_editable_region: String,
 943    /// The byte range within `cursor_excerpt` that this replacement applies to
 944    pub range_in_excerpt: Range<usize>,
 945    /// Byte offset of the cursor marker within `new_editable_region`, if present
 946    pub cursor_offset_in_new_editable_region: Option<usize>,
 947}
 948
 949#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 950pub struct CursorPosition {
 951    pub path: String,
 952    pub row: usize,
 953    pub column: usize,
 954    pub offset: usize,
 955    pub editable_region_offset: usize,
 956}
 957
 958pub fn parsed_output_from_editable_region(
 959    range_in_excerpt: Range<usize>,
 960    mut new_editable_region: String,
 961) -> ParsedOutput {
 962    let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
 963    if let Some(offset) = cursor_offset_in_new_editable_region {
 964        new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
 965    }
 966
 967    ParsedOutput {
 968        new_editable_region,
 969        range_in_excerpt,
 970        cursor_offset_in_new_editable_region,
 971    }
 972}
 973
 974/// Parse model output for the given zeta format
 975pub fn parse_zeta2_model_output(
 976    output: &str,
 977    format: ZetaFormat,
 978    prompt_inputs: &ZetaPromptInput,
 979) -> Result<ParsedOutput> {
 980    let output = match output_end_marker_for_format(format) {
 981        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 982        None => output,
 983    };
 984
 985    let (context, editable_range_in_context, context_range, cursor_offset) =
 986        resolve_cursor_region(prompt_inputs, format);
 987    let context_start = context_range.start;
 988    let old_editable_region = &context[editable_range_in_context.clone()];
 989    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 990
 991    let (range_in_context, output) = match format {
 992        ZetaFormat::v0226Hashline => (
 993            editable_range_in_context,
 994            if hashline::output_has_edit_commands(output) {
 995                hashline::apply_edit_commands(old_editable_region, output)
 996            } else {
 997                output.to_string()
 998            },
 999        ),
1000        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1001        ZetaFormat::V0304SeedNoEdits => (
1002            editable_range_in_context,
1003            if output.starts_with(seed_coder::NO_EDITS) {
1004                old_editable_region.to_string()
1005            } else {
1006                output.to_string()
1007            },
1008        ),
1009        ZetaFormat::V0306SeedMultiRegions => (
1010            editable_range_in_context,
1011            if output.starts_with(seed_coder::NO_EDITS) {
1012                old_editable_region.to_string()
1013            } else {
1014                multi_region::apply_marker_span(old_editable_region, output)?
1015            },
1016        ),
1017        ZetaFormat::V0316SeedMultiRegions => (
1018            editable_range_in_context,
1019            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1020        ),
1021        ZetaFormat::V0318SeedMultiRegions => (
1022            editable_range_in_context,
1023            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1024        ),
1025        ZetaFormat::V0317SeedMultiRegions => (
1026            editable_range_in_context,
1027            multi_region::apply_marker_span_v0317(
1028                old_editable_region,
1029                output,
1030                Some(cursor_offset_in_editable),
1031            )?,
1032        ),
1033        _ => (editable_range_in_context, output.to_string()),
1034    };
1035
1036    let range_in_excerpt =
1037        range_in_context.start + context_start..range_in_context.end + context_start;
1038
1039    Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1040}
1041
1042pub fn parse_zeta2_model_output_as_patch(
1043    output: &str,
1044    format: ZetaFormat,
1045    prompt_inputs: &ZetaPromptInput,
1046) -> Result<String> {
1047    let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1048    parsed_output_to_patch(prompt_inputs, parsed)
1049}
1050
1051pub fn cursor_position_from_parsed_output(
1052    prompt_inputs: &ZetaPromptInput,
1053    parsed: &ParsedOutput,
1054) -> Option<CursorPosition> {
1055    let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1056    let editable_region_offset = parsed.range_in_excerpt.start;
1057    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1058
1059    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1060
1061    let new_editable_region = &parsed.new_editable_region;
1062    let prefix_end = cursor_offset.min(new_editable_region.len());
1063    let new_region_prefix = &new_editable_region[..prefix_end];
1064
1065    let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1066
1067    let column = match new_region_prefix.rfind('\n') {
1068        Some(last_newline) => cursor_offset - last_newline - 1,
1069        None => {
1070            let content_prefix = &excerpt[..editable_region_offset];
1071            let content_column = match content_prefix.rfind('\n') {
1072                Some(last_newline) => editable_region_offset - last_newline - 1,
1073                None => editable_region_offset,
1074            };
1075            content_column + cursor_offset
1076        }
1077    };
1078
1079    Some(CursorPosition {
1080        path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1081        row,
1082        column,
1083        offset: editable_region_offset + cursor_offset,
1084        editable_region_offset: cursor_offset,
1085    })
1086}
1087
1088pub fn parsed_output_to_patch(
1089    prompt_inputs: &ZetaPromptInput,
1090    parsed: ParsedOutput,
1091) -> Result<String> {
1092    let range_in_excerpt = parsed.range_in_excerpt;
1093    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1094    let old_text = excerpt[range_in_excerpt.clone()].to_string();
1095    let mut new_text = parsed.new_editable_region;
1096
1097    let mut old_text_normalized = old_text;
1098    if !new_text.is_empty() && !new_text.ends_with('\n') {
1099        new_text.push('\n');
1100    }
1101    if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1102        old_text_normalized.push('\n');
1103    }
1104
1105    let editable_region_offset = range_in_excerpt.start;
1106    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1107    let editable_region_lines = old_text_normalized.lines().count() as u32;
1108
1109    let diff = udiff::unified_diff_with_context(
1110        &old_text_normalized,
1111        &new_text,
1112        editable_region_start_line,
1113        editable_region_start_line,
1114        editable_region_lines,
1115    );
1116
1117    let path = prompt_inputs
1118        .cursor_path
1119        .to_string_lossy()
1120        .trim_start_matches('/')
1121        .to_string();
1122    let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1123
1124    Ok(udiff::encode_cursor_in_patch(
1125        &formatted_diff,
1126        parsed.cursor_offset_in_new_editable_region,
1127    ))
1128}
1129
1130pub fn excerpt_range_for_format(
1131    format: ZetaFormat,
1132    ranges: &ExcerptRanges,
1133) -> (Range<usize>, Range<usize>) {
1134    excerpt_ranges_for_format(format, ranges)
1135}
1136
1137pub fn resolve_cursor_region(
1138    input: &ZetaPromptInput,
1139    format: ZetaFormat,
1140) -> (&str, Range<usize>, Range<usize>, usize) {
1141    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
1142        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1143        compute_editable_and_context_ranges(
1144            &input.cursor_excerpt,
1145            input.cursor_offset_in_excerpt,
1146            syntax_ranges,
1147            editable_tokens,
1148            context_tokens,
1149        )
1150    } else {
1151        excerpt_range_for_format(format, &input.excerpt_ranges)
1152    };
1153    let context_start = context_range.start;
1154    let context_text = &input.cursor_excerpt[context_range.clone()];
1155    let adjusted_editable =
1156        (editable_range.start - context_start)..(editable_range.end - context_start);
1157    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1158
1159    (
1160        context_text,
1161        adjusted_editable,
1162        context_range,
1163        adjusted_cursor,
1164    )
1165}
1166
1167pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1168    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1169    get_prefill_for_format(format, context, &editable_range)
1170}
1171
1172fn format_edit_history_within_budget(
1173    events: &[Arc<Event>],
1174    file_marker: &str,
1175    edit_history_name: &str,
1176    max_tokens: usize,
1177    max_edit_event_count: usize,
1178) -> String {
1179    let header = format!("{}{}\n", file_marker, edit_history_name);
1180    let header_tokens = estimate_tokens(header.len());
1181    if header_tokens >= max_tokens {
1182        return String::new();
1183    }
1184
1185    let mut event_strings: Vec<String> = Vec::new();
1186    let mut total_tokens = header_tokens;
1187
1188    for event in events.iter().rev().take(max_edit_event_count) {
1189        let mut event_str = String::new();
1190        write_event(&mut event_str, event);
1191        let event_tokens = estimate_tokens(event_str.len());
1192
1193        if total_tokens + event_tokens > max_tokens {
1194            break;
1195        }
1196        total_tokens += event_tokens;
1197        event_strings.push(event_str);
1198    }
1199
1200    if event_strings.is_empty() {
1201        return String::new();
1202    }
1203
1204    let mut result = header;
1205    for event_str in event_strings.iter().rev() {
1206        result.push_str(event_str);
1207    }
1208    result
1209}
1210
1211fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1212    let needs_newline = !excerpt.text.ends_with('\n');
1213    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1214    let len = excerpt.text.len()
1215        + if needs_newline { "\n".len() } else { 0 }
1216        + if needs_ellipsis { "...\n".len() } else { 0 };
1217    estimate_tokens(len)
1218}
1219
1220pub fn format_related_files_within_budget(
1221    related_files: &[RelatedFile],
1222    file_prefix: &str,
1223    file_suffix: &str,
1224    max_tokens: usize,
1225) -> String {
1226    struct ExcerptCandidate {
1227        file_ix: usize,
1228        excerpt_ix: usize,
1229        order: usize,
1230    }
1231
1232    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1233        .iter()
1234        .enumerate()
1235        .flat_map(|(file_ix, file)| {
1236            file.excerpts
1237                .iter()
1238                .enumerate()
1239                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1240                    file_ix,
1241                    excerpt_ix,
1242                    order: e.order,
1243                })
1244        })
1245        .collect();
1246
1247    // Pre-compute file header strings and their token costs.
1248    let file_headers: Vec<String> = related_files
1249        .iter()
1250        .map(|file| {
1251            let path_str = file.path.to_string_lossy();
1252            format!("{}{}\n", file_prefix, path_str)
1253        })
1254        .collect();
1255
1256    // Sort the excerpts by their order and determine how many fit within the budget.
1257    let mut total_tokens = 0;
1258    let mut included_excerpt_count = 0_usize;
1259    let mut included_file_indices = vec![false; related_files.len()];
1260    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1261    for candidate in &excerpt_candidates {
1262        let file = &related_files[candidate.file_ix];
1263        let excerpt = &file.excerpts[candidate.excerpt_ix];
1264        let file_already_included = included_file_indices[candidate.file_ix];
1265        let header_cost = if file_already_included {
1266            0
1267        } else {
1268            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1269        };
1270        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1271        if total_tokens + header_cost + excerpt_cost > max_tokens {
1272            break;
1273        }
1274        total_tokens += header_cost + excerpt_cost;
1275        if !file_already_included {
1276            included_file_indices[candidate.file_ix] = true;
1277        }
1278        included_excerpt_count += 1;
1279    }
1280
1281    excerpt_candidates.truncate(included_excerpt_count);
1282    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1283
1284    // Render all of the files that fit within the token budget, in the original order.
1285    let mut result = String::new();
1286    let mut last_file_ix = None;
1287    for candidate in &excerpt_candidates {
1288        if last_file_ix != Some(candidate.file_ix) {
1289            if last_file_ix.is_some() {
1290                result.push_str(file_suffix);
1291            }
1292            result.push_str(&file_headers[candidate.file_ix]);
1293            last_file_ix = Some(candidate.file_ix);
1294        }
1295        let file = &related_files[candidate.file_ix];
1296        let excerpt = &file.excerpts[candidate.excerpt_ix];
1297        result.push_str(&excerpt.text);
1298        if !result.ends_with('\n') {
1299            result.push('\n');
1300        }
1301        if excerpt.row_range.end < file.max_row {
1302            result.push_str("...\n");
1303        }
1304    }
1305
1306    result
1307}
1308
1309pub fn write_related_files(
1310    prompt: &mut String,
1311    related_files: &[RelatedFile],
1312) -> Vec<Range<usize>> {
1313    let mut ranges = Vec::new();
1314    for file in related_files {
1315        let start = prompt.len();
1316        let path_str = file.path.to_string_lossy();
1317        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1318        for excerpt in &file.excerpts {
1319            prompt.push_str(&excerpt.text);
1320            if !prompt.ends_with('\n') {
1321                prompt.push('\n');
1322            }
1323            if excerpt.row_range.end < file.max_row {
1324                prompt.push_str("...\n");
1325            }
1326        }
1327        let end = prompt.len();
1328        ranges.push(start..end);
1329    }
1330    ranges
1331}
1332
1333mod v0112_middle_at_end {
1334    use super::*;
1335
1336    pub fn special_tokens() -> &'static [&'static str] {
1337        &[
1338            "<|fim_prefix|>",
1339            "<|fim_suffix|>",
1340            "<|fim_middle|>",
1341            "<|file_sep|>",
1342            CURSOR_MARKER,
1343        ]
1344    }
1345
1346    pub fn write_cursor_excerpt_section(
1347        prompt: &mut String,
1348        path: &Path,
1349        context: &str,
1350        editable_range: &Range<usize>,
1351        cursor_offset: usize,
1352    ) {
1353        let path_str = path.to_string_lossy();
1354        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1355
1356        prompt.push_str("<|fim_prefix|>\n");
1357        prompt.push_str(&context[..editable_range.start]);
1358
1359        prompt.push_str("<|fim_suffix|>\n");
1360        prompt.push_str(&context[editable_range.end..]);
1361        if !prompt.ends_with('\n') {
1362            prompt.push('\n');
1363        }
1364
1365        prompt.push_str("<|fim_middle|>current\n");
1366        prompt.push_str(&context[editable_range.start..cursor_offset]);
1367        prompt.push_str(CURSOR_MARKER);
1368        prompt.push_str(&context[cursor_offset..editable_range.end]);
1369        if !prompt.ends_with('\n') {
1370            prompt.push('\n');
1371        }
1372
1373        prompt.push_str("<|fim_middle|>updated\n");
1374    }
1375}
1376
1377mod v0113_ordered {
1378    use super::*;
1379
1380    pub fn special_tokens() -> &'static [&'static str] {
1381        &[
1382            "<|fim_prefix|>",
1383            "<|fim_suffix|>",
1384            "<|fim_middle|>",
1385            "<|file_sep|>",
1386            CURSOR_MARKER,
1387        ]
1388    }
1389
1390    pub fn write_cursor_excerpt_section(
1391        prompt: &mut String,
1392        path: &Path,
1393        context: &str,
1394        editable_range: &Range<usize>,
1395        cursor_offset: usize,
1396    ) {
1397        let path_str = path.to_string_lossy();
1398        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1399
1400        prompt.push_str("<|fim_prefix|>\n");
1401        prompt.push_str(&context[..editable_range.start]);
1402        if !prompt.ends_with('\n') {
1403            prompt.push('\n');
1404        }
1405
1406        prompt.push_str("<|fim_middle|>current\n");
1407        prompt.push_str(&context[editable_range.start..cursor_offset]);
1408        prompt.push_str(CURSOR_MARKER);
1409        prompt.push_str(&context[cursor_offset..editable_range.end]);
1410        if !prompt.ends_with('\n') {
1411            prompt.push('\n');
1412        }
1413
1414        prompt.push_str("<|fim_suffix|>\n");
1415        prompt.push_str(&context[editable_range.end..]);
1416        if !prompt.ends_with('\n') {
1417            prompt.push('\n');
1418        }
1419
1420        prompt.push_str("<|fim_middle|>updated\n");
1421    }
1422}
1423
1424mod v0114180_editable_region {
1425    use super::*;
1426
1427    pub fn special_tokens() -> &'static [&'static str] {
1428        v0113_ordered::special_tokens()
1429    }
1430}
1431
1432pub mod v0120_git_merge_markers {
1433    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1434    //!
1435    //! Example prompt:
1436    //!
1437    //! <|file_sep|>path/to/target_file.py
1438    //! <|fim_prefix|>
1439    //! code before editable region
1440    //! <|fim_suffix|>
1441    //! code after editable region
1442    //! <|fim_middle|>
1443    //! <<<<<<< CURRENT
1444    //! code that
1445    //! needs to<|user_cursor|>
1446    //! be rewritten
1447    //! =======
1448    //!
1449    //! Expected output (should be generated by the model):
1450    //!
1451    //! updated
1452    //! code with
1453    //! changes applied
1454    //! >>>>>>> UPDATED
1455
1456    use super::*;
1457
1458    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1459    pub const SEPARATOR: &str = "=======\n";
1460    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1461
1462    pub fn special_tokens() -> &'static [&'static str] {
1463        &[
1464            "<|fim_prefix|>",
1465            "<|fim_suffix|>",
1466            "<|fim_middle|>",
1467            "<|file_sep|>",
1468            START_MARKER,
1469            SEPARATOR,
1470            END_MARKER,
1471            CURSOR_MARKER,
1472        ]
1473    }
1474
1475    pub fn write_cursor_excerpt_section(
1476        prompt: &mut String,
1477        path: &Path,
1478        context: &str,
1479        editable_range: &Range<usize>,
1480        cursor_offset: usize,
1481    ) {
1482        let path_str = path.to_string_lossy();
1483        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1484
1485        prompt.push_str("<|fim_prefix|>");
1486        prompt.push_str(&context[..editable_range.start]);
1487
1488        prompt.push_str("<|fim_suffix|>");
1489        prompt.push_str(&context[editable_range.end..]);
1490        if !prompt.ends_with('\n') {
1491            prompt.push('\n');
1492        }
1493
1494        prompt.push_str("<|fim_middle|>");
1495        prompt.push_str(START_MARKER);
1496        prompt.push_str(&context[editable_range.start..cursor_offset]);
1497        prompt.push_str(CURSOR_MARKER);
1498        prompt.push_str(&context[cursor_offset..editable_range.end]);
1499        if !prompt.ends_with('\n') {
1500            prompt.push('\n');
1501        }
1502        prompt.push_str(SEPARATOR);
1503    }
1504}
1505
1506pub mod v0131_git_merge_markers_prefix {
1507    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1508    //!
1509    //! Example prompt:
1510    //!
1511    //! <|file_sep|>path/to/target_file.py
1512    //! <|fim_prefix|>
1513    //! code before editable region
1514    //! <<<<<<< CURRENT
1515    //! code that
1516    //! needs to<|user_cursor|>
1517    //! be rewritten
1518    //! =======
1519    //! <|fim_suffix|>
1520    //! code after editable region
1521    //! <|fim_middle|>
1522    //!
1523    //! Expected output (should be generated by the model):
1524    //!
1525    //! updated
1526    //! code with
1527    //! changes applied
1528    //! >>>>>>> UPDATED
1529
1530    use super::*;
1531
1532    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1533    pub const SEPARATOR: &str = "=======\n";
1534    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1535
1536    pub fn special_tokens() -> &'static [&'static str] {
1537        &[
1538            "<|fim_prefix|>",
1539            "<|fim_suffix|>",
1540            "<|fim_middle|>",
1541            "<|file_sep|>",
1542            START_MARKER,
1543            SEPARATOR,
1544            END_MARKER,
1545            CURSOR_MARKER,
1546        ]
1547    }
1548
1549    pub fn write_cursor_excerpt_section(
1550        prompt: &mut String,
1551        path: &Path,
1552        context: &str,
1553        editable_range: &Range<usize>,
1554        cursor_offset: usize,
1555    ) {
1556        let path_str = path.to_string_lossy();
1557        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1558
1559        prompt.push_str("<|fim_prefix|>");
1560        prompt.push_str(&context[..editable_range.start]);
1561        prompt.push_str(START_MARKER);
1562        prompt.push_str(&context[editable_range.start..cursor_offset]);
1563        prompt.push_str(CURSOR_MARKER);
1564        prompt.push_str(&context[cursor_offset..editable_range.end]);
1565        if !prompt.ends_with('\n') {
1566            prompt.push('\n');
1567        }
1568        prompt.push_str(SEPARATOR);
1569
1570        prompt.push_str("<|fim_suffix|>");
1571        prompt.push_str(&context[editable_range.end..]);
1572        if !prompt.ends_with('\n') {
1573            prompt.push('\n');
1574        }
1575
1576        prompt.push_str("<|fim_middle|>");
1577    }
1578}
1579
1580pub mod v0211_prefill {
1581    use super::*;
1582
1583    pub fn special_tokens() -> &'static [&'static str] {
1584        v0131_git_merge_markers_prefix::special_tokens()
1585    }
1586
1587    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1588        let editable_region = &context[editable_range.start..editable_range.end];
1589
1590        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1591        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1592
1593        // Find a token boundary to avoid splitting tokens in the prefill.
1594        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1595        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1596        // the \n and consume any consecutive \n characters after it.
1597        let prefill = &editable_region[..prefill_len];
1598        match prefill.rfind('\n') {
1599            Some(pos) => {
1600                let mut end = pos + 1;
1601                while end < editable_region.len()
1602                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1603                {
1604                    end += 1;
1605                }
1606                editable_region[..end].to_string()
1607            }
1608            // No newline found. Fall back to splitting before the last space
1609            // (word-level boundary)
1610            None => match prefill.rfind(' ') {
1611                Some(pos) => prefill[..pos].to_string(),
1612                None => prefill.to_string(),
1613            },
1614        }
1615    }
1616}
1617
1618pub mod hashline {
1619
1620    use std::fmt::Display;
1621
1622    pub const END_MARKER: &str = "<|fim_middle|>updated";
1623    pub const START_MARKER: &str = "<|fim_middle|>current";
1624
1625    use super::*;
1626
1627    const SET_COMMAND_MARKER: &str = "<|set|>";
1628    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1629    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1630
1631    pub fn special_tokens() -> &'static [&'static str] {
1632        return &[
1633            SET_COMMAND_MARKER,
1634            "<|set_range|>",
1635            INSERT_COMMAND_MARKER,
1636            NO_EDITS_COMMAND_MARKER,
1637            CURSOR_MARKER,
1638            "<|file_sep|>",
1639            "<|fim_prefix|>",
1640            "<|fim_suffix|>",
1641            "<|fim_middle|>",
1642        ];
1643    }
1644
1645    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1646    #[derive(Debug, Clone, PartialEq, Eq)]
1647    struct LineRef {
1648        index: usize,
1649        hash: u8,
1650    }
1651
1652    impl Display for LineRef {
1653        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1654            write!(f, "{}:{:02x}", self.index, self.hash)
1655        }
1656    }
1657
1658    pub fn hash_line(line: &[u8]) -> u8 {
1659        let mut h: u8 = 0;
1660        for &byte in line {
1661            h = h.wrapping_add(byte);
1662        }
1663        return h;
1664    }
1665
1666    /// Write the hashline-encoded editable region into `out`. Each line of
1667    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1668    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1669    /// to the start of `editable_text`).
1670    pub fn write_hashline_editable_region(
1671        out: &mut String,
1672        editable_text: &str,
1673        cursor_offset_in_editable: usize,
1674    ) {
1675        let mut offset = 0;
1676        for (i, line) in editable_text.lines().enumerate() {
1677            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1678                && cursor_offset_in_editable < offset + line.len()
1679            {
1680                (
1681                    &line[..cursor_offset_in_editable - offset],
1682                    CURSOR_MARKER,
1683                    &line[cursor_offset_in_editable - offset..],
1684                )
1685            } else {
1686                (line, "", "")
1687            };
1688            write!(
1689                out,
1690                "\n{}|{head}{cursor}{tail}",
1691                LineRef {
1692                    index: i,
1693                    hash: hash_line(line.as_bytes())
1694                }
1695            )
1696            .unwrap();
1697            offset += line.len() + 1;
1698        }
1699    }
1700
1701    pub fn write_cursor_excerpt_section(
1702        prompt: &mut String,
1703        path: &Path,
1704        context: &str,
1705        editable_range: &Range<usize>,
1706        cursor_offset: usize,
1707    ) {
1708        let path_str = path.to_string_lossy();
1709        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1710
1711        prompt.push_str("<|fim_prefix|>\n");
1712        prompt.push_str(&context[..editable_range.start]);
1713        prompt.push_str(START_MARKER);
1714
1715        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1716        let editable_region = &context[editable_range.clone()];
1717        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1718
1719        if !prompt.ends_with('\n') {
1720            prompt.push('\n');
1721        }
1722
1723        prompt.push_str("<|fim_suffix|>\n");
1724        prompt.push_str(&context[editable_range.end..]);
1725        if !prompt.ends_with('\n') {
1726            prompt.push('\n');
1727        }
1728
1729        prompt.push_str(END_MARKER);
1730        prompt.push('\n');
1731    }
1732
1733    /// A single edit command parsed from the model output.
1734    #[derive(Debug)]
1735    enum EditCommand<'a> {
1736        /// Replace a range of lines (inclusive on both ends). Single-line set is
1737        /// represented by `start == end`.
1738        Set {
1739            start: LineRef,
1740            end: LineRef,
1741            content: &'a str,
1742        },
1743        /// Insert new lines after the given line, or before the first line if
1744        /// `after` is `None`.
1745        Insert {
1746            after: Option<LineRef>,
1747            content: &'a str,
1748        },
1749    }
1750
1751    /// Parse a line reference like `3:c3` into a `LineRef`.
1752    fn parse_line_ref(s: &str) -> Option<LineRef> {
1753        let (idx_str, hash_str) = s.split_once(':')?;
1754        let index = idx_str.parse::<usize>().ok()?;
1755        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1756        Some(LineRef { index, hash })
1757    }
1758
1759    /// Parse the model output into a list of `EditCommand`s.
1760    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1761        let mut commands = Vec::new();
1762        let mut offset = 0usize;
1763
1764        while offset < model_output.len() {
1765            let next_nl = model_output[offset..]
1766                .find('\n')
1767                .map(|i| offset + i)
1768                .unwrap_or(model_output.len());
1769            let line = &model_output[offset..next_nl];
1770            let line_end = if next_nl < model_output.len() {
1771                next_nl + 1
1772            } else {
1773                next_nl
1774            };
1775
1776            let trimmed = line.trim();
1777            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1778                (true, spec)
1779            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1780                (false, spec)
1781            } else {
1782                offset = line_end;
1783                continue;
1784            };
1785
1786            let mut content_end = line_end;
1787            let mut scan = line_end;
1788
1789            while scan < model_output.len() {
1790                let body_nl = model_output[scan..]
1791                    .find('\n')
1792                    .map(|i| scan + i)
1793                    .unwrap_or(model_output.len());
1794                let body_line = &model_output[scan..body_nl];
1795                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1796                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1797                {
1798                    break;
1799                }
1800                scan = if body_nl < model_output.len() {
1801                    body_nl + 1
1802                } else {
1803                    body_nl
1804                };
1805                content_end = scan;
1806            }
1807
1808            let content = &model_output[line_end..content_end];
1809
1810            if is_set {
1811                if let Some((start_str, end_str)) = specifier.split_once('-') {
1812                    if let (Some(start), Some(end)) =
1813                        (parse_line_ref(start_str), parse_line_ref(end_str))
1814                    {
1815                        commands.push(EditCommand::Set {
1816                            start,
1817                            end,
1818                            content,
1819                        });
1820                    }
1821                } else if let Some(target) = parse_line_ref(specifier) {
1822                    commands.push(EditCommand::Set {
1823                        start: target.clone(),
1824                        end: target,
1825                        content,
1826                    });
1827                }
1828            } else {
1829                let after = parse_line_ref(specifier);
1830                commands.push(EditCommand::Insert { after, content });
1831            }
1832
1833            offset = scan;
1834        }
1835
1836        commands
1837    }
1838
1839    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1840    /// (as opposed to being a plain full-replacement output).
1841    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1842    /// editable region, returning the plain text content.
1843    pub fn strip_hashline_prefixes(region: &str) -> String {
1844        let mut decoded: String = region
1845            .lines()
1846            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1847            .collect::<Vec<_>>()
1848            .join("\n");
1849        if region.ends_with('\n') {
1850            decoded.push('\n');
1851        }
1852        decoded
1853    }
1854
1855    pub fn output_has_edit_commands(model_output: &str) -> bool {
1856        model_output.contains(SET_COMMAND_MARKER)
1857            || model_output.contains(INSERT_COMMAND_MARKER)
1858            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1859    }
1860
1861    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1862    /// original editable region text.
1863    ///
1864    /// `editable_region` is the original text of the editable region (without hash
1865    /// prefixes). `model_output` is the raw model response containing edit commands.
1866    ///
1867    /// Returns the full replacement text for the editable region.
1868    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1869        if model_output
1870            .trim_start()
1871            .starts_with(NO_EDITS_COMMAND_MARKER)
1872        {
1873            return editable_region.to_string();
1874        }
1875
1876        let original_lines: Vec<&str> = editable_region.lines().collect();
1877        let old_hashes: Vec<u8> = original_lines
1878            .iter()
1879            .map(|line| hash_line(line.as_bytes()))
1880            .collect();
1881
1882        let commands = parse_edit_commands(model_output);
1883
1884        // For set operations: indexed by start line → Some((end line index, content))
1885        // For insert operations: indexed by line index → vec of content to insert after
1886        // Insert-before-first is tracked separately.
1887        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1888        let mut insert_before_first: Vec<&str> = Vec::new();
1889        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1890
1891        for command in &commands {
1892            match command {
1893                EditCommand::Set {
1894                    start,
1895                    end,
1896                    content,
1897                } => {
1898                    if start.index < old_hashes.len()
1899                        && end.index < old_hashes.len()
1900                        && start.index <= end.index
1901                        && old_hashes[start.index] == start.hash
1902                        && old_hashes[end.index] == end.hash
1903                    {
1904                        set_ops[start.index] = Some((end.index, *content));
1905                    }
1906                }
1907                EditCommand::Insert { after, content } => match after {
1908                    None => insert_before_first.push(*content),
1909                    Some(line_ref) => {
1910                        if line_ref.index < old_hashes.len()
1911                            && old_hashes[line_ref.index] == line_ref.hash
1912                        {
1913                            insert_after[line_ref.index].push(*content);
1914                        }
1915                    }
1916                },
1917            }
1918        }
1919
1920        let mut result = String::new();
1921
1922        // Emit any insertions before the first line
1923        for content in &insert_before_first {
1924            result.push_str(content);
1925            if !content.ends_with('\n') {
1926                result.push('\n');
1927            }
1928        }
1929
1930        let mut i = 0;
1931        while i < original_lines.len() {
1932            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1933                // Replace lines i..=end_index with the replacement content
1934                result.push_str(replacement);
1935                if !replacement.is_empty() && !replacement.ends_with('\n') {
1936                    result.push('\n');
1937                }
1938                // Emit any insertions after the end of this set range
1939                if *end_index < insert_after.len() {
1940                    for content in &insert_after[*end_index] {
1941                        result.push_str(content);
1942                        if !content.ends_with('\n') {
1943                            result.push('\n');
1944                        }
1945                    }
1946                }
1947                i = end_index + 1;
1948            } else {
1949                // Keep the original line
1950                result.push_str(original_lines[i]);
1951                result.push('\n');
1952                // Emit any insertions after this line
1953                for content in &insert_after[i] {
1954                    result.push_str(content);
1955                    if !content.ends_with('\n') {
1956                        result.push('\n');
1957                    }
1958                }
1959                i += 1;
1960            }
1961        }
1962
1963        // Preserve trailing newline behavior: if the original ended with a
1964        // newline the result already has one; if it didn't, trim the extra one
1965        // we added.
1966        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1967            result.pop();
1968        }
1969
1970        result
1971    }
1972
1973    /// Convert a unified diff patch into hashline edit commands.
1974    ///
1975    /// Parses the unified diff `patch` directly to determine which lines of
1976    /// `old_text` are deleted/replaced and what new lines are added, then emits
1977    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1978    /// `{index}:{hash}` identifiers.
1979    ///
1980    /// `cursor_offset` is an optional byte offset into the first hunk's new
1981    /// text (context + additions) where the cursor marker should be placed.
1982    pub fn patch_to_edit_commands(
1983        old_text: &str,
1984        patch: &str,
1985        cursor_offset: Option<usize>,
1986    ) -> Result<String> {
1987        let old_lines: Vec<&str> = old_text.lines().collect();
1988        let old_hashes: Vec<u8> = old_lines
1989            .iter()
1990            .map(|line| hash_line(line.as_bytes()))
1991            .collect();
1992
1993        let mut result = String::new();
1994        let mut first_hunk = true;
1995
1996        struct Hunk<'a> {
1997            line_range: Range<usize>,
1998            new_text_lines: Vec<&'a str>,
1999            cursor_line_offset_in_new_text: Option<(usize, usize)>,
2000        }
2001
2002        // Parse the patch line by line. We only care about hunk headers,
2003        // context, deletions, and additions.
2004        let mut old_line_index: usize = 0;
2005        let mut current_hunk: Option<Hunk> = None;
2006        // Byte offset tracking within the hunk's new text for cursor placement.
2007        let mut new_text_byte_offset: usize = 0;
2008        // The line index of the last old line seen before/in the current hunk
2009        // (used for insert-after reference).
2010        let mut last_old_line_before_hunk: Option<usize> = None;
2011
2012        fn flush_hunk(
2013            hunk: Hunk,
2014            last_old_line: Option<usize>,
2015            result: &mut String,
2016            old_hashes: &[u8],
2017        ) {
2018            if hunk.line_range.is_empty() {
2019                // Pure insertion — reference the old line to insert after when in bounds.
2020                if let Some(after) = last_old_line
2021                    && let Some(&hash) = old_hashes.get(after)
2022                {
2023                    write!(
2024                        result,
2025                        "{INSERT_COMMAND_MARKER}{}\n",
2026                        LineRef { index: after, hash }
2027                    )
2028                    .unwrap();
2029                } else {
2030                    result.push_str(INSERT_COMMAND_MARKER);
2031                    result.push('\n');
2032                }
2033            } else {
2034                let start = hunk.line_range.start;
2035                let end_exclusive = hunk.line_range.end;
2036                let deleted_line_count = end_exclusive.saturating_sub(start);
2037
2038                if deleted_line_count == 1 {
2039                    if let Some(&hash) = old_hashes.get(start) {
2040                        write!(
2041                            result,
2042                            "{SET_COMMAND_MARKER}{}\n",
2043                            LineRef { index: start, hash }
2044                        )
2045                        .unwrap();
2046                    } else {
2047                        result.push_str(SET_COMMAND_MARKER);
2048                        result.push('\n');
2049                    }
2050                } else {
2051                    let end_inclusive = end_exclusive - 1;
2052                    match (
2053                        old_hashes.get(start).copied(),
2054                        old_hashes.get(end_inclusive).copied(),
2055                    ) {
2056                        (Some(start_hash), Some(end_hash)) => {
2057                            write!(
2058                                result,
2059                                "{SET_COMMAND_MARKER}{}-{}\n",
2060                                LineRef {
2061                                    index: start,
2062                                    hash: start_hash
2063                                },
2064                                LineRef {
2065                                    index: end_inclusive,
2066                                    hash: end_hash
2067                                }
2068                            )
2069                            .unwrap();
2070                        }
2071                        _ => {
2072                            result.push_str(SET_COMMAND_MARKER);
2073                            result.push('\n');
2074                        }
2075                    }
2076                }
2077            }
2078            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2079                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2080                    && line_offset == cursor_line_offset
2081                {
2082                    result.push_str(&line[..char_offset]);
2083                    result.push_str(CURSOR_MARKER);
2084                    result.push_str(&line[char_offset..]);
2085                    continue;
2086                }
2087
2088                result.push_str(line);
2089            }
2090        }
2091
2092        for raw_line in patch.split_inclusive('\n') {
2093            if raw_line.starts_with("@@") {
2094                // Flush any pending change hunk from a previous patch hunk.
2095                if let Some(hunk) = current_hunk.take() {
2096                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2097                }
2098
2099                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2100                // We intentionally do not trust old_start as a direct local index into `old_text`,
2101                // because some patches are produced against a larger file region and carry
2102                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2103                if first_hunk {
2104                    new_text_byte_offset = 0;
2105                    first_hunk = false;
2106                }
2107                continue;
2108            }
2109
2110            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2111                continue;
2112            }
2113            if raw_line.starts_with("\\ No newline") {
2114                continue;
2115            }
2116
2117            if raw_line.starts_with('-') {
2118                // Extend or start a change hunk with this deleted old line.
2119                match &mut current_hunk {
2120                    Some(Hunk {
2121                        line_range: range, ..
2122                    }) => range.end = old_line_index + 1,
2123                    None => {
2124                        current_hunk = Some(Hunk {
2125                            line_range: old_line_index..old_line_index + 1,
2126                            new_text_lines: Vec::new(),
2127                            cursor_line_offset_in_new_text: None,
2128                        });
2129                    }
2130                }
2131                old_line_index += 1;
2132            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2133                // Place cursor marker if cursor_offset falls within this line.
2134                let mut cursor_line_offset = None;
2135                if let Some(cursor_off) = cursor_offset
2136                    && (first_hunk
2137                        || cursor_off >= new_text_byte_offset
2138                            && cursor_off <= new_text_byte_offset + added_content.len())
2139                {
2140                    let line_offset = added_content.floor_char_boundary(
2141                        cursor_off
2142                            .saturating_sub(new_text_byte_offset)
2143                            .min(added_content.len()),
2144                    );
2145                    cursor_line_offset = Some(line_offset);
2146                }
2147
2148                new_text_byte_offset += added_content.len();
2149
2150                let hunk = current_hunk.get_or_insert(Hunk {
2151                    line_range: old_line_index..old_line_index,
2152                    new_text_lines: vec![],
2153                    cursor_line_offset_in_new_text: None,
2154                });
2155                hunk.new_text_lines.push(added_content);
2156                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2157                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2158            } else {
2159                // Context line (starts with ' ' or is empty).
2160                if let Some(hunk) = current_hunk.take() {
2161                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2162                }
2163                last_old_line_before_hunk = Some(old_line_index);
2164                old_line_index += 1;
2165                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2166                new_text_byte_offset += content.len();
2167            }
2168        }
2169
2170        // Flush final group.
2171        if let Some(hunk) = current_hunk.take() {
2172            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2173        }
2174
2175        // Trim a single trailing newline.
2176        if result.ends_with('\n') {
2177            result.pop();
2178        }
2179
2180        if result.is_empty() {
2181            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2182        }
2183
2184        Ok(result)
2185    }
2186
2187    #[cfg(test)]
2188    mod tests {
2189        use super::*;
2190        use indoc::indoc;
2191
2192        #[test]
2193        fn test_format_cursor_region() {
2194            struct Case {
2195                name: &'static str,
2196                context: &'static str,
2197                editable_range: Range<usize>,
2198                cursor_offset: usize,
2199                expected: &'static str,
2200            }
2201
2202            let cases = [
2203                Case {
2204                    name: "basic_cursor_placement",
2205                    context: "hello world\n",
2206                    editable_range: 0..12,
2207                    cursor_offset: 5,
2208                    expected: indoc! {"
2209                    <|file_sep|>test.rs
2210                    <|fim_prefix|>
2211                    <|fim_middle|>current
2212                    0:5c|hello<|user_cursor|> world
2213                    <|fim_suffix|>
2214                    <|fim_middle|>updated
2215                    "},
2216                },
2217                Case {
2218                    name: "multiline_cursor_on_second_line",
2219                    context: "aaa\nbbb\nccc\n",
2220                    editable_range: 0..12,
2221                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2222                    expected: indoc! {"
2223                    <|file_sep|>test.rs
2224                    <|fim_prefix|>
2225                    <|fim_middle|>current
2226                    0:23|aaa
2227                    1:26|b<|user_cursor|>bb
2228                    2:29|ccc
2229                    <|fim_suffix|>
2230                    <|fim_middle|>updated
2231                    "},
2232                },
2233                Case {
2234                    name: "no_trailing_newline_in_context",
2235                    context: "line1\nline2",
2236                    editable_range: 0..11,
2237                    cursor_offset: 3,
2238                    expected: indoc! {"
2239                    <|file_sep|>test.rs
2240                    <|fim_prefix|>
2241                    <|fim_middle|>current
2242                    0:d9|lin<|user_cursor|>e1
2243                    1:da|line2
2244                    <|fim_suffix|>
2245                    <|fim_middle|>updated
2246                    "},
2247                },
2248                Case {
2249                    name: "leading_newline_in_editable_region",
2250                    context: "\nabc\n",
2251                    editable_range: 0..5,
2252                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2253                    expected: indoc! {"
2254                    <|file_sep|>test.rs
2255                    <|fim_prefix|>
2256                    <|fim_middle|>current
2257                    0:00|
2258                    1:26|a<|user_cursor|>bc
2259                    <|fim_suffix|>
2260                    <|fim_middle|>updated
2261                    "},
2262                },
2263                Case {
2264                    name: "with_suffix",
2265                    context: "abc\ndef",
2266                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2267                    cursor_offset: 2,
2268                    expected: indoc! {"
2269                    <|file_sep|>test.rs
2270                    <|fim_prefix|>
2271                    <|fim_middle|>current
2272                    0:26|ab<|user_cursor|>c
2273                    <|fim_suffix|>
2274                    def
2275                    <|fim_middle|>updated
2276                    "},
2277                },
2278                Case {
2279                    name: "unicode_two_byte_chars",
2280                    context: "héllo\n",
2281                    editable_range: 0..7,
2282                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2283                    expected: indoc! {"
2284                    <|file_sep|>test.rs
2285                    <|fim_prefix|>
2286                    <|fim_middle|>current
2287                    0:1b|hé<|user_cursor|>llo
2288                    <|fim_suffix|>
2289                    <|fim_middle|>updated
2290                    "},
2291                },
2292                Case {
2293                    name: "unicode_three_byte_chars",
2294                    context: "日本語\n",
2295                    editable_range: 0..10,
2296                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2297                    expected: indoc! {"
2298                    <|file_sep|>test.rs
2299                    <|fim_prefix|>
2300                    <|fim_middle|>current
2301                    0:80|日本<|user_cursor|>語
2302                    <|fim_suffix|>
2303                    <|fim_middle|>updated
2304                    "},
2305                },
2306                Case {
2307                    name: "unicode_four_byte_chars",
2308                    context: "a🌍b\n",
2309                    editable_range: 0..7,
2310                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2311                    expected: indoc! {"
2312                    <|file_sep|>test.rs
2313                    <|fim_prefix|>
2314                    <|fim_middle|>current
2315                    0:6b|a🌍<|user_cursor|>b
2316                    <|fim_suffix|>
2317                    <|fim_middle|>updated
2318                    "},
2319                },
2320                Case {
2321                    name: "cursor_at_start_of_region_not_placed",
2322                    context: "abc\n",
2323                    editable_range: 0..4,
2324                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2325                    expected: indoc! {"
2326                    <|file_sep|>test.rs
2327                    <|fim_prefix|>
2328                    <|fim_middle|>current
2329                    0:26|abc
2330                    <|fim_suffix|>
2331                    <|fim_middle|>updated
2332                    "},
2333                },
2334                Case {
2335                    name: "cursor_at_end_of_line_not_placed",
2336                    context: "abc\ndef\n",
2337                    editable_range: 0..8,
2338                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2339                    expected: indoc! {"
2340                    <|file_sep|>test.rs
2341                    <|fim_prefix|>
2342                    <|fim_middle|>current
2343                    0:26|abc
2344                    1:2f|def
2345                    <|fim_suffix|>
2346                    <|fim_middle|>updated
2347                    "},
2348                },
2349                Case {
2350                    name: "cursor_offset_relative_to_context_not_editable_region",
2351                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2352                    // write_cursor_excerpt_section must subtract it before comparing against
2353                    // per-line offsets within the editable region.
2354                    context: "pre\naaa\nbbb\nsuf\n",
2355                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2356                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2357                    expected: indoc! {"
2358                    <|file_sep|>test.rs
2359                    <|fim_prefix|>
2360                    pre
2361                    <|fim_middle|>current
2362                    0:23|aaa
2363                    1:26|b<|user_cursor|>bb
2364                    <|fim_suffix|>
2365                    suf
2366                    <|fim_middle|>updated
2367                    "},
2368                },
2369            ];
2370
2371            for case in &cases {
2372                let mut prompt = String::new();
2373                hashline::write_cursor_excerpt_section(
2374                    &mut prompt,
2375                    Path::new("test.rs"),
2376                    case.context,
2377                    &case.editable_range,
2378                    case.cursor_offset,
2379                );
2380                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2381            }
2382        }
2383
2384        #[test]
2385        fn test_apply_edit_commands() {
2386            struct Case {
2387                name: &'static str,
2388                original: &'static str,
2389                model_output: &'static str,
2390                expected: &'static str,
2391            }
2392
2393            let cases = vec![
2394                Case {
2395                    name: "set_single_line",
2396                    original: indoc! {"
2397                    let mut total = 0;
2398                    for product in products {
2399                        total += ;
2400                    }
2401                    total
2402                "},
2403                    model_output: indoc! {"
2404                    <|set|>2:87
2405                        total += product.price;
2406                "},
2407                    expected: indoc! {"
2408                    let mut total = 0;
2409                    for product in products {
2410                        total += product.price;
2411                    }
2412                    total
2413                "},
2414                },
2415                Case {
2416                    name: "set_range",
2417                    original: indoc! {"
2418                    fn foo() {
2419                        let x = 1;
2420                        let y = 2;
2421                        let z = 3;
2422                    }
2423                "},
2424                    model_output: indoc! {"
2425                    <|set|>1:46-3:4a
2426                        let sum = 6;
2427                "},
2428                    expected: indoc! {"
2429                    fn foo() {
2430                        let sum = 6;
2431                    }
2432                "},
2433                },
2434                Case {
2435                    name: "insert_after_line",
2436                    original: indoc! {"
2437                    fn main() {
2438                        let x = 1;
2439                    }
2440                "},
2441                    model_output: indoc! {"
2442                    <|insert|>1:46
2443                        let y = 2;
2444                "},
2445                    expected: indoc! {"
2446                    fn main() {
2447                        let x = 1;
2448                        let y = 2;
2449                    }
2450                "},
2451                },
2452                Case {
2453                    name: "insert_before_first",
2454                    original: indoc! {"
2455                    let x = 1;
2456                    let y = 2;
2457                "},
2458                    model_output: indoc! {"
2459                    <|insert|>
2460                    use std::io;
2461                "},
2462                    expected: indoc! {"
2463                    use std::io;
2464                    let x = 1;
2465                    let y = 2;
2466                "},
2467                },
2468                Case {
2469                    name: "set_with_cursor_marker",
2470                    original: indoc! {"
2471                    fn main() {
2472                        println!();
2473                    }
2474                "},
2475                    model_output: indoc! {"
2476                    <|set|>1:34
2477                        eprintln!(\"<|user_cursor|>\");
2478                "},
2479                    expected: indoc! {"
2480                    fn main() {
2481                        eprintln!(\"<|user_cursor|>\");
2482                    }
2483                "},
2484                },
2485                Case {
2486                    name: "multiple_set_commands",
2487                    original: indoc! {"
2488                    aaa
2489                    bbb
2490                    ccc
2491                    ddd
2492                "},
2493                    model_output: indoc! {"
2494                    <|set|>0:23
2495                    AAA
2496                    <|set|>2:29
2497                    CCC
2498                "},
2499                    expected: indoc! {"
2500                    AAA
2501                    bbb
2502                    CCC
2503                    ddd
2504                "},
2505                },
2506                Case {
2507                    name: "set_range_multiline_replacement",
2508                    original: indoc! {"
2509                    fn handle_submit() {
2510                    }
2511
2512                    fn handle_keystroke() {
2513                "},
2514                    model_output: indoc! {"
2515                    <|set|>0:3f-1:7d
2516                    fn handle_submit(modal_state: &mut ModalState) {
2517                        <|user_cursor|>
2518                    }
2519                "},
2520                    expected: indoc! {"
2521                    fn handle_submit(modal_state: &mut ModalState) {
2522                        <|user_cursor|>
2523                    }
2524
2525                    fn handle_keystroke() {
2526                "},
2527                },
2528                Case {
2529                    name: "no_edit_commands_returns_original",
2530                    original: indoc! {"
2531                    hello
2532                    world
2533                "},
2534                    model_output: "some random text with no commands",
2535                    expected: indoc! {"
2536                    hello
2537                    world
2538                "},
2539                },
2540                Case {
2541                    name: "no_edits_command_returns_original",
2542                    original: indoc! {"
2543                    hello
2544                    world
2545                "},
2546                    model_output: "<|no_edits|>",
2547                    expected: indoc! {"
2548                    hello
2549                    world
2550                "},
2551                },
2552                Case {
2553                    name: "wrong_hash_set_ignored",
2554                    original: indoc! {"
2555                    aaa
2556                    bbb
2557                "},
2558                    model_output: indoc! {"
2559                    <|set|>0:ff
2560                    ZZZ
2561                "},
2562                    expected: indoc! {"
2563                    aaa
2564                    bbb
2565                "},
2566                },
2567                Case {
2568                    name: "insert_and_set_combined",
2569                    original: indoc! {"
2570                    alpha
2571                    beta
2572                    gamma
2573                "},
2574                    model_output: indoc! {"
2575                    <|set|>0:06
2576                    ALPHA
2577                    <|insert|>1:9c
2578                    beta_extra
2579                "},
2580                    expected: indoc! {"
2581                    ALPHA
2582                    beta
2583                    beta_extra
2584                    gamma
2585                "},
2586                },
2587                Case {
2588                    name: "no_trailing_newline_preserved",
2589                    original: "hello\nworld",
2590                    model_output: indoc! {"
2591                    <|set|>0:14
2592                    HELLO
2593                "},
2594                    expected: "HELLO\nworld",
2595                },
2596                Case {
2597                    name: "set_range_hash_mismatch_in_end_bound",
2598                    original: indoc! {"
2599                    one
2600                    two
2601                    three
2602                "},
2603                    model_output: indoc! {"
2604                    <|set|>0:42-2:ff
2605                    ONE_TWO_THREE
2606                "},
2607                    expected: indoc! {"
2608                    one
2609                    two
2610                    three
2611                "},
2612                },
2613                Case {
2614                    name: "set_range_start_greater_than_end_ignored",
2615                    original: indoc! {"
2616                    a
2617                    b
2618                    c
2619                "},
2620                    model_output: indoc! {"
2621                    <|set|>2:63-1:62
2622                    X
2623                "},
2624                    expected: indoc! {"
2625                    a
2626                    b
2627                    c
2628                "},
2629                },
2630                Case {
2631                    name: "insert_out_of_bounds_ignored",
2632                    original: indoc! {"
2633                    x
2634                    y
2635                "},
2636                    model_output: indoc! {"
2637                    <|insert|>99:aa
2638                    z
2639                "},
2640                    expected: indoc! {"
2641                    x
2642                    y
2643                "},
2644                },
2645                Case {
2646                    name: "set_out_of_bounds_ignored",
2647                    original: indoc! {"
2648                    x
2649                    y
2650                "},
2651                    model_output: indoc! {"
2652                    <|set|>99:aa
2653                    z
2654                "},
2655                    expected: indoc! {"
2656                    x
2657                    y
2658                "},
2659                },
2660                Case {
2661                    name: "malformed_set_command_ignored",
2662                    original: indoc! {"
2663                    alpha
2664                    beta
2665                "},
2666                    model_output: indoc! {"
2667                    <|set|>not-a-line-ref
2668                    UPDATED
2669                "},
2670                    expected: indoc! {"
2671                    alpha
2672                    beta
2673                "},
2674                },
2675                Case {
2676                    name: "malformed_insert_hash_treated_as_before_first",
2677                    original: indoc! {"
2678                    alpha
2679                    beta
2680                "},
2681                    model_output: indoc! {"
2682                    <|insert|>1:nothex
2683                    preamble
2684                "},
2685                    expected: indoc! {"
2686                    preamble
2687                    alpha
2688                    beta
2689                "},
2690                },
2691                Case {
2692                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2693                    original: indoc! {"
2694                    cat
2695                    dog
2696                "},
2697                    model_output: indoc! {"
2698                    <|set|>0:38
2699                    CAT
2700                    <|insert|>0:38
2701                    TAIL
2702                "},
2703                    expected: indoc! {"
2704                    CAT
2705                    TAIL
2706                    dog
2707                "},
2708                },
2709                Case {
2710                    name: "overlapping_set_ranges_last_wins",
2711                    original: indoc! {"
2712                    a
2713                    b
2714                    c
2715                    d
2716                "},
2717                    model_output: indoc! {"
2718                    <|set|>0:61-2:63
2719                    FIRST
2720                    <|set|>1:62-3:64
2721                    SECOND
2722                "},
2723                    expected: indoc! {"
2724                    FIRST
2725                    d
2726                "},
2727                },
2728                Case {
2729                    name: "insert_before_first_and_after_line",
2730                    original: indoc! {"
2731                        a
2732                        b
2733                    "},
2734                    model_output: indoc! {"
2735                        <|insert|>
2736                        HEAD
2737                        <|insert|>0:61
2738                        MID
2739                    "},
2740                    expected: indoc! {"
2741                        HEAD
2742                        a
2743                        MID
2744                        b
2745                    "},
2746                },
2747            ];
2748
2749            for case in &cases {
2750                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2751                assert_eq!(result, case.expected, "failed case: {}", case.name);
2752            }
2753        }
2754
2755        #[test]
2756        fn test_output_has_edit_commands() {
2757            assert!(hashline::output_has_edit_commands(&format!(
2758                "{}0:ab\nnew",
2759                SET_COMMAND_MARKER
2760            )));
2761            assert!(hashline::output_has_edit_commands(&format!(
2762                "{}0:ab\nnew",
2763                INSERT_COMMAND_MARKER
2764            )));
2765            assert!(hashline::output_has_edit_commands(&format!(
2766                "some text\n{}1:cd\nstuff",
2767                SET_COMMAND_MARKER
2768            )));
2769            assert!(!hashline::output_has_edit_commands("just plain text"));
2770            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2771            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2772        }
2773
2774        // ---- hashline::patch_to_edit_commands round-trip tests ----
2775
2776        #[test]
2777        fn test_patch_to_edit_commands() {
2778            struct Case {
2779                name: &'static str,
2780                old: &'static str,
2781                patch: &'static str,
2782                expected_new: &'static str,
2783            }
2784
2785            let cases = [
2786                Case {
2787                    name: "single_line_replacement",
2788                    old: indoc! {"
2789                    let mut total = 0;
2790                    for product in products {
2791                        total += ;
2792                    }
2793                    total
2794                "},
2795                    patch: indoc! {"
2796                    @@ -1,5 +1,5 @@
2797                     let mut total = 0;
2798                     for product in products {
2799                    -    total += ;
2800                    +    total += product.price;
2801                     }
2802                     total
2803                "},
2804                    expected_new: indoc! {"
2805                    let mut total = 0;
2806                    for product in products {
2807                        total += product.price;
2808                    }
2809                    total
2810                "},
2811                },
2812                Case {
2813                    name: "multiline_replacement",
2814                    old: indoc! {"
2815                    fn foo() {
2816                        let x = 1;
2817                        let y = 2;
2818                        let z = 3;
2819                    }
2820                "},
2821                    patch: indoc! {"
2822                    @@ -1,5 +1,3 @@
2823                     fn foo() {
2824                    -    let x = 1;
2825                    -    let y = 2;
2826                    -    let z = 3;
2827                    +    let sum = 1 + 2 + 3;
2828                     }
2829                "},
2830                    expected_new: indoc! {"
2831                    fn foo() {
2832                        let sum = 1 + 2 + 3;
2833                    }
2834                "},
2835                },
2836                Case {
2837                    name: "insertion",
2838                    old: indoc! {"
2839                    fn main() {
2840                        let x = 1;
2841                    }
2842                "},
2843                    patch: indoc! {"
2844                    @@ -1,3 +1,4 @@
2845                     fn main() {
2846                         let x = 1;
2847                    +    let y = 2;
2848                     }
2849                "},
2850                    expected_new: indoc! {"
2851                    fn main() {
2852                        let x = 1;
2853                        let y = 2;
2854                    }
2855                "},
2856                },
2857                Case {
2858                    name: "insertion_before_first",
2859                    old: indoc! {"
2860                    let x = 1;
2861                    let y = 2;
2862                "},
2863                    patch: indoc! {"
2864                    @@ -1,2 +1,3 @@
2865                    +use std::io;
2866                     let x = 1;
2867                     let y = 2;
2868                "},
2869                    expected_new: indoc! {"
2870                    use std::io;
2871                    let x = 1;
2872                    let y = 2;
2873                "},
2874                },
2875                Case {
2876                    name: "deletion",
2877                    old: indoc! {"
2878                    aaa
2879                    bbb
2880                    ccc
2881                    ddd
2882                "},
2883                    patch: indoc! {"
2884                    @@ -1,4 +1,2 @@
2885                     aaa
2886                    -bbb
2887                    -ccc
2888                     ddd
2889                "},
2890                    expected_new: indoc! {"
2891                    aaa
2892                    ddd
2893                "},
2894                },
2895                Case {
2896                    name: "multiple_changes",
2897                    old: indoc! {"
2898                    alpha
2899                    beta
2900                    gamma
2901                    delta
2902                    epsilon
2903                "},
2904                    patch: indoc! {"
2905                    @@ -1,5 +1,5 @@
2906                    -alpha
2907                    +ALPHA
2908                     beta
2909                     gamma
2910                    -delta
2911                    +DELTA
2912                     epsilon
2913                "},
2914                    expected_new: indoc! {"
2915                    ALPHA
2916                    beta
2917                    gamma
2918                    DELTA
2919                    epsilon
2920                "},
2921                },
2922                Case {
2923                    name: "replace_with_insertion",
2924                    old: indoc! {r#"
2925                    fn handle() {
2926                        modal_state.close();
2927                        modal_state.dismiss();
2928                "#},
2929                    patch: indoc! {r#"
2930                    @@ -1,3 +1,4 @@
2931                     fn handle() {
2932                         modal_state.close();
2933                    +    eprintln!("");
2934                         modal_state.dismiss();
2935                "#},
2936                    expected_new: indoc! {r#"
2937                    fn handle() {
2938                        modal_state.close();
2939                        eprintln!("");
2940                        modal_state.dismiss();
2941                "#},
2942                },
2943                Case {
2944                    name: "complete_replacement",
2945                    old: indoc! {"
2946                    aaa
2947                    bbb
2948                    ccc
2949                "},
2950                    patch: indoc! {"
2951                    @@ -1,3 +1,3 @@
2952                    -aaa
2953                    -bbb
2954                    -ccc
2955                    +xxx
2956                    +yyy
2957                    +zzz
2958                "},
2959                    expected_new: indoc! {"
2960                    xxx
2961                    yyy
2962                    zzz
2963                "},
2964                },
2965                Case {
2966                    name: "add_function_body",
2967                    old: indoc! {"
2968                    fn foo() {
2969                        modal_state.dismiss();
2970                    }
2971
2972                    fn
2973
2974                    fn handle_keystroke() {
2975                "},
2976                    patch: indoc! {"
2977                    @@ -1,6 +1,8 @@
2978                     fn foo() {
2979                         modal_state.dismiss();
2980                     }
2981
2982                    -fn
2983                    +fn handle_submit() {
2984                    +    todo()
2985                    +}
2986
2987                     fn handle_keystroke() {
2988                "},
2989                    expected_new: indoc! {"
2990                    fn foo() {
2991                        modal_state.dismiss();
2992                    }
2993
2994                    fn handle_submit() {
2995                        todo()
2996                    }
2997
2998                    fn handle_keystroke() {
2999                "},
3000                },
3001                Case {
3002                    name: "with_cursor_offset",
3003                    old: indoc! {r#"
3004                    fn main() {
3005                        println!();
3006                    }
3007                "#},
3008                    patch: indoc! {r#"
3009                        @@ -1,3 +1,3 @@
3010                        fn main() {
3011                        -    println!();
3012                        +    eprintln!("");
3013                        }
3014                    "#},
3015                    expected_new: indoc! {r#"
3016                        fn main() {
3017                            eprintln!("<|user_cursor|>");
3018                        }
3019                    "#},
3020                },
3021                Case {
3022                    name: "non_local_hunk_header_pure_insertion_repro",
3023                    old: indoc! {"
3024                        aaa
3025                        bbb
3026                    "},
3027                    patch: indoc! {"
3028                        @@ -20,2 +20,3 @@
3029                        aaa
3030                        +xxx
3031                        bbb
3032                    "},
3033                    expected_new: indoc! {"
3034                        aaa
3035                        xxx
3036                        bbb
3037                    "},
3038                },
3039                Case {
3040                    name: "empty_patch_produces_no_edits_marker",
3041                    old: indoc! {"
3042                        aaa
3043                        bbb
3044                    "},
3045                    patch: "@@ -20,2 +20,3 @@\n",
3046                    expected_new: indoc! {"
3047                        aaa
3048                        bbb
3049                    "},
3050                },
3051            ];
3052
3053            for case in &cases {
3054                // The cursor_offset for patch_to_edit_commands is relative to
3055                // the first hunk's new text (context + additions). We compute
3056                // it by finding where the marker sits in the expected output
3057                // (which mirrors the new text of the hunk).
3058                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3059
3060                let commands =
3061                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3062                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3063
3064                assert!(
3065                    hashline::output_has_edit_commands(&commands),
3066                    "case {}: expected edit commands, got: {commands:?}",
3067                    case.name,
3068                );
3069
3070                let applied = hashline::apply_edit_commands(case.old, &commands);
3071                assert_eq!(applied, case.expected_new, "case {}", case.name);
3072            }
3073        }
3074    }
3075}
3076
3077pub mod seed_coder {
3078    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3079    //!
3080    //! Seed-Coder uses different FIM tokens and order than Qwen:
3081    //! - SPM order: suffix comes FIRST, then prefix, then middle
3082    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3083    //! - File markers: StarCoder-style `<filename>path` (single token + path)
3084    //!
3085    //! All context (related files, edit history) goes in the PREFIX section.
3086    //! The suffix contains only code after the editable region.
3087    //!
3088    //! Example prompt:
3089    //!
3090    //! <[fim-suffix]>
3091    //! code after editable region
3092    //! <[fim-prefix]><filename>related/file.py
3093    //! related file content
3094    //!
3095    //! <filename>edit_history
3096    //! --- a/some_file.py
3097    //! +++ b/some_file.py
3098    //! -old
3099    //! +new
3100    //!
3101    //! <filename>path/to/target_file.py
3102    //! code before editable region
3103    //! <<<<<<< CURRENT
3104    //! code that
3105    //! needs to<|user_cursor|>
3106    //! be rewritten
3107    //! =======
3108    //! <[fim-middle]>
3109    //!
3110    //! Expected output (model generates):
3111    //!
3112    //! updated
3113    //! code with
3114    //! changes applied
3115    //! >>>>>>> UPDATED
3116
3117    use super::*;
3118
3119    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3120    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3121    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3122    pub const FILE_MARKER: &str = "<filename>";
3123
3124    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3125    pub const SEPARATOR: &str = "=======\n";
3126    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3127
3128    pub const NO_EDITS: &str = "NO_EDITS\n";
3129
3130    pub fn special_tokens() -> &'static [&'static str] {
3131        &[
3132            FIM_SUFFIX,
3133            FIM_PREFIX,
3134            FIM_MIDDLE,
3135            FILE_MARKER,
3136            START_MARKER,
3137            SEPARATOR,
3138            END_MARKER,
3139            CURSOR_MARKER,
3140        ]
3141    }
3142
3143    pub fn write_cursor_excerpt_section(
3144        prompt: &mut String,
3145        path: &Path,
3146        context: &str,
3147        editable_range: &Range<usize>,
3148        cursor_offset: usize,
3149    ) {
3150        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3151        prompt.push_str(&section);
3152    }
3153
3154    pub fn format_prompt_with_budget(
3155        path: &Path,
3156        context: &str,
3157        editable_range: &Range<usize>,
3158        cursor_offset: usize,
3159        events: &[Arc<Event>],
3160        related_files: &[RelatedFile],
3161        max_tokens: usize,
3162    ) -> String {
3163        let cursor_prefix_section =
3164            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3165        assemble_fim_prompt(
3166            context,
3167            editable_range,
3168            &cursor_prefix_section,
3169            events,
3170            related_files,
3171            max_tokens,
3172        )
3173    }
3174
3175    pub fn assemble_fim_prompt(
3176        context: &str,
3177        editable_range: &Range<usize>,
3178        cursor_prefix_section: &str,
3179        events: &[Arc<Event>],
3180        related_files: &[RelatedFile],
3181        max_tokens: usize,
3182    ) -> String {
3183        let suffix_section = build_suffix_section(context, editable_range);
3184
3185        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3186        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3187        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3188
3189        let edit_history_section = super::format_edit_history_within_budget(
3190            events,
3191            FILE_MARKER,
3192            "edit_history",
3193            budget_after_cursor,
3194            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3195        );
3196        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3197        let budget_after_edit_history =
3198            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3199
3200        let related_files_section = super::format_related_files_within_budget(
3201            related_files,
3202            FILE_MARKER,
3203            "",
3204            budget_after_edit_history,
3205        );
3206
3207        let mut prompt = String::new();
3208        prompt.push_str(&suffix_section);
3209        prompt.push_str(FIM_PREFIX);
3210        prompt.push_str(&related_files_section);
3211        if !related_files_section.is_empty() {
3212            prompt.push('\n');
3213        }
3214        prompt.push_str(&edit_history_section);
3215        if !edit_history_section.is_empty() {
3216            prompt.push('\n');
3217        }
3218        prompt.push_str(cursor_prefix_section);
3219        prompt.push_str(FIM_MIDDLE);
3220
3221        prompt
3222    }
3223
3224    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3225        let mut section = String::new();
3226        section.push_str(FIM_SUFFIX);
3227        section.push_str(&context[editable_range.end..]);
3228        if !section.ends_with('\n') {
3229            section.push('\n');
3230        }
3231        section
3232    }
3233
3234    fn build_cursor_prefix_section(
3235        path: &Path,
3236        context: &str,
3237        editable_range: &Range<usize>,
3238        cursor_offset: usize,
3239    ) -> String {
3240        let mut section = String::new();
3241        let path_str = path.to_string_lossy();
3242        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3243
3244        section.push_str(&context[..editable_range.start]);
3245        section.push_str(START_MARKER);
3246        section.push_str(&context[editable_range.start..cursor_offset]);
3247        section.push_str(CURSOR_MARKER);
3248        section.push_str(&context[cursor_offset..editable_range.end]);
3249        if !section.ends_with('\n') {
3250            section.push('\n');
3251        }
3252        section.push_str(SEPARATOR);
3253        section
3254    }
3255
3256    /// Format patch as containing no changes if it's empty; otherwise return None.
3257    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3258        // Count lines in the patch
3259        let empty_patch = patch.lines().count() <= 3;
3260        if empty_patch {
3261            Some(format!("{NO_EDITS}{END_MARKER}"))
3262        } else {
3263            None
3264        }
3265    }
3266}
3267
3268pub mod v0304_variable_edit {
3269    //! A prompt format with no fixed editable region. The entire context is shown
3270    //! to the model, and it chooses which text to replace by outputting surrounding
3271    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3272    //! text.
3273    //!
3274    //! Example prompt:
3275    //!
3276    //! <|file_sep|>path/to/file.py
3277    //! zero
3278    //! one
3279    //! two
3280    //! three<|user_cursor|>
3281    //! four
3282    //! five
3283    //! <|fim_prefix|>
3284    //
3285    //! Expected output (model generates):
3286    //!
3287    //! two
3288    //! <|fim_middle|>
3289    //! THREE
3290    //! <|fim_suffix|>
3291    //! four
3292    //!
3293    //! The output means: find "two\n...\nfour" in the context, and replace
3294    //! everything between "two\n" and "four" with "THREE\n".
3295
3296    use super::*;
3297
3298    pub fn special_tokens() -> &'static [&'static str] {
3299        &[
3300            "<|fim_prefix|>",
3301            "<|fim_suffix|>",
3302            "<|fim_middle|>",
3303            "<|file_sep|>",
3304            CURSOR_MARKER,
3305        ]
3306    }
3307
3308    pub fn write_cursor_excerpt_section(
3309        prompt: &mut String,
3310        path: &Path,
3311        context: &str,
3312        cursor_offset: usize,
3313    ) {
3314        let path_str = path.to_string_lossy();
3315        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3316
3317        prompt.push_str(&context[..cursor_offset]);
3318        prompt.push_str(CURSOR_MARKER);
3319        prompt.push_str(&context[cursor_offset..]);
3320        if !prompt.ends_with('\n') {
3321            prompt.push('\n');
3322        }
3323        prompt.push_str("<|fim_prefix|>\n")
3324    }
3325
3326    /// Apply a variable-edit model output to the original context text.
3327    ///
3328    /// The model output has the form:
3329    ///
3330    /// - prefix context lines
3331    /// - `<|fim_middle|>`
3332    /// - new text
3333    /// - `<|fim_suffix|>`
3334    /// - suffix context lines
3335    ///
3336    /// We locate the prefix/suffix context lines in the original text and replace
3337    /// everything between them with the new text.
3338    pub fn apply_variable_edit(
3339        context: &str,
3340        model_output: &str,
3341    ) -> Result<(Range<usize>, String)> {
3342        let (prefix_context, rest) = model_output
3343            .split_once("<|fim_middle|>\n")
3344            .or_else(|| model_output.split_once("<|fim_middle|>"))
3345            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3346
3347        let (new_text, suffix_context) = rest
3348            .split_once("<|fim_suffix|>\n")
3349            .or_else(|| rest.split_once("<|fim_suffix|>"))
3350            .unwrap_or((rest, ""));
3351
3352        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3353            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3354        } else {
3355            suffix_context
3356        };
3357
3358        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3359            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3360            + prefix_context.len();
3361        let suffix_offset = if suffix_context.is_empty() {
3362            context.len()
3363        } else {
3364            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3365                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3366                + prefix_offset
3367        };
3368
3369        let edit_range = prefix_offset..suffix_offset;
3370        return Ok((edit_range, new_text.to_string()));
3371    }
3372
3373    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3374        if needle.is_empty() {
3375            return Some(0);
3376        }
3377
3378        haystack.match_indices(needle).find_map(|(offset, _)| {
3379            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3380            matched_line_start.then_some(offset)
3381        })
3382    }
3383
3384    /// Convert a unified diff patch into the variable-edit output format.
3385    ///
3386    /// Parses `patch` as a unified diff against `old_text` and produces model
3387    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3388    /// delimiters. The diff is resolved by content matching rather than line
3389    /// numbers.
3390    pub fn patch_to_variable_edit_output(
3391        old_text: &str,
3392        patch: &str,
3393        cursor_offset: Option<usize>,
3394    ) -> Result<String> {
3395        // Parse the unified diff into hunks. Each hunk has an `old_context`
3396        // string (context + deleted lines interleaved in order) and a list of
3397        // edits expressed as byte ranges within that context plus replacement
3398        // text.
3399        let hunks = parse_hunks(patch);
3400        if hunks.is_empty() {
3401            return Ok(String::new());
3402        }
3403
3404        // Apply each hunk by finding its old_context in the text and
3405        // performing the edits. We search forward from where the previous
3406        // hunk ended so that hunks are applied in order.
3407        let mut new_text = old_text.to_string();
3408        let mut search_from: usize = 0;
3409        let mut first_hunk_pos: Option<usize> = None;
3410
3411        for hunk in &hunks {
3412            let context_pos = new_text[search_from..]
3413                .find(&hunk.old_context)
3414                .map(|pos| pos + search_from)
3415                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3416
3417            if first_hunk_pos.is_none() {
3418                first_hunk_pos = Some(context_pos);
3419            }
3420
3421            // Apply edits in reverse order so byte offsets remain valid.
3422            for edit in hunk.edits.iter().rev() {
3423                let abs_start = context_pos + edit.range.start;
3424                let abs_end = context_pos + edit.range.end;
3425                new_text.replace_range(abs_start..abs_end, &edit.text);
3426            }
3427
3428            // Advance past this hunk's region in the (now modified) text.
3429            let new_region_len: usize =
3430                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3431                    len + edit.text.len() - (edit.range.end - edit.range.start)
3432                });
3433            search_from = context_pos + new_region_len;
3434        }
3435
3436        // Now we have old_text and new_text. Find the changed line range by
3437        // comparing them.
3438        let old_lines: Vec<&str> = old_text.lines().collect();
3439        let new_lines: Vec<&str> = new_text.lines().collect();
3440
3441        // Find first differing line.
3442        let first_changed_row = old_lines
3443            .iter()
3444            .zip(new_lines.iter())
3445            .position(|(a, b)| a != b)
3446            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3447
3448        // Find last differing line (from the end).
3449        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3450        let common_suffix = old_lines
3451            .iter()
3452            .rev()
3453            .zip(new_lines.iter().rev())
3454            .take(max_suffix)
3455            .take_while(|(a, b)| a == b)
3456            .count();
3457
3458        let old_end = old_lines.len() - common_suffix;
3459        let new_end = new_lines.len() - common_suffix;
3460
3461        if first_changed_row == old_end && first_changed_row == new_end {
3462            return Ok(String::new());
3463        }
3464
3465        // Build the replacement text from new_lines[first_diff..new_end].
3466        let mut merged_new_text = String::new();
3467        for line in &new_lines[first_changed_row..new_end] {
3468            merged_new_text.push_str(line);
3469            merged_new_text.push('\n');
3470        }
3471
3472        // cursor_offset is relative to the first hunk's new content in
3473        // new_text. Translate it to an offset within merged_new_text, which
3474        // only contains lines first_diff..new_end of new_text.
3475        if let Some(hunk_offset) = cursor_offset {
3476            let hunk_start = first_hunk_pos.unwrap_or(0);
3477            let absolute_pos = hunk_start + hunk_offset;
3478
3479            // Byte offset where first_diff starts in new_text.
3480            let merged_start: usize = new_lines[..first_changed_row]
3481                .iter()
3482                .map(|line| line.len() + 1)
3483                .sum();
3484
3485            if absolute_pos >= merged_start {
3486                let relative_offset = absolute_pos - merged_start;
3487                if relative_offset <= merged_new_text.len() {
3488                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3489                }
3490            }
3491        }
3492
3493        // Build output with 2 lines of context above and below.
3494        let context_lines_count = 2;
3495        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3496        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3497
3498        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3499            let pattern = &lines[line_range];
3500            let pattern_len = pattern.len();
3501
3502            let mut count = 0;
3503            for offset in 0..=lines.len() - pattern_len {
3504                if &lines[offset..offset + pattern_len] == pattern {
3505                    count += 1;
3506                }
3507            }
3508            count
3509        }
3510
3511        // Expand prefix and suffix until they are unique
3512        while prefix_start > 0 {
3513            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3514                prefix_start -= 1;
3515            } else {
3516                break;
3517            }
3518        }
3519        while suffix_end < old_lines.len() {
3520            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3521                suffix_end += 1;
3522            } else {
3523                break;
3524            }
3525        }
3526
3527        let mut output = String::new();
3528        for line in &old_lines[prefix_start..first_changed_row] {
3529            output.push_str(line);
3530            output.push('\n');
3531        }
3532        output.push_str("<|fim_middle|>\n");
3533        output.push_str(&merged_new_text);
3534        output.push_str("<|fim_suffix|>\n");
3535        for line in &old_lines[old_end..suffix_end] {
3536            output.push_str(line);
3537            output.push('\n');
3538        }
3539
3540        Ok(output)
3541    }
3542
3543    struct ParsedHunk {
3544        old_context: String,
3545        edits: Vec<ParsedEdit>,
3546    }
3547
3548    struct ParsedEdit {
3549        range: Range<usize>,
3550        text: String,
3551    }
3552
3553    /// Parse a unified diff into content-based hunks. Each hunk contains an
3554    /// `old_context` string (context lines + deleted lines, which together
3555    /// form the text that should be found in the original) and a list of edits
3556    /// expressed as byte ranges within that context.
3557    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3558        let mut hunks = Vec::new();
3559        let mut current: Option<ParsedHunk> = None;
3560
3561        for line in patch.lines() {
3562            if line.starts_with("@@") {
3563                if let Some(hunk) = current.take() {
3564                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3565                        hunks.push(hunk);
3566                    }
3567                }
3568                current = Some(ParsedHunk {
3569                    old_context: String::new(),
3570                    edits: Vec::new(),
3571                });
3572            } else if line.starts_with("---") || line.starts_with("+++") {
3573                continue;
3574            } else if let Some(hunk) = &mut current {
3575                if let Some(added) = line.strip_prefix('+') {
3576                    let pos = hunk.old_context.len();
3577                    if let Some(last_edit) = hunk.edits.last_mut() {
3578                        if last_edit.range.end == pos {
3579                            writeln!(&mut last_edit.text, "{added}").ok();
3580                            continue;
3581                        }
3582                    }
3583                    hunk.edits.push(ParsedEdit {
3584                        range: pos..pos,
3585                        text: format!("{added}\n"),
3586                    });
3587                } else if let Some(removed) = line.strip_prefix('-') {
3588                    let start = hunk.old_context.len();
3589                    writeln!(&mut hunk.old_context, "{removed}").ok();
3590                    let end = hunk.old_context.len();
3591                    if let Some(last_edit) = hunk.edits.last_mut() {
3592                        if last_edit.range.end == start {
3593                            last_edit.range.end = end;
3594                            continue;
3595                        }
3596                    }
3597                    hunk.edits.push(ParsedEdit {
3598                        range: start..end,
3599                        text: String::new(),
3600                    });
3601                } else {
3602                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3603                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3604                }
3605            }
3606        }
3607
3608        if let Some(hunk) = current {
3609            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3610                hunks.push(hunk);
3611            }
3612        }
3613
3614        hunks
3615    }
3616
3617    #[cfg(test)]
3618    mod tests {
3619        use super::*;
3620        use indoc::indoc;
3621
3622        #[test]
3623        fn test_apply_variable_edit() {
3624            struct Case {
3625                name: &'static str,
3626                original: &'static str,
3627                model_output: &'static str,
3628                expected: &'static str,
3629            }
3630
3631            let cases = [
3632                Case {
3633                    name: "simple_single_line_replacement",
3634                    original: indoc! {"
3635                        zero
3636                        one
3637                        two
3638                        three
3639                        four
3640                        five
3641                    "},
3642                    model_output: indoc! {"
3643                        two
3644                        <|fim_middle|>
3645                        THREE
3646                        <|fim_suffix|>
3647                        four
3648                    "},
3649                    expected: indoc! {"
3650                        zero
3651                        one
3652                        two
3653                        THREE
3654                        four
3655                        five
3656                    "},
3657                },
3658                Case {
3659                    name: "multi_line_replacement",
3660                    original: indoc! {"
3661                        a
3662                        b
3663                        c
3664                        d
3665                        e
3666                    "},
3667                    model_output: indoc! {"
3668                        a
3669                        <|fim_middle|>
3670                        B
3671                        C
3672                        D
3673                        <|fim_suffix|>
3674                        e
3675                    "},
3676                    expected: indoc! {"
3677                        a
3678                        B
3679                        C
3680                        D
3681                        e
3682                    "},
3683                },
3684                Case {
3685                    name: "insertion_between_existing_lines",
3686                    original: indoc! {"
3687                        a
3688                        b
3689                        c
3690                    "},
3691                    model_output: indoc! {"
3692                        a
3693                        <|fim_middle|>
3694                        X
3695                        <|fim_suffix|>
3696                        b
3697                    "},
3698                    expected: indoc! {"
3699                        a
3700                        X
3701                        b
3702                        c
3703                    "},
3704                },
3705                Case {
3706                    name: "deletion",
3707                    original: indoc! {"
3708                        a
3709                        b
3710                        c
3711                        d
3712                    "},
3713                    model_output: indoc! {"
3714                        a
3715                        <|fim_middle|>
3716                        <|fim_suffix|>
3717                        c
3718                    "},
3719                    expected: indoc! {"
3720                        a
3721                        c
3722                        d
3723                    "},
3724                },
3725                Case {
3726                    name: "replacement_at_start_no_prefix_context",
3727                    original: indoc! {"
3728                        a
3729                        b
3730                        c
3731                    "},
3732                    model_output: indoc! {"
3733                        <|fim_middle|>
3734                        X
3735                        <|fim_suffix|>
3736                        b
3737                    "},
3738                    expected: indoc! {"
3739                        X
3740                        b
3741                        c
3742                    "},
3743                },
3744                Case {
3745                    name: "replacement_at_end_no_suffix_context",
3746                    original: indoc! {"
3747                        a
3748                        b
3749                        c
3750                    "},
3751                    model_output: indoc! {"
3752                        b
3753                        <|fim_middle|>
3754                        Z
3755                        <|fim_suffix|>
3756                    "},
3757                    expected: indoc! {"
3758                        a
3759                        b
3760                        Z
3761                    "},
3762                },
3763                Case {
3764                    name: "context_with_trailing_newline_is_preserved",
3765                    original: indoc! {"
3766                        a
3767                        b
3768                        c
3769                    "},
3770                    model_output: indoc! {"
3771                        a
3772                        <|fim_middle|>
3773                        B
3774                        <|fim_suffix|>
3775                        c
3776                    "},
3777                    expected: indoc! {"
3778                        a
3779                        B
3780                        c
3781                    "},
3782                },
3783                Case {
3784                    name: "cursor_marker_passes_through_untouched",
3785                    original: indoc! {"
3786                        a
3787                        b
3788                        c
3789                    "},
3790                    model_output: indoc! {"
3791                        a
3792                        <|fim_middle|>
3793                        B<|user_cursor|>B
3794                        <|fim_suffix|>
3795                        c
3796                    "},
3797                    expected: indoc! {"
3798                        a
3799                        B<|user_cursor|>B
3800                        c
3801                    "},
3802                },
3803                Case {
3804                    name: "multiple_prefix_context_lines",
3805                    original: indoc! {"
3806                        a
3807                        b
3808                        c
3809                        d
3810                        e
3811                    "},
3812                    model_output: indoc! {"
3813                        b
3814                        c
3815                        <|fim_middle|>
3816                        D
3817                        <|fim_suffix|>
3818                        e
3819                    "},
3820                    expected: indoc! {"
3821                        a
3822                        b
3823                        c
3824                        D
3825                        e
3826                    "},
3827                },
3828            ];
3829
3830            for case in cases {
3831                let (edit_range, replacement) =
3832                    apply_variable_edit(case.original, case.model_output).unwrap();
3833                let mut edited = case.original.to_string();
3834                edited.replace_range(edit_range, &replacement);
3835                assert_eq!(edited, case.expected, "{}", case.name);
3836            }
3837        }
3838
3839        #[test]
3840        fn test_patch_to_variable_edit() {
3841            struct Case {
3842                name: &'static str,
3843                old: &'static str,
3844                patch: &'static str,
3845                cursor_offset: Option<usize>,
3846                expected_variable_edit: &'static str,
3847                expected_after_apply: &'static str,
3848            }
3849
3850            let cases = [
3851                Case {
3852                    name: "simple_replacement",
3853                    old: indoc! {"
3854                        zero
3855                        one
3856                        two
3857                        three
3858                        four
3859                        five
3860                    "},
3861                    patch: indoc! {"
3862                        @@ -3,3 +3,3 @@
3863                         two
3864                        -three
3865                        +THREE
3866                         four
3867                    "},
3868                    cursor_offset: None,
3869                    expected_variable_edit: indoc! {"
3870                        one
3871                        two
3872                        <|fim_middle|>
3873                        THREE
3874                        <|fim_suffix|>
3875                        four
3876                        five
3877                    "},
3878                    expected_after_apply: indoc! {"
3879                        zero
3880                        one
3881                        two
3882                        THREE
3883                        four
3884                        five
3885                    "},
3886                },
3887                Case {
3888                    name: "insertion",
3889                    old: indoc! {"
3890                        a
3891                        b
3892                        c
3893                        d
3894                        e
3895                    "},
3896                    patch: indoc! {"
3897                        @@ -2,0 +3,1 @@
3898                         b
3899                        +X
3900                         c
3901                    "},
3902                    cursor_offset: None,
3903                    expected_variable_edit: indoc! {"
3904                        a
3905                        b
3906                        <|fim_middle|>
3907                        X
3908                        <|fim_suffix|>
3909                        c
3910                        d
3911                    "},
3912                    expected_after_apply: indoc! {"
3913                        a
3914                        b
3915                        X
3916                        c
3917                        d
3918                        e
3919                    "},
3920                },
3921                Case {
3922                    name: "deletion",
3923                    old: indoc! {"
3924                        a
3925                        b
3926                        c
3927                        d
3928                        e
3929                    "},
3930                    patch: indoc! {"
3931                        @@ -2,3 +2,2 @@
3932                         b
3933                        -c
3934                         d
3935                    "},
3936                    cursor_offset: None,
3937                    expected_variable_edit: indoc! {"
3938                        a
3939                        b
3940                        <|fim_middle|>
3941                        <|fim_suffix|>
3942                        d
3943                        e
3944                    "},
3945                    expected_after_apply: indoc! {"
3946                        a
3947                        b
3948                        d
3949                        e
3950                    "},
3951                },
3952                Case {
3953                    name: "edit_near_start",
3954                    old: indoc! {"
3955                        first
3956                        second
3957                        third
3958                        fourth
3959                    "},
3960                    patch: indoc! {"
3961                        @@ -1,1 +1,1 @@
3962                        -first
3963                        +FIRST
3964                    "},
3965                    cursor_offset: None,
3966                    expected_variable_edit: indoc! {"
3967                        <|fim_middle|>
3968                        FIRST
3969                        <|fim_suffix|>
3970                        second
3971                        third
3972                    "},
3973                    expected_after_apply: indoc! {"
3974                        FIRST
3975                        second
3976                        third
3977                        fourth
3978                    "},
3979                },
3980                Case {
3981                    name: "edit_near_end",
3982                    old: indoc! {"
3983                        first
3984                        second
3985                        third
3986                        fourth
3987                    "},
3988                    patch: indoc! {"
3989                        @@ -4,1 +4,1 @@
3990                        -fourth
3991                        +FOURTH
3992                    "},
3993                    cursor_offset: None,
3994                    expected_variable_edit: indoc! {"
3995                        second
3996                        third
3997                        <|fim_middle|>
3998                        FOURTH
3999                        <|fim_suffix|>
4000                    "},
4001                    expected_after_apply: indoc! {"
4002                        first
4003                        second
4004                        third
4005                        FOURTH
4006                    "},
4007                },
4008                Case {
4009                    name: "cursor_at_start_of_replacement",
4010                    old: indoc! {"
4011                        zero
4012                        one
4013                        two
4014                        three
4015                        four
4016                        five
4017                    "},
4018                    patch: indoc! {"
4019                        @@ -3,3 +3,3 @@
4020                         two
4021                        -three
4022                        +THREE
4023                         four
4024                    "},
4025                    cursor_offset: Some(4),
4026                    expected_variable_edit: indoc! {"
4027                        one
4028                        two
4029                        <|fim_middle|>
4030                        <|user_cursor|>THREE
4031                        <|fim_suffix|>
4032                        four
4033                        five
4034                    "},
4035                    expected_after_apply: indoc! {"
4036                        zero
4037                        one
4038                        two
4039                        <|user_cursor|>THREE
4040                        four
4041                        five
4042                    "},
4043                },
4044                Case {
4045                    name: "cursor_in_middle_of_replacement",
4046                    old: indoc! {"
4047                        zero
4048                        one
4049                        two
4050                        three
4051                        four
4052                        five
4053                    "},
4054                    patch: indoc! {"
4055                        @@ -3,3 +3,3 @@
4056                         two
4057                        -three
4058                        +THREE
4059                         four
4060                    "},
4061                    cursor_offset: Some(6),
4062                    expected_variable_edit: indoc! {"
4063                        one
4064                        two
4065                        <|fim_middle|>
4066                        TH<|user_cursor|>REE
4067                        <|fim_suffix|>
4068                        four
4069                        five
4070                    "},
4071                    expected_after_apply: indoc! {"
4072                        zero
4073                        one
4074                        two
4075                        TH<|user_cursor|>REE
4076                        four
4077                        five
4078                    "},
4079                },
4080                Case {
4081                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4082                    old: indoc! {"
4083                        one
4084                        a
4085                        b
4086                        c
4087                        d
4088                        two
4089                        a
4090                        b
4091                        c
4092                        d
4093                        three
4094                        a
4095                        b
4096                        c
4097                        d
4098                        four
4099                    "},
4100                    patch: indoc! {"
4101                        @@ -4,5 +4,5 @@
4102                         two
4103                         a
4104                         b
4105                        -c
4106                        +C
4107                         d
4108                         three
4109                    "},
4110                    cursor_offset: None,
4111                    expected_variable_edit: indoc! {"
4112                        two
4113                        a
4114                        b
4115                        <|fim_middle|>
4116                        C
4117                        <|fim_suffix|>
4118                        d
4119                        three
4120                    "},
4121                    expected_after_apply: indoc! {"
4122                        one
4123                        a
4124                        b
4125                        c
4126                        d
4127                        two
4128                        a
4129                        b
4130                        C
4131                        d
4132                        three
4133                        a
4134                        b
4135                        c
4136                        d
4137                        four
4138                    "},
4139                },
4140                Case {
4141                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4142                    old: indoc! {"
4143                        {
4144                            {
4145                                one();
4146                            }
4147                        }
4148                        {
4149                            {
4150                                two();
4151                            }
4152                        }
4153                        {
4154                            {
4155                                three();
4156                            }
4157                        }
4158                        {
4159                            {
4160                                four();
4161                            }
4162                        }
4163                    "},
4164                    patch: indoc! {"
4165                        @@ -4,5 +4,5 @@
4166                             {
4167                        -        two();
4168                        +        TWO();
4169                             }
4170                    "},
4171                    cursor_offset: None,
4172                    expected_variable_edit: indoc! {"
4173                                one();
4174                            }
4175                        }
4176                        {
4177                            {
4178                        <|fim_middle|>
4179                                TWO();
4180                        <|fim_suffix|>
4181                            }
4182                        }
4183                        {
4184                            {
4185                                three();
4186                    "},
4187                    expected_after_apply: indoc! {"
4188                        {
4189                            {
4190                                one();
4191                            }
4192                        }
4193                        {
4194                            {
4195                                TWO();
4196                            }
4197                        }
4198                        {
4199                            {
4200                                three();
4201                            }
4202                        }
4203                        {
4204                            {
4205                                four();
4206                            }
4207                        }
4208                    "},
4209                },
4210            ];
4211
4212            for case in cases {
4213                let output =
4214                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4215                        .unwrap_or_else(|error| {
4216                            panic!("failed converting patch for {}: {error}", case.name)
4217                        });
4218                assert_eq!(
4219                    output, case.expected_variable_edit,
4220                    "patch->variable_edit mismatch for {}",
4221                    case.name
4222                );
4223
4224                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4225                    .unwrap_or_else(|error| {
4226                        panic!("failed applying variable_edit for {}: {error}", case.name)
4227                    });
4228                let mut edited_by_variable_edit = case.old.to_string();
4229                edited_by_variable_edit.replace_range(edit_range, &replacement);
4230                assert_eq!(
4231                    edited_by_variable_edit, case.expected_after_apply,
4232                    "variable_edit apply mismatch for {}",
4233                    case.name
4234                );
4235
4236                let (expected_edit_range, expected_replacement) =
4237                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4238                        |error| {
4239                            panic!(
4240                                "failed applying expected variable_edit for {}: {error}",
4241                                case.name
4242                            )
4243                        },
4244                    );
4245                let mut edited_by_expected_variable_edit = case.old.to_string();
4246                edited_by_expected_variable_edit
4247                    .replace_range(expected_edit_range, &expected_replacement);
4248                assert_eq!(
4249                    edited_by_expected_variable_edit, case.expected_after_apply,
4250                    "expected variable_edit apply mismatch for {}",
4251                    case.name
4252                );
4253            }
4254        }
4255
4256        #[test]
4257        fn test_write_cursor_excerpt_section() {
4258            let path = Path::new("test.rs");
4259            let context = "fn main() {\n    hello();\n}\n";
4260            let cursor_offset = 17;
4261            let mut prompt = String::new();
4262            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4263            assert_eq!(
4264                prompt,
4265                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4266            );
4267        }
4268    }
4269}
4270
4271/// The zeta1 prompt format
4272pub mod zeta1 {
4273    use super::*;
4274    use std::fmt::Write;
4275
4276    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4277    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4278    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4279    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4280
4281    const INSTRUCTION_HEADER: &str = concat!(
4282        "### Instruction:\n",
4283        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4284        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4285        "into account the cursor location.\n\n",
4286        "### User Edits:\n\n"
4287    );
4288    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4289    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4290
4291    /// Formats a complete zeta1 prompt from the input events and excerpt.
4292    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4293        let mut prompt = String::with_capacity(
4294            INSTRUCTION_HEADER.len()
4295                + input_events.len()
4296                + EXCERPT_HEADER.len()
4297                + input_excerpt.len()
4298                + RESPONSE_HEADER.len(),
4299        );
4300        prompt.push_str(INSTRUCTION_HEADER);
4301        prompt.push_str(input_events);
4302        prompt.push_str(EXCERPT_HEADER);
4303        prompt.push_str(input_excerpt);
4304        prompt.push_str(RESPONSE_HEADER);
4305        prompt
4306    }
4307
4308    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4309    /// editable and context byte-offset ranges within `cursor_excerpt`.
4310    pub fn format_zeta1_from_input(
4311        input: &ZetaPromptInput,
4312        editable_range: Range<usize>,
4313        context_range: Range<usize>,
4314    ) -> String {
4315        let events = format_zeta1_events(&input.events);
4316        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4317        format_zeta1_prompt(&events, &excerpt)
4318    }
4319
4320    /// Formats events in zeta1 style (oldest first).
4321    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4322        let mut result = String::new();
4323        for event in
4324            events
4325                .iter()
4326                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4327                    &ZetaFormat::V0114180EditableRegion,
4328                )))
4329        {
4330            let event_string = format_zeta1_event(event);
4331            if event_string.is_empty() {
4332                continue;
4333            }
4334            if !result.is_empty() {
4335                result.push_str("\n\n");
4336            }
4337            result.push_str(&event_string);
4338        }
4339        result
4340    }
4341
4342    fn format_zeta1_event(event: &Event) -> String {
4343        match event {
4344            Event::BufferChange {
4345                path,
4346                old_path,
4347                diff,
4348                ..
4349            } => {
4350                let mut prompt = String::new();
4351                if old_path != path {
4352                    writeln!(
4353                        prompt,
4354                        "User renamed {} to {}\n",
4355                        old_path.display(),
4356                        path.display()
4357                    )
4358                    .ok();
4359                }
4360                if !diff.is_empty() {
4361                    write!(
4362                        prompt,
4363                        "User edited {}:\n```diff\n{}\n```",
4364                        path.display(),
4365                        diff
4366                    )
4367                    .ok();
4368                }
4369                prompt
4370            }
4371        }
4372    }
4373
4374    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4375    /// within `cursor_excerpt`.
4376    fn format_zeta1_excerpt(
4377        input: &ZetaPromptInput,
4378        editable_range: Range<usize>,
4379        context_range: Range<usize>,
4380    ) -> String {
4381        let path_str = input.cursor_path.to_string_lossy();
4382        let excerpt = &*input.cursor_excerpt;
4383        let cursor_offset = input.cursor_offset_in_excerpt;
4384
4385        let mut prompt = String::new();
4386        writeln!(&mut prompt, "```{path_str}").ok();
4387
4388        let starts_at_file_beginning =
4389            input.excerpt_start_row == Some(0) && context_range.start == 0;
4390        if starts_at_file_beginning {
4391            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4392        }
4393
4394        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4395
4396        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4397        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4398        prompt.push_str(CURSOR_MARKER);
4399        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4400        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4401
4402        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4403        write!(prompt, "\n```").ok();
4404
4405        prompt
4406    }
4407
4408    /// Cleans zeta1 model output by extracting content between editable region
4409    /// markers and converting the zeta1 cursor marker to the universal one.
4410    /// Returns `None` if the output doesn't contain the expected markers.
4411    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4412        let content = output.replace(CURSOR_MARKER, "");
4413
4414        let content_start = content
4415            .find(EDITABLE_REGION_START_MARKER)
4416            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4417            .map(|pos| {
4418                if content.as_bytes().get(pos) == Some(&b'\n') {
4419                    pos + 1
4420                } else {
4421                    pos
4422                }
4423            })
4424            .unwrap_or(0);
4425
4426        let content_end = content
4427            .find(EDITABLE_REGION_END_MARKER)
4428            .map(|pos| {
4429                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4430                    pos - 1
4431                } else {
4432                    pos
4433                }
4434            })
4435            .unwrap_or(content.len());
4436
4437        if content_start > content_end {
4438            return Some(String::new());
4439        }
4440
4441        let extracted = &content[content_start..content_end];
4442
4443        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4444            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4445            let text_before_cursor = text_before_cursor
4446                .find(EDITABLE_REGION_START_MARKER)
4447                .map(|pos| {
4448                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4449                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4450                        after_marker + 1
4451                    } else {
4452                        after_marker
4453                    }
4454                })
4455                .unwrap_or(0);
4456            let offset_in_extracted = zeta1_cursor_pos
4457                .saturating_sub(text_before_cursor)
4458                .min(extracted.len());
4459            offset_in_extracted
4460        });
4461
4462        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4463        if let Some(offset) = cursor_offset {
4464            result.push_str(&extracted[..offset]);
4465            result.push_str(super::CURSOR_MARKER);
4466            result.push_str(&extracted[offset..]);
4467        } else {
4468            result.push_str(extracted);
4469        }
4470
4471        Some(result)
4472    }
4473}
4474
4475#[cfg(test)]
4476mod tests {
4477    use super::*;
4478    use indoc::indoc;
4479
4480    fn make_input(
4481        cursor_excerpt: &str,
4482        editable_range: Range<usize>,
4483        cursor_offset: usize,
4484        events: Vec<Event>,
4485        related_files: Vec<RelatedFile>,
4486    ) -> ZetaPromptInput {
4487        let context_range = 0..cursor_excerpt.len();
4488        ZetaPromptInput {
4489            cursor_path: Path::new("test.rs").into(),
4490            cursor_excerpt: cursor_excerpt.into(),
4491            cursor_offset_in_excerpt: cursor_offset,
4492            excerpt_start_row: None,
4493            events: events.into_iter().map(Arc::new).collect(),
4494            related_files: Some(related_files),
4495            active_buffer_diagnostics: vec![],
4496            excerpt_ranges: ExcerptRanges {
4497                editable_150: editable_range.clone(),
4498                editable_180: editable_range.clone(),
4499                editable_350: editable_range,
4500                editable_150_context_350: context_range.clone(),
4501                editable_180_context_350: context_range.clone(),
4502                editable_350_context_150: context_range,
4503                ..Default::default()
4504            },
4505            syntax_ranges: None,
4506            experiment: None,
4507            in_open_source_repo: false,
4508            can_collect_data: false,
4509            repo_url: None,
4510        }
4511    }
4512
4513    fn make_input_with_context_range(
4514        excerpt: &str,
4515        editable_range: Range<usize>,
4516        context_range: Range<usize>,
4517        cursor_offset: usize,
4518    ) -> ZetaPromptInput {
4519        ZetaPromptInput {
4520            cursor_path: Path::new("test.rs").into(),
4521            cursor_excerpt: excerpt.into(),
4522            cursor_offset_in_excerpt: cursor_offset,
4523            excerpt_start_row: None,
4524            events: vec![],
4525            related_files: Some(vec![]),
4526            active_buffer_diagnostics: vec![],
4527            excerpt_ranges: ExcerptRanges {
4528                editable_150: editable_range.clone(),
4529                editable_180: editable_range.clone(),
4530                editable_350: editable_range,
4531                editable_150_context_350: context_range.clone(),
4532                editable_180_context_350: context_range.clone(),
4533                editable_350_context_150: context_range,
4534                ..Default::default()
4535            },
4536            syntax_ranges: None,
4537            experiment: None,
4538            in_open_source_repo: false,
4539            can_collect_data: false,
4540            repo_url: None,
4541        }
4542    }
4543
4544    fn make_event(path: &str, diff: &str) -> Event {
4545        Event::BufferChange {
4546            path: Path::new(path).into(),
4547            old_path: Path::new(path).into(),
4548            diff: diff.to_string(),
4549            predicted: false,
4550            in_open_source_repo: false,
4551        }
4552    }
4553
4554    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4555        RelatedFile {
4556            path: Path::new(path).into(),
4557            max_row: content.lines().count() as u32,
4558            excerpts: vec![RelatedExcerpt {
4559                row_range: 0..content.lines().count() as u32,
4560                text: content.into(),
4561                order: 0,
4562            }],
4563            in_open_source_repo: false,
4564        }
4565    }
4566
4567    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4568        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4569    }
4570
4571    fn budget_with_margin(requested_tokens: usize) -> usize {
4572        ((requested_tokens as f64) / 0.9).ceil() as usize
4573    }
4574
4575    #[test]
4576    fn test_no_truncation_when_within_budget() {
4577        let input = make_input(
4578            "prefix\neditable\nsuffix",
4579            7..15,
4580            10,
4581            vec![make_event("a.rs", "-old\n+new\n")],
4582            vec![make_related_file("related.rs", "fn helper() {}\n")],
4583        );
4584
4585        assert_eq!(
4586            format_with_budget(&input, 10000).unwrap(),
4587            indoc! {r#"
4588                <|file_sep|>related.rs
4589                fn helper() {}
4590                <|file_sep|>edit history
4591                --- a/a.rs
4592                +++ b/a.rs
4593                -old
4594                +new
4595                <|file_sep|>test.rs
4596                <|fim_prefix|>
4597                prefix
4598                <|fim_middle|>current
4599                edi<|user_cursor|>table
4600                <|fim_suffix|>
4601
4602                suffix
4603                <|fim_middle|>updated
4604            "#}
4605            .to_string()
4606        );
4607    }
4608
4609    #[test]
4610    fn test_truncation_drops_edit_history_when_budget_tight() {
4611        let input = make_input(
4612            "code",
4613            0..4,
4614            2,
4615            vec![make_event("a.rs", "-x\n+y\n")],
4616            vec![
4617                make_related_file("r1.rs", "aaaaaaa\n"),
4618                make_related_file("r2.rs", "bbbbbbb\n"),
4619            ],
4620        );
4621
4622        assert_eq!(
4623            format_with_budget(&input, 10000).unwrap(),
4624            indoc! {r#"
4625                <|file_sep|>r1.rs
4626                aaaaaaa
4627                <|file_sep|>r2.rs
4628                bbbbbbb
4629                <|file_sep|>edit history
4630                --- a/a.rs
4631                +++ b/a.rs
4632                -x
4633                +y
4634                <|file_sep|>test.rs
4635                <|fim_prefix|>
4636                <|fim_middle|>current
4637                co<|user_cursor|>de
4638                <|fim_suffix|>
4639                <|fim_middle|>updated
4640            "#}
4641            .to_string()
4642        );
4643
4644        assert_eq!(
4645            format_with_budget(&input, budget_with_margin(55)),
4646            Some(
4647                indoc! {r#"
4648                <|file_sep|>edit history
4649                --- a/a.rs
4650                +++ b/a.rs
4651                -x
4652                +y
4653                <|file_sep|>test.rs
4654                <|fim_prefix|>
4655                <|fim_middle|>current
4656                co<|user_cursor|>de
4657                <|fim_suffix|>
4658                <|fim_middle|>updated
4659            "#}
4660                .to_string()
4661            )
4662        );
4663    }
4664
4665    #[test]
4666    fn test_truncation_includes_partial_excerpts() {
4667        let input = make_input(
4668            "x",
4669            0..1,
4670            0,
4671            vec![],
4672            vec![RelatedFile {
4673                path: Path::new("big.rs").into(),
4674                max_row: 30,
4675                in_open_source_repo: false,
4676                excerpts: vec![
4677                    RelatedExcerpt {
4678                        row_range: 0..10,
4679                        text: "first excerpt\n".into(),
4680                        order: 0,
4681                    },
4682                    RelatedExcerpt {
4683                        row_range: 10..20,
4684                        text: "second excerpt\n".into(),
4685                        order: 0,
4686                    },
4687                    RelatedExcerpt {
4688                        row_range: 20..30,
4689                        text: "third excerpt\n".into(),
4690                        order: 0,
4691                    },
4692                ],
4693            }],
4694        );
4695
4696        assert_eq!(
4697            format_with_budget(&input, 10000).unwrap(),
4698            indoc! {r#"
4699                <|file_sep|>big.rs
4700                first excerpt
4701                ...
4702                second excerpt
4703                ...
4704                third excerpt
4705                <|file_sep|>test.rs
4706                <|fim_prefix|>
4707                <|fim_middle|>current
4708                <|user_cursor|>x
4709                <|fim_suffix|>
4710                <|fim_middle|>updated
4711            "#}
4712            .to_string()
4713        );
4714
4715        assert_eq!(
4716            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4717            indoc! {r#"
4718                <|file_sep|>big.rs
4719                first excerpt
4720                ...
4721                <|file_sep|>test.rs
4722                <|fim_prefix|>
4723                <|fim_middle|>current
4724                <|user_cursor|>x
4725                <|fim_suffix|>
4726                <|fim_middle|>updated
4727            "#}
4728            .to_string()
4729        );
4730    }
4731
4732    #[test]
4733    fn test_truncation_prioritizes_lower_order_excerpts() {
4734        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4735        // With tight budget, only the lower-order excerpt from file_b should be included.
4736        let input = make_input(
4737            "x",
4738            0..1,
4739            0,
4740            vec![],
4741            vec![
4742                RelatedFile {
4743                    path: Path::new("file_a.rs").into(),
4744                    max_row: 10,
4745                    in_open_source_repo: false,
4746                    excerpts: vec![RelatedExcerpt {
4747                        row_range: 0..10,
4748                        text: "low priority content\n".into(),
4749                        order: 5,
4750                    }],
4751                },
4752                RelatedFile {
4753                    path: Path::new("file_b.rs").into(),
4754                    max_row: 10,
4755                    in_open_source_repo: false,
4756                    excerpts: vec![RelatedExcerpt {
4757                        row_range: 0..10,
4758                        text: "high priority content\n".into(),
4759                        order: 1,
4760                    }],
4761                },
4762            ],
4763        );
4764
4765        // With large budget, both files included; rendered in stable lexicographic order.
4766        assert_eq!(
4767            format_with_budget(&input, 10000).unwrap(),
4768            indoc! {r#"
4769                <|file_sep|>file_a.rs
4770                low priority content
4771                <|file_sep|>file_b.rs
4772                high priority content
4773                <|file_sep|>test.rs
4774                <|fim_prefix|>
4775                <|fim_middle|>current
4776                <|user_cursor|>x
4777                <|fim_suffix|>
4778                <|fim_middle|>updated
4779            "#}
4780            .to_string()
4781        );
4782
4783        // With tight budget, only file_b (lower order) fits.
4784        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4785        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4786        // file_a would need another 14 tokens, which doesn't fit.
4787        assert_eq!(
4788            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4789            indoc! {r#"
4790                <|file_sep|>file_b.rs
4791                high priority content
4792                <|file_sep|>test.rs
4793                <|fim_prefix|>
4794                <|fim_middle|>current
4795                <|user_cursor|>x
4796                <|fim_suffix|>
4797                <|fim_middle|>updated
4798            "#}
4799            .to_string()
4800        );
4801    }
4802
4803    #[test]
4804    fn test_truncation_drops_high_order_excerpts_within_file() {
4805        // A single file has excerpts at order 1 and order 3. With a tight budget,
4806        // only the order-1 excerpts are included while the order-3 excerpt is
4807        // dropped — even though they belong to the same file. This also preserves
4808        // the parent invariant: parent outline items have order ≤ their best
4809        // child, so they're always included when any child is.
4810        let input = make_input(
4811            "x",
4812            0..1,
4813            0,
4814            vec![],
4815            vec![RelatedFile {
4816                path: Path::new("mod.rs").into(),
4817                max_row: 30,
4818                in_open_source_repo: false,
4819                excerpts: vec![
4820                    RelatedExcerpt {
4821                        row_range: 0..5,
4822                        text: "mod header\n".into(),
4823                        order: 1,
4824                    },
4825                    RelatedExcerpt {
4826                        row_range: 5..15,
4827                        text: "important fn\n".into(),
4828                        order: 1,
4829                    },
4830                    RelatedExcerpt {
4831                        row_range: 15..30,
4832                        text: "less important fn\n".into(),
4833                        order: 3,
4834                    },
4835                ],
4836            }],
4837        );
4838
4839        // With large budget, all three excerpts included.
4840        assert_eq!(
4841            format_with_budget(&input, 10000).unwrap(),
4842            indoc! {r#"
4843                <|file_sep|>mod.rs
4844                mod header
4845                ...
4846                important fn
4847                ...
4848                less important fn
4849                <|file_sep|>test.rs
4850                <|fim_prefix|>
4851                <|fim_middle|>current
4852                <|user_cursor|>x
4853                <|fim_suffix|>
4854                <|fim_middle|>updated
4855            "#}
4856            .to_string()
4857        );
4858
4859        // With tight budget, only order<=1 excerpts included (header + important fn).
4860        assert_eq!(
4861            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4862            indoc! {r#"
4863                <|file_sep|>mod.rs
4864                mod header
4865                ...
4866                important fn
4867                ...
4868                <|file_sep|>test.rs
4869                <|fim_prefix|>
4870                <|fim_middle|>current
4871                <|user_cursor|>x
4872                <|fim_suffix|>
4873                <|fim_middle|>updated
4874            "#}
4875            .to_string()
4876        );
4877    }
4878
4879    #[test]
4880    fn test_truncation_drops_older_events_first() {
4881        let input = make_input(
4882            "x",
4883            0..1,
4884            0,
4885            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4886            vec![],
4887        );
4888
4889        assert_eq!(
4890            format_with_budget(&input, 10000).unwrap(),
4891            indoc! {r#"
4892                <|file_sep|>edit history
4893                --- a/old.rs
4894                +++ b/old.rs
4895                -1
4896                --- a/new.rs
4897                +++ b/new.rs
4898                -2
4899                <|file_sep|>test.rs
4900                <|fim_prefix|>
4901                <|fim_middle|>current
4902                <|user_cursor|>x
4903                <|fim_suffix|>
4904                <|fim_middle|>updated
4905            "#}
4906            .to_string()
4907        );
4908
4909        assert_eq!(
4910            format_with_budget(&input, 60).unwrap(),
4911            indoc! {r#"
4912                <|file_sep|>edit history
4913                --- a/new.rs
4914                +++ b/new.rs
4915                -2
4916                <|file_sep|>test.rs
4917                <|fim_prefix|>
4918                <|fim_middle|>current
4919                <|user_cursor|>x
4920                <|fim_suffix|>
4921                <|fim_middle|>updated
4922            "#}
4923            .to_string()
4924        );
4925    }
4926
4927    #[test]
4928    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4929        let input = make_input(
4930            "fn main() {}",
4931            0..12,
4932            3,
4933            vec![make_event("a.rs", "-old\n+new\n")],
4934            vec![make_related_file("related.rs", "helper\n")],
4935        );
4936
4937        assert!(format_with_budget(&input, 30).is_none())
4938    }
4939
4940    #[track_caller]
4941    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4942        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4943            .expect("seed coder prompt formatting should succeed")
4944    }
4945
4946    #[track_caller]
4947    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4948        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4949            .expect("seed coder prompt formatting should succeed")
4950    }
4951
4952    #[test]
4953    fn test_seed_coder_basic_format() {
4954        let input = make_input(
4955            "prefix\neditable\nsuffix",
4956            7..15,
4957            10,
4958            vec![make_event("a.rs", "-old\n+new\n")],
4959            vec![make_related_file("related.rs", "fn helper() {}\n")],
4960        );
4961
4962        assert_eq!(
4963            format_seed_coder(&input),
4964            indoc! {r#"
4965                <[fim-suffix]>
4966                suffix
4967                <[fim-prefix]><filename>related.rs
4968                fn helper() {}
4969
4970                <filename>edit_history
4971                --- a/a.rs
4972                +++ b/a.rs
4973                -old
4974                +new
4975
4976                <filename>test.rs
4977                prefix
4978                <<<<<<< CURRENT
4979                edi<|user_cursor|>table
4980                =======
4981                <[fim-middle]>"#}
4982        );
4983    }
4984
4985    #[test]
4986    fn test_v0317_formats_prompt_with_many_related_files() {
4987        let related_files = (0..900)
4988            .map(|index| {
4989                make_related_file(
4990                    &format!("related_{index}.rs"),
4991                    "fn helper() {\n    let value = 1;\n}\n",
4992                )
4993            })
4994            .collect();
4995
4996        let input = make_input(
4997            "code",
4998            0..4,
4999            2,
5000            vec![make_event("a.rs", "-x\n+y\n")],
5001            related_files,
5002        );
5003
5004        let prompt =
5005            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5006
5007        assert!(prompt.is_some());
5008        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5009        assert!(prompt.contains("test.rs"));
5010        assert!(prompt.contains(CURSOR_MARKER));
5011    }
5012
5013    #[test]
5014    fn test_seed_coder_no_context() {
5015        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5016
5017        assert_eq!(
5018            format_seed_coder(&input),
5019            indoc! {r#"
5020                <[fim-suffix]>
5021                after
5022                <[fim-prefix]><filename>test.rs
5023                before
5024                <<<<<<< CURRENT
5025                mid<|user_cursor|>dle
5026                =======
5027                <[fim-middle]>"#}
5028        );
5029    }
5030
5031    #[test]
5032    fn test_seed_coder_truncation_drops_context() {
5033        let input = make_input(
5034            "code",
5035            0..4,
5036            2,
5037            vec![make_event("a.rs", "-x\n+y\n")],
5038            vec![make_related_file("r1.rs", "content\n")],
5039        );
5040
5041        // With large budget, everything is included
5042        assert_eq!(
5043            format_seed_coder(&input),
5044            indoc! {r#"
5045                <[fim-suffix]>
5046                <[fim-prefix]><filename>r1.rs
5047                content
5048
5049                <filename>edit_history
5050                --- a/a.rs
5051                +++ b/a.rs
5052                -x
5053                +y
5054
5055                <filename>test.rs
5056                <<<<<<< CURRENT
5057                co<|user_cursor|>de
5058                =======
5059                <[fim-middle]>"#}
5060        );
5061
5062        assert_eq!(
5063            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5064            None
5065        );
5066
5067        assert_eq!(
5068            format_seed_coder_with_budget(&input, 40),
5069            indoc! {r#"
5070                <[fim-suffix]>
5071                <[fim-prefix]><filename>test.rs
5072                <<<<<<< CURRENT
5073                co<|user_cursor|>de
5074                =======
5075                <[fim-middle]>"#
5076            }
5077        )
5078    }
5079
5080    #[test]
5081    fn test_seed_coder_truncation_prioritizes_lower_order() {
5082        let input = make_input(
5083            "code",
5084            0..4,
5085            2,
5086            vec![],
5087            vec![
5088                RelatedFile {
5089                    path: Path::new("low_prio.rs").into(),
5090                    max_row: 5,
5091                    in_open_source_repo: false,
5092                    excerpts: vec![RelatedExcerpt {
5093                        row_range: 0..5,
5094                        text: "low prio\n".into(),
5095                        order: 10,
5096                    }],
5097                },
5098                RelatedFile {
5099                    path: Path::new("high_prio.rs").into(),
5100                    max_row: 5,
5101                    in_open_source_repo: false,
5102                    excerpts: vec![RelatedExcerpt {
5103                        row_range: 0..5,
5104                        text: "high prio\n".into(),
5105                        order: 1,
5106                    }],
5107                },
5108            ],
5109        );
5110
5111        // With large budget, both included; rendered in stable lexicographic order.
5112        assert_eq!(
5113            format_seed_coder(&input),
5114            indoc! {r#"
5115                <[fim-suffix]>
5116                <[fim-prefix]><filename>low_prio.rs
5117                low prio
5118                <filename>high_prio.rs
5119                high prio
5120
5121                <filename>test.rs
5122                <<<<<<< CURRENT
5123                co<|user_cursor|>de
5124                =======
5125                <[fim-middle]>"#}
5126        );
5127
5128        // With tight budget under the generic heuristic, context is dropped but the
5129        // minimal cursor section still fits.
5130        assert_eq!(
5131            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5132            Some(
5133                indoc! {r#"
5134                    <[fim-suffix]>
5135                    <[fim-prefix]><filename>test.rs
5136                    <<<<<<< CURRENT
5137                    co<|user_cursor|>de
5138                    =======
5139                    <[fim-middle]>"#}
5140                .to_string()
5141            )
5142        );
5143    }
5144
5145    #[test]
5146    fn test_format_zeta1_from_input_basic() {
5147        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5148        let input = ZetaPromptInput {
5149            cursor_path: Path::new("src/main.rs").into(),
5150            cursor_excerpt: excerpt.into(),
5151            cursor_offset_in_excerpt: 30,
5152            excerpt_start_row: Some(0),
5153            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5154            related_files: Some(vec![]),
5155            active_buffer_diagnostics: vec![],
5156            excerpt_ranges: ExcerptRanges {
5157                editable_150: 15..41,
5158                editable_180: 15..41,
5159                editable_350: 15..41,
5160                editable_150_context_350: 0..excerpt.len(),
5161                editable_180_context_350: 0..excerpt.len(),
5162                editable_350_context_150: 0..excerpt.len(),
5163                ..Default::default()
5164            },
5165            syntax_ranges: None,
5166            experiment: None,
5167            in_open_source_repo: false,
5168            can_collect_data: false,
5169            repo_url: None,
5170        };
5171
5172        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5173
5174        assert_eq!(
5175            prompt,
5176            concat!(
5177                "### Instruction:\n",
5178                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5179                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5180                "into account the cursor location.\n",
5181                "\n",
5182                "### User Edits:\n",
5183                "\n",
5184                "User edited other.rs:\n",
5185                "```diff\n",
5186                "-old\n",
5187                "+new\n",
5188                "\n",
5189                "```\n",
5190                "\n",
5191                "### User Excerpt:\n",
5192                "\n",
5193                "```src/main.rs\n",
5194                "<|start_of_file|>\n",
5195                "fn before() {}\n",
5196                "<|editable_region_start|>\n",
5197                "fn foo() {\n",
5198                "    <|user_cursor_is_here|>let x = 1;\n",
5199                "\n",
5200                "<|editable_region_end|>}\n",
5201                "fn after() {}\n",
5202                "\n",
5203                "```\n",
5204                "\n",
5205                "### Response:\n",
5206            ),
5207        );
5208    }
5209
5210    #[test]
5211    fn test_format_zeta1_from_input_no_start_of_file() {
5212        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5213        let input = ZetaPromptInput {
5214            cursor_path: Path::new("src/main.rs").into(),
5215            cursor_excerpt: excerpt.into(),
5216            cursor_offset_in_excerpt: 15,
5217            excerpt_start_row: Some(10),
5218            events: vec![],
5219            related_files: Some(vec![]),
5220            active_buffer_diagnostics: vec![],
5221            excerpt_ranges: ExcerptRanges {
5222                editable_150: 0..28,
5223                editable_180: 0..28,
5224                editable_350: 0..28,
5225                editable_150_context_350: 0..28,
5226                editable_180_context_350: 0..28,
5227                editable_350_context_150: 0..28,
5228                ..Default::default()
5229            },
5230            syntax_ranges: None,
5231            experiment: None,
5232            in_open_source_repo: false,
5233            can_collect_data: false,
5234            repo_url: None,
5235        };
5236
5237        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5238
5239        assert_eq!(
5240            prompt,
5241            concat!(
5242                "### Instruction:\n",
5243                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5244                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5245                "into account the cursor location.\n",
5246                "\n",
5247                "### User Edits:\n",
5248                "\n",
5249                "\n",
5250                "\n",
5251                "### User Excerpt:\n",
5252                "\n",
5253                "```src/main.rs\n",
5254                "<|editable_region_start|>\n",
5255                "fn foo() {\n",
5256                "    <|user_cursor_is_here|>let x = 1;\n",
5257                "}\n",
5258                "\n",
5259                "<|editable_region_end|>\n",
5260                "```\n",
5261                "\n",
5262                "### Response:\n",
5263            ),
5264        );
5265    }
5266
5267    #[test]
5268    fn test_format_zeta1_from_input_with_sub_ranges() {
5269        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5270        let editable_range = 10..37;
5271        let context_range = 0..excerpt.len();
5272
5273        let input = ZetaPromptInput {
5274            cursor_path: Path::new("test.rs").into(),
5275            cursor_excerpt: excerpt.into(),
5276            cursor_offset_in_excerpt: 25,
5277            excerpt_start_row: Some(0),
5278            events: vec![],
5279            related_files: Some(vec![]),
5280            active_buffer_diagnostics: vec![],
5281            excerpt_ranges: ExcerptRanges {
5282                editable_150: editable_range.clone(),
5283                editable_180: editable_range.clone(),
5284                editable_350: editable_range.clone(),
5285                editable_150_context_350: context_range.clone(),
5286                editable_180_context_350: context_range.clone(),
5287                editable_350_context_150: context_range.clone(),
5288                ..Default::default()
5289            },
5290            syntax_ranges: None,
5291            experiment: None,
5292            in_open_source_repo: false,
5293            can_collect_data: false,
5294            repo_url: None,
5295        };
5296
5297        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5298
5299        assert_eq!(
5300            prompt,
5301            concat!(
5302                "### Instruction:\n",
5303                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5304                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5305                "into account the cursor location.\n",
5306                "\n",
5307                "### User Edits:\n",
5308                "\n",
5309                "\n",
5310                "\n",
5311                "### User Excerpt:\n",
5312                "\n",
5313                "```test.rs\n",
5314                "<|start_of_file|>\n",
5315                "// prefix\n",
5316                "<|editable_region_start|>\n",
5317                "fn foo() {\n",
5318                "    <|user_cursor_is_here|>let x = 1;\n",
5319                "}\n",
5320                "<|editable_region_end|>\n",
5321                "// suffix\n",
5322                "\n",
5323                "```\n",
5324                "\n",
5325                "### Response:\n",
5326            ),
5327        );
5328    }
5329
5330    #[test]
5331    fn test_max_event_count() {
5332        fn make_numbered_event(index: usize) -> Event {
5333            return make_event(
5334                &format!("event-{index}.rs"),
5335                &format!("-old-{index}\n+new-{index}\n"),
5336            );
5337        }
5338        let input = make_input(
5339            "x",
5340            0..1,
5341            0,
5342            (0..3).map(make_numbered_event).collect(),
5343            vec![],
5344        );
5345
5346        let edit_history_section = format_edit_history_within_budget(
5347            &input.events,
5348            "<|file_sep|>",
5349            "edit history",
5350            usize::MAX,
5351            5,
5352        );
5353
5354        assert_eq!(
5355            &edit_history_section,
5356            indoc!(
5357                "
5358                <|file_sep|>edit history
5359                --- a/event-0.rs
5360                +++ b/event-0.rs
5361                -old-0
5362                +new-0
5363                --- a/event-1.rs
5364                +++ b/event-1.rs
5365                -old-1
5366                +new-1
5367                --- a/event-2.rs
5368                +++ b/event-2.rs
5369                -old-2
5370                +new-2
5371            "
5372            )
5373        );
5374
5375        let edit_history_section = format_edit_history_within_budget(
5376            &input.events,
5377            "<|file_sep|>",
5378            "edit history",
5379            usize::MAX,
5380            2,
5381        );
5382
5383        assert_eq!(
5384            &edit_history_section,
5385            indoc!(
5386                "
5387                <|file_sep|>edit history
5388                --- a/event-1.rs
5389                +++ b/event-1.rs
5390                -old-1
5391                +new-1
5392                --- a/event-2.rs
5393                +++ b/event-2.rs
5394                -old-2
5395                +new-2
5396            "
5397            )
5398        );
5399
5400        let edit_history_section = format_edit_history_within_budget(
5401            &input.events,
5402            "<|file_sep|>",
5403            "edit history",
5404            usize::MAX,
5405            0,
5406        );
5407
5408        assert_eq!(&edit_history_section, "");
5409    }
5410
5411    #[test]
5412    fn test_clean_zeta1_model_output_basic() {
5413        let output = indoc! {"
5414            <|editable_region_start|>
5415            fn main() {
5416                println!(\"hello\");
5417            }
5418            <|editable_region_end|>
5419        "};
5420
5421        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5422        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5423    }
5424
5425    #[test]
5426    fn test_clean_zeta1_model_output_with_cursor() {
5427        let output = indoc! {"
5428            <|editable_region_start|>
5429            fn main() {
5430                <|user_cursor_is_here|>println!(\"hello\");
5431            }
5432            <|editable_region_end|>
5433        "};
5434
5435        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5436        assert_eq!(
5437            cleaned,
5438            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5439        );
5440    }
5441
5442    #[test]
5443    fn test_clean_zeta1_model_output_no_markers() {
5444        let output = "fn main() {}\n";
5445        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5446        assert_eq!(cleaned, "fn main() {}\n");
5447    }
5448
5449    #[test]
5450    fn test_clean_zeta1_model_output_empty_region() {
5451        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5452        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5453        assert_eq!(cleaned, "");
5454    }
5455
5456    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5457        let mut result = excerpt.to_string();
5458        result.replace_range(
5459            parsed_output.range_in_excerpt.clone(),
5460            &parsed_output.new_editable_region,
5461        );
5462        result
5463    }
5464
5465    #[test]
5466    fn test_parse_zeta2_model_output() {
5467        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5468        let context_start = excerpt.find("ctx start").unwrap();
5469        let context_end = excerpt.find("after ctx").unwrap();
5470        let editable_start = excerpt.find("editable old").unwrap();
5471        let editable_end = editable_start + "editable old\n".len();
5472        let input = make_input_with_context_range(
5473            excerpt,
5474            editable_start..editable_end,
5475            context_start..context_end,
5476            editable_start,
5477        );
5478
5479        let output = parse_zeta2_model_output(
5480            "editable new\n>>>>>>> UPDATED\n",
5481            ZetaFormat::V0131GitMergeMarkersPrefix,
5482            &input,
5483        )
5484        .unwrap();
5485
5486        assert_eq!(
5487            apply_edit(excerpt, &output),
5488            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5489        );
5490    }
5491
5492    #[test]
5493    fn test_parse_zeta2_model_output_identity() {
5494        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5495        let editable_start = excerpt.find("bbb").unwrap();
5496        let editable_end = excerpt.find("ddd").unwrap();
5497        let input = make_input_with_context_range(
5498            excerpt,
5499            editable_start..editable_end,
5500            0..excerpt.len(),
5501            editable_start,
5502        );
5503
5504        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5505        let output =
5506            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5507
5508        assert_eq!(apply_edit(excerpt, &output), excerpt);
5509    }
5510
5511    #[test]
5512    fn test_parse_zeta2_model_output_strips_end_marker() {
5513        let excerpt = "hello\nworld\n";
5514        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5515
5516        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5517        let output1 =
5518            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5519        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5520
5521        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5522        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5523    }
5524
5525    #[test]
5526    fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5527        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5528        let context_start = excerpt.find("ctx start").unwrap();
5529        let context_end = excerpt.find("after ctx").unwrap();
5530        let editable_start = excerpt.find("editable old").unwrap();
5531        let editable_end = editable_start + "editable old\n".len();
5532        let input = make_input_with_context_range(
5533            excerpt,
5534            editable_start..editable_end,
5535            context_start..context_end,
5536            editable_start,
5537        );
5538
5539        let parsed = parse_zeta2_model_output(
5540            "editable new\n>>>>>>> UPDATED\n",
5541            ZetaFormat::V0131GitMergeMarkersPrefix,
5542            &input,
5543        )
5544        .unwrap();
5545        let expected = apply_edit(excerpt, &parsed);
5546        let patch = parsed_output_to_patch(&input, parsed).unwrap();
5547        let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5548
5549        assert_eq!(patched, expected);
5550    }
5551
5552    #[test]
5553    fn test_special_tokens_not_triggered_by_comment_separator() {
5554        // Regression test for https://github.com/zed-industries/zed/issues/52489
5555        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5556        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5557        assert!(
5558            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5559            "comment containing ======= should not trigger special token detection"
5560        );
5561    }
5562}