zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18pub const MAX_PROMPT_TOKENS: usize = 4096;
  19
  20/// Use up to this amount of the editable region for prefill.
  21/// Larger values may result in more robust generation, but
  22/// this region becomes non-editable.
  23pub const PREFILL_RATIO: f64 = 0.1; // 10%
  24
  25fn estimate_tokens(bytes: usize) -> usize {
  26    bytes / 3
  27}
  28
  29/// Leave some slack to avoid overflow.
  30fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  31    (max_tokens as f64 * 0.9).floor() as usize
  32}
  33
  34#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  35pub struct ZetaPromptInput {
  36    pub cursor_path: Arc<Path>,
  37    pub cursor_excerpt: Arc<str>,
  38    pub cursor_offset_in_excerpt: usize,
  39    #[serde(default, skip_serializing_if = "Option::is_none")]
  40    pub excerpt_start_row: Option<u32>,
  41    pub events: Vec<Arc<Event>>,
  42    #[serde(default)]
  43    pub related_files: Option<Vec<RelatedFile>>,
  44    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  45    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  46    /// These ranges let the server select model-appropriate subsets.
  47    pub excerpt_ranges: ExcerptRanges,
  48    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  49    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  50    /// When present, the server uses these to compute editable/context ranges
  51    /// instead of `excerpt_ranges`.
  52    #[serde(default, skip_serializing_if = "Option::is_none")]
  53    pub syntax_ranges: Option<Vec<Range<usize>>>,
  54    /// The name of the edit prediction model experiment to use.
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub experiment: Option<String>,
  57    #[serde(default)]
  58    pub in_open_source_repo: bool,
  59    #[serde(default)]
  60    pub can_collect_data: bool,
  61    #[serde(default, skip_serializing_if = "Option::is_none")]
  62    pub repo_url: Option<String>,
  63}
  64
  65#[derive(
  66    Default,
  67    Clone,
  68    Copy,
  69    Debug,
  70    PartialEq,
  71    Eq,
  72    Hash,
  73    EnumIter,
  74    IntoStaticStr,
  75    Serialize,
  76    Deserialize,
  77)]
  78#[allow(non_camel_case_types)]
  79pub enum ZetaFormat {
  80    V0112MiddleAtEnd,
  81    V0113Ordered,
  82    V0114180EditableRegion,
  83    V0120GitMergeMarkers,
  84    #[default]
  85    V0131GitMergeMarkersPrefix,
  86    V0211Prefill,
  87    V0211SeedCoder,
  88    v0226Hashline,
  89    V0304VariableEdit,
  90    V0304SeedNoEdits,
  91    /// Multi-block marker spans with NO_EDITS sentinel.
  92    V0306SeedMultiRegions,
  93    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  94    V0316SeedMultiRegions,
  95    /// V0316 with larger block sizes.
  96    V0318SeedMultiRegions,
  97    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  98    V0317SeedMultiRegions,
  99}
 100
 101impl std::fmt::Display for ZetaFormat {
 102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 103        write!(f, "{}", <&'static str>::from(self))
 104    }
 105}
 106
 107impl ZetaFormat {
 108    pub fn parse(format_name: &str) -> Result<Self> {
 109        let mut results = ZetaFormat::iter().filter(|version| {
 110            <&'static str>::from(version)
 111                .to_lowercase()
 112                .contains(&format_name.to_lowercase())
 113        });
 114        let Some(result) = results.next() else {
 115            anyhow::bail!(
 116                "`{format_name}` did not match any of:\n{}",
 117                Self::options_as_string()
 118            );
 119        };
 120        if results.next().is_some() {
 121            anyhow::bail!(
 122                "`{format_name}` matched more than one of:\n{}",
 123                Self::options_as_string()
 124            );
 125        }
 126        Ok(result)
 127    }
 128
 129    pub fn options_as_string() -> String {
 130        ZetaFormat::iter()
 131            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 132            .collect::<Vec<_>>()
 133            .concat()
 134    }
 135}
 136
 137#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 138#[serde(tag = "event")]
 139pub enum Event {
 140    BufferChange {
 141        path: Arc<Path>,
 142        old_path: Arc<Path>,
 143        diff: String,
 144        predicted: bool,
 145        in_open_source_repo: bool,
 146    },
 147}
 148
 149impl Event {
 150    pub fn in_open_source_repo(&self) -> bool {
 151        match self {
 152            Event::BufferChange {
 153                in_open_source_repo,
 154                ..
 155            } => *in_open_source_repo,
 156        }
 157    }
 158}
 159
 160pub fn write_event(prompt: &mut String, event: &Event) {
 161    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 162        for component in path.components() {
 163            prompt.push('/');
 164            write!(prompt, "{}", component.as_os_str().display()).ok();
 165        }
 166    }
 167    match event {
 168        Event::BufferChange {
 169            path,
 170            old_path,
 171            diff,
 172            predicted,
 173            in_open_source_repo: _,
 174        } => {
 175            if *predicted {
 176                prompt.push_str("// User accepted prediction:\n");
 177            }
 178            prompt.push_str("--- a");
 179            write_path_as_unix_str(prompt, old_path.as_ref());
 180            prompt.push_str("\n+++ b");
 181            write_path_as_unix_str(prompt, path.as_ref());
 182            prompt.push('\n');
 183            prompt.push_str(diff);
 184        }
 185    }
 186}
 187
 188#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 189pub struct ActiveBufferDiagnostic {
 190    pub severity: Option<i32>,
 191    pub message: String,
 192    pub snippet: String,
 193    pub snippet_buffer_row_range: Range<u32>,
 194    pub diagnostic_range_in_snippet: Range<usize>,
 195}
 196
 197#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 198pub struct RelatedFile {
 199    pub path: Arc<Path>,
 200    pub max_row: u32,
 201    pub excerpts: Vec<RelatedExcerpt>,
 202    #[serde(default)]
 203    pub in_open_source_repo: bool,
 204}
 205
 206#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 207pub struct RelatedExcerpt {
 208    pub row_range: Range<u32>,
 209    pub text: Arc<str>,
 210    #[serde(default)]
 211    pub order: usize,
 212}
 213
 214pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 215    special_tokens_for_format(format).iter().any(|token| {
 216        if let Some(line_token) = token.strip_suffix('\n') {
 217            input.cursor_excerpt.lines().any(|line| line == line_token)
 218        } else {
 219            input.cursor_excerpt.contains(token)
 220        }
 221    })
 222}
 223
 224pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 225    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 226}
 227
 228pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 229    match format {
 230        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 231        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 232        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 233        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 234        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 235        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 236        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 237        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 238        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 239        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 240        ZetaFormat::V0316SeedMultiRegions => {
 241            static TOKENS: &[&str] = &[
 242                seed_coder::FIM_SUFFIX,
 243                seed_coder::FIM_PREFIX,
 244                seed_coder::FIM_MIDDLE,
 245                seed_coder::FILE_MARKER,
 246                multi_region::V0316_END_MARKER,
 247                CURSOR_MARKER,
 248                multi_region::MARKER_TAG_PREFIX,
 249            ];
 250            TOKENS
 251        }
 252        ZetaFormat::V0318SeedMultiRegions => {
 253            static TOKENS: &[&str] = &[
 254                seed_coder::FIM_SUFFIX,
 255                seed_coder::FIM_PREFIX,
 256                seed_coder::FIM_MIDDLE,
 257                seed_coder::FILE_MARKER,
 258                multi_region::V0318_END_MARKER,
 259                CURSOR_MARKER,
 260                multi_region::MARKER_TAG_PREFIX,
 261            ];
 262            TOKENS
 263        }
 264        ZetaFormat::V0317SeedMultiRegions => {
 265            static TOKENS: &[&str] = &[
 266                seed_coder::FIM_SUFFIX,
 267                seed_coder::FIM_PREFIX,
 268                seed_coder::FIM_MIDDLE,
 269                seed_coder::FILE_MARKER,
 270                multi_region::V0317_END_MARKER,
 271                CURSOR_MARKER,
 272                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 273            ];
 274            TOKENS
 275        }
 276        ZetaFormat::V0306SeedMultiRegions => {
 277            static TOKENS: &[&str] = &[
 278                seed_coder::FIM_SUFFIX,
 279                seed_coder::FIM_PREFIX,
 280                seed_coder::FIM_MIDDLE,
 281                seed_coder::FILE_MARKER,
 282                seed_coder::START_MARKER,
 283                seed_coder::SEPARATOR,
 284                seed_coder::END_MARKER,
 285                CURSOR_MARKER,
 286                multi_region::MARKER_TAG_PREFIX,
 287            ];
 288            TOKENS
 289        }
 290    }
 291}
 292
 293/// Returns the (editable_token_limit, context_token_limit) for a given format.
 294pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 295    match format {
 296        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 297        ZetaFormat::V0114180EditableRegion => (180, 350),
 298        ZetaFormat::V0120GitMergeMarkers
 299        | ZetaFormat::V0131GitMergeMarkersPrefix
 300        | ZetaFormat::V0211Prefill
 301        | ZetaFormat::V0211SeedCoder
 302        | ZetaFormat::v0226Hashline
 303        | ZetaFormat::V0306SeedMultiRegions
 304        | ZetaFormat::V0316SeedMultiRegions
 305        | ZetaFormat::V0318SeedMultiRegions
 306        | ZetaFormat::V0317SeedMultiRegions
 307        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 308        ZetaFormat::V0304VariableEdit => (1024, 0),
 309    }
 310}
 311
 312pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 313    match format {
 314        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 315        ZetaFormat::V0112MiddleAtEnd
 316        | ZetaFormat::V0113Ordered
 317        | ZetaFormat::V0114180EditableRegion
 318        | ZetaFormat::V0120GitMergeMarkers
 319        | ZetaFormat::V0131GitMergeMarkersPrefix
 320        | ZetaFormat::V0211Prefill
 321        | ZetaFormat::V0211SeedCoder
 322        | ZetaFormat::V0304VariableEdit
 323        | ZetaFormat::V0306SeedMultiRegions
 324        | ZetaFormat::V0304SeedNoEdits => &[],
 325        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 326        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 327        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 328    }
 329}
 330
 331pub fn excerpt_ranges_for_format(
 332    format: ZetaFormat,
 333    ranges: &ExcerptRanges,
 334) -> (Range<usize>, Range<usize>) {
 335    match format {
 336        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 337            ranges.editable_150.clone(),
 338            ranges.editable_150_context_350.clone(),
 339        ),
 340        ZetaFormat::V0114180EditableRegion => (
 341            ranges.editable_180.clone(),
 342            ranges.editable_180_context_350.clone(),
 343        ),
 344        ZetaFormat::V0120GitMergeMarkers
 345        | ZetaFormat::V0131GitMergeMarkersPrefix
 346        | ZetaFormat::V0211Prefill
 347        | ZetaFormat::V0211SeedCoder
 348        | ZetaFormat::v0226Hashline
 349        | ZetaFormat::V0304SeedNoEdits
 350        | ZetaFormat::V0306SeedMultiRegions
 351        | ZetaFormat::V0316SeedMultiRegions
 352        | ZetaFormat::V0318SeedMultiRegions
 353        | ZetaFormat::V0317SeedMultiRegions => (
 354            ranges.editable_350.clone(),
 355            ranges.editable_350_context_150.clone(),
 356        ),
 357        ZetaFormat::V0304VariableEdit => {
 358            let context = ranges
 359                .editable_350_context_1024
 360                .clone()
 361                .or(ranges.editable_350_context_512.clone())
 362                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 363            (context.clone(), context)
 364        }
 365    }
 366}
 367
 368pub fn write_cursor_excerpt_section_for_format(
 369    format: ZetaFormat,
 370    prompt: &mut String,
 371    path: &Path,
 372    context: &str,
 373    editable_range: &Range<usize>,
 374    cursor_offset: usize,
 375) {
 376    match format {
 377        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 378            prompt,
 379            path,
 380            context,
 381            editable_range,
 382            cursor_offset,
 383        ),
 384        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 385            v0113_ordered::write_cursor_excerpt_section(
 386                prompt,
 387                path,
 388                context,
 389                editable_range,
 390                cursor_offset,
 391            )
 392        }
 393        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 394            prompt,
 395            path,
 396            context,
 397            editable_range,
 398            cursor_offset,
 399        ),
 400        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 401            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 402                prompt,
 403                path,
 404                context,
 405                editable_range,
 406                cursor_offset,
 407            )
 408        }
 409        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 410            seed_coder::write_cursor_excerpt_section(
 411                prompt,
 412                path,
 413                context,
 414                editable_range,
 415                cursor_offset,
 416            )
 417        }
 418        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 419            prompt,
 420            path,
 421            context,
 422            editable_range,
 423            cursor_offset,
 424        ),
 425        ZetaFormat::V0304VariableEdit => {
 426            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 427        }
 428        ZetaFormat::V0306SeedMultiRegions => {
 429            prompt.push_str(&build_v0306_cursor_prefix(
 430                path,
 431                context,
 432                editable_range,
 433                cursor_offset,
 434            ));
 435        }
 436        ZetaFormat::V0316SeedMultiRegions => {
 437            prompt.push_str(&build_v0316_cursor_prefix(
 438                path,
 439                context,
 440                editable_range,
 441                cursor_offset,
 442            ));
 443        }
 444        ZetaFormat::V0318SeedMultiRegions => {
 445            prompt.push_str(&build_v0318_cursor_prefix(
 446                path,
 447                context,
 448                editable_range,
 449                cursor_offset,
 450            ));
 451        }
 452        ZetaFormat::V0317SeedMultiRegions => {
 453            prompt.push_str(&build_v0317_cursor_prefix(
 454                path,
 455                context,
 456                editable_range,
 457                cursor_offset,
 458            ));
 459        }
 460    }
 461}
 462
 463fn build_v0306_cursor_prefix(
 464    path: &Path,
 465    context: &str,
 466    editable_range: &Range<usize>,
 467    cursor_offset: usize,
 468) -> String {
 469    let mut section = String::new();
 470    let path_str = path.to_string_lossy();
 471    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 472
 473    section.push_str(&context[..editable_range.start]);
 474    section.push_str(seed_coder::START_MARKER);
 475
 476    let editable_text = &context[editable_range.clone()];
 477    let cursor_in_editable = cursor_offset - editable_range.start;
 478    multi_region::write_editable_with_markers(
 479        &mut section,
 480        editable_text,
 481        cursor_in_editable,
 482        CURSOR_MARKER,
 483    );
 484
 485    if !section.ends_with('\n') {
 486        section.push('\n');
 487    }
 488    section.push_str(seed_coder::SEPARATOR);
 489    section
 490}
 491
 492fn build_v0316_cursor_prefix(
 493    path: &Path,
 494    context: &str,
 495    editable_range: &Range<usize>,
 496    cursor_offset: usize,
 497) -> String {
 498    let mut section = String::new();
 499    let path_str = path.to_string_lossy();
 500    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 501
 502    section.push_str(&context[..editable_range.start]);
 503
 504    let editable_text = &context[editable_range.clone()];
 505    let cursor_in_editable = cursor_offset - editable_range.start;
 506    multi_region::write_editable_with_markers_v0316(
 507        &mut section,
 508        editable_text,
 509        cursor_in_editable,
 510        CURSOR_MARKER,
 511    );
 512
 513    if !section.ends_with('\n') {
 514        section.push('\n');
 515    }
 516    section
 517}
 518
 519fn build_v0318_cursor_prefix(
 520    path: &Path,
 521    context: &str,
 522    editable_range: &Range<usize>,
 523    cursor_offset: usize,
 524) -> String {
 525    let mut section = String::new();
 526    let path_str = path.to_string_lossy();
 527    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 528
 529    section.push_str(&context[..editable_range.start]);
 530
 531    let editable_text = &context[editable_range.clone()];
 532    let cursor_in_editable = cursor_offset - editable_range.start;
 533    multi_region::write_editable_with_markers_v0318(
 534        &mut section,
 535        editable_text,
 536        cursor_in_editable,
 537        CURSOR_MARKER,
 538    );
 539
 540    if !section.ends_with('\n') {
 541        section.push('\n');
 542    }
 543    section
 544}
 545
 546fn build_v0317_cursor_prefix(
 547    path: &Path,
 548    context: &str,
 549    editable_range: &Range<usize>,
 550    cursor_offset: usize,
 551) -> String {
 552    let mut section = String::new();
 553    let path_str = path.to_string_lossy();
 554    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 555
 556    section.push_str(&context[..editable_range.start]);
 557
 558    let editable_text = &context[editable_range.clone()];
 559    let cursor_in_editable = cursor_offset - editable_range.start;
 560    multi_region::write_editable_with_markers_v0317(
 561        &mut section,
 562        editable_text,
 563        cursor_in_editable,
 564        CURSOR_MARKER,
 565    );
 566
 567    if !section.ends_with('\n') {
 568        section.push('\n');
 569    }
 570    section
 571}
 572
 573fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 574    let start_row = text[0..range.start].matches('\n').count() as u32;
 575    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 576    if !text[..range.end].ends_with('\n') {
 577        end_row += 1;
 578    }
 579    return start_row..end_row;
 580}
 581
 582pub fn format_prompt_with_budget_for_format(
 583    input: &ZetaPromptInput,
 584    format: ZetaFormat,
 585    max_tokens: usize,
 586) -> Option<String> {
 587    let (context, editable_range, context_range, cursor_offset) =
 588        resolve_cursor_region(input, format);
 589    let path = &*input.cursor_path;
 590
 591    let empty_files = Vec::new();
 592    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 593    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 594        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 595        let row_range = relative_row_range.start + cursor_excerpt_start_row
 596            ..relative_row_range.end + cursor_excerpt_start_row;
 597        &filter_redundant_excerpts(
 598            input_related_files.to_vec(),
 599            input.cursor_path.as_ref(),
 600            row_range,
 601        )
 602    } else {
 603        input_related_files
 604    };
 605
 606    let prompt = match format {
 607        ZetaFormat::V0211SeedCoder
 608        | ZetaFormat::V0304SeedNoEdits
 609        | ZetaFormat::V0306SeedMultiRegions
 610        | ZetaFormat::V0316SeedMultiRegions
 611        | ZetaFormat::V0318SeedMultiRegions
 612        | ZetaFormat::V0317SeedMultiRegions => {
 613            let mut cursor_section = String::new();
 614            write_cursor_excerpt_section_for_format(
 615                format,
 616                &mut cursor_section,
 617                path,
 618                context,
 619                &editable_range,
 620                cursor_offset,
 621            );
 622
 623            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 624            seed_coder::assemble_fim_prompt(
 625                context,
 626                &editable_range,
 627                &cursor_section,
 628                &input.events,
 629                related_files,
 630                budget_with_margin,
 631            )
 632        }
 633        _ => {
 634            let mut cursor_section = String::new();
 635            write_cursor_excerpt_section_for_format(
 636                format,
 637                &mut cursor_section,
 638                path,
 639                context,
 640                &editable_range,
 641                cursor_offset,
 642            );
 643
 644            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 645            let cursor_tokens = estimate_tokens(cursor_section.len());
 646            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 647
 648            let edit_history_section = format_edit_history_within_budget(
 649                &input.events,
 650                "<|file_sep|>",
 651                "edit history",
 652                remaining_budget,
 653                max_edit_event_count_for_format(&format),
 654            );
 655            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 656            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 657
 658            let related_files_section = format_related_files_within_budget(
 659                &related_files,
 660                "<|file_sep|>",
 661                "",
 662                remaining_budget,
 663            );
 664
 665            let mut prompt = String::new();
 666            prompt.push_str(&related_files_section);
 667            prompt.push_str(&edit_history_section);
 668            prompt.push_str(&cursor_section);
 669            prompt
 670        }
 671    };
 672    let prompt_tokens = estimate_tokens(prompt.len());
 673    if prompt_tokens > max_tokens {
 674        return None;
 675    }
 676    return Some(prompt);
 677}
 678
 679pub fn filter_redundant_excerpts(
 680    mut related_files: Vec<RelatedFile>,
 681    cursor_path: &Path,
 682    cursor_row_range: Range<u32>,
 683) -> Vec<RelatedFile> {
 684    for file in &mut related_files {
 685        if file.path.as_ref() == cursor_path {
 686            file.excerpts.retain(|excerpt| {
 687                excerpt.row_range.start < cursor_row_range.start
 688                    || excerpt.row_range.end > cursor_row_range.end
 689            });
 690        }
 691    }
 692    related_files.retain(|file| !file.excerpts.is_empty());
 693    related_files
 694}
 695
 696pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 697    match format {
 698        ZetaFormat::V0112MiddleAtEnd
 699        | ZetaFormat::V0113Ordered
 700        | ZetaFormat::V0114180EditableRegion
 701        | ZetaFormat::V0120GitMergeMarkers
 702        | ZetaFormat::V0131GitMergeMarkersPrefix
 703        | ZetaFormat::V0211Prefill
 704        | ZetaFormat::V0211SeedCoder
 705        | ZetaFormat::v0226Hashline
 706        | ZetaFormat::V0304SeedNoEdits
 707        | ZetaFormat::V0304VariableEdit
 708        | ZetaFormat::V0306SeedMultiRegions
 709        | ZetaFormat::V0316SeedMultiRegions
 710        | ZetaFormat::V0318SeedMultiRegions
 711        | ZetaFormat::V0317SeedMultiRegions => 6,
 712    }
 713}
 714
 715pub fn get_prefill_for_format(
 716    format: ZetaFormat,
 717    context: &str,
 718    editable_range: &Range<usize>,
 719) -> String {
 720    match format {
 721        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 722        ZetaFormat::V0112MiddleAtEnd
 723        | ZetaFormat::V0113Ordered
 724        | ZetaFormat::V0114180EditableRegion
 725        | ZetaFormat::V0120GitMergeMarkers
 726        | ZetaFormat::V0131GitMergeMarkersPrefix
 727        | ZetaFormat::V0211SeedCoder
 728        | ZetaFormat::v0226Hashline
 729        | ZetaFormat::V0304VariableEdit => String::new(),
 730        ZetaFormat::V0304SeedNoEdits
 731        | ZetaFormat::V0306SeedMultiRegions
 732        | ZetaFormat::V0316SeedMultiRegions
 733        | ZetaFormat::V0318SeedMultiRegions
 734        | ZetaFormat::V0317SeedMultiRegions => String::new(),
 735    }
 736}
 737
 738pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 739    match format {
 740        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 741        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 742        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 743        ZetaFormat::V0211SeedCoder
 744        | ZetaFormat::V0304SeedNoEdits
 745        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 746        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 747        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 748        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 749        ZetaFormat::V0112MiddleAtEnd
 750        | ZetaFormat::V0113Ordered
 751        | ZetaFormat::V0114180EditableRegion
 752        | ZetaFormat::v0226Hashline
 753        | ZetaFormat::V0304VariableEdit => None,
 754    }
 755}
 756
 757pub fn encode_patch_as_output_for_format(
 758    format: ZetaFormat,
 759    old_editable_region: &str,
 760    patch: &str,
 761    cursor_offset: Option<usize>,
 762) -> Result<Option<String>> {
 763    match format {
 764        ZetaFormat::v0226Hashline => {
 765            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 766        }
 767        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 768            old_editable_region,
 769            patch,
 770            cursor_offset,
 771        )
 772        .map(Some),
 773        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 774            Ok(seed_coder::no_edits(patch))
 775        }
 776        ZetaFormat::V0316SeedMultiRegions => {
 777            let empty_patch = patch.lines().count() <= 3;
 778            if empty_patch {
 779                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 780                let marker_num =
 781                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 782                let tag = multi_region::marker_tag(marker_num);
 783                Ok(Some(format!(
 784                    "{tag}{tag}{}",
 785                    multi_region::V0316_END_MARKER
 786                )))
 787            } else {
 788                Ok(None)
 789            }
 790        }
 791        ZetaFormat::V0318SeedMultiRegions => {
 792            let empty_patch = patch.lines().count() <= 3;
 793            if empty_patch {
 794                let marker_offsets =
 795                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 796                let marker_num =
 797                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 798                let tag = multi_region::marker_tag(marker_num);
 799                Ok(Some(format!(
 800                    "{tag}{tag}{}",
 801                    multi_region::V0318_END_MARKER
 802                )))
 803            } else {
 804                Ok(None)
 805            }
 806        }
 807        ZetaFormat::V0317SeedMultiRegions => {
 808            let empty_patch = patch.lines().count() <= 3;
 809            if empty_patch {
 810                let tag = multi_region::marker_tag_relative(0);
 811                Ok(Some(format!(
 812                    "{tag}{tag}{}",
 813                    multi_region::V0317_END_MARKER
 814                )))
 815            } else {
 816                Ok(None)
 817            }
 818        }
 819        _ => Ok(None),
 820    }
 821}
 822
 823/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
 824/// extracted), produce the expected model output string for training.
 825pub fn format_expected_output(
 826    input: &ZetaPromptInput,
 827    format: ZetaFormat,
 828    patch: &str,
 829    cursor_offset: Option<usize>,
 830) -> Result<String> {
 831    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 832    let mut old_editable = context[editable_range].to_string();
 833    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
 834        old_editable.push('\n');
 835    }
 836
 837    // Formats with their own output encoding (hashline, variable-edit,
 838    // multi-region empty patches) are handled here.
 839    if let Some(output) =
 840        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
 841    {
 842        return Ok(output);
 843    }
 844
 845    let empty_patch = patch.lines().count() <= 3;
 846
 847    match format {
 848        // Multi-region formats: non-empty patches need diff application
 849        // then marker-span encoding.
 850        ZetaFormat::V0316SeedMultiRegions => {
 851            let (new_editable, first_hunk_offset) =
 852                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 853            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 854            multi_region::encode_from_old_and_new_v0316(
 855                &old_editable,
 856                &new_editable,
 857                cursor_in_new,
 858                CURSOR_MARKER,
 859                multi_region::V0316_END_MARKER,
 860            )
 861        }
 862        ZetaFormat::V0318SeedMultiRegions => {
 863            let (new_editable, first_hunk_offset) =
 864                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 865            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 866            multi_region::encode_from_old_and_new_v0318(
 867                &old_editable,
 868                &new_editable,
 869                cursor_in_new,
 870                CURSOR_MARKER,
 871                multi_region::V0318_END_MARKER,
 872            )
 873        }
 874        ZetaFormat::V0317SeedMultiRegions => {
 875            let (new_editable, first_hunk_offset) =
 876                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 877            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 878            multi_region::encode_from_old_and_new_v0317(
 879                &old_editable,
 880                &new_editable,
 881                cursor_in_new,
 882                CURSOR_MARKER,
 883                multi_region::V0317_END_MARKER,
 884            )
 885        }
 886        // V0131-style formats and fallback: produce new editable text with
 887        // cursor marker inserted, followed by the end marker.
 888        _ => {
 889            let (mut result, first_hunk_offset) = if empty_patch {
 890                (old_editable.clone(), None)
 891            } else {
 892                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
 893            };
 894
 895            if let Some(cursor) = cursor_offset {
 896                let hunk_start = if !empty_patch {
 897                    first_hunk_offset.unwrap_or(0)
 898                } else {
 899                    0
 900                };
 901                let offset = (hunk_start + cursor).min(result.len());
 902                result.insert_str(offset, CURSOR_MARKER);
 903            }
 904
 905            if !result.is_empty() && !result.ends_with('\n') {
 906                result.push('\n');
 907            }
 908
 909            if let Some(end_marker) = output_end_marker_for_format(format) {
 910                result.push_str(end_marker);
 911            }
 912
 913            Ok(result)
 914        }
 915    }
 916}
 917
 918/// Compute the cursor position within the new text after diff application.
 919fn cursor_in_new_text(
 920    cursor_offset: Option<usize>,
 921    first_hunk_offset: Option<usize>,
 922    new_text: &str,
 923) -> Option<usize> {
 924    cursor_offset.map(|cursor| {
 925        let hunk_start = first_hunk_offset.unwrap_or(0);
 926        (hunk_start + cursor).min(new_text.len())
 927    })
 928}
 929
 930pub struct ParsedOutput {
 931    /// Text that should replace the editable region
 932    pub new_editable_region: String,
 933    /// The byte range within `cursor_excerpt` that this replacement applies to
 934    pub range_in_excerpt: Range<usize>,
 935}
 936
 937/// Parse model output for the given zeta format
 938pub fn parse_zeta2_model_output(
 939    output: &str,
 940    format: ZetaFormat,
 941    prompt_inputs: &ZetaPromptInput,
 942) -> Result<ParsedOutput> {
 943    let output = match output_end_marker_for_format(format) {
 944        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 945        None => output,
 946    };
 947
 948    let (context, editable_range_in_context, context_range, cursor_offset) =
 949        resolve_cursor_region(prompt_inputs, format);
 950    let context_start = context_range.start;
 951    let old_editable_region = &context[editable_range_in_context.clone()];
 952    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 953
 954    let (range_in_context, output) = match format {
 955        ZetaFormat::v0226Hashline => (
 956            editable_range_in_context,
 957            if hashline::output_has_edit_commands(output) {
 958                hashline::apply_edit_commands(old_editable_region, output)
 959            } else {
 960                output.to_string()
 961            },
 962        ),
 963        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 964        ZetaFormat::V0304SeedNoEdits => (
 965            editable_range_in_context,
 966            if output.starts_with(seed_coder::NO_EDITS) {
 967                old_editable_region.to_string()
 968            } else {
 969                output.to_string()
 970            },
 971        ),
 972        ZetaFormat::V0306SeedMultiRegions => (
 973            editable_range_in_context,
 974            if output.starts_with(seed_coder::NO_EDITS) {
 975                old_editable_region.to_string()
 976            } else {
 977                multi_region::apply_marker_span(old_editable_region, output)?
 978            },
 979        ),
 980        ZetaFormat::V0316SeedMultiRegions => (
 981            editable_range_in_context,
 982            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
 983        ),
 984        ZetaFormat::V0318SeedMultiRegions => (
 985            editable_range_in_context,
 986            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
 987        ),
 988        ZetaFormat::V0317SeedMultiRegions => (
 989            editable_range_in_context,
 990            multi_region::apply_marker_span_v0317(
 991                old_editable_region,
 992                output,
 993                Some(cursor_offset_in_editable),
 994            )?,
 995        ),
 996        _ => (editable_range_in_context, output.to_string()),
 997    };
 998
 999    let range_in_excerpt =
1000        range_in_context.start + context_start..range_in_context.end + context_start;
1001
1002    Ok(ParsedOutput {
1003        new_editable_region: output,
1004        range_in_excerpt,
1005    })
1006}
1007
1008pub fn excerpt_range_for_format(
1009    format: ZetaFormat,
1010    ranges: &ExcerptRanges,
1011) -> (Range<usize>, Range<usize>) {
1012    excerpt_ranges_for_format(format, ranges)
1013}
1014
1015pub fn resolve_cursor_region(
1016    input: &ZetaPromptInput,
1017    format: ZetaFormat,
1018) -> (&str, Range<usize>, Range<usize>, usize) {
1019    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
1020        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1021        compute_editable_and_context_ranges(
1022            &input.cursor_excerpt,
1023            input.cursor_offset_in_excerpt,
1024            syntax_ranges,
1025            editable_tokens,
1026            context_tokens,
1027        )
1028    } else {
1029        excerpt_range_for_format(format, &input.excerpt_ranges)
1030    };
1031    let context_start = context_range.start;
1032    let context_text = &input.cursor_excerpt[context_range.clone()];
1033    let adjusted_editable =
1034        (editable_range.start - context_start)..(editable_range.end - context_start);
1035    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1036
1037    (
1038        context_text,
1039        adjusted_editable,
1040        context_range,
1041        adjusted_cursor,
1042    )
1043}
1044
1045pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1046    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1047    get_prefill_for_format(format, context, &editable_range)
1048}
1049
1050fn format_edit_history_within_budget(
1051    events: &[Arc<Event>],
1052    file_marker: &str,
1053    edit_history_name: &str,
1054    max_tokens: usize,
1055    max_edit_event_count: usize,
1056) -> String {
1057    let header = format!("{}{}\n", file_marker, edit_history_name);
1058    let header_tokens = estimate_tokens(header.len());
1059    if header_tokens >= max_tokens {
1060        return String::new();
1061    }
1062
1063    let mut event_strings: Vec<String> = Vec::new();
1064    let mut total_tokens = header_tokens;
1065
1066    for event in events.iter().rev().take(max_edit_event_count) {
1067        let mut event_str = String::new();
1068        write_event(&mut event_str, event);
1069        let event_tokens = estimate_tokens(event_str.len());
1070
1071        if total_tokens + event_tokens > max_tokens {
1072            break;
1073        }
1074        total_tokens += event_tokens;
1075        event_strings.push(event_str);
1076    }
1077
1078    if event_strings.is_empty() {
1079        return String::new();
1080    }
1081
1082    let mut result = header;
1083    for event_str in event_strings.iter().rev() {
1084        result.push_str(event_str);
1085    }
1086    result
1087}
1088
1089fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1090    let needs_newline = !excerpt.text.ends_with('\n');
1091    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1092    let len = excerpt.text.len()
1093        + if needs_newline { "\n".len() } else { 0 }
1094        + if needs_ellipsis { "...\n".len() } else { 0 };
1095    estimate_tokens(len)
1096}
1097
1098pub fn format_related_files_within_budget(
1099    related_files: &[RelatedFile],
1100    file_prefix: &str,
1101    file_suffix: &str,
1102    max_tokens: usize,
1103) -> String {
1104    struct ExcerptCandidate {
1105        file_ix: usize,
1106        excerpt_ix: usize,
1107        order: usize,
1108    }
1109
1110    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1111        .iter()
1112        .enumerate()
1113        .flat_map(|(file_ix, file)| {
1114            file.excerpts
1115                .iter()
1116                .enumerate()
1117                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1118                    file_ix,
1119                    excerpt_ix,
1120                    order: e.order,
1121                })
1122        })
1123        .collect();
1124
1125    // Pre-compute file header strings and their token costs.
1126    let file_headers: Vec<String> = related_files
1127        .iter()
1128        .map(|file| {
1129            let path_str = file.path.to_string_lossy();
1130            format!("{}{}\n", file_prefix, path_str)
1131        })
1132        .collect();
1133
1134    // Sort the excerpts by their order and determine how many fit within the budget.
1135    let mut total_tokens = 0;
1136    let mut included_excerpt_count = 0_usize;
1137    let mut included_file_indices = vec![false; related_files.len()];
1138    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1139    for candidate in &excerpt_candidates {
1140        let file = &related_files[candidate.file_ix];
1141        let excerpt = &file.excerpts[candidate.excerpt_ix];
1142        let file_already_included = included_file_indices[candidate.file_ix];
1143        let header_cost = if file_already_included {
1144            0
1145        } else {
1146            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1147        };
1148        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1149        if total_tokens + header_cost + excerpt_cost > max_tokens {
1150            break;
1151        }
1152        total_tokens += header_cost + excerpt_cost;
1153        if !file_already_included {
1154            included_file_indices[candidate.file_ix] = true;
1155        }
1156        included_excerpt_count += 1;
1157    }
1158
1159    excerpt_candidates.truncate(included_excerpt_count);
1160    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1161
1162    // Render all of the files that fit within the token budget, in the original order.
1163    let mut result = String::new();
1164    let mut last_file_ix = None;
1165    for candidate in &excerpt_candidates {
1166        if last_file_ix != Some(candidate.file_ix) {
1167            if last_file_ix.is_some() {
1168                result.push_str(file_suffix);
1169            }
1170            result.push_str(&file_headers[candidate.file_ix]);
1171            last_file_ix = Some(candidate.file_ix);
1172        }
1173        let file = &related_files[candidate.file_ix];
1174        let excerpt = &file.excerpts[candidate.excerpt_ix];
1175        result.push_str(&excerpt.text);
1176        if !result.ends_with('\n') {
1177            result.push('\n');
1178        }
1179        if excerpt.row_range.end < file.max_row {
1180            result.push_str("...\n");
1181        }
1182    }
1183
1184    result
1185}
1186
1187pub fn write_related_files(
1188    prompt: &mut String,
1189    related_files: &[RelatedFile],
1190) -> Vec<Range<usize>> {
1191    let mut ranges = Vec::new();
1192    for file in related_files {
1193        let start = prompt.len();
1194        let path_str = file.path.to_string_lossy();
1195        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1196        for excerpt in &file.excerpts {
1197            prompt.push_str(&excerpt.text);
1198            if !prompt.ends_with('\n') {
1199                prompt.push('\n');
1200            }
1201            if excerpt.row_range.end < file.max_row {
1202                prompt.push_str("...\n");
1203            }
1204        }
1205        let end = prompt.len();
1206        ranges.push(start..end);
1207    }
1208    ranges
1209}
1210
1211mod v0112_middle_at_end {
1212    use super::*;
1213
1214    pub fn special_tokens() -> &'static [&'static str] {
1215        &[
1216            "<|fim_prefix|>",
1217            "<|fim_suffix|>",
1218            "<|fim_middle|>",
1219            "<|file_sep|>",
1220            CURSOR_MARKER,
1221        ]
1222    }
1223
1224    pub fn write_cursor_excerpt_section(
1225        prompt: &mut String,
1226        path: &Path,
1227        context: &str,
1228        editable_range: &Range<usize>,
1229        cursor_offset: usize,
1230    ) {
1231        let path_str = path.to_string_lossy();
1232        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1233
1234        prompt.push_str("<|fim_prefix|>\n");
1235        prompt.push_str(&context[..editable_range.start]);
1236
1237        prompt.push_str("<|fim_suffix|>\n");
1238        prompt.push_str(&context[editable_range.end..]);
1239        if !prompt.ends_with('\n') {
1240            prompt.push('\n');
1241        }
1242
1243        prompt.push_str("<|fim_middle|>current\n");
1244        prompt.push_str(&context[editable_range.start..cursor_offset]);
1245        prompt.push_str(CURSOR_MARKER);
1246        prompt.push_str(&context[cursor_offset..editable_range.end]);
1247        if !prompt.ends_with('\n') {
1248            prompt.push('\n');
1249        }
1250
1251        prompt.push_str("<|fim_middle|>updated\n");
1252    }
1253}
1254
1255mod v0113_ordered {
1256    use super::*;
1257
1258    pub fn special_tokens() -> &'static [&'static str] {
1259        &[
1260            "<|fim_prefix|>",
1261            "<|fim_suffix|>",
1262            "<|fim_middle|>",
1263            "<|file_sep|>",
1264            CURSOR_MARKER,
1265        ]
1266    }
1267
1268    pub fn write_cursor_excerpt_section(
1269        prompt: &mut String,
1270        path: &Path,
1271        context: &str,
1272        editable_range: &Range<usize>,
1273        cursor_offset: usize,
1274    ) {
1275        let path_str = path.to_string_lossy();
1276        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1277
1278        prompt.push_str("<|fim_prefix|>\n");
1279        prompt.push_str(&context[..editable_range.start]);
1280        if !prompt.ends_with('\n') {
1281            prompt.push('\n');
1282        }
1283
1284        prompt.push_str("<|fim_middle|>current\n");
1285        prompt.push_str(&context[editable_range.start..cursor_offset]);
1286        prompt.push_str(CURSOR_MARKER);
1287        prompt.push_str(&context[cursor_offset..editable_range.end]);
1288        if !prompt.ends_with('\n') {
1289            prompt.push('\n');
1290        }
1291
1292        prompt.push_str("<|fim_suffix|>\n");
1293        prompt.push_str(&context[editable_range.end..]);
1294        if !prompt.ends_with('\n') {
1295            prompt.push('\n');
1296        }
1297
1298        prompt.push_str("<|fim_middle|>updated\n");
1299    }
1300}
1301
1302mod v0114180_editable_region {
1303    use super::*;
1304
1305    pub fn special_tokens() -> &'static [&'static str] {
1306        v0113_ordered::special_tokens()
1307    }
1308}
1309
1310pub mod v0120_git_merge_markers {
1311    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1312    //!
1313    //! Example prompt:
1314    //!
1315    //! <|file_sep|>path/to/target_file.py
1316    //! <|fim_prefix|>
1317    //! code before editable region
1318    //! <|fim_suffix|>
1319    //! code after editable region
1320    //! <|fim_middle|>
1321    //! <<<<<<< CURRENT
1322    //! code that
1323    //! needs to<|user_cursor|>
1324    //! be rewritten
1325    //! =======
1326    //!
1327    //! Expected output (should be generated by the model):
1328    //!
1329    //! updated
1330    //! code with
1331    //! changes applied
1332    //! >>>>>>> UPDATED
1333
1334    use super::*;
1335
1336    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1337    pub const SEPARATOR: &str = "=======\n";
1338    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1339
1340    pub fn special_tokens() -> &'static [&'static str] {
1341        &[
1342            "<|fim_prefix|>",
1343            "<|fim_suffix|>",
1344            "<|fim_middle|>",
1345            "<|file_sep|>",
1346            START_MARKER,
1347            SEPARATOR,
1348            END_MARKER,
1349            CURSOR_MARKER,
1350        ]
1351    }
1352
1353    pub fn write_cursor_excerpt_section(
1354        prompt: &mut String,
1355        path: &Path,
1356        context: &str,
1357        editable_range: &Range<usize>,
1358        cursor_offset: usize,
1359    ) {
1360        let path_str = path.to_string_lossy();
1361        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1362
1363        prompt.push_str("<|fim_prefix|>");
1364        prompt.push_str(&context[..editable_range.start]);
1365
1366        prompt.push_str("<|fim_suffix|>");
1367        prompt.push_str(&context[editable_range.end..]);
1368        if !prompt.ends_with('\n') {
1369            prompt.push('\n');
1370        }
1371
1372        prompt.push_str("<|fim_middle|>");
1373        prompt.push_str(START_MARKER);
1374        prompt.push_str(&context[editable_range.start..cursor_offset]);
1375        prompt.push_str(CURSOR_MARKER);
1376        prompt.push_str(&context[cursor_offset..editable_range.end]);
1377        if !prompt.ends_with('\n') {
1378            prompt.push('\n');
1379        }
1380        prompt.push_str(SEPARATOR);
1381    }
1382}
1383
1384pub mod v0131_git_merge_markers_prefix {
1385    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1386    //!
1387    //! Example prompt:
1388    //!
1389    //! <|file_sep|>path/to/target_file.py
1390    //! <|fim_prefix|>
1391    //! code before editable region
1392    //! <<<<<<< CURRENT
1393    //! code that
1394    //! needs to<|user_cursor|>
1395    //! be rewritten
1396    //! =======
1397    //! <|fim_suffix|>
1398    //! code after editable region
1399    //! <|fim_middle|>
1400    //!
1401    //! Expected output (should be generated by the model):
1402    //!
1403    //! updated
1404    //! code with
1405    //! changes applied
1406    //! >>>>>>> UPDATED
1407
1408    use super::*;
1409
1410    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1411    pub const SEPARATOR: &str = "=======\n";
1412    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1413
1414    pub fn special_tokens() -> &'static [&'static str] {
1415        &[
1416            "<|fim_prefix|>",
1417            "<|fim_suffix|>",
1418            "<|fim_middle|>",
1419            "<|file_sep|>",
1420            START_MARKER,
1421            SEPARATOR,
1422            END_MARKER,
1423            CURSOR_MARKER,
1424        ]
1425    }
1426
1427    pub fn write_cursor_excerpt_section(
1428        prompt: &mut String,
1429        path: &Path,
1430        context: &str,
1431        editable_range: &Range<usize>,
1432        cursor_offset: usize,
1433    ) {
1434        let path_str = path.to_string_lossy();
1435        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1436
1437        prompt.push_str("<|fim_prefix|>");
1438        prompt.push_str(&context[..editable_range.start]);
1439        prompt.push_str(START_MARKER);
1440        prompt.push_str(&context[editable_range.start..cursor_offset]);
1441        prompt.push_str(CURSOR_MARKER);
1442        prompt.push_str(&context[cursor_offset..editable_range.end]);
1443        if !prompt.ends_with('\n') {
1444            prompt.push('\n');
1445        }
1446        prompt.push_str(SEPARATOR);
1447
1448        prompt.push_str("<|fim_suffix|>");
1449        prompt.push_str(&context[editable_range.end..]);
1450        if !prompt.ends_with('\n') {
1451            prompt.push('\n');
1452        }
1453
1454        prompt.push_str("<|fim_middle|>");
1455    }
1456}
1457
1458pub mod v0211_prefill {
1459    use super::*;
1460
1461    pub fn special_tokens() -> &'static [&'static str] {
1462        v0131_git_merge_markers_prefix::special_tokens()
1463    }
1464
1465    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1466        let editable_region = &context[editable_range.start..editable_range.end];
1467
1468        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1469        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1470
1471        // Find a token boundary to avoid splitting tokens in the prefill.
1472        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1473        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1474        // the \n and consume any consecutive \n characters after it.
1475        let prefill = &editable_region[..prefill_len];
1476        match prefill.rfind('\n') {
1477            Some(pos) => {
1478                let mut end = pos + 1;
1479                while end < editable_region.len()
1480                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1481                {
1482                    end += 1;
1483                }
1484                editable_region[..end].to_string()
1485            }
1486            // No newline found. Fall back to splitting before the last space
1487            // (word-level boundary)
1488            None => match prefill.rfind(' ') {
1489                Some(pos) => prefill[..pos].to_string(),
1490                None => prefill.to_string(),
1491            },
1492        }
1493    }
1494}
1495
1496pub mod hashline {
1497
1498    use std::fmt::Display;
1499
1500    pub const END_MARKER: &str = "<|fim_middle|>updated";
1501    pub const START_MARKER: &str = "<|fim_middle|>current";
1502
1503    use super::*;
1504
1505    const SET_COMMAND_MARKER: &str = "<|set|>";
1506    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1507    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1508
1509    pub fn special_tokens() -> &'static [&'static str] {
1510        return &[
1511            SET_COMMAND_MARKER,
1512            "<|set_range|>",
1513            INSERT_COMMAND_MARKER,
1514            NO_EDITS_COMMAND_MARKER,
1515            CURSOR_MARKER,
1516            "<|file_sep|>",
1517            "<|fim_prefix|>",
1518            "<|fim_suffix|>",
1519            "<|fim_middle|>",
1520        ];
1521    }
1522
1523    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1524    #[derive(Debug, Clone, PartialEq, Eq)]
1525    struct LineRef {
1526        index: usize,
1527        hash: u8,
1528    }
1529
1530    impl Display for LineRef {
1531        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1532            write!(f, "{}:{:02x}", self.index, self.hash)
1533        }
1534    }
1535
1536    pub fn hash_line(line: &[u8]) -> u8 {
1537        let mut h: u8 = 0;
1538        for &byte in line {
1539            h = h.wrapping_add(byte);
1540        }
1541        return h;
1542    }
1543
1544    /// Write the hashline-encoded editable region into `out`. Each line of
1545    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1546    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1547    /// to the start of `editable_text`).
1548    pub fn write_hashline_editable_region(
1549        out: &mut String,
1550        editable_text: &str,
1551        cursor_offset_in_editable: usize,
1552    ) {
1553        let mut offset = 0;
1554        for (i, line) in editable_text.lines().enumerate() {
1555            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1556                && cursor_offset_in_editable < offset + line.len()
1557            {
1558                (
1559                    &line[..cursor_offset_in_editable - offset],
1560                    CURSOR_MARKER,
1561                    &line[cursor_offset_in_editable - offset..],
1562                )
1563            } else {
1564                (line, "", "")
1565            };
1566            write!(
1567                out,
1568                "\n{}|{head}{cursor}{tail}",
1569                LineRef {
1570                    index: i,
1571                    hash: hash_line(line.as_bytes())
1572                }
1573            )
1574            .unwrap();
1575            offset += line.len() + 1;
1576        }
1577    }
1578
1579    pub fn write_cursor_excerpt_section(
1580        prompt: &mut String,
1581        path: &Path,
1582        context: &str,
1583        editable_range: &Range<usize>,
1584        cursor_offset: usize,
1585    ) {
1586        let path_str = path.to_string_lossy();
1587        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1588
1589        prompt.push_str("<|fim_prefix|>\n");
1590        prompt.push_str(&context[..editable_range.start]);
1591        prompt.push_str(START_MARKER);
1592
1593        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1594        let editable_region = &context[editable_range.clone()];
1595        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1596
1597        if !prompt.ends_with('\n') {
1598            prompt.push('\n');
1599        }
1600
1601        prompt.push_str("<|fim_suffix|>\n");
1602        prompt.push_str(&context[editable_range.end..]);
1603        if !prompt.ends_with('\n') {
1604            prompt.push('\n');
1605        }
1606
1607        prompt.push_str(END_MARKER);
1608        prompt.push('\n');
1609    }
1610
1611    /// A single edit command parsed from the model output.
1612    #[derive(Debug)]
1613    enum EditCommand<'a> {
1614        /// Replace a range of lines (inclusive on both ends). Single-line set is
1615        /// represented by `start == end`.
1616        Set {
1617            start: LineRef,
1618            end: LineRef,
1619            content: &'a str,
1620        },
1621        /// Insert new lines after the given line, or before the first line if
1622        /// `after` is `None`.
1623        Insert {
1624            after: Option<LineRef>,
1625            content: &'a str,
1626        },
1627    }
1628
1629    /// Parse a line reference like `3:c3` into a `LineRef`.
1630    fn parse_line_ref(s: &str) -> Option<LineRef> {
1631        let (idx_str, hash_str) = s.split_once(':')?;
1632        let index = idx_str.parse::<usize>().ok()?;
1633        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1634        Some(LineRef { index, hash })
1635    }
1636
1637    /// Parse the model output into a list of `EditCommand`s.
1638    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1639        let mut commands = Vec::new();
1640        let mut offset = 0usize;
1641
1642        while offset < model_output.len() {
1643            let next_nl = model_output[offset..]
1644                .find('\n')
1645                .map(|i| offset + i)
1646                .unwrap_or(model_output.len());
1647            let line = &model_output[offset..next_nl];
1648            let line_end = if next_nl < model_output.len() {
1649                next_nl + 1
1650            } else {
1651                next_nl
1652            };
1653
1654            let trimmed = line.trim();
1655            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1656                (true, spec)
1657            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1658                (false, spec)
1659            } else {
1660                offset = line_end;
1661                continue;
1662            };
1663
1664            let mut content_end = line_end;
1665            let mut scan = line_end;
1666
1667            while scan < model_output.len() {
1668                let body_nl = model_output[scan..]
1669                    .find('\n')
1670                    .map(|i| scan + i)
1671                    .unwrap_or(model_output.len());
1672                let body_line = &model_output[scan..body_nl];
1673                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1674                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1675                {
1676                    break;
1677                }
1678                scan = if body_nl < model_output.len() {
1679                    body_nl + 1
1680                } else {
1681                    body_nl
1682                };
1683                content_end = scan;
1684            }
1685
1686            let content = &model_output[line_end..content_end];
1687
1688            if is_set {
1689                if let Some((start_str, end_str)) = specifier.split_once('-') {
1690                    if let (Some(start), Some(end)) =
1691                        (parse_line_ref(start_str), parse_line_ref(end_str))
1692                    {
1693                        commands.push(EditCommand::Set {
1694                            start,
1695                            end,
1696                            content,
1697                        });
1698                    }
1699                } else if let Some(target) = parse_line_ref(specifier) {
1700                    commands.push(EditCommand::Set {
1701                        start: target.clone(),
1702                        end: target,
1703                        content,
1704                    });
1705                }
1706            } else {
1707                let after = parse_line_ref(specifier);
1708                commands.push(EditCommand::Insert { after, content });
1709            }
1710
1711            offset = scan;
1712        }
1713
1714        commands
1715    }
1716
1717    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1718    /// (as opposed to being a plain full-replacement output).
1719    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1720    /// editable region, returning the plain text content.
1721    pub fn strip_hashline_prefixes(region: &str) -> String {
1722        let mut decoded: String = region
1723            .lines()
1724            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1725            .collect::<Vec<_>>()
1726            .join("\n");
1727        if region.ends_with('\n') {
1728            decoded.push('\n');
1729        }
1730        decoded
1731    }
1732
1733    pub fn output_has_edit_commands(model_output: &str) -> bool {
1734        model_output.contains(SET_COMMAND_MARKER)
1735            || model_output.contains(INSERT_COMMAND_MARKER)
1736            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1737    }
1738
1739    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1740    /// original editable region text.
1741    ///
1742    /// `editable_region` is the original text of the editable region (without hash
1743    /// prefixes). `model_output` is the raw model response containing edit commands.
1744    ///
1745    /// Returns the full replacement text for the editable region.
1746    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1747        if model_output
1748            .trim_start()
1749            .starts_with(NO_EDITS_COMMAND_MARKER)
1750        {
1751            return editable_region.to_string();
1752        }
1753
1754        let original_lines: Vec<&str> = editable_region.lines().collect();
1755        let old_hashes: Vec<u8> = original_lines
1756            .iter()
1757            .map(|line| hash_line(line.as_bytes()))
1758            .collect();
1759
1760        let commands = parse_edit_commands(model_output);
1761
1762        // For set operations: indexed by start line → Some((end line index, content))
1763        // For insert operations: indexed by line index → vec of content to insert after
1764        // Insert-before-first is tracked separately.
1765        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1766        let mut insert_before_first: Vec<&str> = Vec::new();
1767        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1768
1769        for command in &commands {
1770            match command {
1771                EditCommand::Set {
1772                    start,
1773                    end,
1774                    content,
1775                } => {
1776                    if start.index < old_hashes.len()
1777                        && end.index < old_hashes.len()
1778                        && start.index <= end.index
1779                        && old_hashes[start.index] == start.hash
1780                        && old_hashes[end.index] == end.hash
1781                    {
1782                        set_ops[start.index] = Some((end.index, *content));
1783                    }
1784                }
1785                EditCommand::Insert { after, content } => match after {
1786                    None => insert_before_first.push(*content),
1787                    Some(line_ref) => {
1788                        if line_ref.index < old_hashes.len()
1789                            && old_hashes[line_ref.index] == line_ref.hash
1790                        {
1791                            insert_after[line_ref.index].push(*content);
1792                        }
1793                    }
1794                },
1795            }
1796        }
1797
1798        let mut result = String::new();
1799
1800        // Emit any insertions before the first line
1801        for content in &insert_before_first {
1802            result.push_str(content);
1803            if !content.ends_with('\n') {
1804                result.push('\n');
1805            }
1806        }
1807
1808        let mut i = 0;
1809        while i < original_lines.len() {
1810            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1811                // Replace lines i..=end_index with the replacement content
1812                result.push_str(replacement);
1813                if !replacement.is_empty() && !replacement.ends_with('\n') {
1814                    result.push('\n');
1815                }
1816                // Emit any insertions after the end of this set range
1817                if *end_index < insert_after.len() {
1818                    for content in &insert_after[*end_index] {
1819                        result.push_str(content);
1820                        if !content.ends_with('\n') {
1821                            result.push('\n');
1822                        }
1823                    }
1824                }
1825                i = end_index + 1;
1826            } else {
1827                // Keep the original line
1828                result.push_str(original_lines[i]);
1829                result.push('\n');
1830                // Emit any insertions after this line
1831                for content in &insert_after[i] {
1832                    result.push_str(content);
1833                    if !content.ends_with('\n') {
1834                        result.push('\n');
1835                    }
1836                }
1837                i += 1;
1838            }
1839        }
1840
1841        // Preserve trailing newline behavior: if the original ended with a
1842        // newline the result already has one; if it didn't, trim the extra one
1843        // we added.
1844        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1845            result.pop();
1846        }
1847
1848        result
1849    }
1850
1851    /// Convert a unified diff patch into hashline edit commands.
1852    ///
1853    /// Parses the unified diff `patch` directly to determine which lines of
1854    /// `old_text` are deleted/replaced and what new lines are added, then emits
1855    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1856    /// `{index}:{hash}` identifiers.
1857    ///
1858    /// `cursor_offset` is an optional byte offset into the first hunk's new
1859    /// text (context + additions) where the cursor marker should be placed.
1860    pub fn patch_to_edit_commands(
1861        old_text: &str,
1862        patch: &str,
1863        cursor_offset: Option<usize>,
1864    ) -> Result<String> {
1865        let old_lines: Vec<&str> = old_text.lines().collect();
1866        let old_hashes: Vec<u8> = old_lines
1867            .iter()
1868            .map(|line| hash_line(line.as_bytes()))
1869            .collect();
1870
1871        let mut result = String::new();
1872        let mut first_hunk = true;
1873
1874        struct Hunk<'a> {
1875            line_range: Range<usize>,
1876            new_text_lines: Vec<&'a str>,
1877            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1878        }
1879
1880        // Parse the patch line by line. We only care about hunk headers,
1881        // context, deletions, and additions.
1882        let mut old_line_index: usize = 0;
1883        let mut current_hunk: Option<Hunk> = None;
1884        // Byte offset tracking within the hunk's new text for cursor placement.
1885        let mut new_text_byte_offset: usize = 0;
1886        // The line index of the last old line seen before/in the current hunk
1887        // (used for insert-after reference).
1888        let mut last_old_line_before_hunk: Option<usize> = None;
1889
1890        fn flush_hunk(
1891            hunk: Hunk,
1892            last_old_line: Option<usize>,
1893            result: &mut String,
1894            old_hashes: &[u8],
1895        ) {
1896            if hunk.line_range.is_empty() {
1897                // Pure insertion — reference the old line to insert after when in bounds.
1898                if let Some(after) = last_old_line
1899                    && let Some(&hash) = old_hashes.get(after)
1900                {
1901                    write!(
1902                        result,
1903                        "{INSERT_COMMAND_MARKER}{}\n",
1904                        LineRef { index: after, hash }
1905                    )
1906                    .unwrap();
1907                } else {
1908                    result.push_str(INSERT_COMMAND_MARKER);
1909                    result.push('\n');
1910                }
1911            } else {
1912                let start = hunk.line_range.start;
1913                let end_exclusive = hunk.line_range.end;
1914                let deleted_line_count = end_exclusive.saturating_sub(start);
1915
1916                if deleted_line_count == 1 {
1917                    if let Some(&hash) = old_hashes.get(start) {
1918                        write!(
1919                            result,
1920                            "{SET_COMMAND_MARKER}{}\n",
1921                            LineRef { index: start, hash }
1922                        )
1923                        .unwrap();
1924                    } else {
1925                        result.push_str(SET_COMMAND_MARKER);
1926                        result.push('\n');
1927                    }
1928                } else {
1929                    let end_inclusive = end_exclusive - 1;
1930                    match (
1931                        old_hashes.get(start).copied(),
1932                        old_hashes.get(end_inclusive).copied(),
1933                    ) {
1934                        (Some(start_hash), Some(end_hash)) => {
1935                            write!(
1936                                result,
1937                                "{SET_COMMAND_MARKER}{}-{}\n",
1938                                LineRef {
1939                                    index: start,
1940                                    hash: start_hash
1941                                },
1942                                LineRef {
1943                                    index: end_inclusive,
1944                                    hash: end_hash
1945                                }
1946                            )
1947                            .unwrap();
1948                        }
1949                        _ => {
1950                            result.push_str(SET_COMMAND_MARKER);
1951                            result.push('\n');
1952                        }
1953                    }
1954                }
1955            }
1956            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1957                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1958                    && line_offset == cursor_line_offset
1959                {
1960                    result.push_str(&line[..char_offset]);
1961                    result.push_str(CURSOR_MARKER);
1962                    result.push_str(&line[char_offset..]);
1963                    continue;
1964                }
1965
1966                result.push_str(line);
1967            }
1968        }
1969
1970        for raw_line in patch.split_inclusive('\n') {
1971            if raw_line.starts_with("@@") {
1972                // Flush any pending change hunk from a previous patch hunk.
1973                if let Some(hunk) = current_hunk.take() {
1974                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1975                }
1976
1977                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1978                // We intentionally do not trust old_start as a direct local index into `old_text`,
1979                // because some patches are produced against a larger file region and carry
1980                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1981                if first_hunk {
1982                    new_text_byte_offset = 0;
1983                    first_hunk = false;
1984                }
1985                continue;
1986            }
1987
1988            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1989                continue;
1990            }
1991            if raw_line.starts_with("\\ No newline") {
1992                continue;
1993            }
1994
1995            if raw_line.starts_with('-') {
1996                // Extend or start a change hunk with this deleted old line.
1997                match &mut current_hunk {
1998                    Some(Hunk {
1999                        line_range: range, ..
2000                    }) => range.end = old_line_index + 1,
2001                    None => {
2002                        current_hunk = Some(Hunk {
2003                            line_range: old_line_index..old_line_index + 1,
2004                            new_text_lines: Vec::new(),
2005                            cursor_line_offset_in_new_text: None,
2006                        });
2007                    }
2008                }
2009                old_line_index += 1;
2010            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2011                // Place cursor marker if cursor_offset falls within this line.
2012                let mut cursor_line_offset = None;
2013                if let Some(cursor_off) = cursor_offset
2014                    && (first_hunk
2015                        || cursor_off >= new_text_byte_offset
2016                            && cursor_off <= new_text_byte_offset + added_content.len())
2017                {
2018                    let line_offset = added_content.floor_char_boundary(
2019                        cursor_off
2020                            .saturating_sub(new_text_byte_offset)
2021                            .min(added_content.len()),
2022                    );
2023                    cursor_line_offset = Some(line_offset);
2024                }
2025
2026                new_text_byte_offset += added_content.len();
2027
2028                let hunk = current_hunk.get_or_insert(Hunk {
2029                    line_range: old_line_index..old_line_index,
2030                    new_text_lines: vec![],
2031                    cursor_line_offset_in_new_text: None,
2032                });
2033                hunk.new_text_lines.push(added_content);
2034                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2035                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2036            } else {
2037                // Context line (starts with ' ' or is empty).
2038                if let Some(hunk) = current_hunk.take() {
2039                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2040                }
2041                last_old_line_before_hunk = Some(old_line_index);
2042                old_line_index += 1;
2043                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2044                new_text_byte_offset += content.len();
2045            }
2046        }
2047
2048        // Flush final group.
2049        if let Some(hunk) = current_hunk.take() {
2050            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2051        }
2052
2053        // Trim a single trailing newline.
2054        if result.ends_with('\n') {
2055            result.pop();
2056        }
2057
2058        if result.is_empty() {
2059            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2060        }
2061
2062        Ok(result)
2063    }
2064
2065    #[cfg(test)]
2066    mod tests {
2067        use super::*;
2068        use indoc::indoc;
2069
2070        #[test]
2071        fn test_format_cursor_region() {
2072            struct Case {
2073                name: &'static str,
2074                context: &'static str,
2075                editable_range: Range<usize>,
2076                cursor_offset: usize,
2077                expected: &'static str,
2078            }
2079
2080            let cases = [
2081                Case {
2082                    name: "basic_cursor_placement",
2083                    context: "hello world\n",
2084                    editable_range: 0..12,
2085                    cursor_offset: 5,
2086                    expected: indoc! {"
2087                    <|file_sep|>test.rs
2088                    <|fim_prefix|>
2089                    <|fim_middle|>current
2090                    0:5c|hello<|user_cursor|> world
2091                    <|fim_suffix|>
2092                    <|fim_middle|>updated
2093                    "},
2094                },
2095                Case {
2096                    name: "multiline_cursor_on_second_line",
2097                    context: "aaa\nbbb\nccc\n",
2098                    editable_range: 0..12,
2099                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2100                    expected: indoc! {"
2101                    <|file_sep|>test.rs
2102                    <|fim_prefix|>
2103                    <|fim_middle|>current
2104                    0:23|aaa
2105                    1:26|b<|user_cursor|>bb
2106                    2:29|ccc
2107                    <|fim_suffix|>
2108                    <|fim_middle|>updated
2109                    "},
2110                },
2111                Case {
2112                    name: "no_trailing_newline_in_context",
2113                    context: "line1\nline2",
2114                    editable_range: 0..11,
2115                    cursor_offset: 3,
2116                    expected: indoc! {"
2117                    <|file_sep|>test.rs
2118                    <|fim_prefix|>
2119                    <|fim_middle|>current
2120                    0:d9|lin<|user_cursor|>e1
2121                    1:da|line2
2122                    <|fim_suffix|>
2123                    <|fim_middle|>updated
2124                    "},
2125                },
2126                Case {
2127                    name: "leading_newline_in_editable_region",
2128                    context: "\nabc\n",
2129                    editable_range: 0..5,
2130                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2131                    expected: indoc! {"
2132                    <|file_sep|>test.rs
2133                    <|fim_prefix|>
2134                    <|fim_middle|>current
2135                    0:00|
2136                    1:26|a<|user_cursor|>bc
2137                    <|fim_suffix|>
2138                    <|fim_middle|>updated
2139                    "},
2140                },
2141                Case {
2142                    name: "with_suffix",
2143                    context: "abc\ndef",
2144                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2145                    cursor_offset: 2,
2146                    expected: indoc! {"
2147                    <|file_sep|>test.rs
2148                    <|fim_prefix|>
2149                    <|fim_middle|>current
2150                    0:26|ab<|user_cursor|>c
2151                    <|fim_suffix|>
2152                    def
2153                    <|fim_middle|>updated
2154                    "},
2155                },
2156                Case {
2157                    name: "unicode_two_byte_chars",
2158                    context: "héllo\n",
2159                    editable_range: 0..7,
2160                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2161                    expected: indoc! {"
2162                    <|file_sep|>test.rs
2163                    <|fim_prefix|>
2164                    <|fim_middle|>current
2165                    0:1b|hé<|user_cursor|>llo
2166                    <|fim_suffix|>
2167                    <|fim_middle|>updated
2168                    "},
2169                },
2170                Case {
2171                    name: "unicode_three_byte_chars",
2172                    context: "日本語\n",
2173                    editable_range: 0..10,
2174                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2175                    expected: indoc! {"
2176                    <|file_sep|>test.rs
2177                    <|fim_prefix|>
2178                    <|fim_middle|>current
2179                    0:80|日本<|user_cursor|>語
2180                    <|fim_suffix|>
2181                    <|fim_middle|>updated
2182                    "},
2183                },
2184                Case {
2185                    name: "unicode_four_byte_chars",
2186                    context: "a🌍b\n",
2187                    editable_range: 0..7,
2188                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2189                    expected: indoc! {"
2190                    <|file_sep|>test.rs
2191                    <|fim_prefix|>
2192                    <|fim_middle|>current
2193                    0:6b|a🌍<|user_cursor|>b
2194                    <|fim_suffix|>
2195                    <|fim_middle|>updated
2196                    "},
2197                },
2198                Case {
2199                    name: "cursor_at_start_of_region_not_placed",
2200                    context: "abc\n",
2201                    editable_range: 0..4,
2202                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2203                    expected: indoc! {"
2204                    <|file_sep|>test.rs
2205                    <|fim_prefix|>
2206                    <|fim_middle|>current
2207                    0:26|abc
2208                    <|fim_suffix|>
2209                    <|fim_middle|>updated
2210                    "},
2211                },
2212                Case {
2213                    name: "cursor_at_end_of_line_not_placed",
2214                    context: "abc\ndef\n",
2215                    editable_range: 0..8,
2216                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2217                    expected: indoc! {"
2218                    <|file_sep|>test.rs
2219                    <|fim_prefix|>
2220                    <|fim_middle|>current
2221                    0:26|abc
2222                    1:2f|def
2223                    <|fim_suffix|>
2224                    <|fim_middle|>updated
2225                    "},
2226                },
2227                Case {
2228                    name: "cursor_offset_relative_to_context_not_editable_region",
2229                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2230                    // write_cursor_excerpt_section must subtract it before comparing against
2231                    // per-line offsets within the editable region.
2232                    context: "pre\naaa\nbbb\nsuf\n",
2233                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2234                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2235                    expected: indoc! {"
2236                    <|file_sep|>test.rs
2237                    <|fim_prefix|>
2238                    pre
2239                    <|fim_middle|>current
2240                    0:23|aaa
2241                    1:26|b<|user_cursor|>bb
2242                    <|fim_suffix|>
2243                    suf
2244                    <|fim_middle|>updated
2245                    "},
2246                },
2247            ];
2248
2249            for case in &cases {
2250                let mut prompt = String::new();
2251                hashline::write_cursor_excerpt_section(
2252                    &mut prompt,
2253                    Path::new("test.rs"),
2254                    case.context,
2255                    &case.editable_range,
2256                    case.cursor_offset,
2257                );
2258                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2259            }
2260        }
2261
2262        #[test]
2263        fn test_apply_edit_commands() {
2264            struct Case {
2265                name: &'static str,
2266                original: &'static str,
2267                model_output: &'static str,
2268                expected: &'static str,
2269            }
2270
2271            let cases = vec![
2272                Case {
2273                    name: "set_single_line",
2274                    original: indoc! {"
2275                    let mut total = 0;
2276                    for product in products {
2277                        total += ;
2278                    }
2279                    total
2280                "},
2281                    model_output: indoc! {"
2282                    <|set|>2:87
2283                        total += product.price;
2284                "},
2285                    expected: indoc! {"
2286                    let mut total = 0;
2287                    for product in products {
2288                        total += product.price;
2289                    }
2290                    total
2291                "},
2292                },
2293                Case {
2294                    name: "set_range",
2295                    original: indoc! {"
2296                    fn foo() {
2297                        let x = 1;
2298                        let y = 2;
2299                        let z = 3;
2300                    }
2301                "},
2302                    model_output: indoc! {"
2303                    <|set|>1:46-3:4a
2304                        let sum = 6;
2305                "},
2306                    expected: indoc! {"
2307                    fn foo() {
2308                        let sum = 6;
2309                    }
2310                "},
2311                },
2312                Case {
2313                    name: "insert_after_line",
2314                    original: indoc! {"
2315                    fn main() {
2316                        let x = 1;
2317                    }
2318                "},
2319                    model_output: indoc! {"
2320                    <|insert|>1:46
2321                        let y = 2;
2322                "},
2323                    expected: indoc! {"
2324                    fn main() {
2325                        let x = 1;
2326                        let y = 2;
2327                    }
2328                "},
2329                },
2330                Case {
2331                    name: "insert_before_first",
2332                    original: indoc! {"
2333                    let x = 1;
2334                    let y = 2;
2335                "},
2336                    model_output: indoc! {"
2337                    <|insert|>
2338                    use std::io;
2339                "},
2340                    expected: indoc! {"
2341                    use std::io;
2342                    let x = 1;
2343                    let y = 2;
2344                "},
2345                },
2346                Case {
2347                    name: "set_with_cursor_marker",
2348                    original: indoc! {"
2349                    fn main() {
2350                        println!();
2351                    }
2352                "},
2353                    model_output: indoc! {"
2354                    <|set|>1:34
2355                        eprintln!(\"<|user_cursor|>\");
2356                "},
2357                    expected: indoc! {"
2358                    fn main() {
2359                        eprintln!(\"<|user_cursor|>\");
2360                    }
2361                "},
2362                },
2363                Case {
2364                    name: "multiple_set_commands",
2365                    original: indoc! {"
2366                    aaa
2367                    bbb
2368                    ccc
2369                    ddd
2370                "},
2371                    model_output: indoc! {"
2372                    <|set|>0:23
2373                    AAA
2374                    <|set|>2:29
2375                    CCC
2376                "},
2377                    expected: indoc! {"
2378                    AAA
2379                    bbb
2380                    CCC
2381                    ddd
2382                "},
2383                },
2384                Case {
2385                    name: "set_range_multiline_replacement",
2386                    original: indoc! {"
2387                    fn handle_submit() {
2388                    }
2389
2390                    fn handle_keystroke() {
2391                "},
2392                    model_output: indoc! {"
2393                    <|set|>0:3f-1:7d
2394                    fn handle_submit(modal_state: &mut ModalState) {
2395                        <|user_cursor|>
2396                    }
2397                "},
2398                    expected: indoc! {"
2399                    fn handle_submit(modal_state: &mut ModalState) {
2400                        <|user_cursor|>
2401                    }
2402
2403                    fn handle_keystroke() {
2404                "},
2405                },
2406                Case {
2407                    name: "no_edit_commands_returns_original",
2408                    original: indoc! {"
2409                    hello
2410                    world
2411                "},
2412                    model_output: "some random text with no commands",
2413                    expected: indoc! {"
2414                    hello
2415                    world
2416                "},
2417                },
2418                Case {
2419                    name: "no_edits_command_returns_original",
2420                    original: indoc! {"
2421                    hello
2422                    world
2423                "},
2424                    model_output: "<|no_edits|>",
2425                    expected: indoc! {"
2426                    hello
2427                    world
2428                "},
2429                },
2430                Case {
2431                    name: "wrong_hash_set_ignored",
2432                    original: indoc! {"
2433                    aaa
2434                    bbb
2435                "},
2436                    model_output: indoc! {"
2437                    <|set|>0:ff
2438                    ZZZ
2439                "},
2440                    expected: indoc! {"
2441                    aaa
2442                    bbb
2443                "},
2444                },
2445                Case {
2446                    name: "insert_and_set_combined",
2447                    original: indoc! {"
2448                    alpha
2449                    beta
2450                    gamma
2451                "},
2452                    model_output: indoc! {"
2453                    <|set|>0:06
2454                    ALPHA
2455                    <|insert|>1:9c
2456                    beta_extra
2457                "},
2458                    expected: indoc! {"
2459                    ALPHA
2460                    beta
2461                    beta_extra
2462                    gamma
2463                "},
2464                },
2465                Case {
2466                    name: "no_trailing_newline_preserved",
2467                    original: "hello\nworld",
2468                    model_output: indoc! {"
2469                    <|set|>0:14
2470                    HELLO
2471                "},
2472                    expected: "HELLO\nworld",
2473                },
2474                Case {
2475                    name: "set_range_hash_mismatch_in_end_bound",
2476                    original: indoc! {"
2477                    one
2478                    two
2479                    three
2480                "},
2481                    model_output: indoc! {"
2482                    <|set|>0:42-2:ff
2483                    ONE_TWO_THREE
2484                "},
2485                    expected: indoc! {"
2486                    one
2487                    two
2488                    three
2489                "},
2490                },
2491                Case {
2492                    name: "set_range_start_greater_than_end_ignored",
2493                    original: indoc! {"
2494                    a
2495                    b
2496                    c
2497                "},
2498                    model_output: indoc! {"
2499                    <|set|>2:63-1:62
2500                    X
2501                "},
2502                    expected: indoc! {"
2503                    a
2504                    b
2505                    c
2506                "},
2507                },
2508                Case {
2509                    name: "insert_out_of_bounds_ignored",
2510                    original: indoc! {"
2511                    x
2512                    y
2513                "},
2514                    model_output: indoc! {"
2515                    <|insert|>99:aa
2516                    z
2517                "},
2518                    expected: indoc! {"
2519                    x
2520                    y
2521                "},
2522                },
2523                Case {
2524                    name: "set_out_of_bounds_ignored",
2525                    original: indoc! {"
2526                    x
2527                    y
2528                "},
2529                    model_output: indoc! {"
2530                    <|set|>99:aa
2531                    z
2532                "},
2533                    expected: indoc! {"
2534                    x
2535                    y
2536                "},
2537                },
2538                Case {
2539                    name: "malformed_set_command_ignored",
2540                    original: indoc! {"
2541                    alpha
2542                    beta
2543                "},
2544                    model_output: indoc! {"
2545                    <|set|>not-a-line-ref
2546                    UPDATED
2547                "},
2548                    expected: indoc! {"
2549                    alpha
2550                    beta
2551                "},
2552                },
2553                Case {
2554                    name: "malformed_insert_hash_treated_as_before_first",
2555                    original: indoc! {"
2556                    alpha
2557                    beta
2558                "},
2559                    model_output: indoc! {"
2560                    <|insert|>1:nothex
2561                    preamble
2562                "},
2563                    expected: indoc! {"
2564                    preamble
2565                    alpha
2566                    beta
2567                "},
2568                },
2569                Case {
2570                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2571                    original: indoc! {"
2572                    cat
2573                    dog
2574                "},
2575                    model_output: indoc! {"
2576                    <|set|>0:38
2577                    CAT
2578                    <|insert|>0:38
2579                    TAIL
2580                "},
2581                    expected: indoc! {"
2582                    CAT
2583                    TAIL
2584                    dog
2585                "},
2586                },
2587                Case {
2588                    name: "overlapping_set_ranges_last_wins",
2589                    original: indoc! {"
2590                    a
2591                    b
2592                    c
2593                    d
2594                "},
2595                    model_output: indoc! {"
2596                    <|set|>0:61-2:63
2597                    FIRST
2598                    <|set|>1:62-3:64
2599                    SECOND
2600                "},
2601                    expected: indoc! {"
2602                    FIRST
2603                    d
2604                "},
2605                },
2606                Case {
2607                    name: "insert_before_first_and_after_line",
2608                    original: indoc! {"
2609                        a
2610                        b
2611                    "},
2612                    model_output: indoc! {"
2613                        <|insert|>
2614                        HEAD
2615                        <|insert|>0:61
2616                        MID
2617                    "},
2618                    expected: indoc! {"
2619                        HEAD
2620                        a
2621                        MID
2622                        b
2623                    "},
2624                },
2625            ];
2626
2627            for case in &cases {
2628                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2629                assert_eq!(result, case.expected, "failed case: {}", case.name);
2630            }
2631        }
2632
2633        #[test]
2634        fn test_output_has_edit_commands() {
2635            assert!(hashline::output_has_edit_commands(&format!(
2636                "{}0:ab\nnew",
2637                SET_COMMAND_MARKER
2638            )));
2639            assert!(hashline::output_has_edit_commands(&format!(
2640                "{}0:ab\nnew",
2641                INSERT_COMMAND_MARKER
2642            )));
2643            assert!(hashline::output_has_edit_commands(&format!(
2644                "some text\n{}1:cd\nstuff",
2645                SET_COMMAND_MARKER
2646            )));
2647            assert!(!hashline::output_has_edit_commands("just plain text"));
2648            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2649            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2650        }
2651
2652        // ---- hashline::patch_to_edit_commands round-trip tests ----
2653
2654        #[test]
2655        fn test_patch_to_edit_commands() {
2656            struct Case {
2657                name: &'static str,
2658                old: &'static str,
2659                patch: &'static str,
2660                expected_new: &'static str,
2661            }
2662
2663            let cases = [
2664                Case {
2665                    name: "single_line_replacement",
2666                    old: indoc! {"
2667                    let mut total = 0;
2668                    for product in products {
2669                        total += ;
2670                    }
2671                    total
2672                "},
2673                    patch: indoc! {"
2674                    @@ -1,5 +1,5 @@
2675                     let mut total = 0;
2676                     for product in products {
2677                    -    total += ;
2678                    +    total += product.price;
2679                     }
2680                     total
2681                "},
2682                    expected_new: indoc! {"
2683                    let mut total = 0;
2684                    for product in products {
2685                        total += product.price;
2686                    }
2687                    total
2688                "},
2689                },
2690                Case {
2691                    name: "multiline_replacement",
2692                    old: indoc! {"
2693                    fn foo() {
2694                        let x = 1;
2695                        let y = 2;
2696                        let z = 3;
2697                    }
2698                "},
2699                    patch: indoc! {"
2700                    @@ -1,5 +1,3 @@
2701                     fn foo() {
2702                    -    let x = 1;
2703                    -    let y = 2;
2704                    -    let z = 3;
2705                    +    let sum = 1 + 2 + 3;
2706                     }
2707                "},
2708                    expected_new: indoc! {"
2709                    fn foo() {
2710                        let sum = 1 + 2 + 3;
2711                    }
2712                "},
2713                },
2714                Case {
2715                    name: "insertion",
2716                    old: indoc! {"
2717                    fn main() {
2718                        let x = 1;
2719                    }
2720                "},
2721                    patch: indoc! {"
2722                    @@ -1,3 +1,4 @@
2723                     fn main() {
2724                         let x = 1;
2725                    +    let y = 2;
2726                     }
2727                "},
2728                    expected_new: indoc! {"
2729                    fn main() {
2730                        let x = 1;
2731                        let y = 2;
2732                    }
2733                "},
2734                },
2735                Case {
2736                    name: "insertion_before_first",
2737                    old: indoc! {"
2738                    let x = 1;
2739                    let y = 2;
2740                "},
2741                    patch: indoc! {"
2742                    @@ -1,2 +1,3 @@
2743                    +use std::io;
2744                     let x = 1;
2745                     let y = 2;
2746                "},
2747                    expected_new: indoc! {"
2748                    use std::io;
2749                    let x = 1;
2750                    let y = 2;
2751                "},
2752                },
2753                Case {
2754                    name: "deletion",
2755                    old: indoc! {"
2756                    aaa
2757                    bbb
2758                    ccc
2759                    ddd
2760                "},
2761                    patch: indoc! {"
2762                    @@ -1,4 +1,2 @@
2763                     aaa
2764                    -bbb
2765                    -ccc
2766                     ddd
2767                "},
2768                    expected_new: indoc! {"
2769                    aaa
2770                    ddd
2771                "},
2772                },
2773                Case {
2774                    name: "multiple_changes",
2775                    old: indoc! {"
2776                    alpha
2777                    beta
2778                    gamma
2779                    delta
2780                    epsilon
2781                "},
2782                    patch: indoc! {"
2783                    @@ -1,5 +1,5 @@
2784                    -alpha
2785                    +ALPHA
2786                     beta
2787                     gamma
2788                    -delta
2789                    +DELTA
2790                     epsilon
2791                "},
2792                    expected_new: indoc! {"
2793                    ALPHA
2794                    beta
2795                    gamma
2796                    DELTA
2797                    epsilon
2798                "},
2799                },
2800                Case {
2801                    name: "replace_with_insertion",
2802                    old: indoc! {r#"
2803                    fn handle() {
2804                        modal_state.close();
2805                        modal_state.dismiss();
2806                "#},
2807                    patch: indoc! {r#"
2808                    @@ -1,3 +1,4 @@
2809                     fn handle() {
2810                         modal_state.close();
2811                    +    eprintln!("");
2812                         modal_state.dismiss();
2813                "#},
2814                    expected_new: indoc! {r#"
2815                    fn handle() {
2816                        modal_state.close();
2817                        eprintln!("");
2818                        modal_state.dismiss();
2819                "#},
2820                },
2821                Case {
2822                    name: "complete_replacement",
2823                    old: indoc! {"
2824                    aaa
2825                    bbb
2826                    ccc
2827                "},
2828                    patch: indoc! {"
2829                    @@ -1,3 +1,3 @@
2830                    -aaa
2831                    -bbb
2832                    -ccc
2833                    +xxx
2834                    +yyy
2835                    +zzz
2836                "},
2837                    expected_new: indoc! {"
2838                    xxx
2839                    yyy
2840                    zzz
2841                "},
2842                },
2843                Case {
2844                    name: "add_function_body",
2845                    old: indoc! {"
2846                    fn foo() {
2847                        modal_state.dismiss();
2848                    }
2849
2850                    fn
2851
2852                    fn handle_keystroke() {
2853                "},
2854                    patch: indoc! {"
2855                    @@ -1,6 +1,8 @@
2856                     fn foo() {
2857                         modal_state.dismiss();
2858                     }
2859
2860                    -fn
2861                    +fn handle_submit() {
2862                    +    todo()
2863                    +}
2864
2865                     fn handle_keystroke() {
2866                "},
2867                    expected_new: indoc! {"
2868                    fn foo() {
2869                        modal_state.dismiss();
2870                    }
2871
2872                    fn handle_submit() {
2873                        todo()
2874                    }
2875
2876                    fn handle_keystroke() {
2877                "},
2878                },
2879                Case {
2880                    name: "with_cursor_offset",
2881                    old: indoc! {r#"
2882                    fn main() {
2883                        println!();
2884                    }
2885                "#},
2886                    patch: indoc! {r#"
2887                        @@ -1,3 +1,3 @@
2888                        fn main() {
2889                        -    println!();
2890                        +    eprintln!("");
2891                        }
2892                    "#},
2893                    expected_new: indoc! {r#"
2894                        fn main() {
2895                            eprintln!("<|user_cursor|>");
2896                        }
2897                    "#},
2898                },
2899                Case {
2900                    name: "non_local_hunk_header_pure_insertion_repro",
2901                    old: indoc! {"
2902                        aaa
2903                        bbb
2904                    "},
2905                    patch: indoc! {"
2906                        @@ -20,2 +20,3 @@
2907                        aaa
2908                        +xxx
2909                        bbb
2910                    "},
2911                    expected_new: indoc! {"
2912                        aaa
2913                        xxx
2914                        bbb
2915                    "},
2916                },
2917                Case {
2918                    name: "empty_patch_produces_no_edits_marker",
2919                    old: indoc! {"
2920                        aaa
2921                        bbb
2922                    "},
2923                    patch: "@@ -20,2 +20,3 @@\n",
2924                    expected_new: indoc! {"
2925                        aaa
2926                        bbb
2927                    "},
2928                },
2929            ];
2930
2931            for case in &cases {
2932                // The cursor_offset for patch_to_edit_commands is relative to
2933                // the first hunk's new text (context + additions). We compute
2934                // it by finding where the marker sits in the expected output
2935                // (which mirrors the new text of the hunk).
2936                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2937
2938                let commands =
2939                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2940                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2941
2942                assert!(
2943                    hashline::output_has_edit_commands(&commands),
2944                    "case {}: expected edit commands, got: {commands:?}",
2945                    case.name,
2946                );
2947
2948                let applied = hashline::apply_edit_commands(case.old, &commands);
2949                assert_eq!(applied, case.expected_new, "case {}", case.name);
2950            }
2951        }
2952    }
2953}
2954
2955pub mod seed_coder {
2956    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2957    //!
2958    //! Seed-Coder uses different FIM tokens and order than Qwen:
2959    //! - SPM order: suffix comes FIRST, then prefix, then middle
2960    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2961    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2962    //!
2963    //! All context (related files, edit history) goes in the PREFIX section.
2964    //! The suffix contains only code after the editable region.
2965    //!
2966    //! Example prompt:
2967    //!
2968    //! <[fim-suffix]>
2969    //! code after editable region
2970    //! <[fim-prefix]><filename>related/file.py
2971    //! related file content
2972    //!
2973    //! <filename>edit_history
2974    //! --- a/some_file.py
2975    //! +++ b/some_file.py
2976    //! -old
2977    //! +new
2978    //!
2979    //! <filename>path/to/target_file.py
2980    //! code before editable region
2981    //! <<<<<<< CURRENT
2982    //! code that
2983    //! needs to<|user_cursor|>
2984    //! be rewritten
2985    //! =======
2986    //! <[fim-middle]>
2987    //!
2988    //! Expected output (model generates):
2989    //!
2990    //! updated
2991    //! code with
2992    //! changes applied
2993    //! >>>>>>> UPDATED
2994
2995    use super::*;
2996
2997    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2998    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2999    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3000    pub const FILE_MARKER: &str = "<filename>";
3001
3002    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3003    pub const SEPARATOR: &str = "=======\n";
3004    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3005
3006    pub const NO_EDITS: &str = "NO_EDITS\n";
3007
3008    pub fn special_tokens() -> &'static [&'static str] {
3009        &[
3010            FIM_SUFFIX,
3011            FIM_PREFIX,
3012            FIM_MIDDLE,
3013            FILE_MARKER,
3014            START_MARKER,
3015            SEPARATOR,
3016            END_MARKER,
3017            CURSOR_MARKER,
3018        ]
3019    }
3020
3021    pub fn write_cursor_excerpt_section(
3022        prompt: &mut String,
3023        path: &Path,
3024        context: &str,
3025        editable_range: &Range<usize>,
3026        cursor_offset: usize,
3027    ) {
3028        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3029        prompt.push_str(&section);
3030    }
3031
3032    pub fn format_prompt_with_budget(
3033        path: &Path,
3034        context: &str,
3035        editable_range: &Range<usize>,
3036        cursor_offset: usize,
3037        events: &[Arc<Event>],
3038        related_files: &[RelatedFile],
3039        max_tokens: usize,
3040    ) -> String {
3041        let cursor_prefix_section =
3042            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3043        assemble_fim_prompt(
3044            context,
3045            editable_range,
3046            &cursor_prefix_section,
3047            events,
3048            related_files,
3049            max_tokens,
3050        )
3051    }
3052
3053    pub fn assemble_fim_prompt(
3054        context: &str,
3055        editable_range: &Range<usize>,
3056        cursor_prefix_section: &str,
3057        events: &[Arc<Event>],
3058        related_files: &[RelatedFile],
3059        max_tokens: usize,
3060    ) -> String {
3061        let suffix_section = build_suffix_section(context, editable_range);
3062
3063        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3064        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3065        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3066
3067        let edit_history_section = super::format_edit_history_within_budget(
3068            events,
3069            FILE_MARKER,
3070            "edit_history",
3071            budget_after_cursor,
3072            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3073        );
3074        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3075        let budget_after_edit_history =
3076            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3077
3078        let related_files_section = super::format_related_files_within_budget(
3079            related_files,
3080            FILE_MARKER,
3081            "",
3082            budget_after_edit_history,
3083        );
3084
3085        let mut prompt = String::new();
3086        prompt.push_str(&suffix_section);
3087        prompt.push_str(FIM_PREFIX);
3088        prompt.push_str(&related_files_section);
3089        if !related_files_section.is_empty() {
3090            prompt.push('\n');
3091        }
3092        prompt.push_str(&edit_history_section);
3093        if !edit_history_section.is_empty() {
3094            prompt.push('\n');
3095        }
3096        prompt.push_str(cursor_prefix_section);
3097        prompt.push_str(FIM_MIDDLE);
3098
3099        prompt
3100    }
3101
3102    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3103        let mut section = String::new();
3104        section.push_str(FIM_SUFFIX);
3105        section.push_str(&context[editable_range.end..]);
3106        if !section.ends_with('\n') {
3107            section.push('\n');
3108        }
3109        section
3110    }
3111
3112    fn build_cursor_prefix_section(
3113        path: &Path,
3114        context: &str,
3115        editable_range: &Range<usize>,
3116        cursor_offset: usize,
3117    ) -> String {
3118        let mut section = String::new();
3119        let path_str = path.to_string_lossy();
3120        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3121
3122        section.push_str(&context[..editable_range.start]);
3123        section.push_str(START_MARKER);
3124        section.push_str(&context[editable_range.start..cursor_offset]);
3125        section.push_str(CURSOR_MARKER);
3126        section.push_str(&context[cursor_offset..editable_range.end]);
3127        if !section.ends_with('\n') {
3128            section.push('\n');
3129        }
3130        section.push_str(SEPARATOR);
3131        section
3132    }
3133
3134    /// Format patch as containing no changes if it's empty; otherwise return None.
3135    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3136        // Count lines in the patch
3137        let empty_patch = patch.lines().count() <= 3;
3138        if empty_patch {
3139            Some(format!("{NO_EDITS}{END_MARKER}"))
3140        } else {
3141            None
3142        }
3143    }
3144}
3145
3146pub mod v0304_variable_edit {
3147    //! A prompt format with no fixed editable region. The entire context is shown
3148    //! to the model, and it chooses which text to replace by outputting surrounding
3149    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3150    //! text.
3151    //!
3152    //! Example prompt:
3153    //!
3154    //! <|file_sep|>path/to/file.py
3155    //! zero
3156    //! one
3157    //! two
3158    //! three<|user_cursor|>
3159    //! four
3160    //! five
3161    //! <|fim_prefix|>
3162    //
3163    //! Expected output (model generates):
3164    //!
3165    //! two
3166    //! <|fim_middle|>
3167    //! THREE
3168    //! <|fim_suffix|>
3169    //! four
3170    //!
3171    //! The output means: find "two\n...\nfour" in the context, and replace
3172    //! everything between "two\n" and "four" with "THREE\n".
3173
3174    use super::*;
3175
3176    pub fn special_tokens() -> &'static [&'static str] {
3177        &[
3178            "<|fim_prefix|>",
3179            "<|fim_suffix|>",
3180            "<|fim_middle|>",
3181            "<|file_sep|>",
3182            CURSOR_MARKER,
3183        ]
3184    }
3185
3186    pub fn write_cursor_excerpt_section(
3187        prompt: &mut String,
3188        path: &Path,
3189        context: &str,
3190        cursor_offset: usize,
3191    ) {
3192        let path_str = path.to_string_lossy();
3193        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3194
3195        prompt.push_str(&context[..cursor_offset]);
3196        prompt.push_str(CURSOR_MARKER);
3197        prompt.push_str(&context[cursor_offset..]);
3198        if !prompt.ends_with('\n') {
3199            prompt.push('\n');
3200        }
3201        prompt.push_str("<|fim_prefix|>\n")
3202    }
3203
3204    /// Apply a variable-edit model output to the original context text.
3205    ///
3206    /// The model output has the form:
3207    ///
3208    /// - prefix context lines
3209    /// - `<|fim_middle|>`
3210    /// - new text
3211    /// - `<|fim_suffix|>`
3212    /// - suffix context lines
3213    ///
3214    /// We locate the prefix/suffix context lines in the original text and replace
3215    /// everything between them with the new text.
3216    pub fn apply_variable_edit(
3217        context: &str,
3218        model_output: &str,
3219    ) -> Result<(Range<usize>, String)> {
3220        let (prefix_context, rest) = model_output
3221            .split_once("<|fim_middle|>\n")
3222            .or_else(|| model_output.split_once("<|fim_middle|>"))
3223            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3224
3225        let (new_text, suffix_context) = rest
3226            .split_once("<|fim_suffix|>\n")
3227            .or_else(|| rest.split_once("<|fim_suffix|>"))
3228            .unwrap_or((rest, ""));
3229
3230        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3231            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3232        } else {
3233            suffix_context
3234        };
3235
3236        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3237            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3238            + prefix_context.len();
3239        let suffix_offset = if suffix_context.is_empty() {
3240            context.len()
3241        } else {
3242            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3243                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3244                + prefix_offset
3245        };
3246
3247        let edit_range = prefix_offset..suffix_offset;
3248        return Ok((edit_range, new_text.to_string()));
3249    }
3250
3251    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3252        if needle.is_empty() {
3253            return Some(0);
3254        }
3255
3256        haystack.match_indices(needle).find_map(|(offset, _)| {
3257            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3258            matched_line_start.then_some(offset)
3259        })
3260    }
3261
3262    /// Convert a unified diff patch into the variable-edit output format.
3263    ///
3264    /// Parses `patch` as a unified diff against `old_text` and produces model
3265    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3266    /// delimiters. The diff is resolved by content matching rather than line
3267    /// numbers.
3268    pub fn patch_to_variable_edit_output(
3269        old_text: &str,
3270        patch: &str,
3271        cursor_offset: Option<usize>,
3272    ) -> Result<String> {
3273        // Parse the unified diff into hunks. Each hunk has an `old_context`
3274        // string (context + deleted lines interleaved in order) and a list of
3275        // edits expressed as byte ranges within that context plus replacement
3276        // text.
3277        let hunks = parse_hunks(patch);
3278        if hunks.is_empty() {
3279            return Ok(String::new());
3280        }
3281
3282        // Apply each hunk by finding its old_context in the text and
3283        // performing the edits. We search forward from where the previous
3284        // hunk ended so that hunks are applied in order.
3285        let mut new_text = old_text.to_string();
3286        let mut search_from: usize = 0;
3287        let mut first_hunk_pos: Option<usize> = None;
3288
3289        for hunk in &hunks {
3290            let context_pos = new_text[search_from..]
3291                .find(&hunk.old_context)
3292                .map(|pos| pos + search_from)
3293                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3294
3295            if first_hunk_pos.is_none() {
3296                first_hunk_pos = Some(context_pos);
3297            }
3298
3299            // Apply edits in reverse order so byte offsets remain valid.
3300            for edit in hunk.edits.iter().rev() {
3301                let abs_start = context_pos + edit.range.start;
3302                let abs_end = context_pos + edit.range.end;
3303                new_text.replace_range(abs_start..abs_end, &edit.text);
3304            }
3305
3306            // Advance past this hunk's region in the (now modified) text.
3307            let new_region_len: usize =
3308                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3309                    len + edit.text.len() - (edit.range.end - edit.range.start)
3310                });
3311            search_from = context_pos + new_region_len;
3312        }
3313
3314        // Now we have old_text and new_text. Find the changed line range by
3315        // comparing them.
3316        let old_lines: Vec<&str> = old_text.lines().collect();
3317        let new_lines: Vec<&str> = new_text.lines().collect();
3318
3319        // Find first differing line.
3320        let first_changed_row = old_lines
3321            .iter()
3322            .zip(new_lines.iter())
3323            .position(|(a, b)| a != b)
3324            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3325
3326        // Find last differing line (from the end).
3327        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3328        let common_suffix = old_lines
3329            .iter()
3330            .rev()
3331            .zip(new_lines.iter().rev())
3332            .take(max_suffix)
3333            .take_while(|(a, b)| a == b)
3334            .count();
3335
3336        let old_end = old_lines.len() - common_suffix;
3337        let new_end = new_lines.len() - common_suffix;
3338
3339        if first_changed_row == old_end && first_changed_row == new_end {
3340            return Ok(String::new());
3341        }
3342
3343        // Build the replacement text from new_lines[first_diff..new_end].
3344        let mut merged_new_text = String::new();
3345        for line in &new_lines[first_changed_row..new_end] {
3346            merged_new_text.push_str(line);
3347            merged_new_text.push('\n');
3348        }
3349
3350        // cursor_offset is relative to the first hunk's new content in
3351        // new_text. Translate it to an offset within merged_new_text, which
3352        // only contains lines first_diff..new_end of new_text.
3353        if let Some(hunk_offset) = cursor_offset {
3354            let hunk_start = first_hunk_pos.unwrap_or(0);
3355            let absolute_pos = hunk_start + hunk_offset;
3356
3357            // Byte offset where first_diff starts in new_text.
3358            let merged_start: usize = new_lines[..first_changed_row]
3359                .iter()
3360                .map(|line| line.len() + 1)
3361                .sum();
3362
3363            if absolute_pos >= merged_start {
3364                let relative_offset = absolute_pos - merged_start;
3365                if relative_offset <= merged_new_text.len() {
3366                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3367                }
3368            }
3369        }
3370
3371        // Build output with 2 lines of context above and below.
3372        let context_lines_count = 2;
3373        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3374        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3375
3376        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3377            let pattern = &lines[line_range];
3378            let pattern_len = pattern.len();
3379
3380            let mut count = 0;
3381            for offset in 0..=lines.len() - pattern_len {
3382                if &lines[offset..offset + pattern_len] == pattern {
3383                    count += 1;
3384                }
3385            }
3386            count
3387        }
3388
3389        // Expand prefix and suffix until they are unique
3390        while prefix_start > 0 {
3391            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3392                prefix_start -= 1;
3393            } else {
3394                break;
3395            }
3396        }
3397        while suffix_end < old_lines.len() {
3398            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3399                suffix_end += 1;
3400            } else {
3401                break;
3402            }
3403        }
3404
3405        let mut output = String::new();
3406        for line in &old_lines[prefix_start..first_changed_row] {
3407            output.push_str(line);
3408            output.push('\n');
3409        }
3410        output.push_str("<|fim_middle|>\n");
3411        output.push_str(&merged_new_text);
3412        output.push_str("<|fim_suffix|>\n");
3413        for line in &old_lines[old_end..suffix_end] {
3414            output.push_str(line);
3415            output.push('\n');
3416        }
3417
3418        Ok(output)
3419    }
3420
3421    struct ParsedHunk {
3422        old_context: String,
3423        edits: Vec<ParsedEdit>,
3424    }
3425
3426    struct ParsedEdit {
3427        range: Range<usize>,
3428        text: String,
3429    }
3430
3431    /// Parse a unified diff into content-based hunks. Each hunk contains an
3432    /// `old_context` string (context lines + deleted lines, which together
3433    /// form the text that should be found in the original) and a list of edits
3434    /// expressed as byte ranges within that context.
3435    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3436        let mut hunks = Vec::new();
3437        let mut current: Option<ParsedHunk> = None;
3438
3439        for line in patch.lines() {
3440            if line.starts_with("@@") {
3441                if let Some(hunk) = current.take() {
3442                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3443                        hunks.push(hunk);
3444                    }
3445                }
3446                current = Some(ParsedHunk {
3447                    old_context: String::new(),
3448                    edits: Vec::new(),
3449                });
3450            } else if line.starts_with("---") || line.starts_with("+++") {
3451                continue;
3452            } else if let Some(hunk) = &mut current {
3453                if let Some(added) = line.strip_prefix('+') {
3454                    let pos = hunk.old_context.len();
3455                    if let Some(last_edit) = hunk.edits.last_mut() {
3456                        if last_edit.range.end == pos {
3457                            writeln!(&mut last_edit.text, "{added}").ok();
3458                            continue;
3459                        }
3460                    }
3461                    hunk.edits.push(ParsedEdit {
3462                        range: pos..pos,
3463                        text: format!("{added}\n"),
3464                    });
3465                } else if let Some(removed) = line.strip_prefix('-') {
3466                    let start = hunk.old_context.len();
3467                    writeln!(&mut hunk.old_context, "{removed}").ok();
3468                    let end = hunk.old_context.len();
3469                    if let Some(last_edit) = hunk.edits.last_mut() {
3470                        if last_edit.range.end == start {
3471                            last_edit.range.end = end;
3472                            continue;
3473                        }
3474                    }
3475                    hunk.edits.push(ParsedEdit {
3476                        range: start..end,
3477                        text: String::new(),
3478                    });
3479                } else {
3480                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3481                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3482                }
3483            }
3484        }
3485
3486        if let Some(hunk) = current {
3487            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3488                hunks.push(hunk);
3489            }
3490        }
3491
3492        hunks
3493    }
3494
3495    #[cfg(test)]
3496    mod tests {
3497        use super::*;
3498        use indoc::indoc;
3499
3500        #[test]
3501        fn test_apply_variable_edit() {
3502            struct Case {
3503                name: &'static str,
3504                original: &'static str,
3505                model_output: &'static str,
3506                expected: &'static str,
3507            }
3508
3509            let cases = [
3510                Case {
3511                    name: "simple_single_line_replacement",
3512                    original: indoc! {"
3513                        zero
3514                        one
3515                        two
3516                        three
3517                        four
3518                        five
3519                    "},
3520                    model_output: indoc! {"
3521                        two
3522                        <|fim_middle|>
3523                        THREE
3524                        <|fim_suffix|>
3525                        four
3526                    "},
3527                    expected: indoc! {"
3528                        zero
3529                        one
3530                        two
3531                        THREE
3532                        four
3533                        five
3534                    "},
3535                },
3536                Case {
3537                    name: "multi_line_replacement",
3538                    original: indoc! {"
3539                        a
3540                        b
3541                        c
3542                        d
3543                        e
3544                    "},
3545                    model_output: indoc! {"
3546                        a
3547                        <|fim_middle|>
3548                        B
3549                        C
3550                        D
3551                        <|fim_suffix|>
3552                        e
3553                    "},
3554                    expected: indoc! {"
3555                        a
3556                        B
3557                        C
3558                        D
3559                        e
3560                    "},
3561                },
3562                Case {
3563                    name: "insertion_between_existing_lines",
3564                    original: indoc! {"
3565                        a
3566                        b
3567                        c
3568                    "},
3569                    model_output: indoc! {"
3570                        a
3571                        <|fim_middle|>
3572                        X
3573                        <|fim_suffix|>
3574                        b
3575                    "},
3576                    expected: indoc! {"
3577                        a
3578                        X
3579                        b
3580                        c
3581                    "},
3582                },
3583                Case {
3584                    name: "deletion",
3585                    original: indoc! {"
3586                        a
3587                        b
3588                        c
3589                        d
3590                    "},
3591                    model_output: indoc! {"
3592                        a
3593                        <|fim_middle|>
3594                        <|fim_suffix|>
3595                        c
3596                    "},
3597                    expected: indoc! {"
3598                        a
3599                        c
3600                        d
3601                    "},
3602                },
3603                Case {
3604                    name: "replacement_at_start_no_prefix_context",
3605                    original: indoc! {"
3606                        a
3607                        b
3608                        c
3609                    "},
3610                    model_output: indoc! {"
3611                        <|fim_middle|>
3612                        X
3613                        <|fim_suffix|>
3614                        b
3615                    "},
3616                    expected: indoc! {"
3617                        X
3618                        b
3619                        c
3620                    "},
3621                },
3622                Case {
3623                    name: "replacement_at_end_no_suffix_context",
3624                    original: indoc! {"
3625                        a
3626                        b
3627                        c
3628                    "},
3629                    model_output: indoc! {"
3630                        b
3631                        <|fim_middle|>
3632                        Z
3633                        <|fim_suffix|>
3634                    "},
3635                    expected: indoc! {"
3636                        a
3637                        b
3638                        Z
3639                    "},
3640                },
3641                Case {
3642                    name: "context_with_trailing_newline_is_preserved",
3643                    original: indoc! {"
3644                        a
3645                        b
3646                        c
3647                    "},
3648                    model_output: indoc! {"
3649                        a
3650                        <|fim_middle|>
3651                        B
3652                        <|fim_suffix|>
3653                        c
3654                    "},
3655                    expected: indoc! {"
3656                        a
3657                        B
3658                        c
3659                    "},
3660                },
3661                Case {
3662                    name: "cursor_marker_passes_through_untouched",
3663                    original: indoc! {"
3664                        a
3665                        b
3666                        c
3667                    "},
3668                    model_output: indoc! {"
3669                        a
3670                        <|fim_middle|>
3671                        B<|user_cursor|>B
3672                        <|fim_suffix|>
3673                        c
3674                    "},
3675                    expected: indoc! {"
3676                        a
3677                        B<|user_cursor|>B
3678                        c
3679                    "},
3680                },
3681                Case {
3682                    name: "multiple_prefix_context_lines",
3683                    original: indoc! {"
3684                        a
3685                        b
3686                        c
3687                        d
3688                        e
3689                    "},
3690                    model_output: indoc! {"
3691                        b
3692                        c
3693                        <|fim_middle|>
3694                        D
3695                        <|fim_suffix|>
3696                        e
3697                    "},
3698                    expected: indoc! {"
3699                        a
3700                        b
3701                        c
3702                        D
3703                        e
3704                    "},
3705                },
3706            ];
3707
3708            for case in cases {
3709                let (edit_range, replacement) =
3710                    apply_variable_edit(case.original, case.model_output).unwrap();
3711                let mut edited = case.original.to_string();
3712                edited.replace_range(edit_range, &replacement);
3713                assert_eq!(edited, case.expected, "{}", case.name);
3714            }
3715        }
3716
3717        #[test]
3718        fn test_patch_to_variable_edit() {
3719            struct Case {
3720                name: &'static str,
3721                old: &'static str,
3722                patch: &'static str,
3723                cursor_offset: Option<usize>,
3724                expected_variable_edit: &'static str,
3725                expected_after_apply: &'static str,
3726            }
3727
3728            let cases = [
3729                Case {
3730                    name: "simple_replacement",
3731                    old: indoc! {"
3732                        zero
3733                        one
3734                        two
3735                        three
3736                        four
3737                        five
3738                    "},
3739                    patch: indoc! {"
3740                        @@ -3,3 +3,3 @@
3741                         two
3742                        -three
3743                        +THREE
3744                         four
3745                    "},
3746                    cursor_offset: None,
3747                    expected_variable_edit: indoc! {"
3748                        one
3749                        two
3750                        <|fim_middle|>
3751                        THREE
3752                        <|fim_suffix|>
3753                        four
3754                        five
3755                    "},
3756                    expected_after_apply: indoc! {"
3757                        zero
3758                        one
3759                        two
3760                        THREE
3761                        four
3762                        five
3763                    "},
3764                },
3765                Case {
3766                    name: "insertion",
3767                    old: indoc! {"
3768                        a
3769                        b
3770                        c
3771                        d
3772                        e
3773                    "},
3774                    patch: indoc! {"
3775                        @@ -2,0 +3,1 @@
3776                         b
3777                        +X
3778                         c
3779                    "},
3780                    cursor_offset: None,
3781                    expected_variable_edit: indoc! {"
3782                        a
3783                        b
3784                        <|fim_middle|>
3785                        X
3786                        <|fim_suffix|>
3787                        c
3788                        d
3789                    "},
3790                    expected_after_apply: indoc! {"
3791                        a
3792                        b
3793                        X
3794                        c
3795                        d
3796                        e
3797                    "},
3798                },
3799                Case {
3800                    name: "deletion",
3801                    old: indoc! {"
3802                        a
3803                        b
3804                        c
3805                        d
3806                        e
3807                    "},
3808                    patch: indoc! {"
3809                        @@ -2,3 +2,2 @@
3810                         b
3811                        -c
3812                         d
3813                    "},
3814                    cursor_offset: None,
3815                    expected_variable_edit: indoc! {"
3816                        a
3817                        b
3818                        <|fim_middle|>
3819                        <|fim_suffix|>
3820                        d
3821                        e
3822                    "},
3823                    expected_after_apply: indoc! {"
3824                        a
3825                        b
3826                        d
3827                        e
3828                    "},
3829                },
3830                Case {
3831                    name: "edit_near_start",
3832                    old: indoc! {"
3833                        first
3834                        second
3835                        third
3836                        fourth
3837                    "},
3838                    patch: indoc! {"
3839                        @@ -1,1 +1,1 @@
3840                        -first
3841                        +FIRST
3842                    "},
3843                    cursor_offset: None,
3844                    expected_variable_edit: indoc! {"
3845                        <|fim_middle|>
3846                        FIRST
3847                        <|fim_suffix|>
3848                        second
3849                        third
3850                    "},
3851                    expected_after_apply: indoc! {"
3852                        FIRST
3853                        second
3854                        third
3855                        fourth
3856                    "},
3857                },
3858                Case {
3859                    name: "edit_near_end",
3860                    old: indoc! {"
3861                        first
3862                        second
3863                        third
3864                        fourth
3865                    "},
3866                    patch: indoc! {"
3867                        @@ -4,1 +4,1 @@
3868                        -fourth
3869                        +FOURTH
3870                    "},
3871                    cursor_offset: None,
3872                    expected_variable_edit: indoc! {"
3873                        second
3874                        third
3875                        <|fim_middle|>
3876                        FOURTH
3877                        <|fim_suffix|>
3878                    "},
3879                    expected_after_apply: indoc! {"
3880                        first
3881                        second
3882                        third
3883                        FOURTH
3884                    "},
3885                },
3886                Case {
3887                    name: "cursor_at_start_of_replacement",
3888                    old: indoc! {"
3889                        zero
3890                        one
3891                        two
3892                        three
3893                        four
3894                        five
3895                    "},
3896                    patch: indoc! {"
3897                        @@ -3,3 +3,3 @@
3898                         two
3899                        -three
3900                        +THREE
3901                         four
3902                    "},
3903                    cursor_offset: Some(4),
3904                    expected_variable_edit: indoc! {"
3905                        one
3906                        two
3907                        <|fim_middle|>
3908                        <|user_cursor|>THREE
3909                        <|fim_suffix|>
3910                        four
3911                        five
3912                    "},
3913                    expected_after_apply: indoc! {"
3914                        zero
3915                        one
3916                        two
3917                        <|user_cursor|>THREE
3918                        four
3919                        five
3920                    "},
3921                },
3922                Case {
3923                    name: "cursor_in_middle_of_replacement",
3924                    old: indoc! {"
3925                        zero
3926                        one
3927                        two
3928                        three
3929                        four
3930                        five
3931                    "},
3932                    patch: indoc! {"
3933                        @@ -3,3 +3,3 @@
3934                         two
3935                        -three
3936                        +THREE
3937                         four
3938                    "},
3939                    cursor_offset: Some(6),
3940                    expected_variable_edit: indoc! {"
3941                        one
3942                        two
3943                        <|fim_middle|>
3944                        TH<|user_cursor|>REE
3945                        <|fim_suffix|>
3946                        four
3947                        five
3948                    "},
3949                    expected_after_apply: indoc! {"
3950                        zero
3951                        one
3952                        two
3953                        TH<|user_cursor|>REE
3954                        four
3955                        five
3956                    "},
3957                },
3958                Case {
3959                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3960                    old: indoc! {"
3961                        one
3962                        a
3963                        b
3964                        c
3965                        d
3966                        two
3967                        a
3968                        b
3969                        c
3970                        d
3971                        three
3972                        a
3973                        b
3974                        c
3975                        d
3976                        four
3977                    "},
3978                    patch: indoc! {"
3979                        @@ -4,5 +4,5 @@
3980                         two
3981                         a
3982                         b
3983                        -c
3984                        +C
3985                         d
3986                         three
3987                    "},
3988                    cursor_offset: None,
3989                    expected_variable_edit: indoc! {"
3990                        two
3991                        a
3992                        b
3993                        <|fim_middle|>
3994                        C
3995                        <|fim_suffix|>
3996                        d
3997                        three
3998                    "},
3999                    expected_after_apply: indoc! {"
4000                        one
4001                        a
4002                        b
4003                        c
4004                        d
4005                        two
4006                        a
4007                        b
4008                        C
4009                        d
4010                        three
4011                        a
4012                        b
4013                        c
4014                        d
4015                        four
4016                    "},
4017                },
4018                Case {
4019                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4020                    old: indoc! {"
4021                        {
4022                            {
4023                                one();
4024                            }
4025                        }
4026                        {
4027                            {
4028                                two();
4029                            }
4030                        }
4031                        {
4032                            {
4033                                three();
4034                            }
4035                        }
4036                        {
4037                            {
4038                                four();
4039                            }
4040                        }
4041                    "},
4042                    patch: indoc! {"
4043                        @@ -4,5 +4,5 @@
4044                             {
4045                        -        two();
4046                        +        TWO();
4047                             }
4048                    "},
4049                    cursor_offset: None,
4050                    expected_variable_edit: indoc! {"
4051                                one();
4052                            }
4053                        }
4054                        {
4055                            {
4056                        <|fim_middle|>
4057                                TWO();
4058                        <|fim_suffix|>
4059                            }
4060                        }
4061                        {
4062                            {
4063                                three();
4064                    "},
4065                    expected_after_apply: indoc! {"
4066                        {
4067                            {
4068                                one();
4069                            }
4070                        }
4071                        {
4072                            {
4073                                TWO();
4074                            }
4075                        }
4076                        {
4077                            {
4078                                three();
4079                            }
4080                        }
4081                        {
4082                            {
4083                                four();
4084                            }
4085                        }
4086                    "},
4087                },
4088            ];
4089
4090            for case in cases {
4091                let output =
4092                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4093                        .unwrap_or_else(|error| {
4094                            panic!("failed converting patch for {}: {error}", case.name)
4095                        });
4096                assert_eq!(
4097                    output, case.expected_variable_edit,
4098                    "patch->variable_edit mismatch for {}",
4099                    case.name
4100                );
4101
4102                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4103                    .unwrap_or_else(|error| {
4104                        panic!("failed applying variable_edit for {}: {error}", case.name)
4105                    });
4106                let mut edited_by_variable_edit = case.old.to_string();
4107                edited_by_variable_edit.replace_range(edit_range, &replacement);
4108                assert_eq!(
4109                    edited_by_variable_edit, case.expected_after_apply,
4110                    "variable_edit apply mismatch for {}",
4111                    case.name
4112                );
4113
4114                let (expected_edit_range, expected_replacement) =
4115                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4116                        |error| {
4117                            panic!(
4118                                "failed applying expected variable_edit for {}: {error}",
4119                                case.name
4120                            )
4121                        },
4122                    );
4123                let mut edited_by_expected_variable_edit = case.old.to_string();
4124                edited_by_expected_variable_edit
4125                    .replace_range(expected_edit_range, &expected_replacement);
4126                assert_eq!(
4127                    edited_by_expected_variable_edit, case.expected_after_apply,
4128                    "expected variable_edit apply mismatch for {}",
4129                    case.name
4130                );
4131            }
4132        }
4133
4134        #[test]
4135        fn test_write_cursor_excerpt_section() {
4136            let path = Path::new("test.rs");
4137            let context = "fn main() {\n    hello();\n}\n";
4138            let cursor_offset = 17;
4139            let mut prompt = String::new();
4140            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4141            assert_eq!(
4142                prompt,
4143                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4144            );
4145        }
4146    }
4147}
4148
4149/// The zeta1 prompt format
4150pub mod zeta1 {
4151    use super::*;
4152    use std::fmt::Write;
4153
4154    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4155    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4156    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4157    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4158
4159    const INSTRUCTION_HEADER: &str = concat!(
4160        "### Instruction:\n",
4161        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4162        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4163        "into account the cursor location.\n\n",
4164        "### User Edits:\n\n"
4165    );
4166    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4167    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4168
4169    /// Formats a complete zeta1 prompt from the input events and excerpt.
4170    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4171        let mut prompt = String::with_capacity(
4172            INSTRUCTION_HEADER.len()
4173                + input_events.len()
4174                + EXCERPT_HEADER.len()
4175                + input_excerpt.len()
4176                + RESPONSE_HEADER.len(),
4177        );
4178        prompt.push_str(INSTRUCTION_HEADER);
4179        prompt.push_str(input_events);
4180        prompt.push_str(EXCERPT_HEADER);
4181        prompt.push_str(input_excerpt);
4182        prompt.push_str(RESPONSE_HEADER);
4183        prompt
4184    }
4185
4186    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4187    /// editable and context byte-offset ranges within `cursor_excerpt`.
4188    pub fn format_zeta1_from_input(
4189        input: &ZetaPromptInput,
4190        editable_range: Range<usize>,
4191        context_range: Range<usize>,
4192    ) -> String {
4193        let events = format_zeta1_events(&input.events);
4194        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4195        format_zeta1_prompt(&events, &excerpt)
4196    }
4197
4198    /// Formats events in zeta1 style (oldest first).
4199    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4200        let mut result = String::new();
4201        for event in
4202            events
4203                .iter()
4204                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4205                    &ZetaFormat::V0114180EditableRegion,
4206                )))
4207        {
4208            let event_string = format_zeta1_event(event);
4209            if event_string.is_empty() {
4210                continue;
4211            }
4212            if !result.is_empty() {
4213                result.push_str("\n\n");
4214            }
4215            result.push_str(&event_string);
4216        }
4217        result
4218    }
4219
4220    fn format_zeta1_event(event: &Event) -> String {
4221        match event {
4222            Event::BufferChange {
4223                path,
4224                old_path,
4225                diff,
4226                ..
4227            } => {
4228                let mut prompt = String::new();
4229                if old_path != path {
4230                    writeln!(
4231                        prompt,
4232                        "User renamed {} to {}\n",
4233                        old_path.display(),
4234                        path.display()
4235                    )
4236                    .ok();
4237                }
4238                if !diff.is_empty() {
4239                    write!(
4240                        prompt,
4241                        "User edited {}:\n```diff\n{}\n```",
4242                        path.display(),
4243                        diff
4244                    )
4245                    .ok();
4246                }
4247                prompt
4248            }
4249        }
4250    }
4251
4252    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4253    /// within `cursor_excerpt`.
4254    fn format_zeta1_excerpt(
4255        input: &ZetaPromptInput,
4256        editable_range: Range<usize>,
4257        context_range: Range<usize>,
4258    ) -> String {
4259        let path_str = input.cursor_path.to_string_lossy();
4260        let excerpt = &*input.cursor_excerpt;
4261        let cursor_offset = input.cursor_offset_in_excerpt;
4262
4263        let mut prompt = String::new();
4264        writeln!(&mut prompt, "```{path_str}").ok();
4265
4266        let starts_at_file_beginning =
4267            input.excerpt_start_row == Some(0) && context_range.start == 0;
4268        if starts_at_file_beginning {
4269            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4270        }
4271
4272        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4273
4274        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4275        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4276        prompt.push_str(CURSOR_MARKER);
4277        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4278        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4279
4280        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4281        write!(prompt, "\n```").ok();
4282
4283        prompt
4284    }
4285
4286    /// Cleans zeta1 model output by extracting content between editable region
4287    /// markers and converting the zeta1 cursor marker to the universal one.
4288    /// Returns `None` if the output doesn't contain the expected markers.
4289    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4290        let content = output.replace(CURSOR_MARKER, "");
4291
4292        let content_start = content
4293            .find(EDITABLE_REGION_START_MARKER)
4294            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4295            .map(|pos| {
4296                if content.as_bytes().get(pos) == Some(&b'\n') {
4297                    pos + 1
4298                } else {
4299                    pos
4300                }
4301            })
4302            .unwrap_or(0);
4303
4304        let content_end = content
4305            .find(EDITABLE_REGION_END_MARKER)
4306            .map(|pos| {
4307                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4308                    pos - 1
4309                } else {
4310                    pos
4311                }
4312            })
4313            .unwrap_or(content.len());
4314
4315        if content_start > content_end {
4316            return Some(String::new());
4317        }
4318
4319        let extracted = &content[content_start..content_end];
4320
4321        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4322            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4323            let text_before_cursor = text_before_cursor
4324                .find(EDITABLE_REGION_START_MARKER)
4325                .map(|pos| {
4326                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4327                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4328                        after_marker + 1
4329                    } else {
4330                        after_marker
4331                    }
4332                })
4333                .unwrap_or(0);
4334            let offset_in_extracted = zeta1_cursor_pos
4335                .saturating_sub(text_before_cursor)
4336                .min(extracted.len());
4337            offset_in_extracted
4338        });
4339
4340        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4341        if let Some(offset) = cursor_offset {
4342            result.push_str(&extracted[..offset]);
4343            result.push_str(super::CURSOR_MARKER);
4344            result.push_str(&extracted[offset..]);
4345        } else {
4346            result.push_str(extracted);
4347        }
4348
4349        Some(result)
4350    }
4351}
4352
4353#[cfg(test)]
4354mod tests {
4355    use super::*;
4356    use indoc::indoc;
4357
4358    fn make_input(
4359        cursor_excerpt: &str,
4360        editable_range: Range<usize>,
4361        cursor_offset: usize,
4362        events: Vec<Event>,
4363        related_files: Vec<RelatedFile>,
4364    ) -> ZetaPromptInput {
4365        let context_range = 0..cursor_excerpt.len();
4366        ZetaPromptInput {
4367            cursor_path: Path::new("test.rs").into(),
4368            cursor_excerpt: cursor_excerpt.into(),
4369            cursor_offset_in_excerpt: cursor_offset,
4370            excerpt_start_row: None,
4371            events: events.into_iter().map(Arc::new).collect(),
4372            related_files: Some(related_files),
4373            active_buffer_diagnostics: vec![],
4374            excerpt_ranges: ExcerptRanges {
4375                editable_150: editable_range.clone(),
4376                editable_180: editable_range.clone(),
4377                editable_350: editable_range,
4378                editable_150_context_350: context_range.clone(),
4379                editable_180_context_350: context_range.clone(),
4380                editable_350_context_150: context_range,
4381                ..Default::default()
4382            },
4383            syntax_ranges: None,
4384            experiment: None,
4385            in_open_source_repo: false,
4386            can_collect_data: false,
4387            repo_url: None,
4388        }
4389    }
4390
4391    fn make_input_with_context_range(
4392        excerpt: &str,
4393        editable_range: Range<usize>,
4394        context_range: Range<usize>,
4395        cursor_offset: usize,
4396    ) -> ZetaPromptInput {
4397        ZetaPromptInput {
4398            cursor_path: Path::new("test.rs").into(),
4399            cursor_excerpt: excerpt.into(),
4400            cursor_offset_in_excerpt: cursor_offset,
4401            excerpt_start_row: None,
4402            events: vec![],
4403            related_files: Some(vec![]),
4404            active_buffer_diagnostics: vec![],
4405            excerpt_ranges: ExcerptRanges {
4406                editable_150: editable_range.clone(),
4407                editable_180: editable_range.clone(),
4408                editable_350: editable_range,
4409                editable_150_context_350: context_range.clone(),
4410                editable_180_context_350: context_range.clone(),
4411                editable_350_context_150: context_range,
4412                ..Default::default()
4413            },
4414            syntax_ranges: None,
4415            experiment: None,
4416            in_open_source_repo: false,
4417            can_collect_data: false,
4418            repo_url: None,
4419        }
4420    }
4421
4422    fn make_event(path: &str, diff: &str) -> Event {
4423        Event::BufferChange {
4424            path: Path::new(path).into(),
4425            old_path: Path::new(path).into(),
4426            diff: diff.to_string(),
4427            predicted: false,
4428            in_open_source_repo: false,
4429        }
4430    }
4431
4432    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4433        RelatedFile {
4434            path: Path::new(path).into(),
4435            max_row: content.lines().count() as u32,
4436            excerpts: vec![RelatedExcerpt {
4437                row_range: 0..content.lines().count() as u32,
4438                text: content.into(),
4439                order: 0,
4440            }],
4441            in_open_source_repo: false,
4442        }
4443    }
4444
4445    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4446        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4447    }
4448
4449    fn budget_with_margin(requested_tokens: usize) -> usize {
4450        ((requested_tokens as f64) / 0.9).ceil() as usize
4451    }
4452
4453    #[test]
4454    fn test_no_truncation_when_within_budget() {
4455        let input = make_input(
4456            "prefix\neditable\nsuffix",
4457            7..15,
4458            10,
4459            vec![make_event("a.rs", "-old\n+new\n")],
4460            vec![make_related_file("related.rs", "fn helper() {}\n")],
4461        );
4462
4463        assert_eq!(
4464            format_with_budget(&input, 10000).unwrap(),
4465            indoc! {r#"
4466                <|file_sep|>related.rs
4467                fn helper() {}
4468                <|file_sep|>edit history
4469                --- a/a.rs
4470                +++ b/a.rs
4471                -old
4472                +new
4473                <|file_sep|>test.rs
4474                <|fim_prefix|>
4475                prefix
4476                <|fim_middle|>current
4477                edi<|user_cursor|>table
4478                <|fim_suffix|>
4479
4480                suffix
4481                <|fim_middle|>updated
4482            "#}
4483            .to_string()
4484        );
4485    }
4486
4487    #[test]
4488    fn test_truncation_drops_edit_history_when_budget_tight() {
4489        let input = make_input(
4490            "code",
4491            0..4,
4492            2,
4493            vec![make_event("a.rs", "-x\n+y\n")],
4494            vec![
4495                make_related_file("r1.rs", "aaaaaaa\n"),
4496                make_related_file("r2.rs", "bbbbbbb\n"),
4497            ],
4498        );
4499
4500        assert_eq!(
4501            format_with_budget(&input, 10000).unwrap(),
4502            indoc! {r#"
4503                <|file_sep|>r1.rs
4504                aaaaaaa
4505                <|file_sep|>r2.rs
4506                bbbbbbb
4507                <|file_sep|>edit history
4508                --- a/a.rs
4509                +++ b/a.rs
4510                -x
4511                +y
4512                <|file_sep|>test.rs
4513                <|fim_prefix|>
4514                <|fim_middle|>current
4515                co<|user_cursor|>de
4516                <|fim_suffix|>
4517                <|fim_middle|>updated
4518            "#}
4519            .to_string()
4520        );
4521
4522        assert_eq!(
4523            format_with_budget(&input, budget_with_margin(55)),
4524            Some(
4525                indoc! {r#"
4526                <|file_sep|>edit history
4527                --- a/a.rs
4528                +++ b/a.rs
4529                -x
4530                +y
4531                <|file_sep|>test.rs
4532                <|fim_prefix|>
4533                <|fim_middle|>current
4534                co<|user_cursor|>de
4535                <|fim_suffix|>
4536                <|fim_middle|>updated
4537            "#}
4538                .to_string()
4539            )
4540        );
4541    }
4542
4543    #[test]
4544    fn test_truncation_includes_partial_excerpts() {
4545        let input = make_input(
4546            "x",
4547            0..1,
4548            0,
4549            vec![],
4550            vec![RelatedFile {
4551                path: Path::new("big.rs").into(),
4552                max_row: 30,
4553                in_open_source_repo: false,
4554                excerpts: vec![
4555                    RelatedExcerpt {
4556                        row_range: 0..10,
4557                        text: "first excerpt\n".into(),
4558                        order: 0,
4559                    },
4560                    RelatedExcerpt {
4561                        row_range: 10..20,
4562                        text: "second excerpt\n".into(),
4563                        order: 0,
4564                    },
4565                    RelatedExcerpt {
4566                        row_range: 20..30,
4567                        text: "third excerpt\n".into(),
4568                        order: 0,
4569                    },
4570                ],
4571            }],
4572        );
4573
4574        assert_eq!(
4575            format_with_budget(&input, 10000).unwrap(),
4576            indoc! {r#"
4577                <|file_sep|>big.rs
4578                first excerpt
4579                ...
4580                second excerpt
4581                ...
4582                third excerpt
4583                <|file_sep|>test.rs
4584                <|fim_prefix|>
4585                <|fim_middle|>current
4586                <|user_cursor|>x
4587                <|fim_suffix|>
4588                <|fim_middle|>updated
4589            "#}
4590            .to_string()
4591        );
4592
4593        assert_eq!(
4594            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4595            indoc! {r#"
4596                <|file_sep|>big.rs
4597                first excerpt
4598                ...
4599                <|file_sep|>test.rs
4600                <|fim_prefix|>
4601                <|fim_middle|>current
4602                <|user_cursor|>x
4603                <|fim_suffix|>
4604                <|fim_middle|>updated
4605            "#}
4606            .to_string()
4607        );
4608    }
4609
4610    #[test]
4611    fn test_truncation_prioritizes_lower_order_excerpts() {
4612        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4613        // With tight budget, only the lower-order excerpt from file_b should be included.
4614        let input = make_input(
4615            "x",
4616            0..1,
4617            0,
4618            vec![],
4619            vec![
4620                RelatedFile {
4621                    path: Path::new("file_a.rs").into(),
4622                    max_row: 10,
4623                    in_open_source_repo: false,
4624                    excerpts: vec![RelatedExcerpt {
4625                        row_range: 0..10,
4626                        text: "low priority content\n".into(),
4627                        order: 5,
4628                    }],
4629                },
4630                RelatedFile {
4631                    path: Path::new("file_b.rs").into(),
4632                    max_row: 10,
4633                    in_open_source_repo: false,
4634                    excerpts: vec![RelatedExcerpt {
4635                        row_range: 0..10,
4636                        text: "high priority content\n".into(),
4637                        order: 1,
4638                    }],
4639                },
4640            ],
4641        );
4642
4643        // With large budget, both files included; rendered in stable lexicographic order.
4644        assert_eq!(
4645            format_with_budget(&input, 10000).unwrap(),
4646            indoc! {r#"
4647                <|file_sep|>file_a.rs
4648                low priority content
4649                <|file_sep|>file_b.rs
4650                high priority content
4651                <|file_sep|>test.rs
4652                <|fim_prefix|>
4653                <|fim_middle|>current
4654                <|user_cursor|>x
4655                <|fim_suffix|>
4656                <|fim_middle|>updated
4657            "#}
4658            .to_string()
4659        );
4660
4661        // With tight budget, only file_b (lower order) fits.
4662        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4663        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4664        // file_a would need another 14 tokens, which doesn't fit.
4665        assert_eq!(
4666            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4667            indoc! {r#"
4668                <|file_sep|>file_b.rs
4669                high priority content
4670                <|file_sep|>test.rs
4671                <|fim_prefix|>
4672                <|fim_middle|>current
4673                <|user_cursor|>x
4674                <|fim_suffix|>
4675                <|fim_middle|>updated
4676            "#}
4677            .to_string()
4678        );
4679    }
4680
4681    #[test]
4682    fn test_truncation_drops_high_order_excerpts_within_file() {
4683        // A single file has excerpts at order 1 and order 3. With a tight budget,
4684        // only the order-1 excerpts are included while the order-3 excerpt is
4685        // dropped — even though they belong to the same file. This also preserves
4686        // the parent invariant: parent outline items have order ≤ their best
4687        // child, so they're always included when any child is.
4688        let input = make_input(
4689            "x",
4690            0..1,
4691            0,
4692            vec![],
4693            vec![RelatedFile {
4694                path: Path::new("mod.rs").into(),
4695                max_row: 30,
4696                in_open_source_repo: false,
4697                excerpts: vec![
4698                    RelatedExcerpt {
4699                        row_range: 0..5,
4700                        text: "mod header\n".into(),
4701                        order: 1,
4702                    },
4703                    RelatedExcerpt {
4704                        row_range: 5..15,
4705                        text: "important fn\n".into(),
4706                        order: 1,
4707                    },
4708                    RelatedExcerpt {
4709                        row_range: 15..30,
4710                        text: "less important fn\n".into(),
4711                        order: 3,
4712                    },
4713                ],
4714            }],
4715        );
4716
4717        // With large budget, all three excerpts included.
4718        assert_eq!(
4719            format_with_budget(&input, 10000).unwrap(),
4720            indoc! {r#"
4721                <|file_sep|>mod.rs
4722                mod header
4723                ...
4724                important fn
4725                ...
4726                less important fn
4727                <|file_sep|>test.rs
4728                <|fim_prefix|>
4729                <|fim_middle|>current
4730                <|user_cursor|>x
4731                <|fim_suffix|>
4732                <|fim_middle|>updated
4733            "#}
4734            .to_string()
4735        );
4736
4737        // With tight budget, only order<=1 excerpts included (header + important fn).
4738        assert_eq!(
4739            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4740            indoc! {r#"
4741                <|file_sep|>mod.rs
4742                mod header
4743                ...
4744                important fn
4745                ...
4746                <|file_sep|>test.rs
4747                <|fim_prefix|>
4748                <|fim_middle|>current
4749                <|user_cursor|>x
4750                <|fim_suffix|>
4751                <|fim_middle|>updated
4752            "#}
4753            .to_string()
4754        );
4755    }
4756
4757    #[test]
4758    fn test_truncation_drops_older_events_first() {
4759        let input = make_input(
4760            "x",
4761            0..1,
4762            0,
4763            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4764            vec![],
4765        );
4766
4767        assert_eq!(
4768            format_with_budget(&input, 10000).unwrap(),
4769            indoc! {r#"
4770                <|file_sep|>edit history
4771                --- a/old.rs
4772                +++ b/old.rs
4773                -1
4774                --- a/new.rs
4775                +++ b/new.rs
4776                -2
4777                <|file_sep|>test.rs
4778                <|fim_prefix|>
4779                <|fim_middle|>current
4780                <|user_cursor|>x
4781                <|fim_suffix|>
4782                <|fim_middle|>updated
4783            "#}
4784            .to_string()
4785        );
4786
4787        assert_eq!(
4788            format_with_budget(&input, 60).unwrap(),
4789            indoc! {r#"
4790                <|file_sep|>edit history
4791                --- a/new.rs
4792                +++ b/new.rs
4793                -2
4794                <|file_sep|>test.rs
4795                <|fim_prefix|>
4796                <|fim_middle|>current
4797                <|user_cursor|>x
4798                <|fim_suffix|>
4799                <|fim_middle|>updated
4800            "#}
4801            .to_string()
4802        );
4803    }
4804
4805    #[test]
4806    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4807        let input = make_input(
4808            "fn main() {}",
4809            0..12,
4810            3,
4811            vec![make_event("a.rs", "-old\n+new\n")],
4812            vec![make_related_file("related.rs", "helper\n")],
4813        );
4814
4815        assert!(format_with_budget(&input, 30).is_none())
4816    }
4817
4818    #[track_caller]
4819    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4820        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4821            .expect("seed coder prompt formatting should succeed")
4822    }
4823
4824    #[track_caller]
4825    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4826        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4827            .expect("seed coder prompt formatting should succeed")
4828    }
4829
4830    #[test]
4831    fn test_seed_coder_basic_format() {
4832        let input = make_input(
4833            "prefix\neditable\nsuffix",
4834            7..15,
4835            10,
4836            vec![make_event("a.rs", "-old\n+new\n")],
4837            vec![make_related_file("related.rs", "fn helper() {}\n")],
4838        );
4839
4840        assert_eq!(
4841            format_seed_coder(&input),
4842            indoc! {r#"
4843                <[fim-suffix]>
4844                suffix
4845                <[fim-prefix]><filename>related.rs
4846                fn helper() {}
4847
4848                <filename>edit_history
4849                --- a/a.rs
4850                +++ b/a.rs
4851                -old
4852                +new
4853
4854                <filename>test.rs
4855                prefix
4856                <<<<<<< CURRENT
4857                edi<|user_cursor|>table
4858                =======
4859                <[fim-middle]>"#}
4860        );
4861    }
4862
4863    #[test]
4864    fn test_v0317_formats_prompt_with_many_related_files() {
4865        let related_files = (0..900)
4866            .map(|index| {
4867                make_related_file(
4868                    &format!("related_{index}.rs"),
4869                    "fn helper() {\n    let value = 1;\n}\n",
4870                )
4871            })
4872            .collect();
4873
4874        let input = make_input(
4875            "code",
4876            0..4,
4877            2,
4878            vec![make_event("a.rs", "-x\n+y\n")],
4879            related_files,
4880        );
4881
4882        let prompt =
4883            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4884
4885        assert!(prompt.is_some());
4886        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4887        assert!(prompt.contains("test.rs"));
4888        assert!(prompt.contains(CURSOR_MARKER));
4889    }
4890
4891    #[test]
4892    fn test_seed_coder_no_context() {
4893        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4894
4895        assert_eq!(
4896            format_seed_coder(&input),
4897            indoc! {r#"
4898                <[fim-suffix]>
4899                after
4900                <[fim-prefix]><filename>test.rs
4901                before
4902                <<<<<<< CURRENT
4903                mid<|user_cursor|>dle
4904                =======
4905                <[fim-middle]>"#}
4906        );
4907    }
4908
4909    #[test]
4910    fn test_seed_coder_truncation_drops_context() {
4911        let input = make_input(
4912            "code",
4913            0..4,
4914            2,
4915            vec![make_event("a.rs", "-x\n+y\n")],
4916            vec![make_related_file("r1.rs", "content\n")],
4917        );
4918
4919        // With large budget, everything is included
4920        assert_eq!(
4921            format_seed_coder(&input),
4922            indoc! {r#"
4923                <[fim-suffix]>
4924                <[fim-prefix]><filename>r1.rs
4925                content
4926
4927                <filename>edit_history
4928                --- a/a.rs
4929                +++ b/a.rs
4930                -x
4931                +y
4932
4933                <filename>test.rs
4934                <<<<<<< CURRENT
4935                co<|user_cursor|>de
4936                =======
4937                <[fim-middle]>"#}
4938        );
4939
4940        assert_eq!(
4941            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4942            None
4943        );
4944
4945        assert_eq!(
4946            format_seed_coder_with_budget(&input, 40),
4947            indoc! {r#"
4948                <[fim-suffix]>
4949                <[fim-prefix]><filename>test.rs
4950                <<<<<<< CURRENT
4951                co<|user_cursor|>de
4952                =======
4953                <[fim-middle]>"#
4954            }
4955        )
4956    }
4957
4958    #[test]
4959    fn test_seed_coder_truncation_prioritizes_lower_order() {
4960        let input = make_input(
4961            "code",
4962            0..4,
4963            2,
4964            vec![],
4965            vec![
4966                RelatedFile {
4967                    path: Path::new("low_prio.rs").into(),
4968                    max_row: 5,
4969                    in_open_source_repo: false,
4970                    excerpts: vec![RelatedExcerpt {
4971                        row_range: 0..5,
4972                        text: "low prio\n".into(),
4973                        order: 10,
4974                    }],
4975                },
4976                RelatedFile {
4977                    path: Path::new("high_prio.rs").into(),
4978                    max_row: 5,
4979                    in_open_source_repo: false,
4980                    excerpts: vec![RelatedExcerpt {
4981                        row_range: 0..5,
4982                        text: "high prio\n".into(),
4983                        order: 1,
4984                    }],
4985                },
4986            ],
4987        );
4988
4989        // With large budget, both included; rendered in stable lexicographic order.
4990        assert_eq!(
4991            format_seed_coder(&input),
4992            indoc! {r#"
4993                <[fim-suffix]>
4994                <[fim-prefix]><filename>low_prio.rs
4995                low prio
4996                <filename>high_prio.rs
4997                high prio
4998
4999                <filename>test.rs
5000                <<<<<<< CURRENT
5001                co<|user_cursor|>de
5002                =======
5003                <[fim-middle]>"#}
5004        );
5005
5006        // With tight budget under the generic heuristic, context is dropped but the
5007        // minimal cursor section still fits.
5008        assert_eq!(
5009            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5010            Some(
5011                indoc! {r#"
5012                    <[fim-suffix]>
5013                    <[fim-prefix]><filename>test.rs
5014                    <<<<<<< CURRENT
5015                    co<|user_cursor|>de
5016                    =======
5017                    <[fim-middle]>"#}
5018                .to_string()
5019            )
5020        );
5021    }
5022
5023    #[test]
5024    fn test_format_zeta1_from_input_basic() {
5025        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5026        let input = ZetaPromptInput {
5027            cursor_path: Path::new("src/main.rs").into(),
5028            cursor_excerpt: excerpt.into(),
5029            cursor_offset_in_excerpt: 30,
5030            excerpt_start_row: Some(0),
5031            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5032            related_files: Some(vec![]),
5033            active_buffer_diagnostics: vec![],
5034            excerpt_ranges: ExcerptRanges {
5035                editable_150: 15..41,
5036                editable_180: 15..41,
5037                editable_350: 15..41,
5038                editable_150_context_350: 0..excerpt.len(),
5039                editable_180_context_350: 0..excerpt.len(),
5040                editable_350_context_150: 0..excerpt.len(),
5041                ..Default::default()
5042            },
5043            syntax_ranges: None,
5044            experiment: None,
5045            in_open_source_repo: false,
5046            can_collect_data: false,
5047            repo_url: None,
5048        };
5049
5050        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5051
5052        assert_eq!(
5053            prompt,
5054            concat!(
5055                "### Instruction:\n",
5056                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5057                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5058                "into account the cursor location.\n",
5059                "\n",
5060                "### User Edits:\n",
5061                "\n",
5062                "User edited other.rs:\n",
5063                "```diff\n",
5064                "-old\n",
5065                "+new\n",
5066                "\n",
5067                "```\n",
5068                "\n",
5069                "### User Excerpt:\n",
5070                "\n",
5071                "```src/main.rs\n",
5072                "<|start_of_file|>\n",
5073                "fn before() {}\n",
5074                "<|editable_region_start|>\n",
5075                "fn foo() {\n",
5076                "    <|user_cursor_is_here|>let x = 1;\n",
5077                "\n",
5078                "<|editable_region_end|>}\n",
5079                "fn after() {}\n",
5080                "\n",
5081                "```\n",
5082                "\n",
5083                "### Response:\n",
5084            ),
5085        );
5086    }
5087
5088    #[test]
5089    fn test_format_zeta1_from_input_no_start_of_file() {
5090        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5091        let input = ZetaPromptInput {
5092            cursor_path: Path::new("src/main.rs").into(),
5093            cursor_excerpt: excerpt.into(),
5094            cursor_offset_in_excerpt: 15,
5095            excerpt_start_row: Some(10),
5096            events: vec![],
5097            related_files: Some(vec![]),
5098            active_buffer_diagnostics: vec![],
5099            excerpt_ranges: ExcerptRanges {
5100                editable_150: 0..28,
5101                editable_180: 0..28,
5102                editable_350: 0..28,
5103                editable_150_context_350: 0..28,
5104                editable_180_context_350: 0..28,
5105                editable_350_context_150: 0..28,
5106                ..Default::default()
5107            },
5108            syntax_ranges: None,
5109            experiment: None,
5110            in_open_source_repo: false,
5111            can_collect_data: false,
5112            repo_url: None,
5113        };
5114
5115        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5116
5117        assert_eq!(
5118            prompt,
5119            concat!(
5120                "### Instruction:\n",
5121                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5122                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5123                "into account the cursor location.\n",
5124                "\n",
5125                "### User Edits:\n",
5126                "\n",
5127                "\n",
5128                "\n",
5129                "### User Excerpt:\n",
5130                "\n",
5131                "```src/main.rs\n",
5132                "<|editable_region_start|>\n",
5133                "fn foo() {\n",
5134                "    <|user_cursor_is_here|>let x = 1;\n",
5135                "}\n",
5136                "\n",
5137                "<|editable_region_end|>\n",
5138                "```\n",
5139                "\n",
5140                "### Response:\n",
5141            ),
5142        );
5143    }
5144
5145    #[test]
5146    fn test_format_zeta1_from_input_with_sub_ranges() {
5147        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5148        let editable_range = 10..37;
5149        let context_range = 0..excerpt.len();
5150
5151        let input = ZetaPromptInput {
5152            cursor_path: Path::new("test.rs").into(),
5153            cursor_excerpt: excerpt.into(),
5154            cursor_offset_in_excerpt: 25,
5155            excerpt_start_row: Some(0),
5156            events: vec![],
5157            related_files: Some(vec![]),
5158            active_buffer_diagnostics: vec![],
5159            excerpt_ranges: ExcerptRanges {
5160                editable_150: editable_range.clone(),
5161                editable_180: editable_range.clone(),
5162                editable_350: editable_range.clone(),
5163                editable_150_context_350: context_range.clone(),
5164                editable_180_context_350: context_range.clone(),
5165                editable_350_context_150: context_range.clone(),
5166                ..Default::default()
5167            },
5168            syntax_ranges: None,
5169            experiment: None,
5170            in_open_source_repo: false,
5171            can_collect_data: false,
5172            repo_url: None,
5173        };
5174
5175        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5176
5177        assert_eq!(
5178            prompt,
5179            concat!(
5180                "### Instruction:\n",
5181                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5182                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5183                "into account the cursor location.\n",
5184                "\n",
5185                "### User Edits:\n",
5186                "\n",
5187                "\n",
5188                "\n",
5189                "### User Excerpt:\n",
5190                "\n",
5191                "```test.rs\n",
5192                "<|start_of_file|>\n",
5193                "// prefix\n",
5194                "<|editable_region_start|>\n",
5195                "fn foo() {\n",
5196                "    <|user_cursor_is_here|>let x = 1;\n",
5197                "}\n",
5198                "<|editable_region_end|>\n",
5199                "// suffix\n",
5200                "\n",
5201                "```\n",
5202                "\n",
5203                "### Response:\n",
5204            ),
5205        );
5206    }
5207
5208    #[test]
5209    fn test_max_event_count() {
5210        fn make_numbered_event(index: usize) -> Event {
5211            return make_event(
5212                &format!("event-{index}.rs"),
5213                &format!("-old-{index}\n+new-{index}\n"),
5214            );
5215        }
5216        let input = make_input(
5217            "x",
5218            0..1,
5219            0,
5220            (0..3).map(make_numbered_event).collect(),
5221            vec![],
5222        );
5223
5224        let edit_history_section = format_edit_history_within_budget(
5225            &input.events,
5226            "<|file_sep|>",
5227            "edit history",
5228            usize::MAX,
5229            5,
5230        );
5231
5232        assert_eq!(
5233            &edit_history_section,
5234            indoc!(
5235                "
5236                <|file_sep|>edit history
5237                --- a/event-0.rs
5238                +++ b/event-0.rs
5239                -old-0
5240                +new-0
5241                --- a/event-1.rs
5242                +++ b/event-1.rs
5243                -old-1
5244                +new-1
5245                --- a/event-2.rs
5246                +++ b/event-2.rs
5247                -old-2
5248                +new-2
5249            "
5250            )
5251        );
5252
5253        let edit_history_section = format_edit_history_within_budget(
5254            &input.events,
5255            "<|file_sep|>",
5256            "edit history",
5257            usize::MAX,
5258            2,
5259        );
5260
5261        assert_eq!(
5262            &edit_history_section,
5263            indoc!(
5264                "
5265                <|file_sep|>edit history
5266                --- a/event-1.rs
5267                +++ b/event-1.rs
5268                -old-1
5269                +new-1
5270                --- a/event-2.rs
5271                +++ b/event-2.rs
5272                -old-2
5273                +new-2
5274            "
5275            )
5276        );
5277
5278        let edit_history_section = format_edit_history_within_budget(
5279            &input.events,
5280            "<|file_sep|>",
5281            "edit history",
5282            usize::MAX,
5283            0,
5284        );
5285
5286        assert_eq!(&edit_history_section, "");
5287    }
5288
5289    #[test]
5290    fn test_clean_zeta1_model_output_basic() {
5291        let output = indoc! {"
5292            <|editable_region_start|>
5293            fn main() {
5294                println!(\"hello\");
5295            }
5296            <|editable_region_end|>
5297        "};
5298
5299        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5300        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5301    }
5302
5303    #[test]
5304    fn test_clean_zeta1_model_output_with_cursor() {
5305        let output = indoc! {"
5306            <|editable_region_start|>
5307            fn main() {
5308                <|user_cursor_is_here|>println!(\"hello\");
5309            }
5310            <|editable_region_end|>
5311        "};
5312
5313        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5314        assert_eq!(
5315            cleaned,
5316            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5317        );
5318    }
5319
5320    #[test]
5321    fn test_clean_zeta1_model_output_no_markers() {
5322        let output = "fn main() {}\n";
5323        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5324        assert_eq!(cleaned, "fn main() {}\n");
5325    }
5326
5327    #[test]
5328    fn test_clean_zeta1_model_output_empty_region() {
5329        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5330        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5331        assert_eq!(cleaned, "");
5332    }
5333
5334    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5335        let mut result = excerpt.to_string();
5336        result.replace_range(
5337            parsed_output.range_in_excerpt.clone(),
5338            &parsed_output.new_editable_region,
5339        );
5340        result
5341    }
5342
5343    #[test]
5344    fn test_parse_zeta2_model_output() {
5345        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5346        let context_start = excerpt.find("ctx start").unwrap();
5347        let context_end = excerpt.find("after ctx").unwrap();
5348        let editable_start = excerpt.find("editable old").unwrap();
5349        let editable_end = editable_start + "editable old\n".len();
5350        let input = make_input_with_context_range(
5351            excerpt,
5352            editable_start..editable_end,
5353            context_start..context_end,
5354            editable_start,
5355        );
5356
5357        let output = parse_zeta2_model_output(
5358            "editable new\n>>>>>>> UPDATED\n",
5359            ZetaFormat::V0131GitMergeMarkersPrefix,
5360            &input,
5361        )
5362        .unwrap();
5363
5364        assert_eq!(
5365            apply_edit(excerpt, &output),
5366            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5367        );
5368    }
5369
5370    #[test]
5371    fn test_parse_zeta2_model_output_identity() {
5372        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5373        let editable_start = excerpt.find("bbb").unwrap();
5374        let editable_end = excerpt.find("ddd").unwrap();
5375        let input = make_input_with_context_range(
5376            excerpt,
5377            editable_start..editable_end,
5378            0..excerpt.len(),
5379            editable_start,
5380        );
5381
5382        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5383        let output =
5384            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5385
5386        assert_eq!(apply_edit(excerpt, &output), excerpt);
5387    }
5388
5389    #[test]
5390    fn test_parse_zeta2_model_output_strips_end_marker() {
5391        let excerpt = "hello\nworld\n";
5392        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5393
5394        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5395        let output1 =
5396            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5397        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5398
5399        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5400        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5401    }
5402
5403    #[test]
5404    fn test_special_tokens_not_triggered_by_comment_separator() {
5405        // Regression test for https://github.com/zed-industries/zed/issues/52489
5406        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5407        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5408        assert!(
5409            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5410            "comment containing ======= should not trigger special token detection"
5411        );
5412    }
5413}