zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  34pub struct ZetaPromptInput {
  35    pub cursor_path: Arc<Path>,
  36    pub cursor_excerpt: Arc<str>,
  37    pub cursor_offset_in_excerpt: usize,
  38    #[serde(default, skip_serializing_if = "Option::is_none")]
  39    pub excerpt_start_row: Option<u32>,
  40    pub events: Vec<Arc<Event>>,
  41    #[serde(default)]
  42    pub related_files: Option<Vec<RelatedFile>>,
  43    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  44    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  45    /// These ranges let the server select model-appropriate subsets.
  46    pub excerpt_ranges: ExcerptRanges,
  47    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  48    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  49    /// When present, the server uses these to compute editable/context ranges
  50    /// instead of `excerpt_ranges`.
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub syntax_ranges: Option<Vec<Range<usize>>>,
  53    /// The name of the edit prediction model experiment to use.
  54    #[serde(default, skip_serializing_if = "Option::is_none")]
  55    pub experiment: Option<String>,
  56    #[serde(default)]
  57    pub in_open_source_repo: bool,
  58    #[serde(default)]
  59    pub can_collect_data: bool,
  60    #[serde(default, skip_serializing_if = "Option::is_none")]
  61    pub repo_url: Option<String>,
  62}
  63
  64#[derive(
  65    Default,
  66    Clone,
  67    Copy,
  68    Debug,
  69    PartialEq,
  70    Eq,
  71    Hash,
  72    EnumIter,
  73    IntoStaticStr,
  74    Serialize,
  75    Deserialize,
  76)]
  77#[allow(non_camel_case_types)]
  78pub enum ZetaFormat {
  79    V0112MiddleAtEnd,
  80    V0113Ordered,
  81    V0114180EditableRegion,
  82    V0120GitMergeMarkers,
  83    #[default]
  84    V0131GitMergeMarkersPrefix,
  85    V0211Prefill,
  86    V0211SeedCoder,
  87    v0226Hashline,
  88    V0304VariableEdit,
  89    V0304SeedNoEdits,
  90    /// Multi-block marker spans with NO_EDITS sentinel.
  91    V0306SeedMultiRegions,
  92    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  93    V0316SeedMultiRegions,
  94    /// V0316 with larger block sizes.
  95    V0318SeedMultiRegions,
  96    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  97    V0317SeedMultiRegions,
  98}
  99
 100impl std::fmt::Display for ZetaFormat {
 101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 102        write!(f, "{}", <&'static str>::from(self))
 103    }
 104}
 105
 106impl ZetaFormat {
 107    pub fn parse(format_name: &str) -> Result<Self> {
 108        let mut results = ZetaFormat::iter().filter(|version| {
 109            <&'static str>::from(version)
 110                .to_lowercase()
 111                .contains(&format_name.to_lowercase())
 112        });
 113        let Some(result) = results.next() else {
 114            anyhow::bail!(
 115                "`{format_name}` did not match any of:\n{}",
 116                Self::options_as_string()
 117            );
 118        };
 119        if results.next().is_some() {
 120            anyhow::bail!(
 121                "`{format_name}` matched more than one of:\n{}",
 122                Self::options_as_string()
 123            );
 124        }
 125        Ok(result)
 126    }
 127
 128    pub fn options_as_string() -> String {
 129        ZetaFormat::iter()
 130            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 131            .collect::<Vec<_>>()
 132            .concat()
 133    }
 134}
 135
 136#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 137#[serde(tag = "event")]
 138pub enum Event {
 139    BufferChange {
 140        path: Arc<Path>,
 141        old_path: Arc<Path>,
 142        diff: String,
 143        predicted: bool,
 144        in_open_source_repo: bool,
 145    },
 146}
 147
 148impl Event {
 149    pub fn in_open_source_repo(&self) -> bool {
 150        match self {
 151            Event::BufferChange {
 152                in_open_source_repo,
 153                ..
 154            } => *in_open_source_repo,
 155        }
 156    }
 157}
 158
 159pub fn write_event(prompt: &mut String, event: &Event) {
 160    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 161        for component in path.components() {
 162            prompt.push('/');
 163            write!(prompt, "{}", component.as_os_str().display()).ok();
 164        }
 165    }
 166    match event {
 167        Event::BufferChange {
 168            path,
 169            old_path,
 170            diff,
 171            predicted,
 172            in_open_source_repo: _,
 173        } => {
 174            if *predicted {
 175                prompt.push_str("// User accepted prediction:\n");
 176            }
 177            prompt.push_str("--- a");
 178            write_path_as_unix_str(prompt, old_path.as_ref());
 179            prompt.push_str("\n+++ b");
 180            write_path_as_unix_str(prompt, path.as_ref());
 181            prompt.push('\n');
 182            prompt.push_str(diff);
 183        }
 184    }
 185}
 186
 187#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 188pub struct ActiveBufferDiagnostic {
 189    pub severity: Option<i32>,
 190    pub message: String,
 191    pub snippet: String,
 192    pub snippet_buffer_row_range: Range<u32>,
 193    pub diagnostic_range_in_snippet: Range<usize>,
 194}
 195
 196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 197pub struct RelatedFile {
 198    pub path: Arc<Path>,
 199    pub max_row: u32,
 200    pub excerpts: Vec<RelatedExcerpt>,
 201    #[serde(default)]
 202    pub in_open_source_repo: bool,
 203}
 204
 205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 206pub struct RelatedExcerpt {
 207    pub row_range: Range<u32>,
 208    pub text: Arc<str>,
 209    #[serde(default)]
 210    pub order: usize,
 211}
 212
 213pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 214    special_tokens_for_format(format).iter().any(|token| {
 215        if let Some(line_token) = token.strip_suffix('\n') {
 216            input.cursor_excerpt.lines().any(|line| line == line_token)
 217        } else {
 218            input.cursor_excerpt.contains(token)
 219        }
 220    })
 221}
 222
 223pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 224    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 225}
 226
 227pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 228    match format {
 229        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 230        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 231        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 232        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 233        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 234        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 235        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 236        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 237        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 238        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 239        ZetaFormat::V0316SeedMultiRegions => {
 240            static TOKENS: &[&str] = &[
 241                seed_coder::FIM_SUFFIX,
 242                seed_coder::FIM_PREFIX,
 243                seed_coder::FIM_MIDDLE,
 244                seed_coder::FILE_MARKER,
 245                multi_region::V0316_END_MARKER,
 246                CURSOR_MARKER,
 247                multi_region::MARKER_TAG_PREFIX,
 248            ];
 249            TOKENS
 250        }
 251        ZetaFormat::V0318SeedMultiRegions => {
 252            static TOKENS: &[&str] = &[
 253                seed_coder::FIM_SUFFIX,
 254                seed_coder::FIM_PREFIX,
 255                seed_coder::FIM_MIDDLE,
 256                seed_coder::FILE_MARKER,
 257                multi_region::V0318_END_MARKER,
 258                CURSOR_MARKER,
 259                multi_region::MARKER_TAG_PREFIX,
 260            ];
 261            TOKENS
 262        }
 263        ZetaFormat::V0317SeedMultiRegions => {
 264            static TOKENS: &[&str] = &[
 265                seed_coder::FIM_SUFFIX,
 266                seed_coder::FIM_PREFIX,
 267                seed_coder::FIM_MIDDLE,
 268                seed_coder::FILE_MARKER,
 269                multi_region::V0317_END_MARKER,
 270                CURSOR_MARKER,
 271                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 272            ];
 273            TOKENS
 274        }
 275        ZetaFormat::V0306SeedMultiRegions => {
 276            static TOKENS: &[&str] = &[
 277                seed_coder::FIM_SUFFIX,
 278                seed_coder::FIM_PREFIX,
 279                seed_coder::FIM_MIDDLE,
 280                seed_coder::FILE_MARKER,
 281                seed_coder::START_MARKER,
 282                seed_coder::SEPARATOR,
 283                seed_coder::END_MARKER,
 284                CURSOR_MARKER,
 285                multi_region::MARKER_TAG_PREFIX,
 286            ];
 287            TOKENS
 288        }
 289    }
 290}
 291
 292/// Returns the (editable_token_limit, context_token_limit) for a given format.
 293pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 294    match format {
 295        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 296        ZetaFormat::V0114180EditableRegion => (180, 350),
 297        ZetaFormat::V0120GitMergeMarkers
 298        | ZetaFormat::V0131GitMergeMarkersPrefix
 299        | ZetaFormat::V0211Prefill
 300        | ZetaFormat::V0211SeedCoder
 301        | ZetaFormat::v0226Hashline
 302        | ZetaFormat::V0306SeedMultiRegions
 303        | ZetaFormat::V0316SeedMultiRegions
 304        | ZetaFormat::V0318SeedMultiRegions
 305        | ZetaFormat::V0317SeedMultiRegions
 306        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 307        ZetaFormat::V0304VariableEdit => (1024, 0),
 308    }
 309}
 310
 311pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 312    match format {
 313        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 314        ZetaFormat::V0112MiddleAtEnd
 315        | ZetaFormat::V0113Ordered
 316        | ZetaFormat::V0114180EditableRegion
 317        | ZetaFormat::V0120GitMergeMarkers
 318        | ZetaFormat::V0131GitMergeMarkersPrefix
 319        | ZetaFormat::V0211Prefill
 320        | ZetaFormat::V0211SeedCoder
 321        | ZetaFormat::V0304VariableEdit
 322        | ZetaFormat::V0306SeedMultiRegions
 323        | ZetaFormat::V0304SeedNoEdits => &[],
 324        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 325        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 326        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 327    }
 328}
 329
 330pub fn excerpt_ranges_for_format(
 331    format: ZetaFormat,
 332    ranges: &ExcerptRanges,
 333) -> (Range<usize>, Range<usize>) {
 334    match format {
 335        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 336            ranges.editable_150.clone(),
 337            ranges.editable_150_context_350.clone(),
 338        ),
 339        ZetaFormat::V0114180EditableRegion => (
 340            ranges.editable_180.clone(),
 341            ranges.editable_180_context_350.clone(),
 342        ),
 343        ZetaFormat::V0120GitMergeMarkers
 344        | ZetaFormat::V0131GitMergeMarkersPrefix
 345        | ZetaFormat::V0211Prefill
 346        | ZetaFormat::V0211SeedCoder
 347        | ZetaFormat::v0226Hashline
 348        | ZetaFormat::V0304SeedNoEdits
 349        | ZetaFormat::V0306SeedMultiRegions
 350        | ZetaFormat::V0316SeedMultiRegions
 351        | ZetaFormat::V0318SeedMultiRegions
 352        | ZetaFormat::V0317SeedMultiRegions => (
 353            ranges.editable_350.clone(),
 354            ranges.editable_350_context_150.clone(),
 355        ),
 356        ZetaFormat::V0304VariableEdit => {
 357            let context = ranges
 358                .editable_350_context_1024
 359                .clone()
 360                .or(ranges.editable_350_context_512.clone())
 361                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 362            (context.clone(), context)
 363        }
 364    }
 365}
 366
 367pub fn write_cursor_excerpt_section_for_format(
 368    format: ZetaFormat,
 369    prompt: &mut String,
 370    path: &Path,
 371    context: &str,
 372    editable_range: &Range<usize>,
 373    cursor_offset: usize,
 374) {
 375    match format {
 376        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 377            prompt,
 378            path,
 379            context,
 380            editable_range,
 381            cursor_offset,
 382        ),
 383        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 384            v0113_ordered::write_cursor_excerpt_section(
 385                prompt,
 386                path,
 387                context,
 388                editable_range,
 389                cursor_offset,
 390            )
 391        }
 392        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 393            prompt,
 394            path,
 395            context,
 396            editable_range,
 397            cursor_offset,
 398        ),
 399        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 400            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 401                prompt,
 402                path,
 403                context,
 404                editable_range,
 405                cursor_offset,
 406            )
 407        }
 408        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 409            seed_coder::write_cursor_excerpt_section(
 410                prompt,
 411                path,
 412                context,
 413                editable_range,
 414                cursor_offset,
 415            )
 416        }
 417        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 418            prompt,
 419            path,
 420            context,
 421            editable_range,
 422            cursor_offset,
 423        ),
 424        ZetaFormat::V0304VariableEdit => {
 425            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 426        }
 427        ZetaFormat::V0306SeedMultiRegions => {
 428            prompt.push_str(&build_v0306_cursor_prefix(
 429                path,
 430                context,
 431                editable_range,
 432                cursor_offset,
 433            ));
 434        }
 435        ZetaFormat::V0316SeedMultiRegions => {
 436            prompt.push_str(&build_v0316_cursor_prefix(
 437                path,
 438                context,
 439                editable_range,
 440                cursor_offset,
 441            ));
 442        }
 443        ZetaFormat::V0318SeedMultiRegions => {
 444            prompt.push_str(&build_v0318_cursor_prefix(
 445                path,
 446                context,
 447                editable_range,
 448                cursor_offset,
 449            ));
 450        }
 451        ZetaFormat::V0317SeedMultiRegions => {
 452            prompt.push_str(&build_v0317_cursor_prefix(
 453                path,
 454                context,
 455                editable_range,
 456                cursor_offset,
 457            ));
 458        }
 459    }
 460}
 461
 462fn build_v0306_cursor_prefix(
 463    path: &Path,
 464    context: &str,
 465    editable_range: &Range<usize>,
 466    cursor_offset: usize,
 467) -> String {
 468    let mut section = String::new();
 469    let path_str = path.to_string_lossy();
 470    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 471
 472    section.push_str(&context[..editable_range.start]);
 473    section.push_str(seed_coder::START_MARKER);
 474
 475    let editable_text = &context[editable_range.clone()];
 476    let cursor_in_editable = cursor_offset - editable_range.start;
 477    multi_region::write_editable_with_markers(
 478        &mut section,
 479        editable_text,
 480        cursor_in_editable,
 481        CURSOR_MARKER,
 482    );
 483
 484    if !section.ends_with('\n') {
 485        section.push('\n');
 486    }
 487    section.push_str(seed_coder::SEPARATOR);
 488    section
 489}
 490
 491fn build_v0316_cursor_prefix(
 492    path: &Path,
 493    context: &str,
 494    editable_range: &Range<usize>,
 495    cursor_offset: usize,
 496) -> String {
 497    let mut section = String::new();
 498    let path_str = path.to_string_lossy();
 499    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 500
 501    section.push_str(&context[..editable_range.start]);
 502
 503    let editable_text = &context[editable_range.clone()];
 504    let cursor_in_editable = cursor_offset - editable_range.start;
 505    multi_region::write_editable_with_markers_v0316(
 506        &mut section,
 507        editable_text,
 508        cursor_in_editable,
 509        CURSOR_MARKER,
 510    );
 511
 512    if !section.ends_with('\n') {
 513        section.push('\n');
 514    }
 515    section
 516}
 517
 518fn build_v0318_cursor_prefix(
 519    path: &Path,
 520    context: &str,
 521    editable_range: &Range<usize>,
 522    cursor_offset: usize,
 523) -> String {
 524    let mut section = String::new();
 525    let path_str = path.to_string_lossy();
 526    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 527
 528    section.push_str(&context[..editable_range.start]);
 529
 530    let editable_text = &context[editable_range.clone()];
 531    let cursor_in_editable = cursor_offset - editable_range.start;
 532    multi_region::write_editable_with_markers_v0318(
 533        &mut section,
 534        editable_text,
 535        cursor_in_editable,
 536        CURSOR_MARKER,
 537    );
 538
 539    if !section.ends_with('\n') {
 540        section.push('\n');
 541    }
 542    section
 543}
 544
 545fn build_v0317_cursor_prefix(
 546    path: &Path,
 547    context: &str,
 548    editable_range: &Range<usize>,
 549    cursor_offset: usize,
 550) -> String {
 551    let mut section = String::new();
 552    let path_str = path.to_string_lossy();
 553    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 554
 555    section.push_str(&context[..editable_range.start]);
 556
 557    let editable_text = &context[editable_range.clone()];
 558    let cursor_in_editable = cursor_offset - editable_range.start;
 559    multi_region::write_editable_with_markers_v0317(
 560        &mut section,
 561        editable_text,
 562        cursor_in_editable,
 563        CURSOR_MARKER,
 564    );
 565
 566    if !section.ends_with('\n') {
 567        section.push('\n');
 568    }
 569    section
 570}
 571
 572fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 573    let start_row = text[0..range.start].matches('\n').count() as u32;
 574    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 575    if !text[..range.end].ends_with('\n') {
 576        end_row += 1;
 577    }
 578    return start_row..end_row;
 579}
 580
 581pub fn format_prompt_with_budget_for_format(
 582    input: &ZetaPromptInput,
 583    format: ZetaFormat,
 584    max_tokens: usize,
 585) -> Option<String> {
 586    let (context, editable_range, context_range, cursor_offset) =
 587        resolve_cursor_region(input, format);
 588    let path = &*input.cursor_path;
 589
 590    let empty_files = Vec::new();
 591    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 592    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 593        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 594        let row_range = relative_row_range.start + cursor_excerpt_start_row
 595            ..relative_row_range.end + cursor_excerpt_start_row;
 596        &filter_redundant_excerpts(
 597            input_related_files.to_vec(),
 598            input.cursor_path.as_ref(),
 599            row_range,
 600        )
 601    } else {
 602        input_related_files
 603    };
 604
 605    let prompt = match format {
 606        ZetaFormat::V0211SeedCoder
 607        | ZetaFormat::V0304SeedNoEdits
 608        | ZetaFormat::V0306SeedMultiRegions
 609        | ZetaFormat::V0316SeedMultiRegions
 610        | ZetaFormat::V0318SeedMultiRegions
 611        | ZetaFormat::V0317SeedMultiRegions => {
 612            let mut cursor_section = String::new();
 613            write_cursor_excerpt_section_for_format(
 614                format,
 615                &mut cursor_section,
 616                path,
 617                context,
 618                &editable_range,
 619                cursor_offset,
 620            );
 621
 622            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 623            seed_coder::assemble_fim_prompt(
 624                context,
 625                &editable_range,
 626                &cursor_section,
 627                &input.events,
 628                related_files,
 629                budget_with_margin,
 630            )
 631        }
 632        _ => {
 633            let mut cursor_section = String::new();
 634            write_cursor_excerpt_section_for_format(
 635                format,
 636                &mut cursor_section,
 637                path,
 638                context,
 639                &editable_range,
 640                cursor_offset,
 641            );
 642
 643            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 644            let cursor_tokens = estimate_tokens(cursor_section.len());
 645            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 646
 647            let edit_history_section = format_edit_history_within_budget(
 648                &input.events,
 649                "<|file_sep|>",
 650                "edit history",
 651                remaining_budget,
 652                max_edit_event_count_for_format(&format),
 653            );
 654            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 655            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 656
 657            let related_files_section = format_related_files_within_budget(
 658                &related_files,
 659                "<|file_sep|>",
 660                "",
 661                remaining_budget,
 662            );
 663
 664            let mut prompt = String::new();
 665            prompt.push_str(&related_files_section);
 666            prompt.push_str(&edit_history_section);
 667            prompt.push_str(&cursor_section);
 668            prompt
 669        }
 670    };
 671    let prompt_tokens = estimate_tokens(prompt.len());
 672    if prompt_tokens > max_tokens {
 673        return None;
 674    }
 675    return Some(prompt);
 676}
 677
 678pub fn filter_redundant_excerpts(
 679    mut related_files: Vec<RelatedFile>,
 680    cursor_path: &Path,
 681    cursor_row_range: Range<u32>,
 682) -> Vec<RelatedFile> {
 683    for file in &mut related_files {
 684        if file.path.as_ref() == cursor_path {
 685            file.excerpts.retain(|excerpt| {
 686                excerpt.row_range.start < cursor_row_range.start
 687                    || excerpt.row_range.end > cursor_row_range.end
 688            });
 689        }
 690    }
 691    related_files.retain(|file| !file.excerpts.is_empty());
 692    related_files
 693}
 694
 695pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 696    match format {
 697        ZetaFormat::V0112MiddleAtEnd
 698        | ZetaFormat::V0113Ordered
 699        | ZetaFormat::V0114180EditableRegion
 700        | ZetaFormat::V0120GitMergeMarkers
 701        | ZetaFormat::V0131GitMergeMarkersPrefix
 702        | ZetaFormat::V0211Prefill
 703        | ZetaFormat::V0211SeedCoder
 704        | ZetaFormat::v0226Hashline
 705        | ZetaFormat::V0304SeedNoEdits
 706        | ZetaFormat::V0304VariableEdit
 707        | ZetaFormat::V0306SeedMultiRegions
 708        | ZetaFormat::V0316SeedMultiRegions
 709        | ZetaFormat::V0318SeedMultiRegions
 710        | ZetaFormat::V0317SeedMultiRegions => 6,
 711    }
 712}
 713
 714pub fn get_prefill_for_format(
 715    format: ZetaFormat,
 716    context: &str,
 717    editable_range: &Range<usize>,
 718) -> String {
 719    match format {
 720        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 721        ZetaFormat::V0112MiddleAtEnd
 722        | ZetaFormat::V0113Ordered
 723        | ZetaFormat::V0114180EditableRegion
 724        | ZetaFormat::V0120GitMergeMarkers
 725        | ZetaFormat::V0131GitMergeMarkersPrefix
 726        | ZetaFormat::V0211SeedCoder
 727        | ZetaFormat::v0226Hashline
 728        | ZetaFormat::V0304VariableEdit => String::new(),
 729        ZetaFormat::V0304SeedNoEdits
 730        | ZetaFormat::V0306SeedMultiRegions
 731        | ZetaFormat::V0316SeedMultiRegions
 732        | ZetaFormat::V0318SeedMultiRegions
 733        | ZetaFormat::V0317SeedMultiRegions => String::new(),
 734    }
 735}
 736
 737pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 738    match format {
 739        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 740        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 741        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 742        ZetaFormat::V0211SeedCoder
 743        | ZetaFormat::V0304SeedNoEdits
 744        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 745        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 746        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 747        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 748        ZetaFormat::V0112MiddleAtEnd
 749        | ZetaFormat::V0113Ordered
 750        | ZetaFormat::V0114180EditableRegion
 751        | ZetaFormat::v0226Hashline
 752        | ZetaFormat::V0304VariableEdit => None,
 753    }
 754}
 755
 756pub fn encode_patch_as_output_for_format(
 757    format: ZetaFormat,
 758    old_editable_region: &str,
 759    patch: &str,
 760    cursor_offset: Option<usize>,
 761) -> Result<Option<String>> {
 762    match format {
 763        ZetaFormat::v0226Hashline => {
 764            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 765        }
 766        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 767            old_editable_region,
 768            patch,
 769            cursor_offset,
 770        )
 771        .map(Some),
 772        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 773            Ok(seed_coder::no_edits(patch))
 774        }
 775        ZetaFormat::V0316SeedMultiRegions => {
 776            let empty_patch = patch.lines().count() <= 3;
 777            if empty_patch {
 778                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 779                let marker_num =
 780                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 781                let tag = multi_region::marker_tag(marker_num);
 782                Ok(Some(format!(
 783                    "{tag}{tag}{}",
 784                    multi_region::V0316_END_MARKER
 785                )))
 786            } else {
 787                Ok(None)
 788            }
 789        }
 790        ZetaFormat::V0318SeedMultiRegions => {
 791            let empty_patch = patch.lines().count() <= 3;
 792            if empty_patch {
 793                let marker_offsets =
 794                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 795                let marker_num =
 796                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 797                let tag = multi_region::marker_tag(marker_num);
 798                Ok(Some(format!(
 799                    "{tag}{tag}{}",
 800                    multi_region::V0318_END_MARKER
 801                )))
 802            } else {
 803                Ok(None)
 804            }
 805        }
 806        ZetaFormat::V0317SeedMultiRegions => {
 807            let empty_patch = patch.lines().count() <= 3;
 808            if empty_patch {
 809                let tag = multi_region::marker_tag_relative(0);
 810                Ok(Some(format!(
 811                    "{tag}{tag}{}",
 812                    multi_region::V0317_END_MARKER
 813                )))
 814            } else {
 815                Ok(None)
 816            }
 817        }
 818        _ => Ok(None),
 819    }
 820}
 821
 822pub struct ParsedOutput {
 823    /// Text that should replace the editable region
 824    pub new_editable_region: String,
 825    /// The byte range within `cursor_excerpt` that this replacement applies to
 826    pub range_in_excerpt: Range<usize>,
 827}
 828
 829/// Parse model output for the given zeta format
 830pub fn parse_zeta2_model_output(
 831    output: &str,
 832    format: ZetaFormat,
 833    prompt_inputs: &ZetaPromptInput,
 834) -> Result<ParsedOutput> {
 835    let output = match output_end_marker_for_format(format) {
 836        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 837        None => output,
 838    };
 839
 840    let (context, editable_range_in_context, context_range, cursor_offset) =
 841        resolve_cursor_region(prompt_inputs, format);
 842    let context_start = context_range.start;
 843    let old_editable_region = &context[editable_range_in_context.clone()];
 844    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
 845
 846    let (range_in_context, output) = match format {
 847        ZetaFormat::v0226Hashline => (
 848            editable_range_in_context,
 849            if hashline::output_has_edit_commands(output) {
 850                hashline::apply_edit_commands(old_editable_region, output)
 851            } else {
 852                output.to_string()
 853            },
 854        ),
 855        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 856        ZetaFormat::V0304SeedNoEdits => (
 857            editable_range_in_context,
 858            if output.starts_with(seed_coder::NO_EDITS) {
 859                old_editable_region.to_string()
 860            } else {
 861                output.to_string()
 862            },
 863        ),
 864        ZetaFormat::V0306SeedMultiRegions => (
 865            editable_range_in_context,
 866            if output.starts_with(seed_coder::NO_EDITS) {
 867                old_editable_region.to_string()
 868            } else {
 869                multi_region::apply_marker_span(old_editable_region, output)?
 870            },
 871        ),
 872        ZetaFormat::V0316SeedMultiRegions => (
 873            editable_range_in_context,
 874            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
 875        ),
 876        ZetaFormat::V0318SeedMultiRegions => (
 877            editable_range_in_context,
 878            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
 879        ),
 880        ZetaFormat::V0317SeedMultiRegions => (
 881            editable_range_in_context,
 882            multi_region::apply_marker_span_v0317(
 883                old_editable_region,
 884                output,
 885                Some(cursor_offset_in_editable),
 886            )?,
 887        ),
 888        _ => (editable_range_in_context, output.to_string()),
 889    };
 890
 891    let range_in_excerpt =
 892        range_in_context.start + context_start..range_in_context.end + context_start;
 893
 894    Ok(ParsedOutput {
 895        new_editable_region: output,
 896        range_in_excerpt,
 897    })
 898}
 899
 900pub fn excerpt_range_for_format(
 901    format: ZetaFormat,
 902    ranges: &ExcerptRanges,
 903) -> (Range<usize>, Range<usize>) {
 904    excerpt_ranges_for_format(format, ranges)
 905}
 906
 907pub fn resolve_cursor_region(
 908    input: &ZetaPromptInput,
 909    format: ZetaFormat,
 910) -> (&str, Range<usize>, Range<usize>, usize) {
 911    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 912        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 913        compute_editable_and_context_ranges(
 914            &input.cursor_excerpt,
 915            input.cursor_offset_in_excerpt,
 916            syntax_ranges,
 917            editable_tokens,
 918            context_tokens,
 919        )
 920    } else {
 921        excerpt_range_for_format(format, &input.excerpt_ranges)
 922    };
 923    let context_start = context_range.start;
 924    let context_text = &input.cursor_excerpt[context_range.clone()];
 925    let adjusted_editable =
 926        (editable_range.start - context_start)..(editable_range.end - context_start);
 927    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 928
 929    (
 930        context_text,
 931        adjusted_editable,
 932        context_range,
 933        adjusted_cursor,
 934    )
 935}
 936
 937pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 938    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 939    get_prefill_for_format(format, context, &editable_range)
 940}
 941
 942fn format_edit_history_within_budget(
 943    events: &[Arc<Event>],
 944    file_marker: &str,
 945    edit_history_name: &str,
 946    max_tokens: usize,
 947    max_edit_event_count: usize,
 948) -> String {
 949    let header = format!("{}{}\n", file_marker, edit_history_name);
 950    let header_tokens = estimate_tokens(header.len());
 951    if header_tokens >= max_tokens {
 952        return String::new();
 953    }
 954
 955    let mut event_strings: Vec<String> = Vec::new();
 956    let mut total_tokens = header_tokens;
 957
 958    for event in events.iter().rev().take(max_edit_event_count) {
 959        let mut event_str = String::new();
 960        write_event(&mut event_str, event);
 961        let event_tokens = estimate_tokens(event_str.len());
 962
 963        if total_tokens + event_tokens > max_tokens {
 964            break;
 965        }
 966        total_tokens += event_tokens;
 967        event_strings.push(event_str);
 968    }
 969
 970    if event_strings.is_empty() {
 971        return String::new();
 972    }
 973
 974    let mut result = header;
 975    for event_str in event_strings.iter().rev() {
 976        result.push_str(event_str);
 977    }
 978    result
 979}
 980
 981fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 982    let needs_newline = !excerpt.text.ends_with('\n');
 983    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 984    let len = excerpt.text.len()
 985        + if needs_newline { "\n".len() } else { 0 }
 986        + if needs_ellipsis { "...\n".len() } else { 0 };
 987    estimate_tokens(len)
 988}
 989
 990pub fn format_related_files_within_budget(
 991    related_files: &[RelatedFile],
 992    file_prefix: &str,
 993    file_suffix: &str,
 994    max_tokens: usize,
 995) -> String {
 996    struct ExcerptCandidate {
 997        file_ix: usize,
 998        excerpt_ix: usize,
 999        order: usize,
1000    }
1001
1002    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1003        .iter()
1004        .enumerate()
1005        .flat_map(|(file_ix, file)| {
1006            file.excerpts
1007                .iter()
1008                .enumerate()
1009                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1010                    file_ix,
1011                    excerpt_ix,
1012                    order: e.order,
1013                })
1014        })
1015        .collect();
1016
1017    // Pre-compute file header strings and their token costs.
1018    let file_headers: Vec<String> = related_files
1019        .iter()
1020        .map(|file| {
1021            let path_str = file.path.to_string_lossy();
1022            format!("{}{}\n", file_prefix, path_str)
1023        })
1024        .collect();
1025
1026    // Sort the excerpts by their order and determine how many fit within the budget.
1027    let mut total_tokens = 0;
1028    let mut included_excerpt_count = 0_usize;
1029    let mut included_file_indices = vec![false; related_files.len()];
1030    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1031    for candidate in &excerpt_candidates {
1032        let file = &related_files[candidate.file_ix];
1033        let excerpt = &file.excerpts[candidate.excerpt_ix];
1034        let file_already_included = included_file_indices[candidate.file_ix];
1035        let header_cost = if file_already_included {
1036            0
1037        } else {
1038            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1039        };
1040        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1041        if total_tokens + header_cost + excerpt_cost > max_tokens {
1042            break;
1043        }
1044        total_tokens += header_cost + excerpt_cost;
1045        if !file_already_included {
1046            included_file_indices[candidate.file_ix] = true;
1047        }
1048        included_excerpt_count += 1;
1049    }
1050
1051    excerpt_candidates.truncate(included_excerpt_count);
1052    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1053
1054    // Render all of the files that fit within the token budget, in the original order.
1055    let mut result = String::new();
1056    let mut last_file_ix = None;
1057    for candidate in &excerpt_candidates {
1058        if last_file_ix != Some(candidate.file_ix) {
1059            if last_file_ix.is_some() {
1060                result.push_str(file_suffix);
1061            }
1062            result.push_str(&file_headers[candidate.file_ix]);
1063            last_file_ix = Some(candidate.file_ix);
1064        }
1065        let file = &related_files[candidate.file_ix];
1066        let excerpt = &file.excerpts[candidate.excerpt_ix];
1067        result.push_str(&excerpt.text);
1068        if !result.ends_with('\n') {
1069            result.push('\n');
1070        }
1071        if excerpt.row_range.end < file.max_row {
1072            result.push_str("...\n");
1073        }
1074    }
1075
1076    result
1077}
1078
1079pub fn write_related_files(
1080    prompt: &mut String,
1081    related_files: &[RelatedFile],
1082) -> Vec<Range<usize>> {
1083    let mut ranges = Vec::new();
1084    for file in related_files {
1085        let start = prompt.len();
1086        let path_str = file.path.to_string_lossy();
1087        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1088        for excerpt in &file.excerpts {
1089            prompt.push_str(&excerpt.text);
1090            if !prompt.ends_with('\n') {
1091                prompt.push('\n');
1092            }
1093            if excerpt.row_range.end < file.max_row {
1094                prompt.push_str("...\n");
1095            }
1096        }
1097        let end = prompt.len();
1098        ranges.push(start..end);
1099    }
1100    ranges
1101}
1102
1103mod v0112_middle_at_end {
1104    use super::*;
1105
1106    pub fn special_tokens() -> &'static [&'static str] {
1107        &[
1108            "<|fim_prefix|>",
1109            "<|fim_suffix|>",
1110            "<|fim_middle|>",
1111            "<|file_sep|>",
1112            CURSOR_MARKER,
1113        ]
1114    }
1115
1116    pub fn write_cursor_excerpt_section(
1117        prompt: &mut String,
1118        path: &Path,
1119        context: &str,
1120        editable_range: &Range<usize>,
1121        cursor_offset: usize,
1122    ) {
1123        let path_str = path.to_string_lossy();
1124        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1125
1126        prompt.push_str("<|fim_prefix|>\n");
1127        prompt.push_str(&context[..editable_range.start]);
1128
1129        prompt.push_str("<|fim_suffix|>\n");
1130        prompt.push_str(&context[editable_range.end..]);
1131        if !prompt.ends_with('\n') {
1132            prompt.push('\n');
1133        }
1134
1135        prompt.push_str("<|fim_middle|>current\n");
1136        prompt.push_str(&context[editable_range.start..cursor_offset]);
1137        prompt.push_str(CURSOR_MARKER);
1138        prompt.push_str(&context[cursor_offset..editable_range.end]);
1139        if !prompt.ends_with('\n') {
1140            prompt.push('\n');
1141        }
1142
1143        prompt.push_str("<|fim_middle|>updated\n");
1144    }
1145}
1146
1147mod v0113_ordered {
1148    use super::*;
1149
1150    pub fn special_tokens() -> &'static [&'static str] {
1151        &[
1152            "<|fim_prefix|>",
1153            "<|fim_suffix|>",
1154            "<|fim_middle|>",
1155            "<|file_sep|>",
1156            CURSOR_MARKER,
1157        ]
1158    }
1159
1160    pub fn write_cursor_excerpt_section(
1161        prompt: &mut String,
1162        path: &Path,
1163        context: &str,
1164        editable_range: &Range<usize>,
1165        cursor_offset: usize,
1166    ) {
1167        let path_str = path.to_string_lossy();
1168        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1169
1170        prompt.push_str("<|fim_prefix|>\n");
1171        prompt.push_str(&context[..editable_range.start]);
1172        if !prompt.ends_with('\n') {
1173            prompt.push('\n');
1174        }
1175
1176        prompt.push_str("<|fim_middle|>current\n");
1177        prompt.push_str(&context[editable_range.start..cursor_offset]);
1178        prompt.push_str(CURSOR_MARKER);
1179        prompt.push_str(&context[cursor_offset..editable_range.end]);
1180        if !prompt.ends_with('\n') {
1181            prompt.push('\n');
1182        }
1183
1184        prompt.push_str("<|fim_suffix|>\n");
1185        prompt.push_str(&context[editable_range.end..]);
1186        if !prompt.ends_with('\n') {
1187            prompt.push('\n');
1188        }
1189
1190        prompt.push_str("<|fim_middle|>updated\n");
1191    }
1192}
1193
1194mod v0114180_editable_region {
1195    use super::*;
1196
1197    pub fn special_tokens() -> &'static [&'static str] {
1198        v0113_ordered::special_tokens()
1199    }
1200}
1201
1202pub mod v0120_git_merge_markers {
1203    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1204    //!
1205    //! Example prompt:
1206    //!
1207    //! <|file_sep|>path/to/target_file.py
1208    //! <|fim_prefix|>
1209    //! code before editable region
1210    //! <|fim_suffix|>
1211    //! code after editable region
1212    //! <|fim_middle|>
1213    //! <<<<<<< CURRENT
1214    //! code that
1215    //! needs to<|user_cursor|>
1216    //! be rewritten
1217    //! =======
1218    //!
1219    //! Expected output (should be generated by the model):
1220    //!
1221    //! updated
1222    //! code with
1223    //! changes applied
1224    //! >>>>>>> UPDATED
1225
1226    use super::*;
1227
1228    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1229    pub const SEPARATOR: &str = "=======\n";
1230    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1231
1232    pub fn special_tokens() -> &'static [&'static str] {
1233        &[
1234            "<|fim_prefix|>",
1235            "<|fim_suffix|>",
1236            "<|fim_middle|>",
1237            "<|file_sep|>",
1238            START_MARKER,
1239            SEPARATOR,
1240            END_MARKER,
1241            CURSOR_MARKER,
1242        ]
1243    }
1244
1245    pub fn write_cursor_excerpt_section(
1246        prompt: &mut String,
1247        path: &Path,
1248        context: &str,
1249        editable_range: &Range<usize>,
1250        cursor_offset: usize,
1251    ) {
1252        let path_str = path.to_string_lossy();
1253        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1254
1255        prompt.push_str("<|fim_prefix|>");
1256        prompt.push_str(&context[..editable_range.start]);
1257
1258        prompt.push_str("<|fim_suffix|>");
1259        prompt.push_str(&context[editable_range.end..]);
1260        if !prompt.ends_with('\n') {
1261            prompt.push('\n');
1262        }
1263
1264        prompt.push_str("<|fim_middle|>");
1265        prompt.push_str(START_MARKER);
1266        prompt.push_str(&context[editable_range.start..cursor_offset]);
1267        prompt.push_str(CURSOR_MARKER);
1268        prompt.push_str(&context[cursor_offset..editable_range.end]);
1269        if !prompt.ends_with('\n') {
1270            prompt.push('\n');
1271        }
1272        prompt.push_str(SEPARATOR);
1273    }
1274}
1275
1276pub mod v0131_git_merge_markers_prefix {
1277    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1278    //!
1279    //! Example prompt:
1280    //!
1281    //! <|file_sep|>path/to/target_file.py
1282    //! <|fim_prefix|>
1283    //! code before editable region
1284    //! <<<<<<< CURRENT
1285    //! code that
1286    //! needs to<|user_cursor|>
1287    //! be rewritten
1288    //! =======
1289    //! <|fim_suffix|>
1290    //! code after editable region
1291    //! <|fim_middle|>
1292    //!
1293    //! Expected output (should be generated by the model):
1294    //!
1295    //! updated
1296    //! code with
1297    //! changes applied
1298    //! >>>>>>> UPDATED
1299
1300    use super::*;
1301
1302    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1303    pub const SEPARATOR: &str = "=======\n";
1304    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1305
1306    pub fn special_tokens() -> &'static [&'static str] {
1307        &[
1308            "<|fim_prefix|>",
1309            "<|fim_suffix|>",
1310            "<|fim_middle|>",
1311            "<|file_sep|>",
1312            START_MARKER,
1313            SEPARATOR,
1314            END_MARKER,
1315            CURSOR_MARKER,
1316        ]
1317    }
1318
1319    pub fn write_cursor_excerpt_section(
1320        prompt: &mut String,
1321        path: &Path,
1322        context: &str,
1323        editable_range: &Range<usize>,
1324        cursor_offset: usize,
1325    ) {
1326        let path_str = path.to_string_lossy();
1327        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1328
1329        prompt.push_str("<|fim_prefix|>");
1330        prompt.push_str(&context[..editable_range.start]);
1331        prompt.push_str(START_MARKER);
1332        prompt.push_str(&context[editable_range.start..cursor_offset]);
1333        prompt.push_str(CURSOR_MARKER);
1334        prompt.push_str(&context[cursor_offset..editable_range.end]);
1335        if !prompt.ends_with('\n') {
1336            prompt.push('\n');
1337        }
1338        prompt.push_str(SEPARATOR);
1339
1340        prompt.push_str("<|fim_suffix|>");
1341        prompt.push_str(&context[editable_range.end..]);
1342        if !prompt.ends_with('\n') {
1343            prompt.push('\n');
1344        }
1345
1346        prompt.push_str("<|fim_middle|>");
1347    }
1348}
1349
1350pub mod v0211_prefill {
1351    use super::*;
1352
1353    pub fn special_tokens() -> &'static [&'static str] {
1354        v0131_git_merge_markers_prefix::special_tokens()
1355    }
1356
1357    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1358        let editable_region = &context[editable_range.start..editable_range.end];
1359
1360        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1361        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1362
1363        // Find a token boundary to avoid splitting tokens in the prefill.
1364        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1365        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1366        // the \n and consume any consecutive \n characters after it.
1367        let prefill = &editable_region[..prefill_len];
1368        match prefill.rfind('\n') {
1369            Some(pos) => {
1370                let mut end = pos + 1;
1371                while end < editable_region.len()
1372                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1373                {
1374                    end += 1;
1375                }
1376                editable_region[..end].to_string()
1377            }
1378            // No newline found. Fall back to splitting before the last space
1379            // (word-level boundary)
1380            None => match prefill.rfind(' ') {
1381                Some(pos) => prefill[..pos].to_string(),
1382                None => prefill.to_string(),
1383            },
1384        }
1385    }
1386}
1387
1388pub mod hashline {
1389
1390    use std::fmt::Display;
1391
1392    pub const END_MARKER: &str = "<|fim_middle|>updated";
1393    pub const START_MARKER: &str = "<|fim_middle|>current";
1394
1395    use super::*;
1396
1397    const SET_COMMAND_MARKER: &str = "<|set|>";
1398    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1399    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1400
1401    pub fn special_tokens() -> &'static [&'static str] {
1402        return &[
1403            SET_COMMAND_MARKER,
1404            "<|set_range|>",
1405            INSERT_COMMAND_MARKER,
1406            NO_EDITS_COMMAND_MARKER,
1407            CURSOR_MARKER,
1408            "<|file_sep|>",
1409            "<|fim_prefix|>",
1410            "<|fim_suffix|>",
1411            "<|fim_middle|>",
1412        ];
1413    }
1414
1415    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1416    #[derive(Debug, Clone, PartialEq, Eq)]
1417    struct LineRef {
1418        index: usize,
1419        hash: u8,
1420    }
1421
1422    impl Display for LineRef {
1423        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1424            write!(f, "{}:{:02x}", self.index, self.hash)
1425        }
1426    }
1427
1428    pub fn hash_line(line: &[u8]) -> u8 {
1429        let mut h: u8 = 0;
1430        for &byte in line {
1431            h = h.wrapping_add(byte);
1432        }
1433        return h;
1434    }
1435
1436    /// Write the hashline-encoded editable region into `out`. Each line of
1437    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1438    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1439    /// to the start of `editable_text`).
1440    pub fn write_hashline_editable_region(
1441        out: &mut String,
1442        editable_text: &str,
1443        cursor_offset_in_editable: usize,
1444    ) {
1445        let mut offset = 0;
1446        for (i, line) in editable_text.lines().enumerate() {
1447            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1448                && cursor_offset_in_editable < offset + line.len()
1449            {
1450                (
1451                    &line[..cursor_offset_in_editable - offset],
1452                    CURSOR_MARKER,
1453                    &line[cursor_offset_in_editable - offset..],
1454                )
1455            } else {
1456                (line, "", "")
1457            };
1458            write!(
1459                out,
1460                "\n{}|{head}{cursor}{tail}",
1461                LineRef {
1462                    index: i,
1463                    hash: hash_line(line.as_bytes())
1464                }
1465            )
1466            .unwrap();
1467            offset += line.len() + 1;
1468        }
1469    }
1470
1471    pub fn write_cursor_excerpt_section(
1472        prompt: &mut String,
1473        path: &Path,
1474        context: &str,
1475        editable_range: &Range<usize>,
1476        cursor_offset: usize,
1477    ) {
1478        let path_str = path.to_string_lossy();
1479        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1480
1481        prompt.push_str("<|fim_prefix|>\n");
1482        prompt.push_str(&context[..editable_range.start]);
1483        prompt.push_str(START_MARKER);
1484
1485        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1486        let editable_region = &context[editable_range.clone()];
1487        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1488
1489        if !prompt.ends_with('\n') {
1490            prompt.push('\n');
1491        }
1492
1493        prompt.push_str("<|fim_suffix|>\n");
1494        prompt.push_str(&context[editable_range.end..]);
1495        if !prompt.ends_with('\n') {
1496            prompt.push('\n');
1497        }
1498
1499        prompt.push_str(END_MARKER);
1500        prompt.push('\n');
1501    }
1502
1503    /// A single edit command parsed from the model output.
1504    #[derive(Debug)]
1505    enum EditCommand<'a> {
1506        /// Replace a range of lines (inclusive on both ends). Single-line set is
1507        /// represented by `start == end`.
1508        Set {
1509            start: LineRef,
1510            end: LineRef,
1511            content: &'a str,
1512        },
1513        /// Insert new lines after the given line, or before the first line if
1514        /// `after` is `None`.
1515        Insert {
1516            after: Option<LineRef>,
1517            content: &'a str,
1518        },
1519    }
1520
1521    /// Parse a line reference like `3:c3` into a `LineRef`.
1522    fn parse_line_ref(s: &str) -> Option<LineRef> {
1523        let (idx_str, hash_str) = s.split_once(':')?;
1524        let index = idx_str.parse::<usize>().ok()?;
1525        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1526        Some(LineRef { index, hash })
1527    }
1528
1529    /// Parse the model output into a list of `EditCommand`s.
1530    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1531        let mut commands = Vec::new();
1532        let mut offset = 0usize;
1533
1534        while offset < model_output.len() {
1535            let next_nl = model_output[offset..]
1536                .find('\n')
1537                .map(|i| offset + i)
1538                .unwrap_or(model_output.len());
1539            let line = &model_output[offset..next_nl];
1540            let line_end = if next_nl < model_output.len() {
1541                next_nl + 1
1542            } else {
1543                next_nl
1544            };
1545
1546            let trimmed = line.trim();
1547            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1548                (true, spec)
1549            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1550                (false, spec)
1551            } else {
1552                offset = line_end;
1553                continue;
1554            };
1555
1556            let mut content_end = line_end;
1557            let mut scan = line_end;
1558
1559            while scan < model_output.len() {
1560                let body_nl = model_output[scan..]
1561                    .find('\n')
1562                    .map(|i| scan + i)
1563                    .unwrap_or(model_output.len());
1564                let body_line = &model_output[scan..body_nl];
1565                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1566                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1567                {
1568                    break;
1569                }
1570                scan = if body_nl < model_output.len() {
1571                    body_nl + 1
1572                } else {
1573                    body_nl
1574                };
1575                content_end = scan;
1576            }
1577
1578            let content = &model_output[line_end..content_end];
1579
1580            if is_set {
1581                if let Some((start_str, end_str)) = specifier.split_once('-') {
1582                    if let (Some(start), Some(end)) =
1583                        (parse_line_ref(start_str), parse_line_ref(end_str))
1584                    {
1585                        commands.push(EditCommand::Set {
1586                            start,
1587                            end,
1588                            content,
1589                        });
1590                    }
1591                } else if let Some(target) = parse_line_ref(specifier) {
1592                    commands.push(EditCommand::Set {
1593                        start: target.clone(),
1594                        end: target,
1595                        content,
1596                    });
1597                }
1598            } else {
1599                let after = parse_line_ref(specifier);
1600                commands.push(EditCommand::Insert { after, content });
1601            }
1602
1603            offset = scan;
1604        }
1605
1606        commands
1607    }
1608
1609    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1610    /// (as opposed to being a plain full-replacement output).
1611    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1612    /// editable region, returning the plain text content.
1613    pub fn strip_hashline_prefixes(region: &str) -> String {
1614        let mut decoded: String = region
1615            .lines()
1616            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1617            .collect::<Vec<_>>()
1618            .join("\n");
1619        if region.ends_with('\n') {
1620            decoded.push('\n');
1621        }
1622        decoded
1623    }
1624
1625    pub fn output_has_edit_commands(model_output: &str) -> bool {
1626        model_output.contains(SET_COMMAND_MARKER)
1627            || model_output.contains(INSERT_COMMAND_MARKER)
1628            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1629    }
1630
1631    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1632    /// original editable region text.
1633    ///
1634    /// `editable_region` is the original text of the editable region (without hash
1635    /// prefixes). `model_output` is the raw model response containing edit commands.
1636    ///
1637    /// Returns the full replacement text for the editable region.
1638    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1639        if model_output
1640            .trim_start()
1641            .starts_with(NO_EDITS_COMMAND_MARKER)
1642        {
1643            return editable_region.to_string();
1644        }
1645
1646        let original_lines: Vec<&str> = editable_region.lines().collect();
1647        let old_hashes: Vec<u8> = original_lines
1648            .iter()
1649            .map(|line| hash_line(line.as_bytes()))
1650            .collect();
1651
1652        let commands = parse_edit_commands(model_output);
1653
1654        // For set operations: indexed by start line → Some((end line index, content))
1655        // For insert operations: indexed by line index → vec of content to insert after
1656        // Insert-before-first is tracked separately.
1657        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1658        let mut insert_before_first: Vec<&str> = Vec::new();
1659        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1660
1661        for command in &commands {
1662            match command {
1663                EditCommand::Set {
1664                    start,
1665                    end,
1666                    content,
1667                } => {
1668                    if start.index < old_hashes.len()
1669                        && end.index < old_hashes.len()
1670                        && start.index <= end.index
1671                        && old_hashes[start.index] == start.hash
1672                        && old_hashes[end.index] == end.hash
1673                    {
1674                        set_ops[start.index] = Some((end.index, *content));
1675                    }
1676                }
1677                EditCommand::Insert { after, content } => match after {
1678                    None => insert_before_first.push(*content),
1679                    Some(line_ref) => {
1680                        if line_ref.index < old_hashes.len()
1681                            && old_hashes[line_ref.index] == line_ref.hash
1682                        {
1683                            insert_after[line_ref.index].push(*content);
1684                        }
1685                    }
1686                },
1687            }
1688        }
1689
1690        let mut result = String::new();
1691
1692        // Emit any insertions before the first line
1693        for content in &insert_before_first {
1694            result.push_str(content);
1695            if !content.ends_with('\n') {
1696                result.push('\n');
1697            }
1698        }
1699
1700        let mut i = 0;
1701        while i < original_lines.len() {
1702            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1703                // Replace lines i..=end_index with the replacement content
1704                result.push_str(replacement);
1705                if !replacement.is_empty() && !replacement.ends_with('\n') {
1706                    result.push('\n');
1707                }
1708                // Emit any insertions after the end of this set range
1709                if *end_index < insert_after.len() {
1710                    for content in &insert_after[*end_index] {
1711                        result.push_str(content);
1712                        if !content.ends_with('\n') {
1713                            result.push('\n');
1714                        }
1715                    }
1716                }
1717                i = end_index + 1;
1718            } else {
1719                // Keep the original line
1720                result.push_str(original_lines[i]);
1721                result.push('\n');
1722                // Emit any insertions after this line
1723                for content in &insert_after[i] {
1724                    result.push_str(content);
1725                    if !content.ends_with('\n') {
1726                        result.push('\n');
1727                    }
1728                }
1729                i += 1;
1730            }
1731        }
1732
1733        // Preserve trailing newline behavior: if the original ended with a
1734        // newline the result already has one; if it didn't, trim the extra one
1735        // we added.
1736        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1737            result.pop();
1738        }
1739
1740        result
1741    }
1742
1743    /// Convert a unified diff patch into hashline edit commands.
1744    ///
1745    /// Parses the unified diff `patch` directly to determine which lines of
1746    /// `old_text` are deleted/replaced and what new lines are added, then emits
1747    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1748    /// `{index}:{hash}` identifiers.
1749    ///
1750    /// `cursor_offset` is an optional byte offset into the first hunk's new
1751    /// text (context + additions) where the cursor marker should be placed.
1752    pub fn patch_to_edit_commands(
1753        old_text: &str,
1754        patch: &str,
1755        cursor_offset: Option<usize>,
1756    ) -> Result<String> {
1757        let old_lines: Vec<&str> = old_text.lines().collect();
1758        let old_hashes: Vec<u8> = old_lines
1759            .iter()
1760            .map(|line| hash_line(line.as_bytes()))
1761            .collect();
1762
1763        let mut result = String::new();
1764        let mut first_hunk = true;
1765
1766        struct Hunk<'a> {
1767            line_range: Range<usize>,
1768            new_text_lines: Vec<&'a str>,
1769            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1770        }
1771
1772        // Parse the patch line by line. We only care about hunk headers,
1773        // context, deletions, and additions.
1774        let mut old_line_index: usize = 0;
1775        let mut current_hunk: Option<Hunk> = None;
1776        // Byte offset tracking within the hunk's new text for cursor placement.
1777        let mut new_text_byte_offset: usize = 0;
1778        // The line index of the last old line seen before/in the current hunk
1779        // (used for insert-after reference).
1780        let mut last_old_line_before_hunk: Option<usize> = None;
1781
1782        fn flush_hunk(
1783            hunk: Hunk,
1784            last_old_line: Option<usize>,
1785            result: &mut String,
1786            old_hashes: &[u8],
1787        ) {
1788            if hunk.line_range.is_empty() {
1789                // Pure insertion — reference the old line to insert after when in bounds.
1790                if let Some(after) = last_old_line
1791                    && let Some(&hash) = old_hashes.get(after)
1792                {
1793                    write!(
1794                        result,
1795                        "{INSERT_COMMAND_MARKER}{}\n",
1796                        LineRef { index: after, hash }
1797                    )
1798                    .unwrap();
1799                } else {
1800                    result.push_str(INSERT_COMMAND_MARKER);
1801                    result.push('\n');
1802                }
1803            } else {
1804                let start = hunk.line_range.start;
1805                let end_exclusive = hunk.line_range.end;
1806                let deleted_line_count = end_exclusive.saturating_sub(start);
1807
1808                if deleted_line_count == 1 {
1809                    if let Some(&hash) = old_hashes.get(start) {
1810                        write!(
1811                            result,
1812                            "{SET_COMMAND_MARKER}{}\n",
1813                            LineRef { index: start, hash }
1814                        )
1815                        .unwrap();
1816                    } else {
1817                        result.push_str(SET_COMMAND_MARKER);
1818                        result.push('\n');
1819                    }
1820                } else {
1821                    let end_inclusive = end_exclusive - 1;
1822                    match (
1823                        old_hashes.get(start).copied(),
1824                        old_hashes.get(end_inclusive).copied(),
1825                    ) {
1826                        (Some(start_hash), Some(end_hash)) => {
1827                            write!(
1828                                result,
1829                                "{SET_COMMAND_MARKER}{}-{}\n",
1830                                LineRef {
1831                                    index: start,
1832                                    hash: start_hash
1833                                },
1834                                LineRef {
1835                                    index: end_inclusive,
1836                                    hash: end_hash
1837                                }
1838                            )
1839                            .unwrap();
1840                        }
1841                        _ => {
1842                            result.push_str(SET_COMMAND_MARKER);
1843                            result.push('\n');
1844                        }
1845                    }
1846                }
1847            }
1848            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1849                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1850                    && line_offset == cursor_line_offset
1851                {
1852                    result.push_str(&line[..char_offset]);
1853                    result.push_str(CURSOR_MARKER);
1854                    result.push_str(&line[char_offset..]);
1855                    continue;
1856                }
1857
1858                result.push_str(line);
1859            }
1860        }
1861
1862        for raw_line in patch.split_inclusive('\n') {
1863            if raw_line.starts_with("@@") {
1864                // Flush any pending change hunk from a previous patch hunk.
1865                if let Some(hunk) = current_hunk.take() {
1866                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1867                }
1868
1869                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1870                // We intentionally do not trust old_start as a direct local index into `old_text`,
1871                // because some patches are produced against a larger file region and carry
1872                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1873                if first_hunk {
1874                    new_text_byte_offset = 0;
1875                    first_hunk = false;
1876                }
1877                continue;
1878            }
1879
1880            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1881                continue;
1882            }
1883            if raw_line.starts_with("\\ No newline") {
1884                continue;
1885            }
1886
1887            if raw_line.starts_with('-') {
1888                // Extend or start a change hunk with this deleted old line.
1889                match &mut current_hunk {
1890                    Some(Hunk {
1891                        line_range: range, ..
1892                    }) => range.end = old_line_index + 1,
1893                    None => {
1894                        current_hunk = Some(Hunk {
1895                            line_range: old_line_index..old_line_index + 1,
1896                            new_text_lines: Vec::new(),
1897                            cursor_line_offset_in_new_text: None,
1898                        });
1899                    }
1900                }
1901                old_line_index += 1;
1902            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1903                // Place cursor marker if cursor_offset falls within this line.
1904                let mut cursor_line_offset = None;
1905                if let Some(cursor_off) = cursor_offset
1906                    && (first_hunk
1907                        || cursor_off >= new_text_byte_offset
1908                            && cursor_off <= new_text_byte_offset + added_content.len())
1909                {
1910                    let line_offset = added_content.floor_char_boundary(
1911                        cursor_off
1912                            .saturating_sub(new_text_byte_offset)
1913                            .min(added_content.len()),
1914                    );
1915                    cursor_line_offset = Some(line_offset);
1916                }
1917
1918                new_text_byte_offset += added_content.len();
1919
1920                let hunk = current_hunk.get_or_insert(Hunk {
1921                    line_range: old_line_index..old_line_index,
1922                    new_text_lines: vec![],
1923                    cursor_line_offset_in_new_text: None,
1924                });
1925                hunk.new_text_lines.push(added_content);
1926                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1927                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1928            } else {
1929                // Context line (starts with ' ' or is empty).
1930                if let Some(hunk) = current_hunk.take() {
1931                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1932                }
1933                last_old_line_before_hunk = Some(old_line_index);
1934                old_line_index += 1;
1935                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1936                new_text_byte_offset += content.len();
1937            }
1938        }
1939
1940        // Flush final group.
1941        if let Some(hunk) = current_hunk.take() {
1942            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1943        }
1944
1945        // Trim a single trailing newline.
1946        if result.ends_with('\n') {
1947            result.pop();
1948        }
1949
1950        if result.is_empty() {
1951            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1952        }
1953
1954        Ok(result)
1955    }
1956
1957    #[cfg(test)]
1958    mod tests {
1959        use super::*;
1960        use indoc::indoc;
1961
1962        #[test]
1963        fn test_format_cursor_region() {
1964            struct Case {
1965                name: &'static str,
1966                context: &'static str,
1967                editable_range: Range<usize>,
1968                cursor_offset: usize,
1969                expected: &'static str,
1970            }
1971
1972            let cases = [
1973                Case {
1974                    name: "basic_cursor_placement",
1975                    context: "hello world\n",
1976                    editable_range: 0..12,
1977                    cursor_offset: 5,
1978                    expected: indoc! {"
1979                    <|file_sep|>test.rs
1980                    <|fim_prefix|>
1981                    <|fim_middle|>current
1982                    0:5c|hello<|user_cursor|> world
1983                    <|fim_suffix|>
1984                    <|fim_middle|>updated
1985                    "},
1986                },
1987                Case {
1988                    name: "multiline_cursor_on_second_line",
1989                    context: "aaa\nbbb\nccc\n",
1990                    editable_range: 0..12,
1991                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1992                    expected: indoc! {"
1993                    <|file_sep|>test.rs
1994                    <|fim_prefix|>
1995                    <|fim_middle|>current
1996                    0:23|aaa
1997                    1:26|b<|user_cursor|>bb
1998                    2:29|ccc
1999                    <|fim_suffix|>
2000                    <|fim_middle|>updated
2001                    "},
2002                },
2003                Case {
2004                    name: "no_trailing_newline_in_context",
2005                    context: "line1\nline2",
2006                    editable_range: 0..11,
2007                    cursor_offset: 3,
2008                    expected: indoc! {"
2009                    <|file_sep|>test.rs
2010                    <|fim_prefix|>
2011                    <|fim_middle|>current
2012                    0:d9|lin<|user_cursor|>e1
2013                    1:da|line2
2014                    <|fim_suffix|>
2015                    <|fim_middle|>updated
2016                    "},
2017                },
2018                Case {
2019                    name: "leading_newline_in_editable_region",
2020                    context: "\nabc\n",
2021                    editable_range: 0..5,
2022                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2023                    expected: indoc! {"
2024                    <|file_sep|>test.rs
2025                    <|fim_prefix|>
2026                    <|fim_middle|>current
2027                    0:00|
2028                    1:26|a<|user_cursor|>bc
2029                    <|fim_suffix|>
2030                    <|fim_middle|>updated
2031                    "},
2032                },
2033                Case {
2034                    name: "with_suffix",
2035                    context: "abc\ndef",
2036                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2037                    cursor_offset: 2,
2038                    expected: indoc! {"
2039                    <|file_sep|>test.rs
2040                    <|fim_prefix|>
2041                    <|fim_middle|>current
2042                    0:26|ab<|user_cursor|>c
2043                    <|fim_suffix|>
2044                    def
2045                    <|fim_middle|>updated
2046                    "},
2047                },
2048                Case {
2049                    name: "unicode_two_byte_chars",
2050                    context: "héllo\n",
2051                    editable_range: 0..7,
2052                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2053                    expected: indoc! {"
2054                    <|file_sep|>test.rs
2055                    <|fim_prefix|>
2056                    <|fim_middle|>current
2057                    0:1b|hé<|user_cursor|>llo
2058                    <|fim_suffix|>
2059                    <|fim_middle|>updated
2060                    "},
2061                },
2062                Case {
2063                    name: "unicode_three_byte_chars",
2064                    context: "日本語\n",
2065                    editable_range: 0..10,
2066                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2067                    expected: indoc! {"
2068                    <|file_sep|>test.rs
2069                    <|fim_prefix|>
2070                    <|fim_middle|>current
2071                    0:80|日本<|user_cursor|>語
2072                    <|fim_suffix|>
2073                    <|fim_middle|>updated
2074                    "},
2075                },
2076                Case {
2077                    name: "unicode_four_byte_chars",
2078                    context: "a🌍b\n",
2079                    editable_range: 0..7,
2080                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2081                    expected: indoc! {"
2082                    <|file_sep|>test.rs
2083                    <|fim_prefix|>
2084                    <|fim_middle|>current
2085                    0:6b|a🌍<|user_cursor|>b
2086                    <|fim_suffix|>
2087                    <|fim_middle|>updated
2088                    "},
2089                },
2090                Case {
2091                    name: "cursor_at_start_of_region_not_placed",
2092                    context: "abc\n",
2093                    editable_range: 0..4,
2094                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2095                    expected: indoc! {"
2096                    <|file_sep|>test.rs
2097                    <|fim_prefix|>
2098                    <|fim_middle|>current
2099                    0:26|abc
2100                    <|fim_suffix|>
2101                    <|fim_middle|>updated
2102                    "},
2103                },
2104                Case {
2105                    name: "cursor_at_end_of_line_not_placed",
2106                    context: "abc\ndef\n",
2107                    editable_range: 0..8,
2108                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2109                    expected: indoc! {"
2110                    <|file_sep|>test.rs
2111                    <|fim_prefix|>
2112                    <|fim_middle|>current
2113                    0:26|abc
2114                    1:2f|def
2115                    <|fim_suffix|>
2116                    <|fim_middle|>updated
2117                    "},
2118                },
2119                Case {
2120                    name: "cursor_offset_relative_to_context_not_editable_region",
2121                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2122                    // write_cursor_excerpt_section must subtract it before comparing against
2123                    // per-line offsets within the editable region.
2124                    context: "pre\naaa\nbbb\nsuf\n",
2125                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2126                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2127                    expected: indoc! {"
2128                    <|file_sep|>test.rs
2129                    <|fim_prefix|>
2130                    pre
2131                    <|fim_middle|>current
2132                    0:23|aaa
2133                    1:26|b<|user_cursor|>bb
2134                    <|fim_suffix|>
2135                    suf
2136                    <|fim_middle|>updated
2137                    "},
2138                },
2139            ];
2140
2141            for case in &cases {
2142                let mut prompt = String::new();
2143                hashline::write_cursor_excerpt_section(
2144                    &mut prompt,
2145                    Path::new("test.rs"),
2146                    case.context,
2147                    &case.editable_range,
2148                    case.cursor_offset,
2149                );
2150                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2151            }
2152        }
2153
2154        #[test]
2155        fn test_apply_edit_commands() {
2156            struct Case {
2157                name: &'static str,
2158                original: &'static str,
2159                model_output: &'static str,
2160                expected: &'static str,
2161            }
2162
2163            let cases = vec![
2164                Case {
2165                    name: "set_single_line",
2166                    original: indoc! {"
2167                    let mut total = 0;
2168                    for product in products {
2169                        total += ;
2170                    }
2171                    total
2172                "},
2173                    model_output: indoc! {"
2174                    <|set|>2:87
2175                        total += product.price;
2176                "},
2177                    expected: indoc! {"
2178                    let mut total = 0;
2179                    for product in products {
2180                        total += product.price;
2181                    }
2182                    total
2183                "},
2184                },
2185                Case {
2186                    name: "set_range",
2187                    original: indoc! {"
2188                    fn foo() {
2189                        let x = 1;
2190                        let y = 2;
2191                        let z = 3;
2192                    }
2193                "},
2194                    model_output: indoc! {"
2195                    <|set|>1:46-3:4a
2196                        let sum = 6;
2197                "},
2198                    expected: indoc! {"
2199                    fn foo() {
2200                        let sum = 6;
2201                    }
2202                "},
2203                },
2204                Case {
2205                    name: "insert_after_line",
2206                    original: indoc! {"
2207                    fn main() {
2208                        let x = 1;
2209                    }
2210                "},
2211                    model_output: indoc! {"
2212                    <|insert|>1:46
2213                        let y = 2;
2214                "},
2215                    expected: indoc! {"
2216                    fn main() {
2217                        let x = 1;
2218                        let y = 2;
2219                    }
2220                "},
2221                },
2222                Case {
2223                    name: "insert_before_first",
2224                    original: indoc! {"
2225                    let x = 1;
2226                    let y = 2;
2227                "},
2228                    model_output: indoc! {"
2229                    <|insert|>
2230                    use std::io;
2231                "},
2232                    expected: indoc! {"
2233                    use std::io;
2234                    let x = 1;
2235                    let y = 2;
2236                "},
2237                },
2238                Case {
2239                    name: "set_with_cursor_marker",
2240                    original: indoc! {"
2241                    fn main() {
2242                        println!();
2243                    }
2244                "},
2245                    model_output: indoc! {"
2246                    <|set|>1:34
2247                        eprintln!(\"<|user_cursor|>\");
2248                "},
2249                    expected: indoc! {"
2250                    fn main() {
2251                        eprintln!(\"<|user_cursor|>\");
2252                    }
2253                "},
2254                },
2255                Case {
2256                    name: "multiple_set_commands",
2257                    original: indoc! {"
2258                    aaa
2259                    bbb
2260                    ccc
2261                    ddd
2262                "},
2263                    model_output: indoc! {"
2264                    <|set|>0:23
2265                    AAA
2266                    <|set|>2:29
2267                    CCC
2268                "},
2269                    expected: indoc! {"
2270                    AAA
2271                    bbb
2272                    CCC
2273                    ddd
2274                "},
2275                },
2276                Case {
2277                    name: "set_range_multiline_replacement",
2278                    original: indoc! {"
2279                    fn handle_submit() {
2280                    }
2281
2282                    fn handle_keystroke() {
2283                "},
2284                    model_output: indoc! {"
2285                    <|set|>0:3f-1:7d
2286                    fn handle_submit(modal_state: &mut ModalState) {
2287                        <|user_cursor|>
2288                    }
2289                "},
2290                    expected: indoc! {"
2291                    fn handle_submit(modal_state: &mut ModalState) {
2292                        <|user_cursor|>
2293                    }
2294
2295                    fn handle_keystroke() {
2296                "},
2297                },
2298                Case {
2299                    name: "no_edit_commands_returns_original",
2300                    original: indoc! {"
2301                    hello
2302                    world
2303                "},
2304                    model_output: "some random text with no commands",
2305                    expected: indoc! {"
2306                    hello
2307                    world
2308                "},
2309                },
2310                Case {
2311                    name: "no_edits_command_returns_original",
2312                    original: indoc! {"
2313                    hello
2314                    world
2315                "},
2316                    model_output: "<|no_edits|>",
2317                    expected: indoc! {"
2318                    hello
2319                    world
2320                "},
2321                },
2322                Case {
2323                    name: "wrong_hash_set_ignored",
2324                    original: indoc! {"
2325                    aaa
2326                    bbb
2327                "},
2328                    model_output: indoc! {"
2329                    <|set|>0:ff
2330                    ZZZ
2331                "},
2332                    expected: indoc! {"
2333                    aaa
2334                    bbb
2335                "},
2336                },
2337                Case {
2338                    name: "insert_and_set_combined",
2339                    original: indoc! {"
2340                    alpha
2341                    beta
2342                    gamma
2343                "},
2344                    model_output: indoc! {"
2345                    <|set|>0:06
2346                    ALPHA
2347                    <|insert|>1:9c
2348                    beta_extra
2349                "},
2350                    expected: indoc! {"
2351                    ALPHA
2352                    beta
2353                    beta_extra
2354                    gamma
2355                "},
2356                },
2357                Case {
2358                    name: "no_trailing_newline_preserved",
2359                    original: "hello\nworld",
2360                    model_output: indoc! {"
2361                    <|set|>0:14
2362                    HELLO
2363                "},
2364                    expected: "HELLO\nworld",
2365                },
2366                Case {
2367                    name: "set_range_hash_mismatch_in_end_bound",
2368                    original: indoc! {"
2369                    one
2370                    two
2371                    three
2372                "},
2373                    model_output: indoc! {"
2374                    <|set|>0:42-2:ff
2375                    ONE_TWO_THREE
2376                "},
2377                    expected: indoc! {"
2378                    one
2379                    two
2380                    three
2381                "},
2382                },
2383                Case {
2384                    name: "set_range_start_greater_than_end_ignored",
2385                    original: indoc! {"
2386                    a
2387                    b
2388                    c
2389                "},
2390                    model_output: indoc! {"
2391                    <|set|>2:63-1:62
2392                    X
2393                "},
2394                    expected: indoc! {"
2395                    a
2396                    b
2397                    c
2398                "},
2399                },
2400                Case {
2401                    name: "insert_out_of_bounds_ignored",
2402                    original: indoc! {"
2403                    x
2404                    y
2405                "},
2406                    model_output: indoc! {"
2407                    <|insert|>99:aa
2408                    z
2409                "},
2410                    expected: indoc! {"
2411                    x
2412                    y
2413                "},
2414                },
2415                Case {
2416                    name: "set_out_of_bounds_ignored",
2417                    original: indoc! {"
2418                    x
2419                    y
2420                "},
2421                    model_output: indoc! {"
2422                    <|set|>99:aa
2423                    z
2424                "},
2425                    expected: indoc! {"
2426                    x
2427                    y
2428                "},
2429                },
2430                Case {
2431                    name: "malformed_set_command_ignored",
2432                    original: indoc! {"
2433                    alpha
2434                    beta
2435                "},
2436                    model_output: indoc! {"
2437                    <|set|>not-a-line-ref
2438                    UPDATED
2439                "},
2440                    expected: indoc! {"
2441                    alpha
2442                    beta
2443                "},
2444                },
2445                Case {
2446                    name: "malformed_insert_hash_treated_as_before_first",
2447                    original: indoc! {"
2448                    alpha
2449                    beta
2450                "},
2451                    model_output: indoc! {"
2452                    <|insert|>1:nothex
2453                    preamble
2454                "},
2455                    expected: indoc! {"
2456                    preamble
2457                    alpha
2458                    beta
2459                "},
2460                },
2461                Case {
2462                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2463                    original: indoc! {"
2464                    cat
2465                    dog
2466                "},
2467                    model_output: indoc! {"
2468                    <|set|>0:38
2469                    CAT
2470                    <|insert|>0:38
2471                    TAIL
2472                "},
2473                    expected: indoc! {"
2474                    CAT
2475                    TAIL
2476                    dog
2477                "},
2478                },
2479                Case {
2480                    name: "overlapping_set_ranges_last_wins",
2481                    original: indoc! {"
2482                    a
2483                    b
2484                    c
2485                    d
2486                "},
2487                    model_output: indoc! {"
2488                    <|set|>0:61-2:63
2489                    FIRST
2490                    <|set|>1:62-3:64
2491                    SECOND
2492                "},
2493                    expected: indoc! {"
2494                    FIRST
2495                    d
2496                "},
2497                },
2498                Case {
2499                    name: "insert_before_first_and_after_line",
2500                    original: indoc! {"
2501                        a
2502                        b
2503                    "},
2504                    model_output: indoc! {"
2505                        <|insert|>
2506                        HEAD
2507                        <|insert|>0:61
2508                        MID
2509                    "},
2510                    expected: indoc! {"
2511                        HEAD
2512                        a
2513                        MID
2514                        b
2515                    "},
2516                },
2517            ];
2518
2519            for case in &cases {
2520                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2521                assert_eq!(result, case.expected, "failed case: {}", case.name);
2522            }
2523        }
2524
2525        #[test]
2526        fn test_output_has_edit_commands() {
2527            assert!(hashline::output_has_edit_commands(&format!(
2528                "{}0:ab\nnew",
2529                SET_COMMAND_MARKER
2530            )));
2531            assert!(hashline::output_has_edit_commands(&format!(
2532                "{}0:ab\nnew",
2533                INSERT_COMMAND_MARKER
2534            )));
2535            assert!(hashline::output_has_edit_commands(&format!(
2536                "some text\n{}1:cd\nstuff",
2537                SET_COMMAND_MARKER
2538            )));
2539            assert!(!hashline::output_has_edit_commands("just plain text"));
2540            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2541            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2542        }
2543
2544        // ---- hashline::patch_to_edit_commands round-trip tests ----
2545
2546        #[test]
2547        fn test_patch_to_edit_commands() {
2548            struct Case {
2549                name: &'static str,
2550                old: &'static str,
2551                patch: &'static str,
2552                expected_new: &'static str,
2553            }
2554
2555            let cases = [
2556                Case {
2557                    name: "single_line_replacement",
2558                    old: indoc! {"
2559                    let mut total = 0;
2560                    for product in products {
2561                        total += ;
2562                    }
2563                    total
2564                "},
2565                    patch: indoc! {"
2566                    @@ -1,5 +1,5 @@
2567                     let mut total = 0;
2568                     for product in products {
2569                    -    total += ;
2570                    +    total += product.price;
2571                     }
2572                     total
2573                "},
2574                    expected_new: indoc! {"
2575                    let mut total = 0;
2576                    for product in products {
2577                        total += product.price;
2578                    }
2579                    total
2580                "},
2581                },
2582                Case {
2583                    name: "multiline_replacement",
2584                    old: indoc! {"
2585                    fn foo() {
2586                        let x = 1;
2587                        let y = 2;
2588                        let z = 3;
2589                    }
2590                "},
2591                    patch: indoc! {"
2592                    @@ -1,5 +1,3 @@
2593                     fn foo() {
2594                    -    let x = 1;
2595                    -    let y = 2;
2596                    -    let z = 3;
2597                    +    let sum = 1 + 2 + 3;
2598                     }
2599                "},
2600                    expected_new: indoc! {"
2601                    fn foo() {
2602                        let sum = 1 + 2 + 3;
2603                    }
2604                "},
2605                },
2606                Case {
2607                    name: "insertion",
2608                    old: indoc! {"
2609                    fn main() {
2610                        let x = 1;
2611                    }
2612                "},
2613                    patch: indoc! {"
2614                    @@ -1,3 +1,4 @@
2615                     fn main() {
2616                         let x = 1;
2617                    +    let y = 2;
2618                     }
2619                "},
2620                    expected_new: indoc! {"
2621                    fn main() {
2622                        let x = 1;
2623                        let y = 2;
2624                    }
2625                "},
2626                },
2627                Case {
2628                    name: "insertion_before_first",
2629                    old: indoc! {"
2630                    let x = 1;
2631                    let y = 2;
2632                "},
2633                    patch: indoc! {"
2634                    @@ -1,2 +1,3 @@
2635                    +use std::io;
2636                     let x = 1;
2637                     let y = 2;
2638                "},
2639                    expected_new: indoc! {"
2640                    use std::io;
2641                    let x = 1;
2642                    let y = 2;
2643                "},
2644                },
2645                Case {
2646                    name: "deletion",
2647                    old: indoc! {"
2648                    aaa
2649                    bbb
2650                    ccc
2651                    ddd
2652                "},
2653                    patch: indoc! {"
2654                    @@ -1,4 +1,2 @@
2655                     aaa
2656                    -bbb
2657                    -ccc
2658                     ddd
2659                "},
2660                    expected_new: indoc! {"
2661                    aaa
2662                    ddd
2663                "},
2664                },
2665                Case {
2666                    name: "multiple_changes",
2667                    old: indoc! {"
2668                    alpha
2669                    beta
2670                    gamma
2671                    delta
2672                    epsilon
2673                "},
2674                    patch: indoc! {"
2675                    @@ -1,5 +1,5 @@
2676                    -alpha
2677                    +ALPHA
2678                     beta
2679                     gamma
2680                    -delta
2681                    +DELTA
2682                     epsilon
2683                "},
2684                    expected_new: indoc! {"
2685                    ALPHA
2686                    beta
2687                    gamma
2688                    DELTA
2689                    epsilon
2690                "},
2691                },
2692                Case {
2693                    name: "replace_with_insertion",
2694                    old: indoc! {r#"
2695                    fn handle() {
2696                        modal_state.close();
2697                        modal_state.dismiss();
2698                "#},
2699                    patch: indoc! {r#"
2700                    @@ -1,3 +1,4 @@
2701                     fn handle() {
2702                         modal_state.close();
2703                    +    eprintln!("");
2704                         modal_state.dismiss();
2705                "#},
2706                    expected_new: indoc! {r#"
2707                    fn handle() {
2708                        modal_state.close();
2709                        eprintln!("");
2710                        modal_state.dismiss();
2711                "#},
2712                },
2713                Case {
2714                    name: "complete_replacement",
2715                    old: indoc! {"
2716                    aaa
2717                    bbb
2718                    ccc
2719                "},
2720                    patch: indoc! {"
2721                    @@ -1,3 +1,3 @@
2722                    -aaa
2723                    -bbb
2724                    -ccc
2725                    +xxx
2726                    +yyy
2727                    +zzz
2728                "},
2729                    expected_new: indoc! {"
2730                    xxx
2731                    yyy
2732                    zzz
2733                "},
2734                },
2735                Case {
2736                    name: "add_function_body",
2737                    old: indoc! {"
2738                    fn foo() {
2739                        modal_state.dismiss();
2740                    }
2741
2742                    fn
2743
2744                    fn handle_keystroke() {
2745                "},
2746                    patch: indoc! {"
2747                    @@ -1,6 +1,8 @@
2748                     fn foo() {
2749                         modal_state.dismiss();
2750                     }
2751
2752                    -fn
2753                    +fn handle_submit() {
2754                    +    todo()
2755                    +}
2756
2757                     fn handle_keystroke() {
2758                "},
2759                    expected_new: indoc! {"
2760                    fn foo() {
2761                        modal_state.dismiss();
2762                    }
2763
2764                    fn handle_submit() {
2765                        todo()
2766                    }
2767
2768                    fn handle_keystroke() {
2769                "},
2770                },
2771                Case {
2772                    name: "with_cursor_offset",
2773                    old: indoc! {r#"
2774                    fn main() {
2775                        println!();
2776                    }
2777                "#},
2778                    patch: indoc! {r#"
2779                        @@ -1,3 +1,3 @@
2780                        fn main() {
2781                        -    println!();
2782                        +    eprintln!("");
2783                        }
2784                    "#},
2785                    expected_new: indoc! {r#"
2786                        fn main() {
2787                            eprintln!("<|user_cursor|>");
2788                        }
2789                    "#},
2790                },
2791                Case {
2792                    name: "non_local_hunk_header_pure_insertion_repro",
2793                    old: indoc! {"
2794                        aaa
2795                        bbb
2796                    "},
2797                    patch: indoc! {"
2798                        @@ -20,2 +20,3 @@
2799                        aaa
2800                        +xxx
2801                        bbb
2802                    "},
2803                    expected_new: indoc! {"
2804                        aaa
2805                        xxx
2806                        bbb
2807                    "},
2808                },
2809                Case {
2810                    name: "empty_patch_produces_no_edits_marker",
2811                    old: indoc! {"
2812                        aaa
2813                        bbb
2814                    "},
2815                    patch: "@@ -20,2 +20,3 @@\n",
2816                    expected_new: indoc! {"
2817                        aaa
2818                        bbb
2819                    "},
2820                },
2821            ];
2822
2823            for case in &cases {
2824                // The cursor_offset for patch_to_edit_commands is relative to
2825                // the first hunk's new text (context + additions). We compute
2826                // it by finding where the marker sits in the expected output
2827                // (which mirrors the new text of the hunk).
2828                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2829
2830                let commands =
2831                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2832                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2833
2834                assert!(
2835                    hashline::output_has_edit_commands(&commands),
2836                    "case {}: expected edit commands, got: {commands:?}",
2837                    case.name,
2838                );
2839
2840                let applied = hashline::apply_edit_commands(case.old, &commands);
2841                assert_eq!(applied, case.expected_new, "case {}", case.name);
2842            }
2843        }
2844    }
2845}
2846
2847pub mod seed_coder {
2848    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2849    //!
2850    //! Seed-Coder uses different FIM tokens and order than Qwen:
2851    //! - SPM order: suffix comes FIRST, then prefix, then middle
2852    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2853    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2854    //!
2855    //! All context (related files, edit history) goes in the PREFIX section.
2856    //! The suffix contains only code after the editable region.
2857    //!
2858    //! Example prompt:
2859    //!
2860    //! <[fim-suffix]>
2861    //! code after editable region
2862    //! <[fim-prefix]><filename>related/file.py
2863    //! related file content
2864    //!
2865    //! <filename>edit_history
2866    //! --- a/some_file.py
2867    //! +++ b/some_file.py
2868    //! -old
2869    //! +new
2870    //!
2871    //! <filename>path/to/target_file.py
2872    //! code before editable region
2873    //! <<<<<<< CURRENT
2874    //! code that
2875    //! needs to<|user_cursor|>
2876    //! be rewritten
2877    //! =======
2878    //! <[fim-middle]>
2879    //!
2880    //! Expected output (model generates):
2881    //!
2882    //! updated
2883    //! code with
2884    //! changes applied
2885    //! >>>>>>> UPDATED
2886
2887    use super::*;
2888
2889    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2890    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2891    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2892    pub const FILE_MARKER: &str = "<filename>";
2893
2894    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2895    pub const SEPARATOR: &str = "=======\n";
2896    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2897
2898    pub const NO_EDITS: &str = "NO_EDITS\n";
2899
2900    pub fn special_tokens() -> &'static [&'static str] {
2901        &[
2902            FIM_SUFFIX,
2903            FIM_PREFIX,
2904            FIM_MIDDLE,
2905            FILE_MARKER,
2906            START_MARKER,
2907            SEPARATOR,
2908            END_MARKER,
2909            CURSOR_MARKER,
2910        ]
2911    }
2912
2913    pub fn write_cursor_excerpt_section(
2914        prompt: &mut String,
2915        path: &Path,
2916        context: &str,
2917        editable_range: &Range<usize>,
2918        cursor_offset: usize,
2919    ) {
2920        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2921        prompt.push_str(&section);
2922    }
2923
2924    pub fn format_prompt_with_budget(
2925        path: &Path,
2926        context: &str,
2927        editable_range: &Range<usize>,
2928        cursor_offset: usize,
2929        events: &[Arc<Event>],
2930        related_files: &[RelatedFile],
2931        max_tokens: usize,
2932    ) -> String {
2933        let cursor_prefix_section =
2934            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2935        assemble_fim_prompt(
2936            context,
2937            editable_range,
2938            &cursor_prefix_section,
2939            events,
2940            related_files,
2941            max_tokens,
2942        )
2943    }
2944
2945    pub fn assemble_fim_prompt(
2946        context: &str,
2947        editable_range: &Range<usize>,
2948        cursor_prefix_section: &str,
2949        events: &[Arc<Event>],
2950        related_files: &[RelatedFile],
2951        max_tokens: usize,
2952    ) -> String {
2953        let suffix_section = build_suffix_section(context, editable_range);
2954
2955        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2956        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2957        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2958
2959        let edit_history_section = super::format_edit_history_within_budget(
2960            events,
2961            FILE_MARKER,
2962            "edit_history",
2963            budget_after_cursor,
2964            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2965        );
2966        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2967        let budget_after_edit_history =
2968            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2969
2970        let related_files_section = super::format_related_files_within_budget(
2971            related_files,
2972            FILE_MARKER,
2973            "",
2974            budget_after_edit_history,
2975        );
2976
2977        let mut prompt = String::new();
2978        prompt.push_str(&suffix_section);
2979        prompt.push_str(FIM_PREFIX);
2980        prompt.push_str(&related_files_section);
2981        if !related_files_section.is_empty() {
2982            prompt.push('\n');
2983        }
2984        prompt.push_str(&edit_history_section);
2985        if !edit_history_section.is_empty() {
2986            prompt.push('\n');
2987        }
2988        prompt.push_str(cursor_prefix_section);
2989        prompt.push_str(FIM_MIDDLE);
2990
2991        prompt
2992    }
2993
2994    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2995        let mut section = String::new();
2996        section.push_str(FIM_SUFFIX);
2997        section.push_str(&context[editable_range.end..]);
2998        if !section.ends_with('\n') {
2999            section.push('\n');
3000        }
3001        section
3002    }
3003
3004    fn build_cursor_prefix_section(
3005        path: &Path,
3006        context: &str,
3007        editable_range: &Range<usize>,
3008        cursor_offset: usize,
3009    ) -> String {
3010        let mut section = String::new();
3011        let path_str = path.to_string_lossy();
3012        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3013
3014        section.push_str(&context[..editable_range.start]);
3015        section.push_str(START_MARKER);
3016        section.push_str(&context[editable_range.start..cursor_offset]);
3017        section.push_str(CURSOR_MARKER);
3018        section.push_str(&context[cursor_offset..editable_range.end]);
3019        if !section.ends_with('\n') {
3020            section.push('\n');
3021        }
3022        section.push_str(SEPARATOR);
3023        section
3024    }
3025
3026    /// Format patch as containing no changes if it's empty; otherwise return None.
3027    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3028        // Count lines in the patch
3029        let empty_patch = patch.lines().count() <= 3;
3030        if empty_patch {
3031            Some(format!("{NO_EDITS}{END_MARKER}"))
3032        } else {
3033            None
3034        }
3035    }
3036}
3037
3038pub mod v0304_variable_edit {
3039    //! A prompt format with no fixed editable region. The entire context is shown
3040    //! to the model, and it chooses which text to replace by outputting surrounding
3041    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3042    //! text.
3043    //!
3044    //! Example prompt:
3045    //!
3046    //! <|file_sep|>path/to/file.py
3047    //! zero
3048    //! one
3049    //! two
3050    //! three<|user_cursor|>
3051    //! four
3052    //! five
3053    //! <|fim_prefix|>
3054    //
3055    //! Expected output (model generates):
3056    //!
3057    //! two
3058    //! <|fim_middle|>
3059    //! THREE
3060    //! <|fim_suffix|>
3061    //! four
3062    //!
3063    //! The output means: find "two\n...\nfour" in the context, and replace
3064    //! everything between "two\n" and "four" with "THREE\n".
3065
3066    use super::*;
3067
3068    pub fn special_tokens() -> &'static [&'static str] {
3069        &[
3070            "<|fim_prefix|>",
3071            "<|fim_suffix|>",
3072            "<|fim_middle|>",
3073            "<|file_sep|>",
3074            CURSOR_MARKER,
3075        ]
3076    }
3077
3078    pub fn write_cursor_excerpt_section(
3079        prompt: &mut String,
3080        path: &Path,
3081        context: &str,
3082        cursor_offset: usize,
3083    ) {
3084        let path_str = path.to_string_lossy();
3085        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3086
3087        prompt.push_str(&context[..cursor_offset]);
3088        prompt.push_str(CURSOR_MARKER);
3089        prompt.push_str(&context[cursor_offset..]);
3090        if !prompt.ends_with('\n') {
3091            prompt.push('\n');
3092        }
3093        prompt.push_str("<|fim_prefix|>\n")
3094    }
3095
3096    /// Apply a variable-edit model output to the original context text.
3097    ///
3098    /// The model output has the form:
3099    ///
3100    /// - prefix context lines
3101    /// - `<|fim_middle|>`
3102    /// - new text
3103    /// - `<|fim_suffix|>`
3104    /// - suffix context lines
3105    ///
3106    /// We locate the prefix/suffix context lines in the original text and replace
3107    /// everything between them with the new text.
3108    pub fn apply_variable_edit(
3109        context: &str,
3110        model_output: &str,
3111    ) -> Result<(Range<usize>, String)> {
3112        let (prefix_context, rest) = model_output
3113            .split_once("<|fim_middle|>\n")
3114            .or_else(|| model_output.split_once("<|fim_middle|>"))
3115            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3116
3117        let (new_text, suffix_context) = rest
3118            .split_once("<|fim_suffix|>\n")
3119            .or_else(|| rest.split_once("<|fim_suffix|>"))
3120            .unwrap_or((rest, ""));
3121
3122        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3123            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3124        } else {
3125            suffix_context
3126        };
3127
3128        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3129            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3130            + prefix_context.len();
3131        let suffix_offset = if suffix_context.is_empty() {
3132            context.len()
3133        } else {
3134            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3135                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3136                + prefix_offset
3137        };
3138
3139        let edit_range = prefix_offset..suffix_offset;
3140        return Ok((edit_range, new_text.to_string()));
3141    }
3142
3143    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3144        if needle.is_empty() {
3145            return Some(0);
3146        }
3147
3148        haystack.match_indices(needle).find_map(|(offset, _)| {
3149            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3150            matched_line_start.then_some(offset)
3151        })
3152    }
3153
3154    /// Convert a unified diff patch into the variable-edit output format.
3155    ///
3156    /// Parses `patch` as a unified diff against `old_text` and produces model
3157    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3158    /// delimiters. The diff is resolved by content matching rather than line
3159    /// numbers.
3160    pub fn patch_to_variable_edit_output(
3161        old_text: &str,
3162        patch: &str,
3163        cursor_offset: Option<usize>,
3164    ) -> Result<String> {
3165        // Parse the unified diff into hunks. Each hunk has an `old_context`
3166        // string (context + deleted lines interleaved in order) and a list of
3167        // edits expressed as byte ranges within that context plus replacement
3168        // text.
3169        let hunks = parse_hunks(patch);
3170        if hunks.is_empty() {
3171            return Ok(String::new());
3172        }
3173
3174        // Apply each hunk by finding its old_context in the text and
3175        // performing the edits. We search forward from where the previous
3176        // hunk ended so that hunks are applied in order.
3177        let mut new_text = old_text.to_string();
3178        let mut search_from: usize = 0;
3179        let mut first_hunk_pos: Option<usize> = None;
3180
3181        for hunk in &hunks {
3182            let context_pos = new_text[search_from..]
3183                .find(&hunk.old_context)
3184                .map(|pos| pos + search_from)
3185                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3186
3187            if first_hunk_pos.is_none() {
3188                first_hunk_pos = Some(context_pos);
3189            }
3190
3191            // Apply edits in reverse order so byte offsets remain valid.
3192            for edit in hunk.edits.iter().rev() {
3193                let abs_start = context_pos + edit.range.start;
3194                let abs_end = context_pos + edit.range.end;
3195                new_text.replace_range(abs_start..abs_end, &edit.text);
3196            }
3197
3198            // Advance past this hunk's region in the (now modified) text.
3199            let new_region_len: usize =
3200                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3201                    len + edit.text.len() - (edit.range.end - edit.range.start)
3202                });
3203            search_from = context_pos + new_region_len;
3204        }
3205
3206        // Now we have old_text and new_text. Find the changed line range by
3207        // comparing them.
3208        let old_lines: Vec<&str> = old_text.lines().collect();
3209        let new_lines: Vec<&str> = new_text.lines().collect();
3210
3211        // Find first differing line.
3212        let first_changed_row = old_lines
3213            .iter()
3214            .zip(new_lines.iter())
3215            .position(|(a, b)| a != b)
3216            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3217
3218        // Find last differing line (from the end).
3219        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3220        let common_suffix = old_lines
3221            .iter()
3222            .rev()
3223            .zip(new_lines.iter().rev())
3224            .take(max_suffix)
3225            .take_while(|(a, b)| a == b)
3226            .count();
3227
3228        let old_end = old_lines.len() - common_suffix;
3229        let new_end = new_lines.len() - common_suffix;
3230
3231        if first_changed_row == old_end && first_changed_row == new_end {
3232            return Ok(String::new());
3233        }
3234
3235        // Build the replacement text from new_lines[first_diff..new_end].
3236        let mut merged_new_text = String::new();
3237        for line in &new_lines[first_changed_row..new_end] {
3238            merged_new_text.push_str(line);
3239            merged_new_text.push('\n');
3240        }
3241
3242        // cursor_offset is relative to the first hunk's new content in
3243        // new_text. Translate it to an offset within merged_new_text, which
3244        // only contains lines first_diff..new_end of new_text.
3245        if let Some(hunk_offset) = cursor_offset {
3246            let hunk_start = first_hunk_pos.unwrap_or(0);
3247            let absolute_pos = hunk_start + hunk_offset;
3248
3249            // Byte offset where first_diff starts in new_text.
3250            let merged_start: usize = new_lines[..first_changed_row]
3251                .iter()
3252                .map(|line| line.len() + 1)
3253                .sum();
3254
3255            if absolute_pos >= merged_start {
3256                let relative_offset = absolute_pos - merged_start;
3257                if relative_offset <= merged_new_text.len() {
3258                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3259                }
3260            }
3261        }
3262
3263        // Build output with 2 lines of context above and below.
3264        let context_lines_count = 2;
3265        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3266        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3267
3268        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3269            let pattern = &lines[line_range];
3270            let pattern_len = pattern.len();
3271
3272            let mut count = 0;
3273            for offset in 0..=lines.len() - pattern_len {
3274                if &lines[offset..offset + pattern_len] == pattern {
3275                    count += 1;
3276                }
3277            }
3278            count
3279        }
3280
3281        // Expand prefix and suffix until they are unique
3282        while prefix_start > 0 {
3283            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3284                prefix_start -= 1;
3285            } else {
3286                break;
3287            }
3288        }
3289        while suffix_end < old_lines.len() {
3290            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3291                suffix_end += 1;
3292            } else {
3293                break;
3294            }
3295        }
3296
3297        let mut output = String::new();
3298        for line in &old_lines[prefix_start..first_changed_row] {
3299            output.push_str(line);
3300            output.push('\n');
3301        }
3302        output.push_str("<|fim_middle|>\n");
3303        output.push_str(&merged_new_text);
3304        output.push_str("<|fim_suffix|>\n");
3305        for line in &old_lines[old_end..suffix_end] {
3306            output.push_str(line);
3307            output.push('\n');
3308        }
3309
3310        Ok(output)
3311    }
3312
3313    struct ParsedHunk {
3314        old_context: String,
3315        edits: Vec<ParsedEdit>,
3316    }
3317
3318    struct ParsedEdit {
3319        range: Range<usize>,
3320        text: String,
3321    }
3322
3323    /// Parse a unified diff into content-based hunks. Each hunk contains an
3324    /// `old_context` string (context lines + deleted lines, which together
3325    /// form the text that should be found in the original) and a list of edits
3326    /// expressed as byte ranges within that context.
3327    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3328        let mut hunks = Vec::new();
3329        let mut current: Option<ParsedHunk> = None;
3330
3331        for line in patch.lines() {
3332            if line.starts_with("@@") {
3333                if let Some(hunk) = current.take() {
3334                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3335                        hunks.push(hunk);
3336                    }
3337                }
3338                current = Some(ParsedHunk {
3339                    old_context: String::new(),
3340                    edits: Vec::new(),
3341                });
3342            } else if line.starts_with("---") || line.starts_with("+++") {
3343                continue;
3344            } else if let Some(hunk) = &mut current {
3345                if let Some(added) = line.strip_prefix('+') {
3346                    let pos = hunk.old_context.len();
3347                    if let Some(last_edit) = hunk.edits.last_mut() {
3348                        if last_edit.range.end == pos {
3349                            writeln!(&mut last_edit.text, "{added}").ok();
3350                            continue;
3351                        }
3352                    }
3353                    hunk.edits.push(ParsedEdit {
3354                        range: pos..pos,
3355                        text: format!("{added}\n"),
3356                    });
3357                } else if let Some(removed) = line.strip_prefix('-') {
3358                    let start = hunk.old_context.len();
3359                    writeln!(&mut hunk.old_context, "{removed}").ok();
3360                    let end = hunk.old_context.len();
3361                    if let Some(last_edit) = hunk.edits.last_mut() {
3362                        if last_edit.range.end == start {
3363                            last_edit.range.end = end;
3364                            continue;
3365                        }
3366                    }
3367                    hunk.edits.push(ParsedEdit {
3368                        range: start..end,
3369                        text: String::new(),
3370                    });
3371                } else {
3372                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3373                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3374                }
3375            }
3376        }
3377
3378        if let Some(hunk) = current {
3379            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3380                hunks.push(hunk);
3381            }
3382        }
3383
3384        hunks
3385    }
3386
3387    #[cfg(test)]
3388    mod tests {
3389        use super::*;
3390        use indoc::indoc;
3391
3392        #[test]
3393        fn test_apply_variable_edit() {
3394            struct Case {
3395                name: &'static str,
3396                original: &'static str,
3397                model_output: &'static str,
3398                expected: &'static str,
3399            }
3400
3401            let cases = [
3402                Case {
3403                    name: "simple_single_line_replacement",
3404                    original: indoc! {"
3405                        zero
3406                        one
3407                        two
3408                        three
3409                        four
3410                        five
3411                    "},
3412                    model_output: indoc! {"
3413                        two
3414                        <|fim_middle|>
3415                        THREE
3416                        <|fim_suffix|>
3417                        four
3418                    "},
3419                    expected: indoc! {"
3420                        zero
3421                        one
3422                        two
3423                        THREE
3424                        four
3425                        five
3426                    "},
3427                },
3428                Case {
3429                    name: "multi_line_replacement",
3430                    original: indoc! {"
3431                        a
3432                        b
3433                        c
3434                        d
3435                        e
3436                    "},
3437                    model_output: indoc! {"
3438                        a
3439                        <|fim_middle|>
3440                        B
3441                        C
3442                        D
3443                        <|fim_suffix|>
3444                        e
3445                    "},
3446                    expected: indoc! {"
3447                        a
3448                        B
3449                        C
3450                        D
3451                        e
3452                    "},
3453                },
3454                Case {
3455                    name: "insertion_between_existing_lines",
3456                    original: indoc! {"
3457                        a
3458                        b
3459                        c
3460                    "},
3461                    model_output: indoc! {"
3462                        a
3463                        <|fim_middle|>
3464                        X
3465                        <|fim_suffix|>
3466                        b
3467                    "},
3468                    expected: indoc! {"
3469                        a
3470                        X
3471                        b
3472                        c
3473                    "},
3474                },
3475                Case {
3476                    name: "deletion",
3477                    original: indoc! {"
3478                        a
3479                        b
3480                        c
3481                        d
3482                    "},
3483                    model_output: indoc! {"
3484                        a
3485                        <|fim_middle|>
3486                        <|fim_suffix|>
3487                        c
3488                    "},
3489                    expected: indoc! {"
3490                        a
3491                        c
3492                        d
3493                    "},
3494                },
3495                Case {
3496                    name: "replacement_at_start_no_prefix_context",
3497                    original: indoc! {"
3498                        a
3499                        b
3500                        c
3501                    "},
3502                    model_output: indoc! {"
3503                        <|fim_middle|>
3504                        X
3505                        <|fim_suffix|>
3506                        b
3507                    "},
3508                    expected: indoc! {"
3509                        X
3510                        b
3511                        c
3512                    "},
3513                },
3514                Case {
3515                    name: "replacement_at_end_no_suffix_context",
3516                    original: indoc! {"
3517                        a
3518                        b
3519                        c
3520                    "},
3521                    model_output: indoc! {"
3522                        b
3523                        <|fim_middle|>
3524                        Z
3525                        <|fim_suffix|>
3526                    "},
3527                    expected: indoc! {"
3528                        a
3529                        b
3530                        Z
3531                    "},
3532                },
3533                Case {
3534                    name: "context_with_trailing_newline_is_preserved",
3535                    original: indoc! {"
3536                        a
3537                        b
3538                        c
3539                    "},
3540                    model_output: indoc! {"
3541                        a
3542                        <|fim_middle|>
3543                        B
3544                        <|fim_suffix|>
3545                        c
3546                    "},
3547                    expected: indoc! {"
3548                        a
3549                        B
3550                        c
3551                    "},
3552                },
3553                Case {
3554                    name: "cursor_marker_passes_through_untouched",
3555                    original: indoc! {"
3556                        a
3557                        b
3558                        c
3559                    "},
3560                    model_output: indoc! {"
3561                        a
3562                        <|fim_middle|>
3563                        B<|user_cursor|>B
3564                        <|fim_suffix|>
3565                        c
3566                    "},
3567                    expected: indoc! {"
3568                        a
3569                        B<|user_cursor|>B
3570                        c
3571                    "},
3572                },
3573                Case {
3574                    name: "multiple_prefix_context_lines",
3575                    original: indoc! {"
3576                        a
3577                        b
3578                        c
3579                        d
3580                        e
3581                    "},
3582                    model_output: indoc! {"
3583                        b
3584                        c
3585                        <|fim_middle|>
3586                        D
3587                        <|fim_suffix|>
3588                        e
3589                    "},
3590                    expected: indoc! {"
3591                        a
3592                        b
3593                        c
3594                        D
3595                        e
3596                    "},
3597                },
3598            ];
3599
3600            for case in cases {
3601                let (edit_range, replacement) =
3602                    apply_variable_edit(case.original, case.model_output).unwrap();
3603                let mut edited = case.original.to_string();
3604                edited.replace_range(edit_range, &replacement);
3605                assert_eq!(edited, case.expected, "{}", case.name);
3606            }
3607        }
3608
3609        #[test]
3610        fn test_patch_to_variable_edit() {
3611            struct Case {
3612                name: &'static str,
3613                old: &'static str,
3614                patch: &'static str,
3615                cursor_offset: Option<usize>,
3616                expected_variable_edit: &'static str,
3617                expected_after_apply: &'static str,
3618            }
3619
3620            let cases = [
3621                Case {
3622                    name: "simple_replacement",
3623                    old: indoc! {"
3624                        zero
3625                        one
3626                        two
3627                        three
3628                        four
3629                        five
3630                    "},
3631                    patch: indoc! {"
3632                        @@ -3,3 +3,3 @@
3633                         two
3634                        -three
3635                        +THREE
3636                         four
3637                    "},
3638                    cursor_offset: None,
3639                    expected_variable_edit: indoc! {"
3640                        one
3641                        two
3642                        <|fim_middle|>
3643                        THREE
3644                        <|fim_suffix|>
3645                        four
3646                        five
3647                    "},
3648                    expected_after_apply: indoc! {"
3649                        zero
3650                        one
3651                        two
3652                        THREE
3653                        four
3654                        five
3655                    "},
3656                },
3657                Case {
3658                    name: "insertion",
3659                    old: indoc! {"
3660                        a
3661                        b
3662                        c
3663                        d
3664                        e
3665                    "},
3666                    patch: indoc! {"
3667                        @@ -2,0 +3,1 @@
3668                         b
3669                        +X
3670                         c
3671                    "},
3672                    cursor_offset: None,
3673                    expected_variable_edit: indoc! {"
3674                        a
3675                        b
3676                        <|fim_middle|>
3677                        X
3678                        <|fim_suffix|>
3679                        c
3680                        d
3681                    "},
3682                    expected_after_apply: indoc! {"
3683                        a
3684                        b
3685                        X
3686                        c
3687                        d
3688                        e
3689                    "},
3690                },
3691                Case {
3692                    name: "deletion",
3693                    old: indoc! {"
3694                        a
3695                        b
3696                        c
3697                        d
3698                        e
3699                    "},
3700                    patch: indoc! {"
3701                        @@ -2,3 +2,2 @@
3702                         b
3703                        -c
3704                         d
3705                    "},
3706                    cursor_offset: None,
3707                    expected_variable_edit: indoc! {"
3708                        a
3709                        b
3710                        <|fim_middle|>
3711                        <|fim_suffix|>
3712                        d
3713                        e
3714                    "},
3715                    expected_after_apply: indoc! {"
3716                        a
3717                        b
3718                        d
3719                        e
3720                    "},
3721                },
3722                Case {
3723                    name: "edit_near_start",
3724                    old: indoc! {"
3725                        first
3726                        second
3727                        third
3728                        fourth
3729                    "},
3730                    patch: indoc! {"
3731                        @@ -1,1 +1,1 @@
3732                        -first
3733                        +FIRST
3734                    "},
3735                    cursor_offset: None,
3736                    expected_variable_edit: indoc! {"
3737                        <|fim_middle|>
3738                        FIRST
3739                        <|fim_suffix|>
3740                        second
3741                        third
3742                    "},
3743                    expected_after_apply: indoc! {"
3744                        FIRST
3745                        second
3746                        third
3747                        fourth
3748                    "},
3749                },
3750                Case {
3751                    name: "edit_near_end",
3752                    old: indoc! {"
3753                        first
3754                        second
3755                        third
3756                        fourth
3757                    "},
3758                    patch: indoc! {"
3759                        @@ -4,1 +4,1 @@
3760                        -fourth
3761                        +FOURTH
3762                    "},
3763                    cursor_offset: None,
3764                    expected_variable_edit: indoc! {"
3765                        second
3766                        third
3767                        <|fim_middle|>
3768                        FOURTH
3769                        <|fim_suffix|>
3770                    "},
3771                    expected_after_apply: indoc! {"
3772                        first
3773                        second
3774                        third
3775                        FOURTH
3776                    "},
3777                },
3778                Case {
3779                    name: "cursor_at_start_of_replacement",
3780                    old: indoc! {"
3781                        zero
3782                        one
3783                        two
3784                        three
3785                        four
3786                        five
3787                    "},
3788                    patch: indoc! {"
3789                        @@ -3,3 +3,3 @@
3790                         two
3791                        -three
3792                        +THREE
3793                         four
3794                    "},
3795                    cursor_offset: Some(4),
3796                    expected_variable_edit: indoc! {"
3797                        one
3798                        two
3799                        <|fim_middle|>
3800                        <|user_cursor|>THREE
3801                        <|fim_suffix|>
3802                        four
3803                        five
3804                    "},
3805                    expected_after_apply: indoc! {"
3806                        zero
3807                        one
3808                        two
3809                        <|user_cursor|>THREE
3810                        four
3811                        five
3812                    "},
3813                },
3814                Case {
3815                    name: "cursor_in_middle_of_replacement",
3816                    old: indoc! {"
3817                        zero
3818                        one
3819                        two
3820                        three
3821                        four
3822                        five
3823                    "},
3824                    patch: indoc! {"
3825                        @@ -3,3 +3,3 @@
3826                         two
3827                        -three
3828                        +THREE
3829                         four
3830                    "},
3831                    cursor_offset: Some(6),
3832                    expected_variable_edit: indoc! {"
3833                        one
3834                        two
3835                        <|fim_middle|>
3836                        TH<|user_cursor|>REE
3837                        <|fim_suffix|>
3838                        four
3839                        five
3840                    "},
3841                    expected_after_apply: indoc! {"
3842                        zero
3843                        one
3844                        two
3845                        TH<|user_cursor|>REE
3846                        four
3847                        five
3848                    "},
3849                },
3850                Case {
3851                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3852                    old: indoc! {"
3853                        one
3854                        a
3855                        b
3856                        c
3857                        d
3858                        two
3859                        a
3860                        b
3861                        c
3862                        d
3863                        three
3864                        a
3865                        b
3866                        c
3867                        d
3868                        four
3869                    "},
3870                    patch: indoc! {"
3871                        @@ -4,5 +4,5 @@
3872                         two
3873                         a
3874                         b
3875                        -c
3876                        +C
3877                         d
3878                         three
3879                    "},
3880                    cursor_offset: None,
3881                    expected_variable_edit: indoc! {"
3882                        two
3883                        a
3884                        b
3885                        <|fim_middle|>
3886                        C
3887                        <|fim_suffix|>
3888                        d
3889                        three
3890                    "},
3891                    expected_after_apply: indoc! {"
3892                        one
3893                        a
3894                        b
3895                        c
3896                        d
3897                        two
3898                        a
3899                        b
3900                        C
3901                        d
3902                        three
3903                        a
3904                        b
3905                        c
3906                        d
3907                        four
3908                    "},
3909                },
3910                Case {
3911                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3912                    old: indoc! {"
3913                        {
3914                            {
3915                                one();
3916                            }
3917                        }
3918                        {
3919                            {
3920                                two();
3921                            }
3922                        }
3923                        {
3924                            {
3925                                three();
3926                            }
3927                        }
3928                        {
3929                            {
3930                                four();
3931                            }
3932                        }
3933                    "},
3934                    patch: indoc! {"
3935                        @@ -4,5 +4,5 @@
3936                             {
3937                        -        two();
3938                        +        TWO();
3939                             }
3940                    "},
3941                    cursor_offset: None,
3942                    expected_variable_edit: indoc! {"
3943                                one();
3944                            }
3945                        }
3946                        {
3947                            {
3948                        <|fim_middle|>
3949                                TWO();
3950                        <|fim_suffix|>
3951                            }
3952                        }
3953                        {
3954                            {
3955                                three();
3956                    "},
3957                    expected_after_apply: indoc! {"
3958                        {
3959                            {
3960                                one();
3961                            }
3962                        }
3963                        {
3964                            {
3965                                TWO();
3966                            }
3967                        }
3968                        {
3969                            {
3970                                three();
3971                            }
3972                        }
3973                        {
3974                            {
3975                                four();
3976                            }
3977                        }
3978                    "},
3979                },
3980            ];
3981
3982            for case in cases {
3983                let output =
3984                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3985                        .unwrap_or_else(|error| {
3986                            panic!("failed converting patch for {}: {error}", case.name)
3987                        });
3988                assert_eq!(
3989                    output, case.expected_variable_edit,
3990                    "patch->variable_edit mismatch for {}",
3991                    case.name
3992                );
3993
3994                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3995                    .unwrap_or_else(|error| {
3996                        panic!("failed applying variable_edit for {}: {error}", case.name)
3997                    });
3998                let mut edited_by_variable_edit = case.old.to_string();
3999                edited_by_variable_edit.replace_range(edit_range, &replacement);
4000                assert_eq!(
4001                    edited_by_variable_edit, case.expected_after_apply,
4002                    "variable_edit apply mismatch for {}",
4003                    case.name
4004                );
4005
4006                let (expected_edit_range, expected_replacement) =
4007                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4008                        |error| {
4009                            panic!(
4010                                "failed applying expected variable_edit for {}: {error}",
4011                                case.name
4012                            )
4013                        },
4014                    );
4015                let mut edited_by_expected_variable_edit = case.old.to_string();
4016                edited_by_expected_variable_edit
4017                    .replace_range(expected_edit_range, &expected_replacement);
4018                assert_eq!(
4019                    edited_by_expected_variable_edit, case.expected_after_apply,
4020                    "expected variable_edit apply mismatch for {}",
4021                    case.name
4022                );
4023            }
4024        }
4025
4026        #[test]
4027        fn test_write_cursor_excerpt_section() {
4028            let path = Path::new("test.rs");
4029            let context = "fn main() {\n    hello();\n}\n";
4030            let cursor_offset = 17;
4031            let mut prompt = String::new();
4032            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4033            assert_eq!(
4034                prompt,
4035                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4036            );
4037        }
4038    }
4039}
4040
4041/// The zeta1 prompt format
4042pub mod zeta1 {
4043    use super::*;
4044    use std::fmt::Write;
4045
4046    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4047    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4048    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4049    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4050
4051    const INSTRUCTION_HEADER: &str = concat!(
4052        "### Instruction:\n",
4053        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4054        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4055        "into account the cursor location.\n\n",
4056        "### User Edits:\n\n"
4057    );
4058    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4059    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4060
4061    /// Formats a complete zeta1 prompt from the input events and excerpt.
4062    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4063        let mut prompt = String::with_capacity(
4064            INSTRUCTION_HEADER.len()
4065                + input_events.len()
4066                + EXCERPT_HEADER.len()
4067                + input_excerpt.len()
4068                + RESPONSE_HEADER.len(),
4069        );
4070        prompt.push_str(INSTRUCTION_HEADER);
4071        prompt.push_str(input_events);
4072        prompt.push_str(EXCERPT_HEADER);
4073        prompt.push_str(input_excerpt);
4074        prompt.push_str(RESPONSE_HEADER);
4075        prompt
4076    }
4077
4078    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4079    /// editable and context byte-offset ranges within `cursor_excerpt`.
4080    pub fn format_zeta1_from_input(
4081        input: &ZetaPromptInput,
4082        editable_range: Range<usize>,
4083        context_range: Range<usize>,
4084    ) -> String {
4085        let events = format_zeta1_events(&input.events);
4086        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4087        format_zeta1_prompt(&events, &excerpt)
4088    }
4089
4090    /// Formats events in zeta1 style (oldest first).
4091    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4092        let mut result = String::new();
4093        for event in
4094            events
4095                .iter()
4096                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4097                    &ZetaFormat::V0114180EditableRegion,
4098                )))
4099        {
4100            let event_string = format_zeta1_event(event);
4101            if event_string.is_empty() {
4102                continue;
4103            }
4104            if !result.is_empty() {
4105                result.push_str("\n\n");
4106            }
4107            result.push_str(&event_string);
4108        }
4109        result
4110    }
4111
4112    fn format_zeta1_event(event: &Event) -> String {
4113        match event {
4114            Event::BufferChange {
4115                path,
4116                old_path,
4117                diff,
4118                ..
4119            } => {
4120                let mut prompt = String::new();
4121                if old_path != path {
4122                    writeln!(
4123                        prompt,
4124                        "User renamed {} to {}\n",
4125                        old_path.display(),
4126                        path.display()
4127                    )
4128                    .ok();
4129                }
4130                if !diff.is_empty() {
4131                    write!(
4132                        prompt,
4133                        "User edited {}:\n```diff\n{}\n```",
4134                        path.display(),
4135                        diff
4136                    )
4137                    .ok();
4138                }
4139                prompt
4140            }
4141        }
4142    }
4143
4144    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4145    /// within `cursor_excerpt`.
4146    fn format_zeta1_excerpt(
4147        input: &ZetaPromptInput,
4148        editable_range: Range<usize>,
4149        context_range: Range<usize>,
4150    ) -> String {
4151        let path_str = input.cursor_path.to_string_lossy();
4152        let excerpt = &*input.cursor_excerpt;
4153        let cursor_offset = input.cursor_offset_in_excerpt;
4154
4155        let mut prompt = String::new();
4156        writeln!(&mut prompt, "```{path_str}").ok();
4157
4158        let starts_at_file_beginning =
4159            input.excerpt_start_row == Some(0) && context_range.start == 0;
4160        if starts_at_file_beginning {
4161            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4162        }
4163
4164        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4165
4166        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4167        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4168        prompt.push_str(CURSOR_MARKER);
4169        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4170        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4171
4172        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4173        write!(prompt, "\n```").ok();
4174
4175        prompt
4176    }
4177
4178    /// Cleans zeta1 model output by extracting content between editable region
4179    /// markers and converting the zeta1 cursor marker to the universal one.
4180    /// Returns `None` if the output doesn't contain the expected markers.
4181    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4182        let content = output.replace(CURSOR_MARKER, "");
4183
4184        let content_start = content
4185            .find(EDITABLE_REGION_START_MARKER)
4186            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4187            .map(|pos| {
4188                if content.as_bytes().get(pos) == Some(&b'\n') {
4189                    pos + 1
4190                } else {
4191                    pos
4192                }
4193            })
4194            .unwrap_or(0);
4195
4196        let content_end = content
4197            .find(EDITABLE_REGION_END_MARKER)
4198            .map(|pos| {
4199                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4200                    pos - 1
4201                } else {
4202                    pos
4203                }
4204            })
4205            .unwrap_or(content.len());
4206
4207        if content_start > content_end {
4208            return Some(String::new());
4209        }
4210
4211        let extracted = &content[content_start..content_end];
4212
4213        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4214            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4215            let text_before_cursor = text_before_cursor
4216                .find(EDITABLE_REGION_START_MARKER)
4217                .map(|pos| {
4218                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4219                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4220                        after_marker + 1
4221                    } else {
4222                        after_marker
4223                    }
4224                })
4225                .unwrap_or(0);
4226            let offset_in_extracted = zeta1_cursor_pos
4227                .saturating_sub(text_before_cursor)
4228                .min(extracted.len());
4229            offset_in_extracted
4230        });
4231
4232        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4233        if let Some(offset) = cursor_offset {
4234            result.push_str(&extracted[..offset]);
4235            result.push_str(super::CURSOR_MARKER);
4236            result.push_str(&extracted[offset..]);
4237        } else {
4238            result.push_str(extracted);
4239        }
4240
4241        Some(result)
4242    }
4243}
4244
4245#[cfg(test)]
4246mod tests {
4247    use super::*;
4248    use indoc::indoc;
4249
4250    fn make_input(
4251        cursor_excerpt: &str,
4252        editable_range: Range<usize>,
4253        cursor_offset: usize,
4254        events: Vec<Event>,
4255        related_files: Vec<RelatedFile>,
4256    ) -> ZetaPromptInput {
4257        let context_range = 0..cursor_excerpt.len();
4258        ZetaPromptInput {
4259            cursor_path: Path::new("test.rs").into(),
4260            cursor_excerpt: cursor_excerpt.into(),
4261            cursor_offset_in_excerpt: cursor_offset,
4262            excerpt_start_row: None,
4263            events: events.into_iter().map(Arc::new).collect(),
4264            related_files: Some(related_files),
4265            active_buffer_diagnostics: vec![],
4266            excerpt_ranges: ExcerptRanges {
4267                editable_150: editable_range.clone(),
4268                editable_180: editable_range.clone(),
4269                editable_350: editable_range,
4270                editable_150_context_350: context_range.clone(),
4271                editable_180_context_350: context_range.clone(),
4272                editable_350_context_150: context_range,
4273                ..Default::default()
4274            },
4275            syntax_ranges: None,
4276            experiment: None,
4277            in_open_source_repo: false,
4278            can_collect_data: false,
4279            repo_url: None,
4280        }
4281    }
4282
4283    fn make_input_with_context_range(
4284        excerpt: &str,
4285        editable_range: Range<usize>,
4286        context_range: Range<usize>,
4287        cursor_offset: usize,
4288    ) -> ZetaPromptInput {
4289        ZetaPromptInput {
4290            cursor_path: Path::new("test.rs").into(),
4291            cursor_excerpt: excerpt.into(),
4292            cursor_offset_in_excerpt: cursor_offset,
4293            excerpt_start_row: None,
4294            events: vec![],
4295            related_files: Some(vec![]),
4296            active_buffer_diagnostics: vec![],
4297            excerpt_ranges: ExcerptRanges {
4298                editable_150: editable_range.clone(),
4299                editable_180: editable_range.clone(),
4300                editable_350: editable_range,
4301                editable_150_context_350: context_range.clone(),
4302                editable_180_context_350: context_range.clone(),
4303                editable_350_context_150: context_range,
4304                ..Default::default()
4305            },
4306            syntax_ranges: None,
4307            experiment: None,
4308            in_open_source_repo: false,
4309            can_collect_data: false,
4310            repo_url: None,
4311        }
4312    }
4313
4314    fn make_event(path: &str, diff: &str) -> Event {
4315        Event::BufferChange {
4316            path: Path::new(path).into(),
4317            old_path: Path::new(path).into(),
4318            diff: diff.to_string(),
4319            predicted: false,
4320            in_open_source_repo: false,
4321        }
4322    }
4323
4324    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4325        RelatedFile {
4326            path: Path::new(path).into(),
4327            max_row: content.lines().count() as u32,
4328            excerpts: vec![RelatedExcerpt {
4329                row_range: 0..content.lines().count() as u32,
4330                text: content.into(),
4331                order: 0,
4332            }],
4333            in_open_source_repo: false,
4334        }
4335    }
4336
4337    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4338        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4339    }
4340
4341    fn budget_with_margin(requested_tokens: usize) -> usize {
4342        ((requested_tokens as f64) / 0.9).ceil() as usize
4343    }
4344
4345    #[test]
4346    fn test_no_truncation_when_within_budget() {
4347        let input = make_input(
4348            "prefix\neditable\nsuffix",
4349            7..15,
4350            10,
4351            vec![make_event("a.rs", "-old\n+new\n")],
4352            vec![make_related_file("related.rs", "fn helper() {}\n")],
4353        );
4354
4355        assert_eq!(
4356            format_with_budget(&input, 10000).unwrap(),
4357            indoc! {r#"
4358                <|file_sep|>related.rs
4359                fn helper() {}
4360                <|file_sep|>edit history
4361                --- a/a.rs
4362                +++ b/a.rs
4363                -old
4364                +new
4365                <|file_sep|>test.rs
4366                <|fim_prefix|>
4367                prefix
4368                <|fim_middle|>current
4369                edi<|user_cursor|>table
4370                <|fim_suffix|>
4371
4372                suffix
4373                <|fim_middle|>updated
4374            "#}
4375            .to_string()
4376        );
4377    }
4378
4379    #[test]
4380    fn test_truncation_drops_edit_history_when_budget_tight() {
4381        let input = make_input(
4382            "code",
4383            0..4,
4384            2,
4385            vec![make_event("a.rs", "-x\n+y\n")],
4386            vec![
4387                make_related_file("r1.rs", "aaaaaaa\n"),
4388                make_related_file("r2.rs", "bbbbbbb\n"),
4389            ],
4390        );
4391
4392        assert_eq!(
4393            format_with_budget(&input, 10000).unwrap(),
4394            indoc! {r#"
4395                <|file_sep|>r1.rs
4396                aaaaaaa
4397                <|file_sep|>r2.rs
4398                bbbbbbb
4399                <|file_sep|>edit history
4400                --- a/a.rs
4401                +++ b/a.rs
4402                -x
4403                +y
4404                <|file_sep|>test.rs
4405                <|fim_prefix|>
4406                <|fim_middle|>current
4407                co<|user_cursor|>de
4408                <|fim_suffix|>
4409                <|fim_middle|>updated
4410            "#}
4411            .to_string()
4412        );
4413
4414        assert_eq!(
4415            format_with_budget(&input, budget_with_margin(55)),
4416            Some(
4417                indoc! {r#"
4418                <|file_sep|>edit history
4419                --- a/a.rs
4420                +++ b/a.rs
4421                -x
4422                +y
4423                <|file_sep|>test.rs
4424                <|fim_prefix|>
4425                <|fim_middle|>current
4426                co<|user_cursor|>de
4427                <|fim_suffix|>
4428                <|fim_middle|>updated
4429            "#}
4430                .to_string()
4431            )
4432        );
4433    }
4434
4435    #[test]
4436    fn test_truncation_includes_partial_excerpts() {
4437        let input = make_input(
4438            "x",
4439            0..1,
4440            0,
4441            vec![],
4442            vec![RelatedFile {
4443                path: Path::new("big.rs").into(),
4444                max_row: 30,
4445                in_open_source_repo: false,
4446                excerpts: vec![
4447                    RelatedExcerpt {
4448                        row_range: 0..10,
4449                        text: "first excerpt\n".into(),
4450                        order: 0,
4451                    },
4452                    RelatedExcerpt {
4453                        row_range: 10..20,
4454                        text: "second excerpt\n".into(),
4455                        order: 0,
4456                    },
4457                    RelatedExcerpt {
4458                        row_range: 20..30,
4459                        text: "third excerpt\n".into(),
4460                        order: 0,
4461                    },
4462                ],
4463            }],
4464        );
4465
4466        assert_eq!(
4467            format_with_budget(&input, 10000).unwrap(),
4468            indoc! {r#"
4469                <|file_sep|>big.rs
4470                first excerpt
4471                ...
4472                second excerpt
4473                ...
4474                third excerpt
4475                <|file_sep|>test.rs
4476                <|fim_prefix|>
4477                <|fim_middle|>current
4478                <|user_cursor|>x
4479                <|fim_suffix|>
4480                <|fim_middle|>updated
4481            "#}
4482            .to_string()
4483        );
4484
4485        assert_eq!(
4486            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4487            indoc! {r#"
4488                <|file_sep|>big.rs
4489                first excerpt
4490                ...
4491                <|file_sep|>test.rs
4492                <|fim_prefix|>
4493                <|fim_middle|>current
4494                <|user_cursor|>x
4495                <|fim_suffix|>
4496                <|fim_middle|>updated
4497            "#}
4498            .to_string()
4499        );
4500    }
4501
4502    #[test]
4503    fn test_truncation_prioritizes_lower_order_excerpts() {
4504        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4505        // With tight budget, only the lower-order excerpt from file_b should be included.
4506        let input = make_input(
4507            "x",
4508            0..1,
4509            0,
4510            vec![],
4511            vec![
4512                RelatedFile {
4513                    path: Path::new("file_a.rs").into(),
4514                    max_row: 10,
4515                    in_open_source_repo: false,
4516                    excerpts: vec![RelatedExcerpt {
4517                        row_range: 0..10,
4518                        text: "low priority content\n".into(),
4519                        order: 5,
4520                    }],
4521                },
4522                RelatedFile {
4523                    path: Path::new("file_b.rs").into(),
4524                    max_row: 10,
4525                    in_open_source_repo: false,
4526                    excerpts: vec![RelatedExcerpt {
4527                        row_range: 0..10,
4528                        text: "high priority content\n".into(),
4529                        order: 1,
4530                    }],
4531                },
4532            ],
4533        );
4534
4535        // With large budget, both files included; rendered in stable lexicographic order.
4536        assert_eq!(
4537            format_with_budget(&input, 10000).unwrap(),
4538            indoc! {r#"
4539                <|file_sep|>file_a.rs
4540                low priority content
4541                <|file_sep|>file_b.rs
4542                high priority content
4543                <|file_sep|>test.rs
4544                <|fim_prefix|>
4545                <|fim_middle|>current
4546                <|user_cursor|>x
4547                <|fim_suffix|>
4548                <|fim_middle|>updated
4549            "#}
4550            .to_string()
4551        );
4552
4553        // With tight budget, only file_b (lower order) fits.
4554        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4555        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4556        // file_a would need another 14 tokens, which doesn't fit.
4557        assert_eq!(
4558            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4559            indoc! {r#"
4560                <|file_sep|>file_b.rs
4561                high priority content
4562                <|file_sep|>test.rs
4563                <|fim_prefix|>
4564                <|fim_middle|>current
4565                <|user_cursor|>x
4566                <|fim_suffix|>
4567                <|fim_middle|>updated
4568            "#}
4569            .to_string()
4570        );
4571    }
4572
4573    #[test]
4574    fn test_truncation_drops_high_order_excerpts_within_file() {
4575        // A single file has excerpts at order 1 and order 3. With a tight budget,
4576        // only the order-1 excerpts are included while the order-3 excerpt is
4577        // dropped — even though they belong to the same file. This also preserves
4578        // the parent invariant: parent outline items have order ≤ their best
4579        // child, so they're always included when any child is.
4580        let input = make_input(
4581            "x",
4582            0..1,
4583            0,
4584            vec![],
4585            vec![RelatedFile {
4586                path: Path::new("mod.rs").into(),
4587                max_row: 30,
4588                in_open_source_repo: false,
4589                excerpts: vec![
4590                    RelatedExcerpt {
4591                        row_range: 0..5,
4592                        text: "mod header\n".into(),
4593                        order: 1,
4594                    },
4595                    RelatedExcerpt {
4596                        row_range: 5..15,
4597                        text: "important fn\n".into(),
4598                        order: 1,
4599                    },
4600                    RelatedExcerpt {
4601                        row_range: 15..30,
4602                        text: "less important fn\n".into(),
4603                        order: 3,
4604                    },
4605                ],
4606            }],
4607        );
4608
4609        // With large budget, all three excerpts included.
4610        assert_eq!(
4611            format_with_budget(&input, 10000).unwrap(),
4612            indoc! {r#"
4613                <|file_sep|>mod.rs
4614                mod header
4615                ...
4616                important fn
4617                ...
4618                less important fn
4619                <|file_sep|>test.rs
4620                <|fim_prefix|>
4621                <|fim_middle|>current
4622                <|user_cursor|>x
4623                <|fim_suffix|>
4624                <|fim_middle|>updated
4625            "#}
4626            .to_string()
4627        );
4628
4629        // With tight budget, only order<=1 excerpts included (header + important fn).
4630        assert_eq!(
4631            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4632            indoc! {r#"
4633                <|file_sep|>mod.rs
4634                mod header
4635                ...
4636                important fn
4637                ...
4638                <|file_sep|>test.rs
4639                <|fim_prefix|>
4640                <|fim_middle|>current
4641                <|user_cursor|>x
4642                <|fim_suffix|>
4643                <|fim_middle|>updated
4644            "#}
4645            .to_string()
4646        );
4647    }
4648
4649    #[test]
4650    fn test_truncation_drops_older_events_first() {
4651        let input = make_input(
4652            "x",
4653            0..1,
4654            0,
4655            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4656            vec![],
4657        );
4658
4659        assert_eq!(
4660            format_with_budget(&input, 10000).unwrap(),
4661            indoc! {r#"
4662                <|file_sep|>edit history
4663                --- a/old.rs
4664                +++ b/old.rs
4665                -1
4666                --- a/new.rs
4667                +++ b/new.rs
4668                -2
4669                <|file_sep|>test.rs
4670                <|fim_prefix|>
4671                <|fim_middle|>current
4672                <|user_cursor|>x
4673                <|fim_suffix|>
4674                <|fim_middle|>updated
4675            "#}
4676            .to_string()
4677        );
4678
4679        assert_eq!(
4680            format_with_budget(&input, 60).unwrap(),
4681            indoc! {r#"
4682                <|file_sep|>edit history
4683                --- a/new.rs
4684                +++ b/new.rs
4685                -2
4686                <|file_sep|>test.rs
4687                <|fim_prefix|>
4688                <|fim_middle|>current
4689                <|user_cursor|>x
4690                <|fim_suffix|>
4691                <|fim_middle|>updated
4692            "#}
4693            .to_string()
4694        );
4695    }
4696
4697    #[test]
4698    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4699        let input = make_input(
4700            "fn main() {}",
4701            0..12,
4702            3,
4703            vec![make_event("a.rs", "-old\n+new\n")],
4704            vec![make_related_file("related.rs", "helper\n")],
4705        );
4706
4707        assert!(format_with_budget(&input, 30).is_none())
4708    }
4709
4710    #[track_caller]
4711    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4712        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4713            .expect("seed coder prompt formatting should succeed")
4714    }
4715
4716    #[track_caller]
4717    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4718        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4719            .expect("seed coder prompt formatting should succeed")
4720    }
4721
4722    #[test]
4723    fn test_seed_coder_basic_format() {
4724        let input = make_input(
4725            "prefix\neditable\nsuffix",
4726            7..15,
4727            10,
4728            vec![make_event("a.rs", "-old\n+new\n")],
4729            vec![make_related_file("related.rs", "fn helper() {}\n")],
4730        );
4731
4732        assert_eq!(
4733            format_seed_coder(&input),
4734            indoc! {r#"
4735                <[fim-suffix]>
4736                suffix
4737                <[fim-prefix]><filename>related.rs
4738                fn helper() {}
4739
4740                <filename>edit_history
4741                --- a/a.rs
4742                +++ b/a.rs
4743                -old
4744                +new
4745
4746                <filename>test.rs
4747                prefix
4748                <<<<<<< CURRENT
4749                edi<|user_cursor|>table
4750                =======
4751                <[fim-middle]>"#}
4752        );
4753    }
4754
4755    #[test]
4756    fn test_v0317_formats_prompt_with_many_related_files() {
4757        let related_files = (0..900)
4758            .map(|index| {
4759                make_related_file(
4760                    &format!("related_{index}.rs"),
4761                    "fn helper() {\n    let value = 1;\n}\n",
4762                )
4763            })
4764            .collect();
4765
4766        let input = make_input(
4767            "code",
4768            0..4,
4769            2,
4770            vec![make_event("a.rs", "-x\n+y\n")],
4771            related_files,
4772        );
4773
4774        let prompt =
4775            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
4776
4777        assert!(prompt.is_some());
4778        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
4779        assert!(prompt.contains("test.rs"));
4780        assert!(prompt.contains(CURSOR_MARKER));
4781    }
4782
4783    #[test]
4784    fn test_seed_coder_no_context() {
4785        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4786
4787        assert_eq!(
4788            format_seed_coder(&input),
4789            indoc! {r#"
4790                <[fim-suffix]>
4791                after
4792                <[fim-prefix]><filename>test.rs
4793                before
4794                <<<<<<< CURRENT
4795                mid<|user_cursor|>dle
4796                =======
4797                <[fim-middle]>"#}
4798        );
4799    }
4800
4801    #[test]
4802    fn test_seed_coder_truncation_drops_context() {
4803        let input = make_input(
4804            "code",
4805            0..4,
4806            2,
4807            vec![make_event("a.rs", "-x\n+y\n")],
4808            vec![make_related_file("r1.rs", "content\n")],
4809        );
4810
4811        // With large budget, everything is included
4812        assert_eq!(
4813            format_seed_coder(&input),
4814            indoc! {r#"
4815                <[fim-suffix]>
4816                <[fim-prefix]><filename>r1.rs
4817                content
4818
4819                <filename>edit_history
4820                --- a/a.rs
4821                +++ b/a.rs
4822                -x
4823                +y
4824
4825                <filename>test.rs
4826                <<<<<<< CURRENT
4827                co<|user_cursor|>de
4828                =======
4829                <[fim-middle]>"#}
4830        );
4831
4832        assert_eq!(
4833            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4834            None
4835        );
4836
4837        assert_eq!(
4838            format_seed_coder_with_budget(&input, 40),
4839            indoc! {r#"
4840                <[fim-suffix]>
4841                <[fim-prefix]><filename>test.rs
4842                <<<<<<< CURRENT
4843                co<|user_cursor|>de
4844                =======
4845                <[fim-middle]>"#
4846            }
4847        )
4848    }
4849
4850    #[test]
4851    fn test_seed_coder_truncation_prioritizes_lower_order() {
4852        let input = make_input(
4853            "code",
4854            0..4,
4855            2,
4856            vec![],
4857            vec![
4858                RelatedFile {
4859                    path: Path::new("low_prio.rs").into(),
4860                    max_row: 5,
4861                    in_open_source_repo: false,
4862                    excerpts: vec![RelatedExcerpt {
4863                        row_range: 0..5,
4864                        text: "low prio\n".into(),
4865                        order: 10,
4866                    }],
4867                },
4868                RelatedFile {
4869                    path: Path::new("high_prio.rs").into(),
4870                    max_row: 5,
4871                    in_open_source_repo: false,
4872                    excerpts: vec![RelatedExcerpt {
4873                        row_range: 0..5,
4874                        text: "high prio\n".into(),
4875                        order: 1,
4876                    }],
4877                },
4878            ],
4879        );
4880
4881        // With large budget, both included; rendered in stable lexicographic order.
4882        assert_eq!(
4883            format_seed_coder(&input),
4884            indoc! {r#"
4885                <[fim-suffix]>
4886                <[fim-prefix]><filename>low_prio.rs
4887                low prio
4888                <filename>high_prio.rs
4889                high prio
4890
4891                <filename>test.rs
4892                <<<<<<< CURRENT
4893                co<|user_cursor|>de
4894                =======
4895                <[fim-middle]>"#}
4896        );
4897
4898        // With tight budget under the generic heuristic, context is dropped but the
4899        // minimal cursor section still fits.
4900        assert_eq!(
4901            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4902            Some(
4903                indoc! {r#"
4904                    <[fim-suffix]>
4905                    <[fim-prefix]><filename>test.rs
4906                    <<<<<<< CURRENT
4907                    co<|user_cursor|>de
4908                    =======
4909                    <[fim-middle]>"#}
4910                .to_string()
4911            )
4912        );
4913    }
4914
4915    #[test]
4916    fn test_format_zeta1_from_input_basic() {
4917        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4918        let input = ZetaPromptInput {
4919            cursor_path: Path::new("src/main.rs").into(),
4920            cursor_excerpt: excerpt.into(),
4921            cursor_offset_in_excerpt: 30,
4922            excerpt_start_row: Some(0),
4923            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4924            related_files: Some(vec![]),
4925            active_buffer_diagnostics: vec![],
4926            excerpt_ranges: ExcerptRanges {
4927                editable_150: 15..41,
4928                editable_180: 15..41,
4929                editable_350: 15..41,
4930                editable_150_context_350: 0..excerpt.len(),
4931                editable_180_context_350: 0..excerpt.len(),
4932                editable_350_context_150: 0..excerpt.len(),
4933                ..Default::default()
4934            },
4935            syntax_ranges: None,
4936            experiment: None,
4937            in_open_source_repo: false,
4938            can_collect_data: false,
4939            repo_url: None,
4940        };
4941
4942        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4943
4944        assert_eq!(
4945            prompt,
4946            concat!(
4947                "### Instruction:\n",
4948                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4949                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4950                "into account the cursor location.\n",
4951                "\n",
4952                "### User Edits:\n",
4953                "\n",
4954                "User edited other.rs:\n",
4955                "```diff\n",
4956                "-old\n",
4957                "+new\n",
4958                "\n",
4959                "```\n",
4960                "\n",
4961                "### User Excerpt:\n",
4962                "\n",
4963                "```src/main.rs\n",
4964                "<|start_of_file|>\n",
4965                "fn before() {}\n",
4966                "<|editable_region_start|>\n",
4967                "fn foo() {\n",
4968                "    <|user_cursor_is_here|>let x = 1;\n",
4969                "\n",
4970                "<|editable_region_end|>}\n",
4971                "fn after() {}\n",
4972                "\n",
4973                "```\n",
4974                "\n",
4975                "### Response:\n",
4976            ),
4977        );
4978    }
4979
4980    #[test]
4981    fn test_format_zeta1_from_input_no_start_of_file() {
4982        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4983        let input = ZetaPromptInput {
4984            cursor_path: Path::new("src/main.rs").into(),
4985            cursor_excerpt: excerpt.into(),
4986            cursor_offset_in_excerpt: 15,
4987            excerpt_start_row: Some(10),
4988            events: vec![],
4989            related_files: Some(vec![]),
4990            active_buffer_diagnostics: vec![],
4991            excerpt_ranges: ExcerptRanges {
4992                editable_150: 0..28,
4993                editable_180: 0..28,
4994                editable_350: 0..28,
4995                editable_150_context_350: 0..28,
4996                editable_180_context_350: 0..28,
4997                editable_350_context_150: 0..28,
4998                ..Default::default()
4999            },
5000            syntax_ranges: None,
5001            experiment: None,
5002            in_open_source_repo: false,
5003            can_collect_data: false,
5004            repo_url: None,
5005        };
5006
5007        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5008
5009        assert_eq!(
5010            prompt,
5011            concat!(
5012                "### Instruction:\n",
5013                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5014                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5015                "into account the cursor location.\n",
5016                "\n",
5017                "### User Edits:\n",
5018                "\n",
5019                "\n",
5020                "\n",
5021                "### User Excerpt:\n",
5022                "\n",
5023                "```src/main.rs\n",
5024                "<|editable_region_start|>\n",
5025                "fn foo() {\n",
5026                "    <|user_cursor_is_here|>let x = 1;\n",
5027                "}\n",
5028                "\n",
5029                "<|editable_region_end|>\n",
5030                "```\n",
5031                "\n",
5032                "### Response:\n",
5033            ),
5034        );
5035    }
5036
5037    #[test]
5038    fn test_format_zeta1_from_input_with_sub_ranges() {
5039        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5040        let editable_range = 10..37;
5041        let context_range = 0..excerpt.len();
5042
5043        let input = ZetaPromptInput {
5044            cursor_path: Path::new("test.rs").into(),
5045            cursor_excerpt: excerpt.into(),
5046            cursor_offset_in_excerpt: 25,
5047            excerpt_start_row: Some(0),
5048            events: vec![],
5049            related_files: Some(vec![]),
5050            active_buffer_diagnostics: vec![],
5051            excerpt_ranges: ExcerptRanges {
5052                editable_150: editable_range.clone(),
5053                editable_180: editable_range.clone(),
5054                editable_350: editable_range.clone(),
5055                editable_150_context_350: context_range.clone(),
5056                editable_180_context_350: context_range.clone(),
5057                editable_350_context_150: context_range.clone(),
5058                ..Default::default()
5059            },
5060            syntax_ranges: None,
5061            experiment: None,
5062            in_open_source_repo: false,
5063            can_collect_data: false,
5064            repo_url: None,
5065        };
5066
5067        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5068
5069        assert_eq!(
5070            prompt,
5071            concat!(
5072                "### Instruction:\n",
5073                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5074                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5075                "into account the cursor location.\n",
5076                "\n",
5077                "### User Edits:\n",
5078                "\n",
5079                "\n",
5080                "\n",
5081                "### User Excerpt:\n",
5082                "\n",
5083                "```test.rs\n",
5084                "<|start_of_file|>\n",
5085                "// prefix\n",
5086                "<|editable_region_start|>\n",
5087                "fn foo() {\n",
5088                "    <|user_cursor_is_here|>let x = 1;\n",
5089                "}\n",
5090                "<|editable_region_end|>\n",
5091                "// suffix\n",
5092                "\n",
5093                "```\n",
5094                "\n",
5095                "### Response:\n",
5096            ),
5097        );
5098    }
5099
5100    #[test]
5101    fn test_max_event_count() {
5102        fn make_numbered_event(index: usize) -> Event {
5103            return make_event(
5104                &format!("event-{index}.rs"),
5105                &format!("-old-{index}\n+new-{index}\n"),
5106            );
5107        }
5108        let input = make_input(
5109            "x",
5110            0..1,
5111            0,
5112            (0..3).map(make_numbered_event).collect(),
5113            vec![],
5114        );
5115
5116        let edit_history_section = format_edit_history_within_budget(
5117            &input.events,
5118            "<|file_sep|>",
5119            "edit history",
5120            usize::MAX,
5121            5,
5122        );
5123
5124        assert_eq!(
5125            &edit_history_section,
5126            indoc!(
5127                "
5128                <|file_sep|>edit history
5129                --- a/event-0.rs
5130                +++ b/event-0.rs
5131                -old-0
5132                +new-0
5133                --- a/event-1.rs
5134                +++ b/event-1.rs
5135                -old-1
5136                +new-1
5137                --- a/event-2.rs
5138                +++ b/event-2.rs
5139                -old-2
5140                +new-2
5141            "
5142            )
5143        );
5144
5145        let edit_history_section = format_edit_history_within_budget(
5146            &input.events,
5147            "<|file_sep|>",
5148            "edit history",
5149            usize::MAX,
5150            2,
5151        );
5152
5153        assert_eq!(
5154            &edit_history_section,
5155            indoc!(
5156                "
5157                <|file_sep|>edit history
5158                --- a/event-1.rs
5159                +++ b/event-1.rs
5160                -old-1
5161                +new-1
5162                --- a/event-2.rs
5163                +++ b/event-2.rs
5164                -old-2
5165                +new-2
5166            "
5167            )
5168        );
5169
5170        let edit_history_section = format_edit_history_within_budget(
5171            &input.events,
5172            "<|file_sep|>",
5173            "edit history",
5174            usize::MAX,
5175            0,
5176        );
5177
5178        assert_eq!(&edit_history_section, "");
5179    }
5180
5181    #[test]
5182    fn test_clean_zeta1_model_output_basic() {
5183        let output = indoc! {"
5184            <|editable_region_start|>
5185            fn main() {
5186                println!(\"hello\");
5187            }
5188            <|editable_region_end|>
5189        "};
5190
5191        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5192        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5193    }
5194
5195    #[test]
5196    fn test_clean_zeta1_model_output_with_cursor() {
5197        let output = indoc! {"
5198            <|editable_region_start|>
5199            fn main() {
5200                <|user_cursor_is_here|>println!(\"hello\");
5201            }
5202            <|editable_region_end|>
5203        "};
5204
5205        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5206        assert_eq!(
5207            cleaned,
5208            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5209        );
5210    }
5211
5212    #[test]
5213    fn test_clean_zeta1_model_output_no_markers() {
5214        let output = "fn main() {}\n";
5215        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5216        assert_eq!(cleaned, "fn main() {}\n");
5217    }
5218
5219    #[test]
5220    fn test_clean_zeta1_model_output_empty_region() {
5221        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5222        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5223        assert_eq!(cleaned, "");
5224    }
5225
5226    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5227        let mut result = excerpt.to_string();
5228        result.replace_range(
5229            parsed_output.range_in_excerpt.clone(),
5230            &parsed_output.new_editable_region,
5231        );
5232        result
5233    }
5234
5235    #[test]
5236    fn test_parse_zeta2_model_output() {
5237        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5238        let context_start = excerpt.find("ctx start").unwrap();
5239        let context_end = excerpt.find("after ctx").unwrap();
5240        let editable_start = excerpt.find("editable old").unwrap();
5241        let editable_end = editable_start + "editable old\n".len();
5242        let input = make_input_with_context_range(
5243            excerpt,
5244            editable_start..editable_end,
5245            context_start..context_end,
5246            editable_start,
5247        );
5248
5249        let output = parse_zeta2_model_output(
5250            "editable new\n>>>>>>> UPDATED\n",
5251            ZetaFormat::V0131GitMergeMarkersPrefix,
5252            &input,
5253        )
5254        .unwrap();
5255
5256        assert_eq!(
5257            apply_edit(excerpt, &output),
5258            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5259        );
5260    }
5261
5262    #[test]
5263    fn test_parse_zeta2_model_output_identity() {
5264        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5265        let editable_start = excerpt.find("bbb").unwrap();
5266        let editable_end = excerpt.find("ddd").unwrap();
5267        let input = make_input_with_context_range(
5268            excerpt,
5269            editable_start..editable_end,
5270            0..excerpt.len(),
5271            editable_start,
5272        );
5273
5274        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5275        let output =
5276            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5277
5278        assert_eq!(apply_edit(excerpt, &output), excerpt);
5279    }
5280
5281    #[test]
5282    fn test_parse_zeta2_model_output_strips_end_marker() {
5283        let excerpt = "hello\nworld\n";
5284        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5285
5286        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5287        let output1 =
5288            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5289        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5290
5291        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5292        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5293    }
5294
5295    #[test]
5296    fn test_special_tokens_not_triggered_by_comment_separator() {
5297        // Regression test for https://github.com/zed-industries/zed/issues/52489
5298        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5299        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5300        assert!(
5301            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5302            "comment containing ======= should not trigger special token detection"
5303        );
5304    }
5305}