zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18pub const MAX_PROMPT_TOKENS: usize = 4096;
  19
  20/// Use up to this amount of the editable region for prefill.
  21/// Larger values may result in more robust generation, but
  22/// this region becomes non-editable.
  23pub const PREFILL_RATIO: f64 = 0.1; // 10%
  24
  25fn estimate_tokens(bytes: usize) -> usize {
  26    bytes / 3
  27}
  28
  29/// Leave some slack to avoid overflow.
  30fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  31    (max_tokens as f64 * 0.9).floor() as usize
  32}
  33
  34#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  35pub struct ZetaPromptInput {
  36    pub cursor_path: Arc<Path>,
  37    pub cursor_excerpt: Arc<str>,
  38    pub cursor_offset_in_excerpt: usize,
  39    #[serde(default, skip_serializing_if = "Option::is_none")]
  40    pub excerpt_start_row: Option<u32>,
  41    pub events: Vec<Arc<Event>>,
  42    #[serde(default)]
  43    pub related_files: Option<Vec<RelatedFile>>,
  44    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  45    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  46    /// These ranges let the server select model-appropriate subsets.
  47    pub excerpt_ranges: ExcerptRanges,
  48    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  49    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  50    /// When present, the server uses these to compute editable/context ranges
  51    /// instead of `excerpt_ranges`.
  52    #[serde(default, skip_serializing_if = "Option::is_none")]
  53    pub syntax_ranges: Option<Vec<Range<usize>>>,
  54    #[serde(default)]
  55    pub in_open_source_repo: bool,
  56    #[serde(default)]
  57    pub can_collect_data: bool,
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub repo_url: Option<String>,
  60}
  61
  62#[derive(
  63    Default,
  64    Clone,
  65    Copy,
  66    Debug,
  67    PartialEq,
  68    Eq,
  69    Hash,
  70    EnumIter,
  71    IntoStaticStr,
  72    Serialize,
  73    Deserialize,
  74)]
  75#[allow(non_camel_case_types)]
  76pub enum ZetaFormat {
  77    V0112MiddleAtEnd,
  78    V0113Ordered,
  79    V0114180EditableRegion,
  80    V0120GitMergeMarkers,
  81    #[default]
  82    V0131GitMergeMarkersPrefix,
  83    V0211Prefill,
  84    V0211SeedCoder,
  85    v0226Hashline,
  86    V0304VariableEdit,
  87    V0304SeedNoEdits,
  88    /// Multi-block marker spans with NO_EDITS sentinel.
  89    V0306SeedMultiRegions,
  90    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  91    V0316SeedMultiRegions,
  92    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  93    V0317SeedMultiRegions,
  94    /// V0316 with larger block sizes.
  95    V0318SeedMultiRegions,
  96    /// V0318-style markers over the full available current file excerpt with no related files.
  97    V0327SingleFile,
  98}
  99
 100impl std::fmt::Display for ZetaFormat {
 101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 102        write!(f, "{}", <&'static str>::from(self))
 103    }
 104}
 105
 106impl ZetaFormat {
 107    pub fn parse(format_name: &str) -> Result<Self> {
 108        let lower = format_name.to_lowercase();
 109
 110        // Exact case-insensitive match takes priority, bypassing ambiguity checks.
 111        for variant in ZetaFormat::iter() {
 112            if <&'static str>::from(&variant).to_lowercase() == lower {
 113                return Ok(variant);
 114            }
 115        }
 116
 117        let mut results = ZetaFormat::iter().filter(|version| {
 118            <&'static str>::from(version)
 119                .to_lowercase()
 120                .contains(&lower)
 121        });
 122        let Some(result) = results.next() else {
 123            anyhow::bail!(
 124                "`{format_name}` did not match any of:\n{}",
 125                Self::options_as_string()
 126            );
 127        };
 128        if results.next().is_some() {
 129            anyhow::bail!(
 130                "`{format_name}` matched more than one of:\n{}",
 131                Self::options_as_string()
 132            );
 133        }
 134        Ok(result)
 135    }
 136
 137    pub fn options_as_string() -> String {
 138        ZetaFormat::iter()
 139            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 140            .collect::<Vec<_>>()
 141            .concat()
 142    }
 143}
 144
 145#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 146#[serde(tag = "event")]
 147pub enum Event {
 148    BufferChange {
 149        path: Arc<Path>,
 150        old_path: Arc<Path>,
 151        diff: String,
 152        predicted: bool,
 153        in_open_source_repo: bool,
 154    },
 155}
 156
 157impl Event {
 158    pub fn in_open_source_repo(&self) -> bool {
 159        match self {
 160            Event::BufferChange {
 161                in_open_source_repo,
 162                ..
 163            } => *in_open_source_repo,
 164        }
 165    }
 166}
 167
 168pub fn write_event(prompt: &mut String, event: &Event) {
 169    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 170        for component in path.components() {
 171            prompt.push('/');
 172            write!(prompt, "{}", component.as_os_str().display()).ok();
 173        }
 174    }
 175    match event {
 176        Event::BufferChange {
 177            path,
 178            old_path,
 179            diff,
 180            predicted,
 181            in_open_source_repo: _,
 182        } => {
 183            if *predicted {
 184                prompt.push_str("// User accepted prediction:\n");
 185            }
 186            prompt.push_str("--- a");
 187            write_path_as_unix_str(prompt, old_path.as_ref());
 188            prompt.push_str("\n+++ b");
 189            write_path_as_unix_str(prompt, path.as_ref());
 190            prompt.push('\n');
 191            prompt.push_str(diff);
 192        }
 193    }
 194}
 195
 196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 197pub struct ActiveBufferDiagnostic {
 198    pub severity: Option<i32>,
 199    pub message: String,
 200    pub snippet: String,
 201    pub snippet_buffer_row_range: Range<u32>,
 202    pub diagnostic_range_in_snippet: Range<usize>,
 203}
 204
 205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 206pub struct RelatedFile {
 207    pub path: Arc<Path>,
 208    pub max_row: u32,
 209    pub excerpts: Vec<RelatedExcerpt>,
 210    #[serde(default)]
 211    pub in_open_source_repo: bool,
 212}
 213
 214#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 215pub struct RelatedExcerpt {
 216    pub row_range: Range<u32>,
 217    pub text: Arc<str>,
 218    #[serde(default)]
 219    pub order: usize,
 220}
 221
 222pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 223    special_tokens_for_format(format).iter().any(|token| {
 224        if let Some(line_token) = token.strip_suffix('\n') {
 225            input.cursor_excerpt.lines().any(|line| line == line_token)
 226        } else {
 227            input.cursor_excerpt.contains(token)
 228        }
 229    })
 230}
 231
 232pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 233    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 234}
 235
 236pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 237    match format {
 238        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 239        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 240        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 241        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 242        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 243        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 244        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 245        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 246        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 247        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 248        ZetaFormat::V0316SeedMultiRegions => {
 249            static TOKENS: &[&str] = &[
 250                seed_coder::FIM_SUFFIX,
 251                seed_coder::FIM_PREFIX,
 252                seed_coder::FIM_MIDDLE,
 253                seed_coder::FILE_MARKER,
 254                multi_region::V0316_END_MARKER,
 255                CURSOR_MARKER,
 256                multi_region::MARKER_TAG_PREFIX,
 257            ];
 258            TOKENS
 259        }
 260        ZetaFormat::V0318SeedMultiRegions => {
 261            static TOKENS: &[&str] = &[
 262                seed_coder::FIM_SUFFIX,
 263                seed_coder::FIM_PREFIX,
 264                seed_coder::FIM_MIDDLE,
 265                seed_coder::FILE_MARKER,
 266                multi_region::V0318_END_MARKER,
 267                CURSOR_MARKER,
 268                multi_region::MARKER_TAG_PREFIX,
 269            ];
 270            TOKENS
 271        }
 272        ZetaFormat::V0317SeedMultiRegions => {
 273            static TOKENS: &[&str] = &[
 274                seed_coder::FIM_SUFFIX,
 275                seed_coder::FIM_PREFIX,
 276                seed_coder::FIM_MIDDLE,
 277                seed_coder::FILE_MARKER,
 278                multi_region::V0317_END_MARKER,
 279                CURSOR_MARKER,
 280                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 281            ];
 282            TOKENS
 283        }
 284        ZetaFormat::V0327SingleFile => {
 285            static TOKENS: &[&str] = &[
 286                seed_coder::FIM_SUFFIX,
 287                seed_coder::FIM_PREFIX,
 288                seed_coder::FIM_MIDDLE,
 289                seed_coder::FILE_MARKER,
 290                multi_region::V0327_END_MARKER,
 291                CURSOR_MARKER,
 292                multi_region::MARKER_TAG_PREFIX,
 293            ];
 294            TOKENS
 295        }
 296        ZetaFormat::V0306SeedMultiRegions => {
 297            static TOKENS: &[&str] = &[
 298                seed_coder::FIM_SUFFIX,
 299                seed_coder::FIM_PREFIX,
 300                seed_coder::FIM_MIDDLE,
 301                seed_coder::FILE_MARKER,
 302                seed_coder::START_MARKER,
 303                seed_coder::SEPARATOR,
 304                seed_coder::END_MARKER,
 305                CURSOR_MARKER,
 306                multi_region::MARKER_TAG_PREFIX,
 307            ];
 308            TOKENS
 309        }
 310    }
 311}
 312
 313/// Returns the (editable_token_limit, context_token_limit) for a given format.
 314pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 315    match format {
 316        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 317        ZetaFormat::V0114180EditableRegion => (180, 350),
 318        ZetaFormat::V0120GitMergeMarkers
 319        | ZetaFormat::V0131GitMergeMarkersPrefix
 320        | ZetaFormat::V0211Prefill
 321        | ZetaFormat::V0211SeedCoder
 322        | ZetaFormat::v0226Hashline
 323        | ZetaFormat::V0306SeedMultiRegions
 324        | ZetaFormat::V0316SeedMultiRegions
 325        | ZetaFormat::V0318SeedMultiRegions
 326        | ZetaFormat::V0317SeedMultiRegions
 327        | ZetaFormat::V0327SingleFile
 328        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 329
 330        ZetaFormat::V0304VariableEdit => (1024, 0),
 331    }
 332}
 333
 334pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 335    match format {
 336        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 337        ZetaFormat::V0112MiddleAtEnd
 338        | ZetaFormat::V0113Ordered
 339        | ZetaFormat::V0114180EditableRegion
 340        | ZetaFormat::V0120GitMergeMarkers
 341        | ZetaFormat::V0131GitMergeMarkersPrefix
 342        | ZetaFormat::V0211Prefill
 343        | ZetaFormat::V0211SeedCoder
 344        | ZetaFormat::V0304VariableEdit
 345        | ZetaFormat::V0306SeedMultiRegions
 346        | ZetaFormat::V0304SeedNoEdits => &[],
 347        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 348        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 349        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 350        ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
 351    }
 352}
 353
 354/// Return (editable_range, context_range) for the prompt format
 355pub fn excerpt_ranges_for_format(
 356    format: ZetaFormat,
 357    ranges: &ExcerptRanges,
 358) -> (Range<usize>, Range<usize>) {
 359    match format {
 360        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 361            ranges.editable_150.clone(),
 362            ranges.editable_150_context_350.clone(),
 363        ),
 364        ZetaFormat::V0114180EditableRegion => (
 365            ranges.editable_180.clone(),
 366            ranges.editable_180_context_350.clone(),
 367        ),
 368        ZetaFormat::V0120GitMergeMarkers
 369        | ZetaFormat::V0131GitMergeMarkersPrefix
 370        | ZetaFormat::V0211Prefill
 371        | ZetaFormat::V0211SeedCoder
 372        | ZetaFormat::v0226Hashline
 373        | ZetaFormat::V0304SeedNoEdits
 374        | ZetaFormat::V0306SeedMultiRegions
 375        | ZetaFormat::V0316SeedMultiRegions
 376        | ZetaFormat::V0318SeedMultiRegions
 377        | ZetaFormat::V0317SeedMultiRegions => (
 378            ranges.editable_350.clone(),
 379            ranges.editable_350_context_150.clone(),
 380        ),
 381        ZetaFormat::V0327SingleFile => (
 382            ranges.editable_350_context_150.clone(),
 383            ranges.context_8192.clone().unwrap_or(
 384                // shouldn't be used, only for compat with old data/clients
 385                ranges.editable_350_context_150.clone(),
 386            ),
 387        ),
 388
 389        ZetaFormat::V0304VariableEdit => {
 390            let context = ranges
 391                .editable_350_context_1024
 392                .clone()
 393                .or(ranges.editable_350_context_512.clone())
 394                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 395            (context.clone(), context)
 396        }
 397    }
 398}
 399
 400pub fn write_cursor_excerpt_section_for_format(
 401    format: ZetaFormat,
 402    prompt: &mut String,
 403    path: &Path,
 404    context: &str,
 405    editable_range: &Range<usize>,
 406    cursor_offset: usize,
 407) {
 408    match format {
 409        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 410            prompt,
 411            path,
 412            context,
 413            editable_range,
 414            cursor_offset,
 415        ),
 416        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 417            v0113_ordered::write_cursor_excerpt_section(
 418                prompt,
 419                path,
 420                context,
 421                editable_range,
 422                cursor_offset,
 423            )
 424        }
 425        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 426            prompt,
 427            path,
 428            context,
 429            editable_range,
 430            cursor_offset,
 431        ),
 432        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 433            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 434                prompt,
 435                path,
 436                context,
 437                editable_range,
 438                cursor_offset,
 439            )
 440        }
 441        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 442            seed_coder::write_cursor_excerpt_section(
 443                prompt,
 444                path,
 445                context,
 446                editable_range,
 447                cursor_offset,
 448            )
 449        }
 450        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 451            prompt,
 452            path,
 453            context,
 454            editable_range,
 455            cursor_offset,
 456        ),
 457        ZetaFormat::V0304VariableEdit => {
 458            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 459        }
 460        ZetaFormat::V0306SeedMultiRegions => {
 461            prompt.push_str(&build_v0306_cursor_prefix(
 462                path,
 463                context,
 464                editable_range,
 465                cursor_offset,
 466            ));
 467        }
 468        ZetaFormat::V0316SeedMultiRegions => {
 469            prompt.push_str(&build_v0316_cursor_prefix(
 470                path,
 471                context,
 472                editable_range,
 473                cursor_offset,
 474            ));
 475        }
 476        ZetaFormat::V0318SeedMultiRegions => {
 477            prompt.push_str(&build_v0318_cursor_prefix(
 478                path,
 479                context,
 480                editable_range,
 481                cursor_offset,
 482            ));
 483        }
 484        ZetaFormat::V0317SeedMultiRegions => {
 485            prompt.push_str(&build_v0317_cursor_prefix(
 486                path,
 487                context,
 488                editable_range,
 489                cursor_offset,
 490            ));
 491        }
 492        ZetaFormat::V0327SingleFile => {
 493            prompt.push_str(&build_v0318_cursor_prefix(
 494                path,
 495                context,
 496                editable_range,
 497                cursor_offset,
 498            ));
 499        }
 500    }
 501}
 502
 503fn build_v0306_cursor_prefix(
 504    path: &Path,
 505    context: &str,
 506    editable_range: &Range<usize>,
 507    cursor_offset: usize,
 508) -> String {
 509    let mut section = String::new();
 510    let path_str = path.to_string_lossy();
 511    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 512
 513    section.push_str(&context[..editable_range.start]);
 514    section.push_str(seed_coder::START_MARKER);
 515
 516    let editable_text = &context[editable_range.clone()];
 517    let cursor_in_editable = cursor_offset - editable_range.start;
 518    multi_region::write_editable_with_markers(
 519        &mut section,
 520        editable_text,
 521        cursor_in_editable,
 522        CURSOR_MARKER,
 523    );
 524
 525    if !section.ends_with('\n') {
 526        section.push('\n');
 527    }
 528    section.push_str(seed_coder::SEPARATOR);
 529    section
 530}
 531
 532fn build_v0316_cursor_prefix(
 533    path: &Path,
 534    context: &str,
 535    editable_range: &Range<usize>,
 536    cursor_offset: usize,
 537) -> String {
 538    let mut section = String::new();
 539    let path_str = path.to_string_lossy();
 540    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 541
 542    section.push_str(&context[..editable_range.start]);
 543
 544    let editable_text = &context[editable_range.clone()];
 545    let cursor_in_editable = cursor_offset - editable_range.start;
 546    multi_region::write_editable_with_markers_v0316(
 547        &mut section,
 548        editable_text,
 549        cursor_in_editable,
 550        CURSOR_MARKER,
 551    );
 552
 553    if !section.ends_with('\n') {
 554        section.push('\n');
 555    }
 556    section
 557}
 558
 559fn build_v0318_cursor_prefix(
 560    path: &Path,
 561    context: &str,
 562    editable_range: &Range<usize>,
 563    cursor_offset: usize,
 564) -> String {
 565    let mut section = String::new();
 566    let path_str = path.to_string_lossy();
 567    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 568
 569    section.push_str(&context[..editable_range.start]);
 570
 571    let editable_text = &context[editable_range.clone()];
 572    let cursor_in_editable = cursor_offset - editable_range.start;
 573    multi_region::write_editable_with_markers_v0318(
 574        &mut section,
 575        editable_text,
 576        cursor_in_editable,
 577        CURSOR_MARKER,
 578    );
 579
 580    if !section.ends_with('\n') {
 581        section.push('\n');
 582    }
 583    section
 584}
 585
 586fn build_v0317_cursor_prefix(
 587    path: &Path,
 588    context: &str,
 589    editable_range: &Range<usize>,
 590    cursor_offset: usize,
 591) -> String {
 592    let mut section = String::new();
 593    let path_str = path.to_string_lossy();
 594    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 595
 596    section.push_str(&context[..editable_range.start]);
 597
 598    let editable_text = &context[editable_range.clone()];
 599    let cursor_in_editable = cursor_offset - editable_range.start;
 600    multi_region::write_editable_with_markers_v0317(
 601        &mut section,
 602        editable_text,
 603        cursor_in_editable,
 604        CURSOR_MARKER,
 605    );
 606
 607    if !section.ends_with('\n') {
 608        section.push('\n');
 609    }
 610    section
 611}
 612
 613fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 614    let start_row = text[0..range.start].matches('\n').count() as u32;
 615    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 616    if !text[..range.end].ends_with('\n') {
 617        end_row += 1;
 618    }
 619    return start_row..end_row;
 620}
 621
 622fn assemble_single_file_fim_prompt(
 623    context: &str,
 624    editable_range: &Range<usize>,
 625    cursor_prefix_section: &str,
 626    events: &[Arc<Event>],
 627    max_tokens: usize,
 628) -> String {
 629    let suffix_section = seed_coder::build_suffix_section(context, editable_range);
 630
 631    let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
 632    let cursor_prefix_tokens =
 633        estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
 634    let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 635
 636    let edit_history_section = format_edit_history_within_budget(
 637        events,
 638        seed_coder::FILE_MARKER,
 639        "edit_history",
 640        budget_after_cursor,
 641        max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
 642    );
 643
 644    let mut prompt = String::new();
 645    prompt.push_str(&suffix_section);
 646    prompt.push_str(seed_coder::FIM_PREFIX);
 647    prompt.push_str(&edit_history_section);
 648    if !edit_history_section.is_empty() {
 649        prompt.push('\n');
 650    }
 651    prompt.push_str(cursor_prefix_section);
 652    prompt.push_str(seed_coder::FIM_MIDDLE);
 653    prompt
 654}
 655
 656pub fn format_prompt_with_budget_for_format(
 657    input: &ZetaPromptInput,
 658    format: ZetaFormat,
 659    max_tokens: usize,
 660) -> Option<String> {
 661    let (context, editable_range, context_range, cursor_offset) =
 662        resolve_cursor_region(input, format);
 663    let path = &*input.cursor_path;
 664
 665    let empty_files = Vec::new();
 666    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 667    let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 668        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 669        let row_range = relative_row_range.start + cursor_excerpt_start_row
 670            ..relative_row_range.end + cursor_excerpt_start_row;
 671        filter_redundant_excerpts(
 672            input_related_files.to_vec(),
 673            input.cursor_path.as_ref(),
 674            row_range,
 675        )
 676    } else {
 677        input_related_files.to_vec()
 678    };
 679    let related_files = filtered_related_files.as_slice();
 680
 681    let prompt = match format {
 682        ZetaFormat::V0211SeedCoder
 683        | ZetaFormat::V0304SeedNoEdits
 684        | ZetaFormat::V0306SeedMultiRegions
 685        | ZetaFormat::V0316SeedMultiRegions
 686        | ZetaFormat::V0318SeedMultiRegions
 687        | ZetaFormat::V0317SeedMultiRegions => {
 688            let mut cursor_section = String::new();
 689            write_cursor_excerpt_section_for_format(
 690                format,
 691                &mut cursor_section,
 692                path,
 693                context,
 694                &editable_range,
 695                cursor_offset,
 696            );
 697
 698            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 699            seed_coder::assemble_fim_prompt(
 700                context,
 701                &editable_range,
 702                &cursor_section,
 703                &input.events,
 704                related_files,
 705                budget_with_margin,
 706            )
 707        }
 708        ZetaFormat::V0327SingleFile => {
 709            let mut cursor_section = String::new();
 710            write_cursor_excerpt_section_for_format(
 711                format,
 712                &mut cursor_section,
 713                path,
 714                context,
 715                &editable_range,
 716                cursor_offset,
 717            );
 718
 719            assemble_single_file_fim_prompt(
 720                context,
 721                &editable_range,
 722                &cursor_section,
 723                &input.events,
 724                apply_prompt_budget_margin(max_tokens),
 725            )
 726        }
 727        _ => {
 728            let mut cursor_section = String::new();
 729            write_cursor_excerpt_section_for_format(
 730                format,
 731                &mut cursor_section,
 732                path,
 733                context,
 734                &editable_range,
 735                cursor_offset,
 736            );
 737
 738            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 739            let cursor_tokens = estimate_tokens(cursor_section.len());
 740            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 741
 742            let edit_history_section = format_edit_history_within_budget(
 743                &input.events,
 744                "<|file_sep|>",
 745                "edit history",
 746                remaining_budget,
 747                max_edit_event_count_for_format(&format),
 748            );
 749            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 750            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 751
 752            let related_files_section = format_related_files_within_budget(
 753                &related_files,
 754                "<|file_sep|>",
 755                "",
 756                remaining_budget,
 757            );
 758
 759            let mut prompt = String::new();
 760            prompt.push_str(&related_files_section);
 761            prompt.push_str(&edit_history_section);
 762            prompt.push_str(&cursor_section);
 763            prompt
 764        }
 765    };
 766    let prompt_tokens = estimate_tokens(prompt.len());
 767    if prompt_tokens > max_tokens {
 768        return None;
 769    }
 770    return Some(prompt);
 771}
 772
 773pub fn filter_redundant_excerpts(
 774    mut related_files: Vec<RelatedFile>,
 775    cursor_path: &Path,
 776    cursor_row_range: Range<u32>,
 777) -> Vec<RelatedFile> {
 778    for file in &mut related_files {
 779        if file.path.as_ref() == cursor_path {
 780            file.excerpts.retain(|excerpt| {
 781                excerpt.row_range.start < cursor_row_range.start
 782                    || excerpt.row_range.end > cursor_row_range.end
 783            });
 784        }
 785    }
 786    related_files.retain(|file| !file.excerpts.is_empty());
 787    related_files
 788}
 789
 790pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 791    match format {
 792        ZetaFormat::V0112MiddleAtEnd
 793        | ZetaFormat::V0113Ordered
 794        | ZetaFormat::V0114180EditableRegion
 795        | ZetaFormat::V0120GitMergeMarkers
 796        | ZetaFormat::V0131GitMergeMarkersPrefix
 797        | ZetaFormat::V0211Prefill
 798        | ZetaFormat::V0211SeedCoder
 799        | ZetaFormat::v0226Hashline
 800        | ZetaFormat::V0304SeedNoEdits
 801        | ZetaFormat::V0304VariableEdit
 802        | ZetaFormat::V0306SeedMultiRegions
 803        | ZetaFormat::V0316SeedMultiRegions
 804        | ZetaFormat::V0318SeedMultiRegions
 805        | ZetaFormat::V0317SeedMultiRegions
 806        | ZetaFormat::V0327SingleFile => 6,
 807    }
 808}
 809
 810pub fn get_prefill_for_format(
 811    format: ZetaFormat,
 812    context: &str,
 813    editable_range: &Range<usize>,
 814) -> String {
 815    match format {
 816        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 817        ZetaFormat::V0112MiddleAtEnd
 818        | ZetaFormat::V0113Ordered
 819        | ZetaFormat::V0114180EditableRegion
 820        | ZetaFormat::V0120GitMergeMarkers
 821        | ZetaFormat::V0131GitMergeMarkersPrefix
 822        | ZetaFormat::V0211SeedCoder
 823        | ZetaFormat::v0226Hashline
 824        | ZetaFormat::V0304VariableEdit => String::new(),
 825        ZetaFormat::V0304SeedNoEdits
 826        | ZetaFormat::V0306SeedMultiRegions
 827        | ZetaFormat::V0316SeedMultiRegions
 828        | ZetaFormat::V0318SeedMultiRegions
 829        | ZetaFormat::V0317SeedMultiRegions
 830        | ZetaFormat::V0327SingleFile => String::new(),
 831    }
 832}
 833
 834pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 835    match format {
 836        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 837        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 838        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 839        ZetaFormat::V0211SeedCoder
 840        | ZetaFormat::V0304SeedNoEdits
 841        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 842        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 843        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 844        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 845        ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
 846
 847        ZetaFormat::V0112MiddleAtEnd
 848        | ZetaFormat::V0113Ordered
 849        | ZetaFormat::V0114180EditableRegion
 850        | ZetaFormat::v0226Hashline
 851        | ZetaFormat::V0304VariableEdit => None,
 852    }
 853}
 854
 855pub fn encode_patch_as_output_for_format(
 856    format: ZetaFormat,
 857    old_editable_region: &str,
 858    patch: &str,
 859    cursor_offset: Option<usize>,
 860) -> Result<Option<String>> {
 861    match format {
 862        ZetaFormat::v0226Hashline => {
 863            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 864        }
 865        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 866            old_editable_region,
 867            patch,
 868            cursor_offset,
 869        )
 870        .map(Some),
 871        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 872            Ok(seed_coder::no_edits(patch))
 873        }
 874        ZetaFormat::V0316SeedMultiRegions => {
 875            let empty_patch = patch.lines().count() <= 3;
 876            if empty_patch {
 877                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 878                let marker_num =
 879                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 880                let tag = multi_region::marker_tag(marker_num);
 881                Ok(Some(format!(
 882                    "{tag}{tag}{}",
 883                    multi_region::V0316_END_MARKER
 884                )))
 885            } else {
 886                Ok(None)
 887            }
 888        }
 889        ZetaFormat::V0318SeedMultiRegions => {
 890            let empty_patch = patch.lines().count() <= 3;
 891            if empty_patch {
 892                let marker_offsets =
 893                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 894                let marker_num =
 895                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 896                let tag = multi_region::marker_tag(marker_num);
 897                Ok(Some(format!(
 898                    "{tag}{tag}{}",
 899                    multi_region::V0318_END_MARKER
 900                )))
 901            } else {
 902                Ok(None)
 903            }
 904        }
 905        ZetaFormat::V0317SeedMultiRegions => {
 906            let empty_patch = patch.lines().count() <= 3;
 907            if empty_patch {
 908                let tag = multi_region::marker_tag_relative(0);
 909                Ok(Some(format!(
 910                    "{tag}{tag}{}",
 911                    multi_region::V0317_END_MARKER
 912                )))
 913            } else {
 914                Ok(None)
 915            }
 916        }
 917        ZetaFormat::V0327SingleFile => {
 918            let empty_patch = patch.lines().count() <= 3;
 919            if empty_patch {
 920                let marker_offsets =
 921                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 922                let marker_num =
 923                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 924                let tag = multi_region::marker_tag(marker_num);
 925                Ok(Some(format!(
 926                    "{tag}{tag}{}",
 927                    multi_region::V0327_END_MARKER
 928                )))
 929            } else {
 930                Ok(None)
 931            }
 932        }
 933        _ => Ok(None),
 934    }
 935}
 936
 937/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
 938/// extracted), produce the expected model output string for training.
 939pub fn format_expected_output(
 940    input: &ZetaPromptInput,
 941    format: ZetaFormat,
 942    patch: &str,
 943    cursor_offset: Option<usize>,
 944) -> Result<String> {
 945    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 946    let mut old_editable = context[editable_range].to_string();
 947    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
 948        old_editable.push('\n');
 949    }
 950
 951    // Formats with their own output encoding (hashline, variable-edit,
 952    // multi-region empty patches) are handled here.
 953    if let Some(output) =
 954        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
 955    {
 956        return Ok(output);
 957    }
 958
 959    let empty_patch = patch.lines().count() <= 3;
 960
 961    match format {
 962        // Multi-region formats: non-empty patches need diff application
 963        // then marker-span encoding.
 964        ZetaFormat::V0316SeedMultiRegions => {
 965            let (new_editable, first_hunk_offset) =
 966                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 967            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 968            multi_region::encode_from_old_and_new_v0316(
 969                &old_editable,
 970                &new_editable,
 971                cursor_in_new,
 972                CURSOR_MARKER,
 973                multi_region::V0316_END_MARKER,
 974            )
 975        }
 976        ZetaFormat::V0318SeedMultiRegions => {
 977            let (new_editable, first_hunk_offset) =
 978                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 979            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 980            multi_region::encode_from_old_and_new_v0318(
 981                &old_editable,
 982                &new_editable,
 983                cursor_in_new,
 984                CURSOR_MARKER,
 985                multi_region::V0318_END_MARKER,
 986            )
 987        }
 988        ZetaFormat::V0317SeedMultiRegions => {
 989            let (new_editable, first_hunk_offset) =
 990                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 991            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 992            multi_region::encode_from_old_and_new_v0317(
 993                &old_editable,
 994                &new_editable,
 995                cursor_in_new,
 996                CURSOR_MARKER,
 997                multi_region::V0317_END_MARKER,
 998            )
 999        }
1000        // V0131-style formats and fallback: produce new editable text with
1001        // cursor marker inserted, followed by the end marker.
1002        _ => {
1003            let (mut result, first_hunk_offset) = if empty_patch {
1004                (old_editable.clone(), None)
1005            } else {
1006                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1007            };
1008
1009            if let Some(cursor) = cursor_offset {
1010                let hunk_start = if !empty_patch {
1011                    first_hunk_offset.unwrap_or(0)
1012                } else {
1013                    0
1014                };
1015                let offset = (hunk_start + cursor).min(result.len());
1016                result.insert_str(offset, CURSOR_MARKER);
1017            }
1018
1019            if !result.is_empty() && !result.ends_with('\n') {
1020                result.push('\n');
1021            }
1022
1023            if let Some(end_marker) = output_end_marker_for_format(format) {
1024                result.push_str(end_marker);
1025            }
1026
1027            Ok(result)
1028        }
1029    }
1030}
1031
1032/// Compute the cursor position within the new text after diff application.
1033fn cursor_in_new_text(
1034    cursor_offset: Option<usize>,
1035    first_hunk_offset: Option<usize>,
1036    new_text: &str,
1037) -> Option<usize> {
1038    cursor_offset.map(|cursor| {
1039        let hunk_start = first_hunk_offset.unwrap_or(0);
1040        (hunk_start + cursor).min(new_text.len())
1041    })
1042}
1043
1044#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1045pub struct ParsedOutput {
1046    /// Text that should replace the editable region
1047    pub new_editable_region: String,
1048    /// The byte range within `cursor_excerpt` that this replacement applies to
1049    pub range_in_excerpt: Range<usize>,
1050    /// Byte offset of the cursor marker within `new_editable_region`, if present
1051    pub cursor_offset_in_new_editable_region: Option<usize>,
1052}
1053
1054#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1055pub struct CursorPosition {
1056    pub path: String,
1057    pub row: usize,
1058    pub column: usize,
1059    pub offset: usize,
1060    pub editable_region_offset: usize,
1061}
1062
1063pub fn parsed_output_from_editable_region(
1064    range_in_excerpt: Range<usize>,
1065    mut new_editable_region: String,
1066) -> ParsedOutput {
1067    let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1068    if let Some(offset) = cursor_offset_in_new_editable_region {
1069        new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1070    }
1071
1072    ParsedOutput {
1073        new_editable_region,
1074        range_in_excerpt,
1075        cursor_offset_in_new_editable_region,
1076    }
1077}
1078
1079/// Parse model output for the given zeta format
1080pub fn parse_zeta2_model_output(
1081    output: &str,
1082    format: ZetaFormat,
1083    prompt_inputs: &ZetaPromptInput,
1084) -> Result<ParsedOutput> {
1085    let output = match output_end_marker_for_format(format) {
1086        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1087        None => output,
1088    };
1089
1090    let (context, editable_range_in_context, context_range, cursor_offset) =
1091        resolve_cursor_region(prompt_inputs, format);
1092    let context_start = context_range.start;
1093    let old_editable_region = &context[editable_range_in_context.clone()];
1094    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1095
1096    let (range_in_context, output) = match format {
1097        ZetaFormat::v0226Hashline => (
1098            editable_range_in_context,
1099            if hashline::output_has_edit_commands(output) {
1100                hashline::apply_edit_commands(old_editable_region, output)
1101            } else {
1102                output.to_string()
1103            },
1104        ),
1105        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1106        ZetaFormat::V0304SeedNoEdits => (
1107            editable_range_in_context,
1108            if output.starts_with(seed_coder::NO_EDITS) {
1109                old_editable_region.to_string()
1110            } else {
1111                output.to_string()
1112            },
1113        ),
1114        ZetaFormat::V0306SeedMultiRegions => (
1115            editable_range_in_context,
1116            if output.starts_with(seed_coder::NO_EDITS) {
1117                old_editable_region.to_string()
1118            } else {
1119                multi_region::apply_marker_span(old_editable_region, output)?
1120            },
1121        ),
1122        ZetaFormat::V0316SeedMultiRegions => (
1123            editable_range_in_context,
1124            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1125        ),
1126        ZetaFormat::V0318SeedMultiRegions => (
1127            editable_range_in_context,
1128            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1129        ),
1130        ZetaFormat::V0317SeedMultiRegions => (
1131            editable_range_in_context,
1132            multi_region::apply_marker_span_v0317(
1133                old_editable_region,
1134                output,
1135                Some(cursor_offset_in_editable),
1136            )?,
1137        ),
1138        ZetaFormat::V0327SingleFile => (
1139            editable_range_in_context,
1140            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1141        ),
1142        _ => (editable_range_in_context, output.to_string()),
1143    };
1144
1145    let range_in_excerpt =
1146        range_in_context.start + context_start..range_in_context.end + context_start;
1147
1148    Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1149}
1150
1151pub fn parse_zeta2_model_output_as_patch(
1152    output: &str,
1153    format: ZetaFormat,
1154    prompt_inputs: &ZetaPromptInput,
1155) -> Result<String> {
1156    let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1157    parsed_output_to_patch(prompt_inputs, parsed)
1158}
1159
1160pub fn cursor_position_from_parsed_output(
1161    prompt_inputs: &ZetaPromptInput,
1162    parsed: &ParsedOutput,
1163) -> Option<CursorPosition> {
1164    let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1165    let editable_region_offset = parsed.range_in_excerpt.start;
1166    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1167
1168    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1169
1170    let new_editable_region = &parsed.new_editable_region;
1171    let prefix_end = cursor_offset.min(new_editable_region.len());
1172    let new_region_prefix = &new_editable_region[..prefix_end];
1173
1174    let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1175
1176    let column = match new_region_prefix.rfind('\n') {
1177        Some(last_newline) => cursor_offset - last_newline - 1,
1178        None => {
1179            let content_prefix = &excerpt[..editable_region_offset];
1180            let content_column = match content_prefix.rfind('\n') {
1181                Some(last_newline) => editable_region_offset - last_newline - 1,
1182                None => editable_region_offset,
1183            };
1184            content_column + cursor_offset
1185        }
1186    };
1187
1188    Some(CursorPosition {
1189        path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1190        row,
1191        column,
1192        offset: editable_region_offset + cursor_offset,
1193        editable_region_offset: cursor_offset,
1194    })
1195}
1196
1197pub fn parsed_output_to_patch(
1198    prompt_inputs: &ZetaPromptInput,
1199    parsed: ParsedOutput,
1200) -> Result<String> {
1201    let range_in_excerpt = parsed.range_in_excerpt;
1202    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1203    let old_text = excerpt[range_in_excerpt.clone()].to_string();
1204    let mut new_text = parsed.new_editable_region;
1205
1206    let mut old_text_normalized = old_text;
1207    if !new_text.is_empty() && !new_text.ends_with('\n') {
1208        new_text.push('\n');
1209    }
1210    if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1211        old_text_normalized.push('\n');
1212    }
1213
1214    let editable_region_offset = range_in_excerpt.start;
1215    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1216    let editable_region_lines = old_text_normalized.lines().count() as u32;
1217
1218    let diff = udiff::unified_diff_with_context(
1219        &old_text_normalized,
1220        &new_text,
1221        editable_region_start_line,
1222        editable_region_start_line,
1223        editable_region_lines,
1224    );
1225
1226    let path = prompt_inputs
1227        .cursor_path
1228        .to_string_lossy()
1229        .trim_start_matches('/')
1230        .to_string();
1231    let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1232
1233    Ok(udiff::encode_cursor_in_patch(
1234        &formatted_diff,
1235        parsed.cursor_offset_in_new_editable_region,
1236    ))
1237}
1238
1239pub fn excerpt_range_for_format(
1240    format: ZetaFormat,
1241    ranges: &ExcerptRanges,
1242) -> (Range<usize>, Range<usize>) {
1243    excerpt_ranges_for_format(format, ranges)
1244}
1245
1246pub fn resolve_cursor_region(
1247    input: &ZetaPromptInput,
1248    format: ZetaFormat,
1249) -> (&str, Range<usize>, Range<usize>, usize) {
1250    let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1251        let (editable_tokens, _) = token_limits_for_format(format);
1252        let context_range = 0..input.cursor_excerpt.len();
1253        let editable_range = multi_region::compute_v0327_editable_range(
1254            &input.cursor_excerpt,
1255            input.cursor_offset_in_excerpt,
1256            editable_tokens,
1257        );
1258        (editable_range, context_range)
1259    } else if let Some(syntax_ranges) = &input.syntax_ranges {
1260        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1261        compute_editable_and_context_ranges(
1262            &input.cursor_excerpt,
1263            input.cursor_offset_in_excerpt,
1264            syntax_ranges,
1265            editable_tokens,
1266            context_tokens,
1267        )
1268    } else {
1269        excerpt_range_for_format(format, &input.excerpt_ranges)
1270    };
1271
1272    let context_start = context_range.start;
1273    let context_text = &input.cursor_excerpt[context_range.clone()];
1274    let adjusted_editable =
1275        (editable_range.start - context_start)..(editable_range.end - context_start);
1276    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1277
1278    (
1279        context_text,
1280        adjusted_editable,
1281        context_range,
1282        adjusted_cursor,
1283    )
1284}
1285
1286pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1287    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1288    get_prefill_for_format(format, context, &editable_range)
1289}
1290
1291fn format_edit_history_within_budget(
1292    events: &[Arc<Event>],
1293    file_marker: &str,
1294    edit_history_name: &str,
1295    max_tokens: usize,
1296    max_edit_event_count: usize,
1297) -> String {
1298    let header = format!("{}{}\n", file_marker, edit_history_name);
1299    let header_tokens = estimate_tokens(header.len());
1300    if header_tokens >= max_tokens {
1301        return String::new();
1302    }
1303
1304    let mut event_strings: Vec<String> = Vec::new();
1305    let mut total_tokens = header_tokens;
1306
1307    for event in events.iter().rev().take(max_edit_event_count) {
1308        let mut event_str = String::new();
1309        write_event(&mut event_str, event);
1310        let event_tokens = estimate_tokens(event_str.len());
1311
1312        if total_tokens + event_tokens > max_tokens {
1313            break;
1314        }
1315        total_tokens += event_tokens;
1316        event_strings.push(event_str);
1317    }
1318
1319    if event_strings.is_empty() {
1320        return String::new();
1321    }
1322
1323    let mut result = header;
1324    for event_str in event_strings.iter().rev() {
1325        result.push_str(event_str);
1326    }
1327    result
1328}
1329
1330fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1331    let needs_newline = !excerpt.text.ends_with('\n');
1332    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1333    let len = excerpt.text.len()
1334        + if needs_newline { "\n".len() } else { 0 }
1335        + if needs_ellipsis { "...\n".len() } else { 0 };
1336    estimate_tokens(len)
1337}
1338
1339pub fn format_related_files_within_budget(
1340    related_files: &[RelatedFile],
1341    file_prefix: &str,
1342    file_suffix: &str,
1343    max_tokens: usize,
1344) -> String {
1345    struct ExcerptCandidate {
1346        file_ix: usize,
1347        excerpt_ix: usize,
1348        order: usize,
1349    }
1350
1351    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1352        .iter()
1353        .enumerate()
1354        .flat_map(|(file_ix, file)| {
1355            file.excerpts
1356                .iter()
1357                .enumerate()
1358                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1359                    file_ix,
1360                    excerpt_ix,
1361                    order: e.order,
1362                })
1363        })
1364        .collect();
1365
1366    // Pre-compute file header strings and their token costs.
1367    let file_headers: Vec<String> = related_files
1368        .iter()
1369        .map(|file| {
1370            let path_str = file.path.to_string_lossy();
1371            format!("{}{}\n", file_prefix, path_str)
1372        })
1373        .collect();
1374
1375    // Sort the excerpts by their order and determine how many fit within the budget.
1376    let mut total_tokens = 0;
1377    let mut included_excerpt_count = 0_usize;
1378    let mut included_file_indices = vec![false; related_files.len()];
1379    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1380    for candidate in &excerpt_candidates {
1381        let file = &related_files[candidate.file_ix];
1382        let excerpt = &file.excerpts[candidate.excerpt_ix];
1383        let file_already_included = included_file_indices[candidate.file_ix];
1384        let header_cost = if file_already_included {
1385            0
1386        } else {
1387            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1388        };
1389        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1390        if total_tokens + header_cost + excerpt_cost > max_tokens {
1391            break;
1392        }
1393        total_tokens += header_cost + excerpt_cost;
1394        if !file_already_included {
1395            included_file_indices[candidate.file_ix] = true;
1396        }
1397        included_excerpt_count += 1;
1398    }
1399
1400    excerpt_candidates.truncate(included_excerpt_count);
1401    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1402
1403    // Render all of the files that fit within the token budget, in the original order.
1404    let mut result = String::new();
1405    let mut last_file_ix = None;
1406    for candidate in &excerpt_candidates {
1407        if last_file_ix != Some(candidate.file_ix) {
1408            if last_file_ix.is_some() {
1409                result.push_str(file_suffix);
1410            }
1411            result.push_str(&file_headers[candidate.file_ix]);
1412            last_file_ix = Some(candidate.file_ix);
1413        }
1414        let file = &related_files[candidate.file_ix];
1415        let excerpt = &file.excerpts[candidate.excerpt_ix];
1416        result.push_str(&excerpt.text);
1417        if !result.ends_with('\n') {
1418            result.push('\n');
1419        }
1420        if excerpt.row_range.end < file.max_row {
1421            result.push_str("...\n");
1422        }
1423    }
1424
1425    result
1426}
1427
1428pub fn write_related_files(
1429    prompt: &mut String,
1430    related_files: &[RelatedFile],
1431) -> Vec<Range<usize>> {
1432    let mut ranges = Vec::new();
1433    for file in related_files {
1434        let start = prompt.len();
1435        let path_str = file.path.to_string_lossy();
1436        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1437        for excerpt in &file.excerpts {
1438            prompt.push_str(&excerpt.text);
1439            if !prompt.ends_with('\n') {
1440                prompt.push('\n');
1441            }
1442            if excerpt.row_range.end < file.max_row {
1443                prompt.push_str("...\n");
1444            }
1445        }
1446        let end = prompt.len();
1447        ranges.push(start..end);
1448    }
1449    ranges
1450}
1451
1452mod v0112_middle_at_end {
1453    use super::*;
1454
1455    pub fn special_tokens() -> &'static [&'static str] {
1456        &[
1457            "<|fim_prefix|>",
1458            "<|fim_suffix|>",
1459            "<|fim_middle|>",
1460            "<|file_sep|>",
1461            CURSOR_MARKER,
1462        ]
1463    }
1464
1465    pub fn write_cursor_excerpt_section(
1466        prompt: &mut String,
1467        path: &Path,
1468        context: &str,
1469        editable_range: &Range<usize>,
1470        cursor_offset: usize,
1471    ) {
1472        let path_str = path.to_string_lossy();
1473        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1474
1475        prompt.push_str("<|fim_prefix|>\n");
1476        prompt.push_str(&context[..editable_range.start]);
1477
1478        prompt.push_str("<|fim_suffix|>\n");
1479        prompt.push_str(&context[editable_range.end..]);
1480        if !prompt.ends_with('\n') {
1481            prompt.push('\n');
1482        }
1483
1484        prompt.push_str("<|fim_middle|>current\n");
1485        prompt.push_str(&context[editable_range.start..cursor_offset]);
1486        prompt.push_str(CURSOR_MARKER);
1487        prompt.push_str(&context[cursor_offset..editable_range.end]);
1488        if !prompt.ends_with('\n') {
1489            prompt.push('\n');
1490        }
1491
1492        prompt.push_str("<|fim_middle|>updated\n");
1493    }
1494}
1495
1496mod v0113_ordered {
1497    use super::*;
1498
1499    pub fn special_tokens() -> &'static [&'static str] {
1500        &[
1501            "<|fim_prefix|>",
1502            "<|fim_suffix|>",
1503            "<|fim_middle|>",
1504            "<|file_sep|>",
1505            CURSOR_MARKER,
1506        ]
1507    }
1508
1509    pub fn write_cursor_excerpt_section(
1510        prompt: &mut String,
1511        path: &Path,
1512        context: &str,
1513        editable_range: &Range<usize>,
1514        cursor_offset: usize,
1515    ) {
1516        let path_str = path.to_string_lossy();
1517        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1518
1519        prompt.push_str("<|fim_prefix|>\n");
1520        prompt.push_str(&context[..editable_range.start]);
1521        if !prompt.ends_with('\n') {
1522            prompt.push('\n');
1523        }
1524
1525        prompt.push_str("<|fim_middle|>current\n");
1526        prompt.push_str(&context[editable_range.start..cursor_offset]);
1527        prompt.push_str(CURSOR_MARKER);
1528        prompt.push_str(&context[cursor_offset..editable_range.end]);
1529        if !prompt.ends_with('\n') {
1530            prompt.push('\n');
1531        }
1532
1533        prompt.push_str("<|fim_suffix|>\n");
1534        prompt.push_str(&context[editable_range.end..]);
1535        if !prompt.ends_with('\n') {
1536            prompt.push('\n');
1537        }
1538
1539        prompt.push_str("<|fim_middle|>updated\n");
1540    }
1541}
1542
1543mod v0114180_editable_region {
1544    use super::*;
1545
1546    pub fn special_tokens() -> &'static [&'static str] {
1547        v0113_ordered::special_tokens()
1548    }
1549}
1550
1551pub mod v0120_git_merge_markers {
1552    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1553    //!
1554    //! Example prompt:
1555    //!
1556    //! <|file_sep|>path/to/target_file.py
1557    //! <|fim_prefix|>
1558    //! code before editable region
1559    //! <|fim_suffix|>
1560    //! code after editable region
1561    //! <|fim_middle|>
1562    //! <<<<<<< CURRENT
1563    //! code that
1564    //! needs to<|user_cursor|>
1565    //! be rewritten
1566    //! =======
1567    //!
1568    //! Expected output (should be generated by the model):
1569    //!
1570    //! updated
1571    //! code with
1572    //! changes applied
1573    //! >>>>>>> UPDATED
1574
1575    use super::*;
1576
1577    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1578    pub const SEPARATOR: &str = "=======\n";
1579    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1580
1581    pub fn special_tokens() -> &'static [&'static str] {
1582        &[
1583            "<|fim_prefix|>",
1584            "<|fim_suffix|>",
1585            "<|fim_middle|>",
1586            "<|file_sep|>",
1587            START_MARKER,
1588            SEPARATOR,
1589            END_MARKER,
1590            CURSOR_MARKER,
1591        ]
1592    }
1593
1594    pub fn write_cursor_excerpt_section(
1595        prompt: &mut String,
1596        path: &Path,
1597        context: &str,
1598        editable_range: &Range<usize>,
1599        cursor_offset: usize,
1600    ) {
1601        let path_str = path.to_string_lossy();
1602        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1603
1604        prompt.push_str("<|fim_prefix|>");
1605        prompt.push_str(&context[..editable_range.start]);
1606
1607        prompt.push_str("<|fim_suffix|>");
1608        prompt.push_str(&context[editable_range.end..]);
1609        if !prompt.ends_with('\n') {
1610            prompt.push('\n');
1611        }
1612
1613        prompt.push_str("<|fim_middle|>");
1614        prompt.push_str(START_MARKER);
1615        prompt.push_str(&context[editable_range.start..cursor_offset]);
1616        prompt.push_str(CURSOR_MARKER);
1617        prompt.push_str(&context[cursor_offset..editable_range.end]);
1618        if !prompt.ends_with('\n') {
1619            prompt.push('\n');
1620        }
1621        prompt.push_str(SEPARATOR);
1622    }
1623}
1624
1625pub mod v0131_git_merge_markers_prefix {
1626    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1627    //!
1628    //! Example prompt:
1629    //!
1630    //! <|file_sep|>path/to/target_file.py
1631    //! <|fim_prefix|>
1632    //! code before editable region
1633    //! <<<<<<< CURRENT
1634    //! code that
1635    //! needs to<|user_cursor|>
1636    //! be rewritten
1637    //! =======
1638    //! <|fim_suffix|>
1639    //! code after editable region
1640    //! <|fim_middle|>
1641    //!
1642    //! Expected output (should be generated by the model):
1643    //!
1644    //! updated
1645    //! code with
1646    //! changes applied
1647    //! >>>>>>> UPDATED
1648
1649    use super::*;
1650
1651    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1652    pub const SEPARATOR: &str = "=======\n";
1653    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1654
1655    pub fn special_tokens() -> &'static [&'static str] {
1656        &[
1657            "<|fim_prefix|>",
1658            "<|fim_suffix|>",
1659            "<|fim_middle|>",
1660            "<|file_sep|>",
1661            START_MARKER,
1662            SEPARATOR,
1663            END_MARKER,
1664            CURSOR_MARKER,
1665        ]
1666    }
1667
1668    pub fn write_cursor_excerpt_section(
1669        prompt: &mut String,
1670        path: &Path,
1671        context: &str,
1672        editable_range: &Range<usize>,
1673        cursor_offset: usize,
1674    ) {
1675        let path_str = path.to_string_lossy();
1676        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1677
1678        prompt.push_str("<|fim_prefix|>");
1679        prompt.push_str(&context[..editable_range.start]);
1680        prompt.push_str(START_MARKER);
1681        prompt.push_str(&context[editable_range.start..cursor_offset]);
1682        prompt.push_str(CURSOR_MARKER);
1683        prompt.push_str(&context[cursor_offset..editable_range.end]);
1684        if !prompt.ends_with('\n') {
1685            prompt.push('\n');
1686        }
1687        prompt.push_str(SEPARATOR);
1688
1689        prompt.push_str("<|fim_suffix|>");
1690        prompt.push_str(&context[editable_range.end..]);
1691        if !prompt.ends_with('\n') {
1692            prompt.push('\n');
1693        }
1694
1695        prompt.push_str("<|fim_middle|>");
1696    }
1697}
1698
1699pub mod v0211_prefill {
1700    use super::*;
1701
1702    pub fn special_tokens() -> &'static [&'static str] {
1703        v0131_git_merge_markers_prefix::special_tokens()
1704    }
1705
1706    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1707        let editable_region = &context[editable_range.start..editable_range.end];
1708
1709        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1710        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1711
1712        // Find a token boundary to avoid splitting tokens in the prefill.
1713        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1714        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1715        // the \n and consume any consecutive \n characters after it.
1716        let prefill = &editable_region[..prefill_len];
1717        match prefill.rfind('\n') {
1718            Some(pos) => {
1719                let mut end = pos + 1;
1720                while end < editable_region.len()
1721                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1722                {
1723                    end += 1;
1724                }
1725                editable_region[..end].to_string()
1726            }
1727            // No newline found. Fall back to splitting before the last space
1728            // (word-level boundary)
1729            None => match prefill.rfind(' ') {
1730                Some(pos) => prefill[..pos].to_string(),
1731                None => prefill.to_string(),
1732            },
1733        }
1734    }
1735}
1736
1737pub mod hashline {
1738
1739    use std::fmt::Display;
1740
1741    pub const END_MARKER: &str = "<|fim_middle|>updated";
1742    pub const START_MARKER: &str = "<|fim_middle|>current";
1743
1744    use super::*;
1745
1746    const SET_COMMAND_MARKER: &str = "<|set|>";
1747    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1748    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1749
1750    pub fn special_tokens() -> &'static [&'static str] {
1751        return &[
1752            SET_COMMAND_MARKER,
1753            "<|set_range|>",
1754            INSERT_COMMAND_MARKER,
1755            NO_EDITS_COMMAND_MARKER,
1756            CURSOR_MARKER,
1757            "<|file_sep|>",
1758            "<|fim_prefix|>",
1759            "<|fim_suffix|>",
1760            "<|fim_middle|>",
1761        ];
1762    }
1763
1764    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1765    #[derive(Debug, Clone, PartialEq, Eq)]
1766    struct LineRef {
1767        index: usize,
1768        hash: u8,
1769    }
1770
1771    impl Display for LineRef {
1772        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1773            write!(f, "{}:{:02x}", self.index, self.hash)
1774        }
1775    }
1776
1777    pub fn hash_line(line: &[u8]) -> u8 {
1778        let mut h: u8 = 0;
1779        for &byte in line {
1780            h = h.wrapping_add(byte);
1781        }
1782        return h;
1783    }
1784
1785    /// Write the hashline-encoded editable region into `out`. Each line of
1786    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1787    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1788    /// to the start of `editable_text`).
1789    pub fn write_hashline_editable_region(
1790        out: &mut String,
1791        editable_text: &str,
1792        cursor_offset_in_editable: usize,
1793    ) {
1794        let mut offset = 0;
1795        for (i, line) in editable_text.lines().enumerate() {
1796            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1797                && cursor_offset_in_editable < offset + line.len()
1798            {
1799                (
1800                    &line[..cursor_offset_in_editable - offset],
1801                    CURSOR_MARKER,
1802                    &line[cursor_offset_in_editable - offset..],
1803                )
1804            } else {
1805                (line, "", "")
1806            };
1807            write!(
1808                out,
1809                "\n{}|{head}{cursor}{tail}",
1810                LineRef {
1811                    index: i,
1812                    hash: hash_line(line.as_bytes())
1813                }
1814            )
1815            .unwrap();
1816            offset += line.len() + 1;
1817        }
1818    }
1819
1820    pub fn write_cursor_excerpt_section(
1821        prompt: &mut String,
1822        path: &Path,
1823        context: &str,
1824        editable_range: &Range<usize>,
1825        cursor_offset: usize,
1826    ) {
1827        let path_str = path.to_string_lossy();
1828        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1829
1830        prompt.push_str("<|fim_prefix|>\n");
1831        prompt.push_str(&context[..editable_range.start]);
1832        prompt.push_str(START_MARKER);
1833
1834        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1835        let editable_region = &context[editable_range.clone()];
1836        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1837
1838        if !prompt.ends_with('\n') {
1839            prompt.push('\n');
1840        }
1841
1842        prompt.push_str("<|fim_suffix|>\n");
1843        prompt.push_str(&context[editable_range.end..]);
1844        if !prompt.ends_with('\n') {
1845            prompt.push('\n');
1846        }
1847
1848        prompt.push_str(END_MARKER);
1849        prompt.push('\n');
1850    }
1851
1852    /// A single edit command parsed from the model output.
1853    #[derive(Debug)]
1854    enum EditCommand<'a> {
1855        /// Replace a range of lines (inclusive on both ends). Single-line set is
1856        /// represented by `start == end`.
1857        Set {
1858            start: LineRef,
1859            end: LineRef,
1860            content: &'a str,
1861        },
1862        /// Insert new lines after the given line, or before the first line if
1863        /// `after` is `None`.
1864        Insert {
1865            after: Option<LineRef>,
1866            content: &'a str,
1867        },
1868    }
1869
1870    /// Parse a line reference like `3:c3` into a `LineRef`.
1871    fn parse_line_ref(s: &str) -> Option<LineRef> {
1872        let (idx_str, hash_str) = s.split_once(':')?;
1873        let index = idx_str.parse::<usize>().ok()?;
1874        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1875        Some(LineRef { index, hash })
1876    }
1877
1878    /// Parse the model output into a list of `EditCommand`s.
1879    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1880        let mut commands = Vec::new();
1881        let mut offset = 0usize;
1882
1883        while offset < model_output.len() {
1884            let next_nl = model_output[offset..]
1885                .find('\n')
1886                .map(|i| offset + i)
1887                .unwrap_or(model_output.len());
1888            let line = &model_output[offset..next_nl];
1889            let line_end = if next_nl < model_output.len() {
1890                next_nl + 1
1891            } else {
1892                next_nl
1893            };
1894
1895            let trimmed = line.trim();
1896            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1897                (true, spec)
1898            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1899                (false, spec)
1900            } else {
1901                offset = line_end;
1902                continue;
1903            };
1904
1905            let mut content_end = line_end;
1906            let mut scan = line_end;
1907
1908            while scan < model_output.len() {
1909                let body_nl = model_output[scan..]
1910                    .find('\n')
1911                    .map(|i| scan + i)
1912                    .unwrap_or(model_output.len());
1913                let body_line = &model_output[scan..body_nl];
1914                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1915                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1916                {
1917                    break;
1918                }
1919                scan = if body_nl < model_output.len() {
1920                    body_nl + 1
1921                } else {
1922                    body_nl
1923                };
1924                content_end = scan;
1925            }
1926
1927            let content = &model_output[line_end..content_end];
1928
1929            if is_set {
1930                if let Some((start_str, end_str)) = specifier.split_once('-') {
1931                    if let (Some(start), Some(end)) =
1932                        (parse_line_ref(start_str), parse_line_ref(end_str))
1933                    {
1934                        commands.push(EditCommand::Set {
1935                            start,
1936                            end,
1937                            content,
1938                        });
1939                    }
1940                } else if let Some(target) = parse_line_ref(specifier) {
1941                    commands.push(EditCommand::Set {
1942                        start: target.clone(),
1943                        end: target,
1944                        content,
1945                    });
1946                }
1947            } else {
1948                let after = parse_line_ref(specifier);
1949                commands.push(EditCommand::Insert { after, content });
1950            }
1951
1952            offset = scan;
1953        }
1954
1955        commands
1956    }
1957
1958    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1959    /// (as opposed to being a plain full-replacement output).
1960    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1961    /// editable region, returning the plain text content.
1962    pub fn strip_hashline_prefixes(region: &str) -> String {
1963        let mut decoded: String = region
1964            .lines()
1965            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1966            .collect::<Vec<_>>()
1967            .join("\n");
1968        if region.ends_with('\n') {
1969            decoded.push('\n');
1970        }
1971        decoded
1972    }
1973
1974    pub fn output_has_edit_commands(model_output: &str) -> bool {
1975        model_output.contains(SET_COMMAND_MARKER)
1976            || model_output.contains(INSERT_COMMAND_MARKER)
1977            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1978    }
1979
1980    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1981    /// original editable region text.
1982    ///
1983    /// `editable_region` is the original text of the editable region (without hash
1984    /// prefixes). `model_output` is the raw model response containing edit commands.
1985    ///
1986    /// Returns the full replacement text for the editable region.
1987    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1988        if model_output
1989            .trim_start()
1990            .starts_with(NO_EDITS_COMMAND_MARKER)
1991        {
1992            return editable_region.to_string();
1993        }
1994
1995        let original_lines: Vec<&str> = editable_region.lines().collect();
1996        let old_hashes: Vec<u8> = original_lines
1997            .iter()
1998            .map(|line| hash_line(line.as_bytes()))
1999            .collect();
2000
2001        let commands = parse_edit_commands(model_output);
2002
2003        // For set operations: indexed by start line → Some((end line index, content))
2004        // For insert operations: indexed by line index → vec of content to insert after
2005        // Insert-before-first is tracked separately.
2006        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2007        let mut insert_before_first: Vec<&str> = Vec::new();
2008        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2009
2010        for command in &commands {
2011            match command {
2012                EditCommand::Set {
2013                    start,
2014                    end,
2015                    content,
2016                } => {
2017                    if start.index < old_hashes.len()
2018                        && end.index < old_hashes.len()
2019                        && start.index <= end.index
2020                        && old_hashes[start.index] == start.hash
2021                        && old_hashes[end.index] == end.hash
2022                    {
2023                        set_ops[start.index] = Some((end.index, *content));
2024                    }
2025                }
2026                EditCommand::Insert { after, content } => match after {
2027                    None => insert_before_first.push(*content),
2028                    Some(line_ref) => {
2029                        if line_ref.index < old_hashes.len()
2030                            && old_hashes[line_ref.index] == line_ref.hash
2031                        {
2032                            insert_after[line_ref.index].push(*content);
2033                        }
2034                    }
2035                },
2036            }
2037        }
2038
2039        let mut result = String::new();
2040
2041        // Emit any insertions before the first line
2042        for content in &insert_before_first {
2043            result.push_str(content);
2044            if !content.ends_with('\n') {
2045                result.push('\n');
2046            }
2047        }
2048
2049        let mut i = 0;
2050        while i < original_lines.len() {
2051            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2052                // Replace lines i..=end_index with the replacement content
2053                result.push_str(replacement);
2054                if !replacement.is_empty() && !replacement.ends_with('\n') {
2055                    result.push('\n');
2056                }
2057                // Emit any insertions after the end of this set range
2058                if *end_index < insert_after.len() {
2059                    for content in &insert_after[*end_index] {
2060                        result.push_str(content);
2061                        if !content.ends_with('\n') {
2062                            result.push('\n');
2063                        }
2064                    }
2065                }
2066                i = end_index + 1;
2067            } else {
2068                // Keep the original line
2069                result.push_str(original_lines[i]);
2070                result.push('\n');
2071                // Emit any insertions after this line
2072                for content in &insert_after[i] {
2073                    result.push_str(content);
2074                    if !content.ends_with('\n') {
2075                        result.push('\n');
2076                    }
2077                }
2078                i += 1;
2079            }
2080        }
2081
2082        // Preserve trailing newline behavior: if the original ended with a
2083        // newline the result already has one; if it didn't, trim the extra one
2084        // we added.
2085        if !editable_region.ends_with('\n') && result.ends_with('\n') {
2086            result.pop();
2087        }
2088
2089        result
2090    }
2091
2092    /// Convert a unified diff patch into hashline edit commands.
2093    ///
2094    /// Parses the unified diff `patch` directly to determine which lines of
2095    /// `old_text` are deleted/replaced and what new lines are added, then emits
2096    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2097    /// `{index}:{hash}` identifiers.
2098    ///
2099    /// `cursor_offset` is an optional byte offset into the first hunk's new
2100    /// text (context + additions) where the cursor marker should be placed.
2101    pub fn patch_to_edit_commands(
2102        old_text: &str,
2103        patch: &str,
2104        cursor_offset: Option<usize>,
2105    ) -> Result<String> {
2106        let old_lines: Vec<&str> = old_text.lines().collect();
2107        let old_hashes: Vec<u8> = old_lines
2108            .iter()
2109            .map(|line| hash_line(line.as_bytes()))
2110            .collect();
2111
2112        let mut result = String::new();
2113        let mut first_hunk = true;
2114
2115        struct Hunk<'a> {
2116            line_range: Range<usize>,
2117            new_text_lines: Vec<&'a str>,
2118            cursor_line_offset_in_new_text: Option<(usize, usize)>,
2119        }
2120
2121        // Parse the patch line by line. We only care about hunk headers,
2122        // context, deletions, and additions.
2123        let mut old_line_index: usize = 0;
2124        let mut current_hunk: Option<Hunk> = None;
2125        // Byte offset tracking within the hunk's new text for cursor placement.
2126        let mut new_text_byte_offset: usize = 0;
2127        // The line index of the last old line seen before/in the current hunk
2128        // (used for insert-after reference).
2129        let mut last_old_line_before_hunk: Option<usize> = None;
2130
2131        fn flush_hunk(
2132            hunk: Hunk,
2133            last_old_line: Option<usize>,
2134            result: &mut String,
2135            old_hashes: &[u8],
2136        ) {
2137            if hunk.line_range.is_empty() {
2138                // Pure insertion — reference the old line to insert after when in bounds.
2139                if let Some(after) = last_old_line
2140                    && let Some(&hash) = old_hashes.get(after)
2141                {
2142                    write!(
2143                        result,
2144                        "{INSERT_COMMAND_MARKER}{}\n",
2145                        LineRef { index: after, hash }
2146                    )
2147                    .unwrap();
2148                } else {
2149                    result.push_str(INSERT_COMMAND_MARKER);
2150                    result.push('\n');
2151                }
2152            } else {
2153                let start = hunk.line_range.start;
2154                let end_exclusive = hunk.line_range.end;
2155                let deleted_line_count = end_exclusive.saturating_sub(start);
2156
2157                if deleted_line_count == 1 {
2158                    if let Some(&hash) = old_hashes.get(start) {
2159                        write!(
2160                            result,
2161                            "{SET_COMMAND_MARKER}{}\n",
2162                            LineRef { index: start, hash }
2163                        )
2164                        .unwrap();
2165                    } else {
2166                        result.push_str(SET_COMMAND_MARKER);
2167                        result.push('\n');
2168                    }
2169                } else {
2170                    let end_inclusive = end_exclusive - 1;
2171                    match (
2172                        old_hashes.get(start).copied(),
2173                        old_hashes.get(end_inclusive).copied(),
2174                    ) {
2175                        (Some(start_hash), Some(end_hash)) => {
2176                            write!(
2177                                result,
2178                                "{SET_COMMAND_MARKER}{}-{}\n",
2179                                LineRef {
2180                                    index: start,
2181                                    hash: start_hash
2182                                },
2183                                LineRef {
2184                                    index: end_inclusive,
2185                                    hash: end_hash
2186                                }
2187                            )
2188                            .unwrap();
2189                        }
2190                        _ => {
2191                            result.push_str(SET_COMMAND_MARKER);
2192                            result.push('\n');
2193                        }
2194                    }
2195                }
2196            }
2197            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2198                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2199                    && line_offset == cursor_line_offset
2200                {
2201                    result.push_str(&line[..char_offset]);
2202                    result.push_str(CURSOR_MARKER);
2203                    result.push_str(&line[char_offset..]);
2204                    continue;
2205                }
2206
2207                result.push_str(line);
2208            }
2209        }
2210
2211        for raw_line in patch.split_inclusive('\n') {
2212            if raw_line.starts_with("@@") {
2213                // Flush any pending change hunk from a previous patch hunk.
2214                if let Some(hunk) = current_hunk.take() {
2215                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2216                }
2217
2218                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2219                // We intentionally do not trust old_start as a direct local index into `old_text`,
2220                // because some patches are produced against a larger file region and carry
2221                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2222                if first_hunk {
2223                    new_text_byte_offset = 0;
2224                    first_hunk = false;
2225                }
2226                continue;
2227            }
2228
2229            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2230                continue;
2231            }
2232            if raw_line.starts_with("\\ No newline") {
2233                continue;
2234            }
2235
2236            if raw_line.starts_with('-') {
2237                // Extend or start a change hunk with this deleted old line.
2238                match &mut current_hunk {
2239                    Some(Hunk {
2240                        line_range: range, ..
2241                    }) => range.end = old_line_index + 1,
2242                    None => {
2243                        current_hunk = Some(Hunk {
2244                            line_range: old_line_index..old_line_index + 1,
2245                            new_text_lines: Vec::new(),
2246                            cursor_line_offset_in_new_text: None,
2247                        });
2248                    }
2249                }
2250                old_line_index += 1;
2251            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2252                // Place cursor marker if cursor_offset falls within this line.
2253                let mut cursor_line_offset = None;
2254                if let Some(cursor_off) = cursor_offset
2255                    && (first_hunk
2256                        || cursor_off >= new_text_byte_offset
2257                            && cursor_off <= new_text_byte_offset + added_content.len())
2258                {
2259                    let line_offset = added_content.floor_char_boundary(
2260                        cursor_off
2261                            .saturating_sub(new_text_byte_offset)
2262                            .min(added_content.len()),
2263                    );
2264                    cursor_line_offset = Some(line_offset);
2265                }
2266
2267                new_text_byte_offset += added_content.len();
2268
2269                let hunk = current_hunk.get_or_insert(Hunk {
2270                    line_range: old_line_index..old_line_index,
2271                    new_text_lines: vec![],
2272                    cursor_line_offset_in_new_text: None,
2273                });
2274                hunk.new_text_lines.push(added_content);
2275                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2276                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2277            } else {
2278                // Context line (starts with ' ' or is empty).
2279                if let Some(hunk) = current_hunk.take() {
2280                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2281                }
2282                last_old_line_before_hunk = Some(old_line_index);
2283                old_line_index += 1;
2284                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2285                new_text_byte_offset += content.len();
2286            }
2287        }
2288
2289        // Flush final group.
2290        if let Some(hunk) = current_hunk.take() {
2291            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2292        }
2293
2294        // Trim a single trailing newline.
2295        if result.ends_with('\n') {
2296            result.pop();
2297        }
2298
2299        if result.is_empty() {
2300            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2301        }
2302
2303        Ok(result)
2304    }
2305
2306    #[cfg(test)]
2307    mod tests {
2308        use super::*;
2309        use indoc::indoc;
2310
2311        #[test]
2312        fn test_format_cursor_region() {
2313            struct Case {
2314                name: &'static str,
2315                context: &'static str,
2316                editable_range: Range<usize>,
2317                cursor_offset: usize,
2318                expected: &'static str,
2319            }
2320
2321            let cases = [
2322                Case {
2323                    name: "basic_cursor_placement",
2324                    context: "hello world\n",
2325                    editable_range: 0..12,
2326                    cursor_offset: 5,
2327                    expected: indoc! {"
2328                    <|file_sep|>test.rs
2329                    <|fim_prefix|>
2330                    <|fim_middle|>current
2331                    0:5c|hello<|user_cursor|> world
2332                    <|fim_suffix|>
2333                    <|fim_middle|>updated
2334                    "},
2335                },
2336                Case {
2337                    name: "multiline_cursor_on_second_line",
2338                    context: "aaa\nbbb\nccc\n",
2339                    editable_range: 0..12,
2340                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2341                    expected: indoc! {"
2342                    <|file_sep|>test.rs
2343                    <|fim_prefix|>
2344                    <|fim_middle|>current
2345                    0:23|aaa
2346                    1:26|b<|user_cursor|>bb
2347                    2:29|ccc
2348                    <|fim_suffix|>
2349                    <|fim_middle|>updated
2350                    "},
2351                },
2352                Case {
2353                    name: "no_trailing_newline_in_context",
2354                    context: "line1\nline2",
2355                    editable_range: 0..11,
2356                    cursor_offset: 3,
2357                    expected: indoc! {"
2358                    <|file_sep|>test.rs
2359                    <|fim_prefix|>
2360                    <|fim_middle|>current
2361                    0:d9|lin<|user_cursor|>e1
2362                    1:da|line2
2363                    <|fim_suffix|>
2364                    <|fim_middle|>updated
2365                    "},
2366                },
2367                Case {
2368                    name: "leading_newline_in_editable_region",
2369                    context: "\nabc\n",
2370                    editable_range: 0..5,
2371                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2372                    expected: indoc! {"
2373                    <|file_sep|>test.rs
2374                    <|fim_prefix|>
2375                    <|fim_middle|>current
2376                    0:00|
2377                    1:26|a<|user_cursor|>bc
2378                    <|fim_suffix|>
2379                    <|fim_middle|>updated
2380                    "},
2381                },
2382                Case {
2383                    name: "with_suffix",
2384                    context: "abc\ndef",
2385                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2386                    cursor_offset: 2,
2387                    expected: indoc! {"
2388                    <|file_sep|>test.rs
2389                    <|fim_prefix|>
2390                    <|fim_middle|>current
2391                    0:26|ab<|user_cursor|>c
2392                    <|fim_suffix|>
2393                    def
2394                    <|fim_middle|>updated
2395                    "},
2396                },
2397                Case {
2398                    name: "unicode_two_byte_chars",
2399                    context: "héllo\n",
2400                    editable_range: 0..7,
2401                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2402                    expected: indoc! {"
2403                    <|file_sep|>test.rs
2404                    <|fim_prefix|>
2405                    <|fim_middle|>current
2406                    0:1b|hé<|user_cursor|>llo
2407                    <|fim_suffix|>
2408                    <|fim_middle|>updated
2409                    "},
2410                },
2411                Case {
2412                    name: "unicode_three_byte_chars",
2413                    context: "日本語\n",
2414                    editable_range: 0..10,
2415                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2416                    expected: indoc! {"
2417                    <|file_sep|>test.rs
2418                    <|fim_prefix|>
2419                    <|fim_middle|>current
2420                    0:80|日本<|user_cursor|>語
2421                    <|fim_suffix|>
2422                    <|fim_middle|>updated
2423                    "},
2424                },
2425                Case {
2426                    name: "unicode_four_byte_chars",
2427                    context: "a🌍b\n",
2428                    editable_range: 0..7,
2429                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2430                    expected: indoc! {"
2431                    <|file_sep|>test.rs
2432                    <|fim_prefix|>
2433                    <|fim_middle|>current
2434                    0:6b|a🌍<|user_cursor|>b
2435                    <|fim_suffix|>
2436                    <|fim_middle|>updated
2437                    "},
2438                },
2439                Case {
2440                    name: "cursor_at_start_of_region_not_placed",
2441                    context: "abc\n",
2442                    editable_range: 0..4,
2443                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2444                    expected: indoc! {"
2445                    <|file_sep|>test.rs
2446                    <|fim_prefix|>
2447                    <|fim_middle|>current
2448                    0:26|abc
2449                    <|fim_suffix|>
2450                    <|fim_middle|>updated
2451                    "},
2452                },
2453                Case {
2454                    name: "cursor_at_end_of_line_not_placed",
2455                    context: "abc\ndef\n",
2456                    editable_range: 0..8,
2457                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2458                    expected: indoc! {"
2459                    <|file_sep|>test.rs
2460                    <|fim_prefix|>
2461                    <|fim_middle|>current
2462                    0:26|abc
2463                    1:2f|def
2464                    <|fim_suffix|>
2465                    <|fim_middle|>updated
2466                    "},
2467                },
2468                Case {
2469                    name: "cursor_offset_relative_to_context_not_editable_region",
2470                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2471                    // write_cursor_excerpt_section must subtract it before comparing against
2472                    // per-line offsets within the editable region.
2473                    context: "pre\naaa\nbbb\nsuf\n",
2474                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2475                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2476                    expected: indoc! {"
2477                    <|file_sep|>test.rs
2478                    <|fim_prefix|>
2479                    pre
2480                    <|fim_middle|>current
2481                    0:23|aaa
2482                    1:26|b<|user_cursor|>bb
2483                    <|fim_suffix|>
2484                    suf
2485                    <|fim_middle|>updated
2486                    "},
2487                },
2488            ];
2489
2490            for case in &cases {
2491                let mut prompt = String::new();
2492                hashline::write_cursor_excerpt_section(
2493                    &mut prompt,
2494                    Path::new("test.rs"),
2495                    case.context,
2496                    &case.editable_range,
2497                    case.cursor_offset,
2498                );
2499                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2500            }
2501        }
2502
2503        #[test]
2504        fn test_apply_edit_commands() {
2505            struct Case {
2506                name: &'static str,
2507                original: &'static str,
2508                model_output: &'static str,
2509                expected: &'static str,
2510            }
2511
2512            let cases = vec![
2513                Case {
2514                    name: "set_single_line",
2515                    original: indoc! {"
2516                    let mut total = 0;
2517                    for product in products {
2518                        total += ;
2519                    }
2520                    total
2521                "},
2522                    model_output: indoc! {"
2523                    <|set|>2:87
2524                        total += product.price;
2525                "},
2526                    expected: indoc! {"
2527                    let mut total = 0;
2528                    for product in products {
2529                        total += product.price;
2530                    }
2531                    total
2532                "},
2533                },
2534                Case {
2535                    name: "set_range",
2536                    original: indoc! {"
2537                    fn foo() {
2538                        let x = 1;
2539                        let y = 2;
2540                        let z = 3;
2541                    }
2542                "},
2543                    model_output: indoc! {"
2544                    <|set|>1:46-3:4a
2545                        let sum = 6;
2546                "},
2547                    expected: indoc! {"
2548                    fn foo() {
2549                        let sum = 6;
2550                    }
2551                "},
2552                },
2553                Case {
2554                    name: "insert_after_line",
2555                    original: indoc! {"
2556                    fn main() {
2557                        let x = 1;
2558                    }
2559                "},
2560                    model_output: indoc! {"
2561                    <|insert|>1:46
2562                        let y = 2;
2563                "},
2564                    expected: indoc! {"
2565                    fn main() {
2566                        let x = 1;
2567                        let y = 2;
2568                    }
2569                "},
2570                },
2571                Case {
2572                    name: "insert_before_first",
2573                    original: indoc! {"
2574                    let x = 1;
2575                    let y = 2;
2576                "},
2577                    model_output: indoc! {"
2578                    <|insert|>
2579                    use std::io;
2580                "},
2581                    expected: indoc! {"
2582                    use std::io;
2583                    let x = 1;
2584                    let y = 2;
2585                "},
2586                },
2587                Case {
2588                    name: "set_with_cursor_marker",
2589                    original: indoc! {"
2590                    fn main() {
2591                        println!();
2592                    }
2593                "},
2594                    model_output: indoc! {"
2595                    <|set|>1:34
2596                        eprintln!(\"<|user_cursor|>\");
2597                "},
2598                    expected: indoc! {"
2599                    fn main() {
2600                        eprintln!(\"<|user_cursor|>\");
2601                    }
2602                "},
2603                },
2604                Case {
2605                    name: "multiple_set_commands",
2606                    original: indoc! {"
2607                    aaa
2608                    bbb
2609                    ccc
2610                    ddd
2611                "},
2612                    model_output: indoc! {"
2613                    <|set|>0:23
2614                    AAA
2615                    <|set|>2:29
2616                    CCC
2617                "},
2618                    expected: indoc! {"
2619                    AAA
2620                    bbb
2621                    CCC
2622                    ddd
2623                "},
2624                },
2625                Case {
2626                    name: "set_range_multiline_replacement",
2627                    original: indoc! {"
2628                    fn handle_submit() {
2629                    }
2630
2631                    fn handle_keystroke() {
2632                "},
2633                    model_output: indoc! {"
2634                    <|set|>0:3f-1:7d
2635                    fn handle_submit(modal_state: &mut ModalState) {
2636                        <|user_cursor|>
2637                    }
2638                "},
2639                    expected: indoc! {"
2640                    fn handle_submit(modal_state: &mut ModalState) {
2641                        <|user_cursor|>
2642                    }
2643
2644                    fn handle_keystroke() {
2645                "},
2646                },
2647                Case {
2648                    name: "no_edit_commands_returns_original",
2649                    original: indoc! {"
2650                    hello
2651                    world
2652                "},
2653                    model_output: "some random text with no commands",
2654                    expected: indoc! {"
2655                    hello
2656                    world
2657                "},
2658                },
2659                Case {
2660                    name: "no_edits_command_returns_original",
2661                    original: indoc! {"
2662                    hello
2663                    world
2664                "},
2665                    model_output: "<|no_edits|>",
2666                    expected: indoc! {"
2667                    hello
2668                    world
2669                "},
2670                },
2671                Case {
2672                    name: "wrong_hash_set_ignored",
2673                    original: indoc! {"
2674                    aaa
2675                    bbb
2676                "},
2677                    model_output: indoc! {"
2678                    <|set|>0:ff
2679                    ZZZ
2680                "},
2681                    expected: indoc! {"
2682                    aaa
2683                    bbb
2684                "},
2685                },
2686                Case {
2687                    name: "insert_and_set_combined",
2688                    original: indoc! {"
2689                    alpha
2690                    beta
2691                    gamma
2692                "},
2693                    model_output: indoc! {"
2694                    <|set|>0:06
2695                    ALPHA
2696                    <|insert|>1:9c
2697                    beta_extra
2698                "},
2699                    expected: indoc! {"
2700                    ALPHA
2701                    beta
2702                    beta_extra
2703                    gamma
2704                "},
2705                },
2706                Case {
2707                    name: "no_trailing_newline_preserved",
2708                    original: "hello\nworld",
2709                    model_output: indoc! {"
2710                    <|set|>0:14
2711                    HELLO
2712                "},
2713                    expected: "HELLO\nworld",
2714                },
2715                Case {
2716                    name: "set_range_hash_mismatch_in_end_bound",
2717                    original: indoc! {"
2718                    one
2719                    two
2720                    three
2721                "},
2722                    model_output: indoc! {"
2723                    <|set|>0:42-2:ff
2724                    ONE_TWO_THREE
2725                "},
2726                    expected: indoc! {"
2727                    one
2728                    two
2729                    three
2730                "},
2731                },
2732                Case {
2733                    name: "set_range_start_greater_than_end_ignored",
2734                    original: indoc! {"
2735                    a
2736                    b
2737                    c
2738                "},
2739                    model_output: indoc! {"
2740                    <|set|>2:63-1:62
2741                    X
2742                "},
2743                    expected: indoc! {"
2744                    a
2745                    b
2746                    c
2747                "},
2748                },
2749                Case {
2750                    name: "insert_out_of_bounds_ignored",
2751                    original: indoc! {"
2752                    x
2753                    y
2754                "},
2755                    model_output: indoc! {"
2756                    <|insert|>99:aa
2757                    z
2758                "},
2759                    expected: indoc! {"
2760                    x
2761                    y
2762                "},
2763                },
2764                Case {
2765                    name: "set_out_of_bounds_ignored",
2766                    original: indoc! {"
2767                    x
2768                    y
2769                "},
2770                    model_output: indoc! {"
2771                    <|set|>99:aa
2772                    z
2773                "},
2774                    expected: indoc! {"
2775                    x
2776                    y
2777                "},
2778                },
2779                Case {
2780                    name: "malformed_set_command_ignored",
2781                    original: indoc! {"
2782                    alpha
2783                    beta
2784                "},
2785                    model_output: indoc! {"
2786                    <|set|>not-a-line-ref
2787                    UPDATED
2788                "},
2789                    expected: indoc! {"
2790                    alpha
2791                    beta
2792                "},
2793                },
2794                Case {
2795                    name: "malformed_insert_hash_treated_as_before_first",
2796                    original: indoc! {"
2797                    alpha
2798                    beta
2799                "},
2800                    model_output: indoc! {"
2801                    <|insert|>1:nothex
2802                    preamble
2803                "},
2804                    expected: indoc! {"
2805                    preamble
2806                    alpha
2807                    beta
2808                "},
2809                },
2810                Case {
2811                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2812                    original: indoc! {"
2813                    cat
2814                    dog
2815                "},
2816                    model_output: indoc! {"
2817                    <|set|>0:38
2818                    CAT
2819                    <|insert|>0:38
2820                    TAIL
2821                "},
2822                    expected: indoc! {"
2823                    CAT
2824                    TAIL
2825                    dog
2826                "},
2827                },
2828                Case {
2829                    name: "overlapping_set_ranges_last_wins",
2830                    original: indoc! {"
2831                    a
2832                    b
2833                    c
2834                    d
2835                "},
2836                    model_output: indoc! {"
2837                    <|set|>0:61-2:63
2838                    FIRST
2839                    <|set|>1:62-3:64
2840                    SECOND
2841                "},
2842                    expected: indoc! {"
2843                    FIRST
2844                    d
2845                "},
2846                },
2847                Case {
2848                    name: "insert_before_first_and_after_line",
2849                    original: indoc! {"
2850                        a
2851                        b
2852                    "},
2853                    model_output: indoc! {"
2854                        <|insert|>
2855                        HEAD
2856                        <|insert|>0:61
2857                        MID
2858                    "},
2859                    expected: indoc! {"
2860                        HEAD
2861                        a
2862                        MID
2863                        b
2864                    "},
2865                },
2866            ];
2867
2868            for case in &cases {
2869                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2870                assert_eq!(result, case.expected, "failed case: {}", case.name);
2871            }
2872        }
2873
2874        #[test]
2875        fn test_output_has_edit_commands() {
2876            assert!(hashline::output_has_edit_commands(&format!(
2877                "{}0:ab\nnew",
2878                SET_COMMAND_MARKER
2879            )));
2880            assert!(hashline::output_has_edit_commands(&format!(
2881                "{}0:ab\nnew",
2882                INSERT_COMMAND_MARKER
2883            )));
2884            assert!(hashline::output_has_edit_commands(&format!(
2885                "some text\n{}1:cd\nstuff",
2886                SET_COMMAND_MARKER
2887            )));
2888            assert!(!hashline::output_has_edit_commands("just plain text"));
2889            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2890            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2891        }
2892
2893        // ---- hashline::patch_to_edit_commands round-trip tests ----
2894
2895        #[test]
2896        fn test_patch_to_edit_commands() {
2897            struct Case {
2898                name: &'static str,
2899                old: &'static str,
2900                patch: &'static str,
2901                expected_new: &'static str,
2902            }
2903
2904            let cases = [
2905                Case {
2906                    name: "single_line_replacement",
2907                    old: indoc! {"
2908                    let mut total = 0;
2909                    for product in products {
2910                        total += ;
2911                    }
2912                    total
2913                "},
2914                    patch: indoc! {"
2915                    @@ -1,5 +1,5 @@
2916                     let mut total = 0;
2917                     for product in products {
2918                    -    total += ;
2919                    +    total += product.price;
2920                     }
2921                     total
2922                "},
2923                    expected_new: indoc! {"
2924                    let mut total = 0;
2925                    for product in products {
2926                        total += product.price;
2927                    }
2928                    total
2929                "},
2930                },
2931                Case {
2932                    name: "multiline_replacement",
2933                    old: indoc! {"
2934                    fn foo() {
2935                        let x = 1;
2936                        let y = 2;
2937                        let z = 3;
2938                    }
2939                "},
2940                    patch: indoc! {"
2941                    @@ -1,5 +1,3 @@
2942                     fn foo() {
2943                    -    let x = 1;
2944                    -    let y = 2;
2945                    -    let z = 3;
2946                    +    let sum = 1 + 2 + 3;
2947                     }
2948                "},
2949                    expected_new: indoc! {"
2950                    fn foo() {
2951                        let sum = 1 + 2 + 3;
2952                    }
2953                "},
2954                },
2955                Case {
2956                    name: "insertion",
2957                    old: indoc! {"
2958                    fn main() {
2959                        let x = 1;
2960                    }
2961                "},
2962                    patch: indoc! {"
2963                    @@ -1,3 +1,4 @@
2964                     fn main() {
2965                         let x = 1;
2966                    +    let y = 2;
2967                     }
2968                "},
2969                    expected_new: indoc! {"
2970                    fn main() {
2971                        let x = 1;
2972                        let y = 2;
2973                    }
2974                "},
2975                },
2976                Case {
2977                    name: "insertion_before_first",
2978                    old: indoc! {"
2979                    let x = 1;
2980                    let y = 2;
2981                "},
2982                    patch: indoc! {"
2983                    @@ -1,2 +1,3 @@
2984                    +use std::io;
2985                     let x = 1;
2986                     let y = 2;
2987                "},
2988                    expected_new: indoc! {"
2989                    use std::io;
2990                    let x = 1;
2991                    let y = 2;
2992                "},
2993                },
2994                Case {
2995                    name: "deletion",
2996                    old: indoc! {"
2997                    aaa
2998                    bbb
2999                    ccc
3000                    ddd
3001                "},
3002                    patch: indoc! {"
3003                    @@ -1,4 +1,2 @@
3004                     aaa
3005                    -bbb
3006                    -ccc
3007                     ddd
3008                "},
3009                    expected_new: indoc! {"
3010                    aaa
3011                    ddd
3012                "},
3013                },
3014                Case {
3015                    name: "multiple_changes",
3016                    old: indoc! {"
3017                    alpha
3018                    beta
3019                    gamma
3020                    delta
3021                    epsilon
3022                "},
3023                    patch: indoc! {"
3024                    @@ -1,5 +1,5 @@
3025                    -alpha
3026                    +ALPHA
3027                     beta
3028                     gamma
3029                    -delta
3030                    +DELTA
3031                     epsilon
3032                "},
3033                    expected_new: indoc! {"
3034                    ALPHA
3035                    beta
3036                    gamma
3037                    DELTA
3038                    epsilon
3039                "},
3040                },
3041                Case {
3042                    name: "replace_with_insertion",
3043                    old: indoc! {r#"
3044                    fn handle() {
3045                        modal_state.close();
3046                        modal_state.dismiss();
3047                "#},
3048                    patch: indoc! {r#"
3049                    @@ -1,3 +1,4 @@
3050                     fn handle() {
3051                         modal_state.close();
3052                    +    eprintln!("");
3053                         modal_state.dismiss();
3054                "#},
3055                    expected_new: indoc! {r#"
3056                    fn handle() {
3057                        modal_state.close();
3058                        eprintln!("");
3059                        modal_state.dismiss();
3060                "#},
3061                },
3062                Case {
3063                    name: "complete_replacement",
3064                    old: indoc! {"
3065                    aaa
3066                    bbb
3067                    ccc
3068                "},
3069                    patch: indoc! {"
3070                    @@ -1,3 +1,3 @@
3071                    -aaa
3072                    -bbb
3073                    -ccc
3074                    +xxx
3075                    +yyy
3076                    +zzz
3077                "},
3078                    expected_new: indoc! {"
3079                    xxx
3080                    yyy
3081                    zzz
3082                "},
3083                },
3084                Case {
3085                    name: "add_function_body",
3086                    old: indoc! {"
3087                    fn foo() {
3088                        modal_state.dismiss();
3089                    }
3090
3091                    fn
3092
3093                    fn handle_keystroke() {
3094                "},
3095                    patch: indoc! {"
3096                    @@ -1,6 +1,8 @@
3097                     fn foo() {
3098                         modal_state.dismiss();
3099                     }
3100
3101                    -fn
3102                    +fn handle_submit() {
3103                    +    todo()
3104                    +}
3105
3106                     fn handle_keystroke() {
3107                "},
3108                    expected_new: indoc! {"
3109                    fn foo() {
3110                        modal_state.dismiss();
3111                    }
3112
3113                    fn handle_submit() {
3114                        todo()
3115                    }
3116
3117                    fn handle_keystroke() {
3118                "},
3119                },
3120                Case {
3121                    name: "with_cursor_offset",
3122                    old: indoc! {r#"
3123                    fn main() {
3124                        println!();
3125                    }
3126                "#},
3127                    patch: indoc! {r#"
3128                        @@ -1,3 +1,3 @@
3129                        fn main() {
3130                        -    println!();
3131                        +    eprintln!("");
3132                        }
3133                    "#},
3134                    expected_new: indoc! {r#"
3135                        fn main() {
3136                            eprintln!("<|user_cursor|>");
3137                        }
3138                    "#},
3139                },
3140                Case {
3141                    name: "non_local_hunk_header_pure_insertion_repro",
3142                    old: indoc! {"
3143                        aaa
3144                        bbb
3145                    "},
3146                    patch: indoc! {"
3147                        @@ -20,2 +20,3 @@
3148                        aaa
3149                        +xxx
3150                        bbb
3151                    "},
3152                    expected_new: indoc! {"
3153                        aaa
3154                        xxx
3155                        bbb
3156                    "},
3157                },
3158                Case {
3159                    name: "empty_patch_produces_no_edits_marker",
3160                    old: indoc! {"
3161                        aaa
3162                        bbb
3163                    "},
3164                    patch: "@@ -20,2 +20,3 @@\n",
3165                    expected_new: indoc! {"
3166                        aaa
3167                        bbb
3168                    "},
3169                },
3170            ];
3171
3172            for case in &cases {
3173                // The cursor_offset for patch_to_edit_commands is relative to
3174                // the first hunk's new text (context + additions). We compute
3175                // it by finding where the marker sits in the expected output
3176                // (which mirrors the new text of the hunk).
3177                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3178
3179                let commands =
3180                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3181                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3182
3183                assert!(
3184                    hashline::output_has_edit_commands(&commands),
3185                    "case {}: expected edit commands, got: {commands:?}",
3186                    case.name,
3187                );
3188
3189                let applied = hashline::apply_edit_commands(case.old, &commands);
3190                assert_eq!(applied, case.expected_new, "case {}", case.name);
3191            }
3192        }
3193    }
3194}
3195
3196pub mod seed_coder {
3197    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3198    //!
3199    //! Seed-Coder uses different FIM tokens and order than Qwen:
3200    //! - SPM order: suffix comes FIRST, then prefix, then middle
3201    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3202    //! - File markers: StarCoder-style `<filename>path` (single token + path)
3203    //!
3204    //! All context (related files, edit history) goes in the PREFIX section.
3205    //! The suffix contains only code after the editable region.
3206    //!
3207    //! Example prompt:
3208    //!
3209    //! <[fim-suffix]>
3210    //! code after editable region
3211    //! <[fim-prefix]><filename>related/file.py
3212    //! related file content
3213    //!
3214    //! <filename>edit_history
3215    //! --- a/some_file.py
3216    //! +++ b/some_file.py
3217    //! -old
3218    //! +new
3219    //!
3220    //! <filename>path/to/target_file.py
3221    //! code before editable region
3222    //! <<<<<<< CURRENT
3223    //! code that
3224    //! needs to<|user_cursor|>
3225    //! be rewritten
3226    //! =======
3227    //! <[fim-middle]>
3228    //!
3229    //! Expected output (model generates):
3230    //!
3231    //! updated
3232    //! code with
3233    //! changes applied
3234    //! >>>>>>> UPDATED
3235
3236    use super::*;
3237
3238    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3239    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3240    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3241    pub const FILE_MARKER: &str = "<filename>";
3242
3243    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3244    pub const SEPARATOR: &str = "=======\n";
3245    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3246
3247    pub const NO_EDITS: &str = "NO_EDITS\n";
3248
3249    pub fn special_tokens() -> &'static [&'static str] {
3250        &[
3251            FIM_SUFFIX,
3252            FIM_PREFIX,
3253            FIM_MIDDLE,
3254            FILE_MARKER,
3255            START_MARKER,
3256            SEPARATOR,
3257            END_MARKER,
3258            CURSOR_MARKER,
3259        ]
3260    }
3261
3262    pub fn write_cursor_excerpt_section(
3263        prompt: &mut String,
3264        path: &Path,
3265        context: &str,
3266        editable_range: &Range<usize>,
3267        cursor_offset: usize,
3268    ) {
3269        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3270        prompt.push_str(&section);
3271    }
3272
3273    pub fn format_prompt_with_budget(
3274        path: &Path,
3275        context: &str,
3276        editable_range: &Range<usize>,
3277        cursor_offset: usize,
3278        events: &[Arc<Event>],
3279        related_files: &[RelatedFile],
3280        max_tokens: usize,
3281    ) -> String {
3282        let cursor_prefix_section =
3283            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3284        assemble_fim_prompt(
3285            context,
3286            editable_range,
3287            &cursor_prefix_section,
3288            events,
3289            related_files,
3290            max_tokens,
3291        )
3292    }
3293
3294    pub fn assemble_fim_prompt(
3295        context: &str,
3296        editable_range: &Range<usize>,
3297        cursor_prefix_section: &str,
3298        events: &[Arc<Event>],
3299        related_files: &[RelatedFile],
3300        max_tokens: usize,
3301    ) -> String {
3302        let suffix_section = build_suffix_section(context, editable_range);
3303
3304        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3305        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3306        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3307
3308        let edit_history_section = super::format_edit_history_within_budget(
3309            events,
3310            FILE_MARKER,
3311            "edit_history",
3312            budget_after_cursor,
3313            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3314        );
3315        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3316        let budget_after_edit_history =
3317            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3318
3319        let related_files_section = super::format_related_files_within_budget(
3320            related_files,
3321            FILE_MARKER,
3322            "",
3323            budget_after_edit_history,
3324        );
3325
3326        let mut prompt = String::new();
3327        prompt.push_str(&suffix_section);
3328        prompt.push_str(FIM_PREFIX);
3329        prompt.push_str(&related_files_section);
3330        if !related_files_section.is_empty() {
3331            prompt.push('\n');
3332        }
3333        prompt.push_str(&edit_history_section);
3334        if !edit_history_section.is_empty() {
3335            prompt.push('\n');
3336        }
3337        prompt.push_str(cursor_prefix_section);
3338        prompt.push_str(FIM_MIDDLE);
3339
3340        prompt
3341    }
3342
3343    pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3344        let mut section = String::new();
3345        section.push_str(FIM_SUFFIX);
3346        section.push_str(&context[editable_range.end..]);
3347        if !section.ends_with('\n') {
3348            section.push('\n');
3349        }
3350        section
3351    }
3352
3353    fn build_cursor_prefix_section(
3354        path: &Path,
3355        context: &str,
3356        editable_range: &Range<usize>,
3357        cursor_offset: usize,
3358    ) -> String {
3359        let mut section = String::new();
3360        let path_str = path.to_string_lossy();
3361        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3362
3363        section.push_str(&context[..editable_range.start]);
3364        section.push_str(START_MARKER);
3365        section.push_str(&context[editable_range.start..cursor_offset]);
3366        section.push_str(CURSOR_MARKER);
3367        section.push_str(&context[cursor_offset..editable_range.end]);
3368        if !section.ends_with('\n') {
3369            section.push('\n');
3370        }
3371        section.push_str(SEPARATOR);
3372        section
3373    }
3374
3375    /// Format patch as containing no changes if it's empty; otherwise return None.
3376    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3377        // Count lines in the patch
3378        let empty_patch = patch.lines().count() <= 3;
3379        if empty_patch {
3380            Some(format!("{NO_EDITS}{END_MARKER}"))
3381        } else {
3382            None
3383        }
3384    }
3385}
3386
3387pub mod v0304_variable_edit {
3388    //! A prompt format with no fixed editable region. The entire context is shown
3389    //! to the model, and it chooses which text to replace by outputting surrounding
3390    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3391    //! text.
3392    //!
3393    //! Example prompt:
3394    //!
3395    //! <|file_sep|>path/to/file.py
3396    //! zero
3397    //! one
3398    //! two
3399    //! three<|user_cursor|>
3400    //! four
3401    //! five
3402    //! <|fim_prefix|>
3403    //
3404    //! Expected output (model generates):
3405    //!
3406    //! two
3407    //! <|fim_middle|>
3408    //! THREE
3409    //! <|fim_suffix|>
3410    //! four
3411    //!
3412    //! The output means: find "two\n...\nfour" in the context, and replace
3413    //! everything between "two\n" and "four" with "THREE\n".
3414
3415    use super::*;
3416
3417    pub fn special_tokens() -> &'static [&'static str] {
3418        &[
3419            "<|fim_prefix|>",
3420            "<|fim_suffix|>",
3421            "<|fim_middle|>",
3422            "<|file_sep|>",
3423            CURSOR_MARKER,
3424        ]
3425    }
3426
3427    pub fn write_cursor_excerpt_section(
3428        prompt: &mut String,
3429        path: &Path,
3430        context: &str,
3431        cursor_offset: usize,
3432    ) {
3433        let path_str = path.to_string_lossy();
3434        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3435
3436        prompt.push_str(&context[..cursor_offset]);
3437        prompt.push_str(CURSOR_MARKER);
3438        prompt.push_str(&context[cursor_offset..]);
3439        if !prompt.ends_with('\n') {
3440            prompt.push('\n');
3441        }
3442        prompt.push_str("<|fim_prefix|>\n")
3443    }
3444
3445    /// Apply a variable-edit model output to the original context text.
3446    ///
3447    /// The model output has the form:
3448    ///
3449    /// - prefix context lines
3450    /// - `<|fim_middle|>`
3451    /// - new text
3452    /// - `<|fim_suffix|>`
3453    /// - suffix context lines
3454    ///
3455    /// We locate the prefix/suffix context lines in the original text and replace
3456    /// everything between them with the new text.
3457    pub fn apply_variable_edit(
3458        context: &str,
3459        model_output: &str,
3460    ) -> Result<(Range<usize>, String)> {
3461        let (prefix_context, rest) = model_output
3462            .split_once("<|fim_middle|>\n")
3463            .or_else(|| model_output.split_once("<|fim_middle|>"))
3464            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3465
3466        let (new_text, suffix_context) = rest
3467            .split_once("<|fim_suffix|>\n")
3468            .or_else(|| rest.split_once("<|fim_suffix|>"))
3469            .unwrap_or((rest, ""));
3470
3471        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3472            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3473        } else {
3474            suffix_context
3475        };
3476
3477        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3478            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3479            + prefix_context.len();
3480        let suffix_offset = if suffix_context.is_empty() {
3481            context.len()
3482        } else {
3483            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3484                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3485                + prefix_offset
3486        };
3487
3488        let edit_range = prefix_offset..suffix_offset;
3489        return Ok((edit_range, new_text.to_string()));
3490    }
3491
3492    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3493        if needle.is_empty() {
3494            return Some(0);
3495        }
3496
3497        haystack.match_indices(needle).find_map(|(offset, _)| {
3498            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3499            matched_line_start.then_some(offset)
3500        })
3501    }
3502
3503    /// Convert a unified diff patch into the variable-edit output format.
3504    ///
3505    /// Parses `patch` as a unified diff against `old_text` and produces model
3506    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3507    /// delimiters. The diff is resolved by content matching rather than line
3508    /// numbers.
3509    pub fn patch_to_variable_edit_output(
3510        old_text: &str,
3511        patch: &str,
3512        cursor_offset: Option<usize>,
3513    ) -> Result<String> {
3514        // Parse the unified diff into hunks. Each hunk has an `old_context`
3515        // string (context + deleted lines interleaved in order) and a list of
3516        // edits expressed as byte ranges within that context plus replacement
3517        // text.
3518        let hunks = parse_hunks(patch);
3519        if hunks.is_empty() {
3520            return Ok(String::new());
3521        }
3522
3523        // Apply each hunk by finding its old_context in the text and
3524        // performing the edits. We search forward from where the previous
3525        // hunk ended so that hunks are applied in order.
3526        let mut new_text = old_text.to_string();
3527        let mut search_from: usize = 0;
3528        let mut first_hunk_pos: Option<usize> = None;
3529
3530        for hunk in &hunks {
3531            let context_pos = new_text[search_from..]
3532                .find(&hunk.old_context)
3533                .map(|pos| pos + search_from)
3534                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3535
3536            if first_hunk_pos.is_none() {
3537                first_hunk_pos = Some(context_pos);
3538            }
3539
3540            // Apply edits in reverse order so byte offsets remain valid.
3541            for edit in hunk.edits.iter().rev() {
3542                let abs_start = context_pos + edit.range.start;
3543                let abs_end = context_pos + edit.range.end;
3544                new_text.replace_range(abs_start..abs_end, &edit.text);
3545            }
3546
3547            // Advance past this hunk's region in the (now modified) text.
3548            let new_region_len: usize =
3549                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3550                    len + edit.text.len() - (edit.range.end - edit.range.start)
3551                });
3552            search_from = context_pos + new_region_len;
3553        }
3554
3555        // Now we have old_text and new_text. Find the changed line range by
3556        // comparing them.
3557        let old_lines: Vec<&str> = old_text.lines().collect();
3558        let new_lines: Vec<&str> = new_text.lines().collect();
3559
3560        // Find first differing line.
3561        let first_changed_row = old_lines
3562            .iter()
3563            .zip(new_lines.iter())
3564            .position(|(a, b)| a != b)
3565            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3566
3567        // Find last differing line (from the end).
3568        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3569        let common_suffix = old_lines
3570            .iter()
3571            .rev()
3572            .zip(new_lines.iter().rev())
3573            .take(max_suffix)
3574            .take_while(|(a, b)| a == b)
3575            .count();
3576
3577        let old_end = old_lines.len() - common_suffix;
3578        let new_end = new_lines.len() - common_suffix;
3579
3580        if first_changed_row == old_end && first_changed_row == new_end {
3581            return Ok(String::new());
3582        }
3583
3584        // Build the replacement text from new_lines[first_diff..new_end].
3585        let mut merged_new_text = String::new();
3586        for line in &new_lines[first_changed_row..new_end] {
3587            merged_new_text.push_str(line);
3588            merged_new_text.push('\n');
3589        }
3590
3591        // cursor_offset is relative to the first hunk's new content in
3592        // new_text. Translate it to an offset within merged_new_text, which
3593        // only contains lines first_diff..new_end of new_text.
3594        if let Some(hunk_offset) = cursor_offset {
3595            let hunk_start = first_hunk_pos.unwrap_or(0);
3596            let absolute_pos = hunk_start + hunk_offset;
3597
3598            // Byte offset where first_diff starts in new_text.
3599            let merged_start: usize = new_lines[..first_changed_row]
3600                .iter()
3601                .map(|line| line.len() + 1)
3602                .sum();
3603
3604            if absolute_pos >= merged_start {
3605                let relative_offset = absolute_pos - merged_start;
3606                if relative_offset <= merged_new_text.len() {
3607                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3608                }
3609            }
3610        }
3611
3612        // Build output with 2 lines of context above and below.
3613        let context_lines_count = 2;
3614        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3615        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3616
3617        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3618            let pattern = &lines[line_range];
3619            let pattern_len = pattern.len();
3620
3621            let mut count = 0;
3622            for offset in 0..=lines.len() - pattern_len {
3623                if &lines[offset..offset + pattern_len] == pattern {
3624                    count += 1;
3625                }
3626            }
3627            count
3628        }
3629
3630        // Expand prefix and suffix until they are unique
3631        while prefix_start > 0 {
3632            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3633                prefix_start -= 1;
3634            } else {
3635                break;
3636            }
3637        }
3638        while suffix_end < old_lines.len() {
3639            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3640                suffix_end += 1;
3641            } else {
3642                break;
3643            }
3644        }
3645
3646        let mut output = String::new();
3647        for line in &old_lines[prefix_start..first_changed_row] {
3648            output.push_str(line);
3649            output.push('\n');
3650        }
3651        output.push_str("<|fim_middle|>\n");
3652        output.push_str(&merged_new_text);
3653        output.push_str("<|fim_suffix|>\n");
3654        for line in &old_lines[old_end..suffix_end] {
3655            output.push_str(line);
3656            output.push('\n');
3657        }
3658
3659        Ok(output)
3660    }
3661
3662    struct ParsedHunk {
3663        old_context: String,
3664        edits: Vec<ParsedEdit>,
3665    }
3666
3667    struct ParsedEdit {
3668        range: Range<usize>,
3669        text: String,
3670    }
3671
3672    /// Parse a unified diff into content-based hunks. Each hunk contains an
3673    /// `old_context` string (context lines + deleted lines, which together
3674    /// form the text that should be found in the original) and a list of edits
3675    /// expressed as byte ranges within that context.
3676    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3677        let mut hunks = Vec::new();
3678        let mut current: Option<ParsedHunk> = None;
3679
3680        for line in patch.lines() {
3681            if line.starts_with("@@") {
3682                if let Some(hunk) = current.take() {
3683                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3684                        hunks.push(hunk);
3685                    }
3686                }
3687                current = Some(ParsedHunk {
3688                    old_context: String::new(),
3689                    edits: Vec::new(),
3690                });
3691            } else if line.starts_with("---") || line.starts_with("+++") {
3692                continue;
3693            } else if let Some(hunk) = &mut current {
3694                if let Some(added) = line.strip_prefix('+') {
3695                    let pos = hunk.old_context.len();
3696                    if let Some(last_edit) = hunk.edits.last_mut() {
3697                        if last_edit.range.end == pos {
3698                            writeln!(&mut last_edit.text, "{added}").ok();
3699                            continue;
3700                        }
3701                    }
3702                    hunk.edits.push(ParsedEdit {
3703                        range: pos..pos,
3704                        text: format!("{added}\n"),
3705                    });
3706                } else if let Some(removed) = line.strip_prefix('-') {
3707                    let start = hunk.old_context.len();
3708                    writeln!(&mut hunk.old_context, "{removed}").ok();
3709                    let end = hunk.old_context.len();
3710                    if let Some(last_edit) = hunk.edits.last_mut() {
3711                        if last_edit.range.end == start {
3712                            last_edit.range.end = end;
3713                            continue;
3714                        }
3715                    }
3716                    hunk.edits.push(ParsedEdit {
3717                        range: start..end,
3718                        text: String::new(),
3719                    });
3720                } else {
3721                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3722                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3723                }
3724            }
3725        }
3726
3727        if let Some(hunk) = current {
3728            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3729                hunks.push(hunk);
3730            }
3731        }
3732
3733        hunks
3734    }
3735
3736    #[cfg(test)]
3737    mod tests {
3738        use super::*;
3739        use indoc::indoc;
3740
3741        #[test]
3742        fn test_apply_variable_edit() {
3743            struct Case {
3744                name: &'static str,
3745                original: &'static str,
3746                model_output: &'static str,
3747                expected: &'static str,
3748            }
3749
3750            let cases = [
3751                Case {
3752                    name: "simple_single_line_replacement",
3753                    original: indoc! {"
3754                        zero
3755                        one
3756                        two
3757                        three
3758                        four
3759                        five
3760                    "},
3761                    model_output: indoc! {"
3762                        two
3763                        <|fim_middle|>
3764                        THREE
3765                        <|fim_suffix|>
3766                        four
3767                    "},
3768                    expected: indoc! {"
3769                        zero
3770                        one
3771                        two
3772                        THREE
3773                        four
3774                        five
3775                    "},
3776                },
3777                Case {
3778                    name: "multi_line_replacement",
3779                    original: indoc! {"
3780                        a
3781                        b
3782                        c
3783                        d
3784                        e
3785                    "},
3786                    model_output: indoc! {"
3787                        a
3788                        <|fim_middle|>
3789                        B
3790                        C
3791                        D
3792                        <|fim_suffix|>
3793                        e
3794                    "},
3795                    expected: indoc! {"
3796                        a
3797                        B
3798                        C
3799                        D
3800                        e
3801                    "},
3802                },
3803                Case {
3804                    name: "insertion_between_existing_lines",
3805                    original: indoc! {"
3806                        a
3807                        b
3808                        c
3809                    "},
3810                    model_output: indoc! {"
3811                        a
3812                        <|fim_middle|>
3813                        X
3814                        <|fim_suffix|>
3815                        b
3816                    "},
3817                    expected: indoc! {"
3818                        a
3819                        X
3820                        b
3821                        c
3822                    "},
3823                },
3824                Case {
3825                    name: "deletion",
3826                    original: indoc! {"
3827                        a
3828                        b
3829                        c
3830                        d
3831                    "},
3832                    model_output: indoc! {"
3833                        a
3834                        <|fim_middle|>
3835                        <|fim_suffix|>
3836                        c
3837                    "},
3838                    expected: indoc! {"
3839                        a
3840                        c
3841                        d
3842                    "},
3843                },
3844                Case {
3845                    name: "replacement_at_start_no_prefix_context",
3846                    original: indoc! {"
3847                        a
3848                        b
3849                        c
3850                    "},
3851                    model_output: indoc! {"
3852                        <|fim_middle|>
3853                        X
3854                        <|fim_suffix|>
3855                        b
3856                    "},
3857                    expected: indoc! {"
3858                        X
3859                        b
3860                        c
3861                    "},
3862                },
3863                Case {
3864                    name: "replacement_at_end_no_suffix_context",
3865                    original: indoc! {"
3866                        a
3867                        b
3868                        c
3869                    "},
3870                    model_output: indoc! {"
3871                        b
3872                        <|fim_middle|>
3873                        Z
3874                        <|fim_suffix|>
3875                    "},
3876                    expected: indoc! {"
3877                        a
3878                        b
3879                        Z
3880                    "},
3881                },
3882                Case {
3883                    name: "context_with_trailing_newline_is_preserved",
3884                    original: indoc! {"
3885                        a
3886                        b
3887                        c
3888                    "},
3889                    model_output: indoc! {"
3890                        a
3891                        <|fim_middle|>
3892                        B
3893                        <|fim_suffix|>
3894                        c
3895                    "},
3896                    expected: indoc! {"
3897                        a
3898                        B
3899                        c
3900                    "},
3901                },
3902                Case {
3903                    name: "cursor_marker_passes_through_untouched",
3904                    original: indoc! {"
3905                        a
3906                        b
3907                        c
3908                    "},
3909                    model_output: indoc! {"
3910                        a
3911                        <|fim_middle|>
3912                        B<|user_cursor|>B
3913                        <|fim_suffix|>
3914                        c
3915                    "},
3916                    expected: indoc! {"
3917                        a
3918                        B<|user_cursor|>B
3919                        c
3920                    "},
3921                },
3922                Case {
3923                    name: "multiple_prefix_context_lines",
3924                    original: indoc! {"
3925                        a
3926                        b
3927                        c
3928                        d
3929                        e
3930                    "},
3931                    model_output: indoc! {"
3932                        b
3933                        c
3934                        <|fim_middle|>
3935                        D
3936                        <|fim_suffix|>
3937                        e
3938                    "},
3939                    expected: indoc! {"
3940                        a
3941                        b
3942                        c
3943                        D
3944                        e
3945                    "},
3946                },
3947            ];
3948
3949            for case in cases {
3950                let (edit_range, replacement) =
3951                    apply_variable_edit(case.original, case.model_output).unwrap();
3952                let mut edited = case.original.to_string();
3953                edited.replace_range(edit_range, &replacement);
3954                assert_eq!(edited, case.expected, "{}", case.name);
3955            }
3956        }
3957
3958        #[test]
3959        fn test_patch_to_variable_edit() {
3960            struct Case {
3961                name: &'static str,
3962                old: &'static str,
3963                patch: &'static str,
3964                cursor_offset: Option<usize>,
3965                expected_variable_edit: &'static str,
3966                expected_after_apply: &'static str,
3967            }
3968
3969            let cases = [
3970                Case {
3971                    name: "simple_replacement",
3972                    old: indoc! {"
3973                        zero
3974                        one
3975                        two
3976                        three
3977                        four
3978                        five
3979                    "},
3980                    patch: indoc! {"
3981                        @@ -3,3 +3,3 @@
3982                         two
3983                        -three
3984                        +THREE
3985                         four
3986                    "},
3987                    cursor_offset: None,
3988                    expected_variable_edit: indoc! {"
3989                        one
3990                        two
3991                        <|fim_middle|>
3992                        THREE
3993                        <|fim_suffix|>
3994                        four
3995                        five
3996                    "},
3997                    expected_after_apply: indoc! {"
3998                        zero
3999                        one
4000                        two
4001                        THREE
4002                        four
4003                        five
4004                    "},
4005                },
4006                Case {
4007                    name: "insertion",
4008                    old: indoc! {"
4009                        a
4010                        b
4011                        c
4012                        d
4013                        e
4014                    "},
4015                    patch: indoc! {"
4016                        @@ -2,0 +3,1 @@
4017                         b
4018                        +X
4019                         c
4020                    "},
4021                    cursor_offset: None,
4022                    expected_variable_edit: indoc! {"
4023                        a
4024                        b
4025                        <|fim_middle|>
4026                        X
4027                        <|fim_suffix|>
4028                        c
4029                        d
4030                    "},
4031                    expected_after_apply: indoc! {"
4032                        a
4033                        b
4034                        X
4035                        c
4036                        d
4037                        e
4038                    "},
4039                },
4040                Case {
4041                    name: "deletion",
4042                    old: indoc! {"
4043                        a
4044                        b
4045                        c
4046                        d
4047                        e
4048                    "},
4049                    patch: indoc! {"
4050                        @@ -2,3 +2,2 @@
4051                         b
4052                        -c
4053                         d
4054                    "},
4055                    cursor_offset: None,
4056                    expected_variable_edit: indoc! {"
4057                        a
4058                        b
4059                        <|fim_middle|>
4060                        <|fim_suffix|>
4061                        d
4062                        e
4063                    "},
4064                    expected_after_apply: indoc! {"
4065                        a
4066                        b
4067                        d
4068                        e
4069                    "},
4070                },
4071                Case {
4072                    name: "edit_near_start",
4073                    old: indoc! {"
4074                        first
4075                        second
4076                        third
4077                        fourth
4078                    "},
4079                    patch: indoc! {"
4080                        @@ -1,1 +1,1 @@
4081                        -first
4082                        +FIRST
4083                    "},
4084                    cursor_offset: None,
4085                    expected_variable_edit: indoc! {"
4086                        <|fim_middle|>
4087                        FIRST
4088                        <|fim_suffix|>
4089                        second
4090                        third
4091                    "},
4092                    expected_after_apply: indoc! {"
4093                        FIRST
4094                        second
4095                        third
4096                        fourth
4097                    "},
4098                },
4099                Case {
4100                    name: "edit_near_end",
4101                    old: indoc! {"
4102                        first
4103                        second
4104                        third
4105                        fourth
4106                    "},
4107                    patch: indoc! {"
4108                        @@ -4,1 +4,1 @@
4109                        -fourth
4110                        +FOURTH
4111                    "},
4112                    cursor_offset: None,
4113                    expected_variable_edit: indoc! {"
4114                        second
4115                        third
4116                        <|fim_middle|>
4117                        FOURTH
4118                        <|fim_suffix|>
4119                    "},
4120                    expected_after_apply: indoc! {"
4121                        first
4122                        second
4123                        third
4124                        FOURTH
4125                    "},
4126                },
4127                Case {
4128                    name: "cursor_at_start_of_replacement",
4129                    old: indoc! {"
4130                        zero
4131                        one
4132                        two
4133                        three
4134                        four
4135                        five
4136                    "},
4137                    patch: indoc! {"
4138                        @@ -3,3 +3,3 @@
4139                         two
4140                        -three
4141                        +THREE
4142                         four
4143                    "},
4144                    cursor_offset: Some(4),
4145                    expected_variable_edit: indoc! {"
4146                        one
4147                        two
4148                        <|fim_middle|>
4149                        <|user_cursor|>THREE
4150                        <|fim_suffix|>
4151                        four
4152                        five
4153                    "},
4154                    expected_after_apply: indoc! {"
4155                        zero
4156                        one
4157                        two
4158                        <|user_cursor|>THREE
4159                        four
4160                        five
4161                    "},
4162                },
4163                Case {
4164                    name: "cursor_in_middle_of_replacement",
4165                    old: indoc! {"
4166                        zero
4167                        one
4168                        two
4169                        three
4170                        four
4171                        five
4172                    "},
4173                    patch: indoc! {"
4174                        @@ -3,3 +3,3 @@
4175                         two
4176                        -three
4177                        +THREE
4178                         four
4179                    "},
4180                    cursor_offset: Some(6),
4181                    expected_variable_edit: indoc! {"
4182                        one
4183                        two
4184                        <|fim_middle|>
4185                        TH<|user_cursor|>REE
4186                        <|fim_suffix|>
4187                        four
4188                        five
4189                    "},
4190                    expected_after_apply: indoc! {"
4191                        zero
4192                        one
4193                        two
4194                        TH<|user_cursor|>REE
4195                        four
4196                        five
4197                    "},
4198                },
4199                Case {
4200                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4201                    old: indoc! {"
4202                        one
4203                        a
4204                        b
4205                        c
4206                        d
4207                        two
4208                        a
4209                        b
4210                        c
4211                        d
4212                        three
4213                        a
4214                        b
4215                        c
4216                        d
4217                        four
4218                    "},
4219                    patch: indoc! {"
4220                        @@ -4,5 +4,5 @@
4221                         two
4222                         a
4223                         b
4224                        -c
4225                        +C
4226                         d
4227                         three
4228                    "},
4229                    cursor_offset: None,
4230                    expected_variable_edit: indoc! {"
4231                        two
4232                        a
4233                        b
4234                        <|fim_middle|>
4235                        C
4236                        <|fim_suffix|>
4237                        d
4238                        three
4239                    "},
4240                    expected_after_apply: indoc! {"
4241                        one
4242                        a
4243                        b
4244                        c
4245                        d
4246                        two
4247                        a
4248                        b
4249                        C
4250                        d
4251                        three
4252                        a
4253                        b
4254                        c
4255                        d
4256                        four
4257                    "},
4258                },
4259                Case {
4260                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4261                    old: indoc! {"
4262                        {
4263                            {
4264                                one();
4265                            }
4266                        }
4267                        {
4268                            {
4269                                two();
4270                            }
4271                        }
4272                        {
4273                            {
4274                                three();
4275                            }
4276                        }
4277                        {
4278                            {
4279                                four();
4280                            }
4281                        }
4282                    "},
4283                    patch: indoc! {"
4284                        @@ -4,5 +4,5 @@
4285                             {
4286                        -        two();
4287                        +        TWO();
4288                             }
4289                    "},
4290                    cursor_offset: None,
4291                    expected_variable_edit: indoc! {"
4292                                one();
4293                            }
4294                        }
4295                        {
4296                            {
4297                        <|fim_middle|>
4298                                TWO();
4299                        <|fim_suffix|>
4300                            }
4301                        }
4302                        {
4303                            {
4304                                three();
4305                    "},
4306                    expected_after_apply: indoc! {"
4307                        {
4308                            {
4309                                one();
4310                            }
4311                        }
4312                        {
4313                            {
4314                                TWO();
4315                            }
4316                        }
4317                        {
4318                            {
4319                                three();
4320                            }
4321                        }
4322                        {
4323                            {
4324                                four();
4325                            }
4326                        }
4327                    "},
4328                },
4329            ];
4330
4331            for case in cases {
4332                let output =
4333                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4334                        .unwrap_or_else(|error| {
4335                            panic!("failed converting patch for {}: {error}", case.name)
4336                        });
4337                assert_eq!(
4338                    output, case.expected_variable_edit,
4339                    "patch->variable_edit mismatch for {}",
4340                    case.name
4341                );
4342
4343                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4344                    .unwrap_or_else(|error| {
4345                        panic!("failed applying variable_edit for {}: {error}", case.name)
4346                    });
4347                let mut edited_by_variable_edit = case.old.to_string();
4348                edited_by_variable_edit.replace_range(edit_range, &replacement);
4349                assert_eq!(
4350                    edited_by_variable_edit, case.expected_after_apply,
4351                    "variable_edit apply mismatch for {}",
4352                    case.name
4353                );
4354
4355                let (expected_edit_range, expected_replacement) =
4356                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4357                        |error| {
4358                            panic!(
4359                                "failed applying expected variable_edit for {}: {error}",
4360                                case.name
4361                            )
4362                        },
4363                    );
4364                let mut edited_by_expected_variable_edit = case.old.to_string();
4365                edited_by_expected_variable_edit
4366                    .replace_range(expected_edit_range, &expected_replacement);
4367                assert_eq!(
4368                    edited_by_expected_variable_edit, case.expected_after_apply,
4369                    "expected variable_edit apply mismatch for {}",
4370                    case.name
4371                );
4372            }
4373        }
4374
4375        #[test]
4376        fn test_write_cursor_excerpt_section() {
4377            let path = Path::new("test.rs");
4378            let context = "fn main() {\n    hello();\n}\n";
4379            let cursor_offset = 17;
4380            let mut prompt = String::new();
4381            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4382            assert_eq!(
4383                prompt,
4384                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4385            );
4386        }
4387    }
4388}
4389
4390/// The zeta1 prompt format
4391pub mod zeta1 {
4392    use super::*;
4393    use std::fmt::Write;
4394
4395    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4396    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4397    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4398    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4399
4400    const INSTRUCTION_HEADER: &str = concat!(
4401        "### Instruction:\n",
4402        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4403        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4404        "into account the cursor location.\n\n",
4405        "### User Edits:\n\n"
4406    );
4407    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4408    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4409
4410    /// Formats a complete zeta1 prompt from the input events and excerpt.
4411    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4412        let mut prompt = String::with_capacity(
4413            INSTRUCTION_HEADER.len()
4414                + input_events.len()
4415                + EXCERPT_HEADER.len()
4416                + input_excerpt.len()
4417                + RESPONSE_HEADER.len(),
4418        );
4419        prompt.push_str(INSTRUCTION_HEADER);
4420        prompt.push_str(input_events);
4421        prompt.push_str(EXCERPT_HEADER);
4422        prompt.push_str(input_excerpt);
4423        prompt.push_str(RESPONSE_HEADER);
4424        prompt
4425    }
4426
4427    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4428    /// editable and context byte-offset ranges within `cursor_excerpt`.
4429    pub fn format_zeta1_from_input(
4430        input: &ZetaPromptInput,
4431        editable_range: Range<usize>,
4432        context_range: Range<usize>,
4433    ) -> String {
4434        let events = format_zeta1_events(&input.events);
4435        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4436        format_zeta1_prompt(&events, &excerpt)
4437    }
4438
4439    /// Formats events in zeta1 style (oldest first).
4440    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4441        let mut result = String::new();
4442        for event in
4443            events
4444                .iter()
4445                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4446                    &ZetaFormat::V0114180EditableRegion,
4447                )))
4448        {
4449            let event_string = format_zeta1_event(event);
4450            if event_string.is_empty() {
4451                continue;
4452            }
4453            if !result.is_empty() {
4454                result.push_str("\n\n");
4455            }
4456            result.push_str(&event_string);
4457        }
4458        result
4459    }
4460
4461    fn format_zeta1_event(event: &Event) -> String {
4462        match event {
4463            Event::BufferChange {
4464                path,
4465                old_path,
4466                diff,
4467                ..
4468            } => {
4469                let mut prompt = String::new();
4470                if old_path != path {
4471                    writeln!(
4472                        prompt,
4473                        "User renamed {} to {}\n",
4474                        old_path.display(),
4475                        path.display()
4476                    )
4477                    .ok();
4478                }
4479                if !diff.is_empty() {
4480                    write!(
4481                        prompt,
4482                        "User edited {}:\n```diff\n{}\n```",
4483                        path.display(),
4484                        diff
4485                    )
4486                    .ok();
4487                }
4488                prompt
4489            }
4490        }
4491    }
4492
4493    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4494    /// within `cursor_excerpt`.
4495    fn format_zeta1_excerpt(
4496        input: &ZetaPromptInput,
4497        editable_range: Range<usize>,
4498        context_range: Range<usize>,
4499    ) -> String {
4500        let path_str = input.cursor_path.to_string_lossy();
4501        let excerpt = &*input.cursor_excerpt;
4502        let cursor_offset = input.cursor_offset_in_excerpt;
4503
4504        let mut prompt = String::new();
4505        writeln!(&mut prompt, "```{path_str}").ok();
4506
4507        let starts_at_file_beginning =
4508            input.excerpt_start_row == Some(0) && context_range.start == 0;
4509        if starts_at_file_beginning {
4510            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4511        }
4512
4513        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4514
4515        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4516        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4517        prompt.push_str(CURSOR_MARKER);
4518        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4519        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4520
4521        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4522        write!(prompt, "\n```").ok();
4523
4524        prompt
4525    }
4526
4527    /// Cleans zeta1 model output by extracting content between editable region
4528    /// markers and converting the zeta1 cursor marker to the universal one.
4529    /// Returns `None` if the output doesn't contain the expected markers.
4530    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4531        let content = output.replace(CURSOR_MARKER, "");
4532
4533        let content_start = content
4534            .find(EDITABLE_REGION_START_MARKER)
4535            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4536            .map(|pos| {
4537                if content.as_bytes().get(pos) == Some(&b'\n') {
4538                    pos + 1
4539                } else {
4540                    pos
4541                }
4542            })
4543            .unwrap_or(0);
4544
4545        let content_end = content
4546            .find(EDITABLE_REGION_END_MARKER)
4547            .map(|pos| {
4548                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4549                    pos - 1
4550                } else {
4551                    pos
4552                }
4553            })
4554            .unwrap_or(content.len());
4555
4556        if content_start > content_end {
4557            return Some(String::new());
4558        }
4559
4560        let extracted = &content[content_start..content_end];
4561
4562        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4563            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4564            let text_before_cursor = text_before_cursor
4565                .find(EDITABLE_REGION_START_MARKER)
4566                .map(|pos| {
4567                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4568                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4569                        after_marker + 1
4570                    } else {
4571                        after_marker
4572                    }
4573                })
4574                .unwrap_or(0);
4575            let offset_in_extracted = zeta1_cursor_pos
4576                .saturating_sub(text_before_cursor)
4577                .min(extracted.len());
4578            offset_in_extracted
4579        });
4580
4581        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4582        if let Some(offset) = cursor_offset {
4583            result.push_str(&extracted[..offset]);
4584            result.push_str(super::CURSOR_MARKER);
4585            result.push_str(&extracted[offset..]);
4586        } else {
4587            result.push_str(extracted);
4588        }
4589
4590        Some(result)
4591    }
4592}
4593
4594#[cfg(test)]
4595mod tests {
4596    use super::*;
4597    use indoc::indoc;
4598
4599    fn make_input(
4600        cursor_excerpt: &str,
4601        editable_range: Range<usize>,
4602        cursor_offset: usize,
4603        events: Vec<Event>,
4604        related_files: Vec<RelatedFile>,
4605    ) -> ZetaPromptInput {
4606        let context_range = 0..cursor_excerpt.len();
4607        ZetaPromptInput {
4608            cursor_path: Path::new("test.rs").into(),
4609            cursor_excerpt: cursor_excerpt.into(),
4610            cursor_offset_in_excerpt: cursor_offset,
4611            excerpt_start_row: None,
4612            events: events.into_iter().map(Arc::new).collect(),
4613            related_files: Some(related_files),
4614            active_buffer_diagnostics: vec![],
4615            excerpt_ranges: ExcerptRanges {
4616                editable_150: editable_range.clone(),
4617                editable_180: editable_range.clone(),
4618                editable_350: editable_range,
4619                editable_150_context_350: context_range.clone(),
4620                editable_180_context_350: context_range.clone(),
4621                editable_350_context_150: context_range,
4622                ..Default::default()
4623            },
4624            syntax_ranges: None,
4625            in_open_source_repo: false,
4626            can_collect_data: false,
4627            repo_url: None,
4628        }
4629    }
4630
4631    fn make_input_with_context_range(
4632        excerpt: &str,
4633        editable_range: Range<usize>,
4634        context_range: Range<usize>,
4635        cursor_offset: usize,
4636    ) -> ZetaPromptInput {
4637        ZetaPromptInput {
4638            cursor_path: Path::new("test.rs").into(),
4639            cursor_excerpt: excerpt.into(),
4640            cursor_offset_in_excerpt: cursor_offset,
4641            excerpt_start_row: None,
4642            events: vec![],
4643            related_files: Some(vec![]),
4644            active_buffer_diagnostics: vec![],
4645            excerpt_ranges: ExcerptRanges {
4646                editable_150: editable_range.clone(),
4647                editable_180: editable_range.clone(),
4648                editable_350: editable_range,
4649                editable_150_context_350: context_range.clone(),
4650                editable_180_context_350: context_range.clone(),
4651                editable_350_context_150: context_range,
4652                ..Default::default()
4653            },
4654            syntax_ranges: None,
4655            in_open_source_repo: false,
4656            can_collect_data: false,
4657            repo_url: None,
4658        }
4659    }
4660
4661    fn make_event(path: &str, diff: &str) -> Event {
4662        Event::BufferChange {
4663            path: Path::new(path).into(),
4664            old_path: Path::new(path).into(),
4665            diff: diff.to_string(),
4666            predicted: false,
4667            in_open_source_repo: false,
4668        }
4669    }
4670
4671    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4672        RelatedFile {
4673            path: Path::new(path).into(),
4674            max_row: content.lines().count() as u32,
4675            excerpts: vec![RelatedExcerpt {
4676                row_range: 0..content.lines().count() as u32,
4677                text: content.into(),
4678                order: 0,
4679            }],
4680            in_open_source_repo: false,
4681        }
4682    }
4683
4684    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4685        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4686    }
4687
4688    fn budget_with_margin(requested_tokens: usize) -> usize {
4689        ((requested_tokens as f64) / 0.9).ceil() as usize
4690    }
4691
4692    #[test]
4693    fn test_no_truncation_when_within_budget() {
4694        let input = make_input(
4695            "prefix\neditable\nsuffix",
4696            7..15,
4697            10,
4698            vec![make_event("a.rs", "-old\n+new\n")],
4699            vec![make_related_file("related.rs", "fn helper() {}\n")],
4700        );
4701
4702        assert_eq!(
4703            format_with_budget(&input, 10000).unwrap(),
4704            indoc! {r#"
4705                <|file_sep|>related.rs
4706                fn helper() {}
4707                <|file_sep|>edit history
4708                --- a/a.rs
4709                +++ b/a.rs
4710                -old
4711                +new
4712                <|file_sep|>test.rs
4713                <|fim_prefix|>
4714                prefix
4715                <|fim_middle|>current
4716                edi<|user_cursor|>table
4717                <|fim_suffix|>
4718
4719                suffix
4720                <|fim_middle|>updated
4721            "#}
4722            .to_string()
4723        );
4724    }
4725
4726    #[test]
4727    fn test_truncation_drops_edit_history_when_budget_tight() {
4728        let input = make_input(
4729            "code",
4730            0..4,
4731            2,
4732            vec![make_event("a.rs", "-x\n+y\n")],
4733            vec![
4734                make_related_file("r1.rs", "aaaaaaa\n"),
4735                make_related_file("r2.rs", "bbbbbbb\n"),
4736            ],
4737        );
4738
4739        assert_eq!(
4740            format_with_budget(&input, 10000).unwrap(),
4741            indoc! {r#"
4742                <|file_sep|>r1.rs
4743                aaaaaaa
4744                <|file_sep|>r2.rs
4745                bbbbbbb
4746                <|file_sep|>edit history
4747                --- a/a.rs
4748                +++ b/a.rs
4749                -x
4750                +y
4751                <|file_sep|>test.rs
4752                <|fim_prefix|>
4753                <|fim_middle|>current
4754                co<|user_cursor|>de
4755                <|fim_suffix|>
4756                <|fim_middle|>updated
4757            "#}
4758            .to_string()
4759        );
4760
4761        assert_eq!(
4762            format_with_budget(&input, budget_with_margin(55)),
4763            Some(
4764                indoc! {r#"
4765                <|file_sep|>edit history
4766                --- a/a.rs
4767                +++ b/a.rs
4768                -x
4769                +y
4770                <|file_sep|>test.rs
4771                <|fim_prefix|>
4772                <|fim_middle|>current
4773                co<|user_cursor|>de
4774                <|fim_suffix|>
4775                <|fim_middle|>updated
4776            "#}
4777                .to_string()
4778            )
4779        );
4780    }
4781
4782    #[test]
4783    fn test_truncation_includes_partial_excerpts() {
4784        let input = make_input(
4785            "x",
4786            0..1,
4787            0,
4788            vec![],
4789            vec![RelatedFile {
4790                path: Path::new("big.rs").into(),
4791                max_row: 30,
4792                in_open_source_repo: false,
4793                excerpts: vec![
4794                    RelatedExcerpt {
4795                        row_range: 0..10,
4796                        text: "first excerpt\n".into(),
4797                        order: 0,
4798                    },
4799                    RelatedExcerpt {
4800                        row_range: 10..20,
4801                        text: "second excerpt\n".into(),
4802                        order: 0,
4803                    },
4804                    RelatedExcerpt {
4805                        row_range: 20..30,
4806                        text: "third excerpt\n".into(),
4807                        order: 0,
4808                    },
4809                ],
4810            }],
4811        );
4812
4813        assert_eq!(
4814            format_with_budget(&input, 10000).unwrap(),
4815            indoc! {r#"
4816                <|file_sep|>big.rs
4817                first excerpt
4818                ...
4819                second excerpt
4820                ...
4821                third excerpt
4822                <|file_sep|>test.rs
4823                <|fim_prefix|>
4824                <|fim_middle|>current
4825                <|user_cursor|>x
4826                <|fim_suffix|>
4827                <|fim_middle|>updated
4828            "#}
4829            .to_string()
4830        );
4831
4832        assert_eq!(
4833            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4834            indoc! {r#"
4835                <|file_sep|>big.rs
4836                first excerpt
4837                ...
4838                <|file_sep|>test.rs
4839                <|fim_prefix|>
4840                <|fim_middle|>current
4841                <|user_cursor|>x
4842                <|fim_suffix|>
4843                <|fim_middle|>updated
4844            "#}
4845            .to_string()
4846        );
4847    }
4848
4849    #[test]
4850    fn test_truncation_prioritizes_lower_order_excerpts() {
4851        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4852        // With tight budget, only the lower-order excerpt from file_b should be included.
4853        let input = make_input(
4854            "x",
4855            0..1,
4856            0,
4857            vec![],
4858            vec![
4859                RelatedFile {
4860                    path: Path::new("file_a.rs").into(),
4861                    max_row: 10,
4862                    in_open_source_repo: false,
4863                    excerpts: vec![RelatedExcerpt {
4864                        row_range: 0..10,
4865                        text: "low priority content\n".into(),
4866                        order: 5,
4867                    }],
4868                },
4869                RelatedFile {
4870                    path: Path::new("file_b.rs").into(),
4871                    max_row: 10,
4872                    in_open_source_repo: false,
4873                    excerpts: vec![RelatedExcerpt {
4874                        row_range: 0..10,
4875                        text: "high priority content\n".into(),
4876                        order: 1,
4877                    }],
4878                },
4879            ],
4880        );
4881
4882        // With large budget, both files included; rendered in stable lexicographic order.
4883        assert_eq!(
4884            format_with_budget(&input, 10000).unwrap(),
4885            indoc! {r#"
4886                <|file_sep|>file_a.rs
4887                low priority content
4888                <|file_sep|>file_b.rs
4889                high priority content
4890                <|file_sep|>test.rs
4891                <|fim_prefix|>
4892                <|fim_middle|>current
4893                <|user_cursor|>x
4894                <|fim_suffix|>
4895                <|fim_middle|>updated
4896            "#}
4897            .to_string()
4898        );
4899
4900        // With tight budget, only file_b (lower order) fits.
4901        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4902        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4903        // file_a would need another 14 tokens, which doesn't fit.
4904        assert_eq!(
4905            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4906            indoc! {r#"
4907                <|file_sep|>file_b.rs
4908                high priority content
4909                <|file_sep|>test.rs
4910                <|fim_prefix|>
4911                <|fim_middle|>current
4912                <|user_cursor|>x
4913                <|fim_suffix|>
4914                <|fim_middle|>updated
4915            "#}
4916            .to_string()
4917        );
4918    }
4919
4920    #[test]
4921    fn test_truncation_drops_high_order_excerpts_within_file() {
4922        // A single file has excerpts at order 1 and order 3. With a tight budget,
4923        // only the order-1 excerpts are included while the order-3 excerpt is
4924        // dropped — even though they belong to the same file. This also preserves
4925        // the parent invariant: parent outline items have order ≤ their best
4926        // child, so they're always included when any child is.
4927        let input = make_input(
4928            "x",
4929            0..1,
4930            0,
4931            vec![],
4932            vec![RelatedFile {
4933                path: Path::new("mod.rs").into(),
4934                max_row: 30,
4935                in_open_source_repo: false,
4936                excerpts: vec![
4937                    RelatedExcerpt {
4938                        row_range: 0..5,
4939                        text: "mod header\n".into(),
4940                        order: 1,
4941                    },
4942                    RelatedExcerpt {
4943                        row_range: 5..15,
4944                        text: "important fn\n".into(),
4945                        order: 1,
4946                    },
4947                    RelatedExcerpt {
4948                        row_range: 15..30,
4949                        text: "less important fn\n".into(),
4950                        order: 3,
4951                    },
4952                ],
4953            }],
4954        );
4955
4956        // With large budget, all three excerpts included.
4957        assert_eq!(
4958            format_with_budget(&input, 10000).unwrap(),
4959            indoc! {r#"
4960                <|file_sep|>mod.rs
4961                mod header
4962                ...
4963                important fn
4964                ...
4965                less important fn
4966                <|file_sep|>test.rs
4967                <|fim_prefix|>
4968                <|fim_middle|>current
4969                <|user_cursor|>x
4970                <|fim_suffix|>
4971                <|fim_middle|>updated
4972            "#}
4973            .to_string()
4974        );
4975
4976        // With tight budget, only order<=1 excerpts included (header + important fn).
4977        assert_eq!(
4978            format_with_budget(&input, budget_with_margin(55)).unwrap(),
4979            indoc! {r#"
4980                <|file_sep|>mod.rs
4981                mod header
4982                ...
4983                important fn
4984                ...
4985                <|file_sep|>test.rs
4986                <|fim_prefix|>
4987                <|fim_middle|>current
4988                <|user_cursor|>x
4989                <|fim_suffix|>
4990                <|fim_middle|>updated
4991            "#}
4992            .to_string()
4993        );
4994    }
4995
4996    #[test]
4997    fn test_truncation_drops_older_events_first() {
4998        let input = make_input(
4999            "x",
5000            0..1,
5001            0,
5002            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5003            vec![],
5004        );
5005
5006        assert_eq!(
5007            format_with_budget(&input, 10000).unwrap(),
5008            indoc! {r#"
5009                <|file_sep|>edit history
5010                --- a/old.rs
5011                +++ b/old.rs
5012                -1
5013                --- a/new.rs
5014                +++ b/new.rs
5015                -2
5016                <|file_sep|>test.rs
5017                <|fim_prefix|>
5018                <|fim_middle|>current
5019                <|user_cursor|>x
5020                <|fim_suffix|>
5021                <|fim_middle|>updated
5022            "#}
5023            .to_string()
5024        );
5025
5026        assert_eq!(
5027            format_with_budget(&input, 60).unwrap(),
5028            indoc! {r#"
5029                <|file_sep|>edit history
5030                --- a/new.rs
5031                +++ b/new.rs
5032                -2
5033                <|file_sep|>test.rs
5034                <|fim_prefix|>
5035                <|fim_middle|>current
5036                <|user_cursor|>x
5037                <|fim_suffix|>
5038                <|fim_middle|>updated
5039            "#}
5040            .to_string()
5041        );
5042    }
5043
5044    #[test]
5045    fn test_cursor_excerpt_always_included_with_minimal_budget() {
5046        let input = make_input(
5047            "fn main() {}",
5048            0..12,
5049            3,
5050            vec![make_event("a.rs", "-old\n+new\n")],
5051            vec![make_related_file("related.rs", "helper\n")],
5052        );
5053
5054        assert!(format_with_budget(&input, 30).is_none())
5055    }
5056
5057    #[track_caller]
5058    fn format_seed_coder(input: &ZetaPromptInput) -> String {
5059        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5060            .expect("seed coder prompt formatting should succeed")
5061    }
5062
5063    #[track_caller]
5064    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5065        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5066            .expect("seed coder prompt formatting should succeed")
5067    }
5068
5069    #[test]
5070    fn test_seed_coder_basic_format() {
5071        let input = make_input(
5072            "prefix\neditable\nsuffix",
5073            7..15,
5074            10,
5075            vec![make_event("a.rs", "-old\n+new\n")],
5076            vec![make_related_file("related.rs", "fn helper() {}\n")],
5077        );
5078
5079        assert_eq!(
5080            format_seed_coder(&input),
5081            indoc! {r#"
5082                <[fim-suffix]>
5083                suffix
5084                <[fim-prefix]><filename>related.rs
5085                fn helper() {}
5086
5087                <filename>edit_history
5088                --- a/a.rs
5089                +++ b/a.rs
5090                -old
5091                +new
5092
5093                <filename>test.rs
5094                prefix
5095                <<<<<<< CURRENT
5096                edi<|user_cursor|>table
5097                =======
5098                <[fim-middle]>"#}
5099        );
5100    }
5101
5102    #[test]
5103    fn test_v0317_formats_prompt_with_many_related_files() {
5104        let related_files = (0..900)
5105            .map(|index| {
5106                make_related_file(
5107                    &format!("related_{index}.rs"),
5108                    "fn helper() {\n    let value = 1;\n}\n",
5109                )
5110            })
5111            .collect();
5112
5113        let input = make_input(
5114            "code",
5115            0..4,
5116            2,
5117            vec![make_event("a.rs", "-x\n+y\n")],
5118            related_files,
5119        );
5120
5121        let prompt =
5122            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5123
5124        assert!(prompt.is_some());
5125        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5126        assert!(prompt.contains("test.rs"));
5127        assert!(prompt.contains(CURSOR_MARKER));
5128    }
5129
5130    #[test]
5131    fn test_v0327_formats_single_file_prompt_without_related_files() {
5132        let excerpt = indoc! {"
5133            line01
5134            line02
5135            line03
5136            line04
5137            line05
5138            line06
5139            line07
5140            line08
5141            line09
5142            line10
5143            line11
5144            line12
5145            line13
5146            line14
5147            line15
5148            line16
5149            line17
5150            line18
5151            line19
5152            line20
5153        "};
5154        let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5155        let input = make_input(
5156            excerpt,
5157            0..excerpt.len(),
5158            cursor_offset,
5159            vec![make_event("a.rs", "-x\n+y\n")],
5160            vec![make_related_file("related.rs", "fn helper() {}\n")],
5161        );
5162
5163        let prompt =
5164            format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5165                .expect("v0327 prompt should fit");
5166
5167        assert!(prompt.contains("line01"));
5168        assert!(prompt.contains("line20"));
5169        assert!(prompt.contains("<filename>edit_history"));
5170        assert!(prompt.contains("<filename>test.rs"));
5171        assert!(prompt.contains(CURSOR_MARKER));
5172        assert!(!prompt.contains("related.rs"));
5173        assert!(!prompt.contains("fn helper() {}"));
5174    }
5175
5176    #[test]
5177    fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5178        let excerpt = (0..80)
5179            .map(|index| format!("l{index:02}\n"))
5180            .collect::<String>();
5181        let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5182        let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5183
5184        let (context, editable_range, context_range, adjusted_cursor) =
5185            resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5186
5187        assert_eq!(context, excerpt);
5188        assert_eq!(context_range, 0..excerpt.len());
5189        assert_eq!(adjusted_cursor, cursor_offset);
5190        assert!(editable_range.start < adjusted_cursor);
5191        assert!(editable_range.end > adjusted_cursor);
5192        assert!(editable_range.end < excerpt.len());
5193    }
5194
5195    #[test]
5196    fn test_seed_coder_no_context() {
5197        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5198
5199        assert_eq!(
5200            format_seed_coder(&input),
5201            indoc! {r#"
5202                <[fim-suffix]>
5203                after
5204                <[fim-prefix]><filename>test.rs
5205                before
5206                <<<<<<< CURRENT
5207                mid<|user_cursor|>dle
5208                =======
5209                <[fim-middle]>"#}
5210        );
5211    }
5212
5213    #[test]
5214    fn test_seed_coder_truncation_drops_context() {
5215        let input = make_input(
5216            "code",
5217            0..4,
5218            2,
5219            vec![make_event("a.rs", "-x\n+y\n")],
5220            vec![make_related_file("r1.rs", "content\n")],
5221        );
5222
5223        // With large budget, everything is included
5224        assert_eq!(
5225            format_seed_coder(&input),
5226            indoc! {r#"
5227                <[fim-suffix]>
5228                <[fim-prefix]><filename>r1.rs
5229                content
5230
5231                <filename>edit_history
5232                --- a/a.rs
5233                +++ b/a.rs
5234                -x
5235                +y
5236
5237                <filename>test.rs
5238                <<<<<<< CURRENT
5239                co<|user_cursor|>de
5240                =======
5241                <[fim-middle]>"#}
5242        );
5243
5244        assert_eq!(
5245            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5246            None
5247        );
5248
5249        assert_eq!(
5250            format_seed_coder_with_budget(&input, 40),
5251            indoc! {r#"
5252                <[fim-suffix]>
5253                <[fim-prefix]><filename>test.rs
5254                <<<<<<< CURRENT
5255                co<|user_cursor|>de
5256                =======
5257                <[fim-middle]>"#
5258            }
5259        )
5260    }
5261
5262    #[test]
5263    fn test_seed_coder_truncation_prioritizes_lower_order() {
5264        let input = make_input(
5265            "code",
5266            0..4,
5267            2,
5268            vec![],
5269            vec![
5270                RelatedFile {
5271                    path: Path::new("low_prio.rs").into(),
5272                    max_row: 5,
5273                    in_open_source_repo: false,
5274                    excerpts: vec![RelatedExcerpt {
5275                        row_range: 0..5,
5276                        text: "low prio\n".into(),
5277                        order: 10,
5278                    }],
5279                },
5280                RelatedFile {
5281                    path: Path::new("high_prio.rs").into(),
5282                    max_row: 5,
5283                    in_open_source_repo: false,
5284                    excerpts: vec![RelatedExcerpt {
5285                        row_range: 0..5,
5286                        text: "high prio\n".into(),
5287                        order: 1,
5288                    }],
5289                },
5290            ],
5291        );
5292
5293        // With large budget, both included; rendered in stable lexicographic order.
5294        assert_eq!(
5295            format_seed_coder(&input),
5296            indoc! {r#"
5297                <[fim-suffix]>
5298                <[fim-prefix]><filename>low_prio.rs
5299                low prio
5300                <filename>high_prio.rs
5301                high prio
5302
5303                <filename>test.rs
5304                <<<<<<< CURRENT
5305                co<|user_cursor|>de
5306                =======
5307                <[fim-middle]>"#}
5308        );
5309
5310        // With tight budget under the generic heuristic, context is dropped but the
5311        // minimal cursor section still fits.
5312        assert_eq!(
5313            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5314            Some(
5315                indoc! {r#"
5316                    <[fim-suffix]>
5317                    <[fim-prefix]><filename>test.rs
5318                    <<<<<<< CURRENT
5319                    co<|user_cursor|>de
5320                    =======
5321                    <[fim-middle]>"#}
5322                .to_string()
5323            )
5324        );
5325    }
5326
5327    #[test]
5328    fn test_format_zeta1_from_input_basic() {
5329        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5330        let input = ZetaPromptInput {
5331            cursor_path: Path::new("src/main.rs").into(),
5332            cursor_excerpt: excerpt.into(),
5333            cursor_offset_in_excerpt: 30,
5334            excerpt_start_row: Some(0),
5335            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5336            related_files: Some(vec![]),
5337            active_buffer_diagnostics: vec![],
5338            excerpt_ranges: ExcerptRanges {
5339                editable_150: 15..41,
5340                editable_180: 15..41,
5341                editable_350: 15..41,
5342                editable_150_context_350: 0..excerpt.len(),
5343                editable_180_context_350: 0..excerpt.len(),
5344                editable_350_context_150: 0..excerpt.len(),
5345                ..Default::default()
5346            },
5347            syntax_ranges: None,
5348            in_open_source_repo: false,
5349            can_collect_data: false,
5350            repo_url: None,
5351        };
5352
5353        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5354
5355        assert_eq!(
5356            prompt,
5357            concat!(
5358                "### Instruction:\n",
5359                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5360                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5361                "into account the cursor location.\n",
5362                "\n",
5363                "### User Edits:\n",
5364                "\n",
5365                "User edited other.rs:\n",
5366                "```diff\n",
5367                "-old\n",
5368                "+new\n",
5369                "\n",
5370                "```\n",
5371                "\n",
5372                "### User Excerpt:\n",
5373                "\n",
5374                "```src/main.rs\n",
5375                "<|start_of_file|>\n",
5376                "fn before() {}\n",
5377                "<|editable_region_start|>\n",
5378                "fn foo() {\n",
5379                "    <|user_cursor_is_here|>let x = 1;\n",
5380                "\n",
5381                "<|editable_region_end|>}\n",
5382                "fn after() {}\n",
5383                "\n",
5384                "```\n",
5385                "\n",
5386                "### Response:\n",
5387            ),
5388        );
5389    }
5390
5391    #[test]
5392    fn test_format_zeta1_from_input_no_start_of_file() {
5393        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5394        let input = ZetaPromptInput {
5395            cursor_path: Path::new("src/main.rs").into(),
5396            cursor_excerpt: excerpt.into(),
5397            cursor_offset_in_excerpt: 15,
5398            excerpt_start_row: Some(10),
5399            events: vec![],
5400            related_files: Some(vec![]),
5401            active_buffer_diagnostics: vec![],
5402            excerpt_ranges: ExcerptRanges {
5403                editable_150: 0..28,
5404                editable_180: 0..28,
5405                editable_350: 0..28,
5406                editable_150_context_350: 0..28,
5407                editable_180_context_350: 0..28,
5408                editable_350_context_150: 0..28,
5409                ..Default::default()
5410            },
5411            syntax_ranges: None,
5412            in_open_source_repo: false,
5413            can_collect_data: false,
5414            repo_url: None,
5415        };
5416
5417        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5418
5419        assert_eq!(
5420            prompt,
5421            concat!(
5422                "### Instruction:\n",
5423                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5424                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5425                "into account the cursor location.\n",
5426                "\n",
5427                "### User Edits:\n",
5428                "\n",
5429                "\n",
5430                "\n",
5431                "### User Excerpt:\n",
5432                "\n",
5433                "```src/main.rs\n",
5434                "<|editable_region_start|>\n",
5435                "fn foo() {\n",
5436                "    <|user_cursor_is_here|>let x = 1;\n",
5437                "}\n",
5438                "\n",
5439                "<|editable_region_end|>\n",
5440                "```\n",
5441                "\n",
5442                "### Response:\n",
5443            ),
5444        );
5445    }
5446
5447    #[test]
5448    fn test_format_zeta1_from_input_with_sub_ranges() {
5449        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5450        let editable_range = 10..37;
5451        let context_range = 0..excerpt.len();
5452
5453        let input = ZetaPromptInput {
5454            cursor_path: Path::new("test.rs").into(),
5455            cursor_excerpt: excerpt.into(),
5456            cursor_offset_in_excerpt: 25,
5457            excerpt_start_row: Some(0),
5458            events: vec![],
5459            related_files: Some(vec![]),
5460            active_buffer_diagnostics: vec![],
5461            excerpt_ranges: ExcerptRanges {
5462                editable_150: editable_range.clone(),
5463                editable_180: editable_range.clone(),
5464                editable_350: editable_range.clone(),
5465                editable_150_context_350: context_range.clone(),
5466                editable_180_context_350: context_range.clone(),
5467                editable_350_context_150: context_range.clone(),
5468                ..Default::default()
5469            },
5470            syntax_ranges: None,
5471            in_open_source_repo: false,
5472            can_collect_data: false,
5473            repo_url: None,
5474        };
5475
5476        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5477
5478        assert_eq!(
5479            prompt,
5480            concat!(
5481                "### Instruction:\n",
5482                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5483                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5484                "into account the cursor location.\n",
5485                "\n",
5486                "### User Edits:\n",
5487                "\n",
5488                "\n",
5489                "\n",
5490                "### User Excerpt:\n",
5491                "\n",
5492                "```test.rs\n",
5493                "<|start_of_file|>\n",
5494                "// prefix\n",
5495                "<|editable_region_start|>\n",
5496                "fn foo() {\n",
5497                "    <|user_cursor_is_here|>let x = 1;\n",
5498                "}\n",
5499                "<|editable_region_end|>\n",
5500                "// suffix\n",
5501                "\n",
5502                "```\n",
5503                "\n",
5504                "### Response:\n",
5505            ),
5506        );
5507    }
5508
5509    #[test]
5510    fn test_max_event_count() {
5511        fn make_numbered_event(index: usize) -> Event {
5512            return make_event(
5513                &format!("event-{index}.rs"),
5514                &format!("-old-{index}\n+new-{index}\n"),
5515            );
5516        }
5517        let input = make_input(
5518            "x",
5519            0..1,
5520            0,
5521            (0..3).map(make_numbered_event).collect(),
5522            vec![],
5523        );
5524
5525        let edit_history_section = format_edit_history_within_budget(
5526            &input.events,
5527            "<|file_sep|>",
5528            "edit history",
5529            usize::MAX,
5530            5,
5531        );
5532
5533        assert_eq!(
5534            &edit_history_section,
5535            indoc!(
5536                "
5537                <|file_sep|>edit history
5538                --- a/event-0.rs
5539                +++ b/event-0.rs
5540                -old-0
5541                +new-0
5542                --- a/event-1.rs
5543                +++ b/event-1.rs
5544                -old-1
5545                +new-1
5546                --- a/event-2.rs
5547                +++ b/event-2.rs
5548                -old-2
5549                +new-2
5550            "
5551            )
5552        );
5553
5554        let edit_history_section = format_edit_history_within_budget(
5555            &input.events,
5556            "<|file_sep|>",
5557            "edit history",
5558            usize::MAX,
5559            2,
5560        );
5561
5562        assert_eq!(
5563            &edit_history_section,
5564            indoc!(
5565                "
5566                <|file_sep|>edit history
5567                --- a/event-1.rs
5568                +++ b/event-1.rs
5569                -old-1
5570                +new-1
5571                --- a/event-2.rs
5572                +++ b/event-2.rs
5573                -old-2
5574                +new-2
5575            "
5576            )
5577        );
5578
5579        let edit_history_section = format_edit_history_within_budget(
5580            &input.events,
5581            "<|file_sep|>",
5582            "edit history",
5583            usize::MAX,
5584            0,
5585        );
5586
5587        assert_eq!(&edit_history_section, "");
5588    }
5589
5590    #[test]
5591    fn test_clean_zeta1_model_output_basic() {
5592        let output = indoc! {"
5593            <|editable_region_start|>
5594            fn main() {
5595                println!(\"hello\");
5596            }
5597            <|editable_region_end|>
5598        "};
5599
5600        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5601        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5602    }
5603
5604    #[test]
5605    fn test_clean_zeta1_model_output_with_cursor() {
5606        let output = indoc! {"
5607            <|editable_region_start|>
5608            fn main() {
5609                <|user_cursor_is_here|>println!(\"hello\");
5610            }
5611            <|editable_region_end|>
5612        "};
5613
5614        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5615        assert_eq!(
5616            cleaned,
5617            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5618        );
5619    }
5620
5621    #[test]
5622    fn test_clean_zeta1_model_output_no_markers() {
5623        let output = "fn main() {}\n";
5624        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5625        assert_eq!(cleaned, "fn main() {}\n");
5626    }
5627
5628    #[test]
5629    fn test_clean_zeta1_model_output_empty_region() {
5630        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5631        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5632        assert_eq!(cleaned, "");
5633    }
5634
5635    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5636        let mut result = excerpt.to_string();
5637        result.replace_range(
5638            parsed_output.range_in_excerpt.clone(),
5639            &parsed_output.new_editable_region,
5640        );
5641        result
5642    }
5643
5644    #[test]
5645    fn test_parse_zeta2_model_output() {
5646        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5647        let context_start = excerpt.find("ctx start").unwrap();
5648        let context_end = excerpt.find("after ctx").unwrap();
5649        let editable_start = excerpt.find("editable old").unwrap();
5650        let editable_end = editable_start + "editable old\n".len();
5651        let input = make_input_with_context_range(
5652            excerpt,
5653            editable_start..editable_end,
5654            context_start..context_end,
5655            editable_start,
5656        );
5657
5658        let output = parse_zeta2_model_output(
5659            "editable new\n>>>>>>> UPDATED\n",
5660            ZetaFormat::V0131GitMergeMarkersPrefix,
5661            &input,
5662        )
5663        .unwrap();
5664
5665        assert_eq!(
5666            apply_edit(excerpt, &output),
5667            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5668        );
5669    }
5670
5671    #[test]
5672    fn test_parse_zeta2_model_output_identity() {
5673        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5674        let editable_start = excerpt.find("bbb").unwrap();
5675        let editable_end = excerpt.find("ddd").unwrap();
5676        let input = make_input_with_context_range(
5677            excerpt,
5678            editable_start..editable_end,
5679            0..excerpt.len(),
5680            editable_start,
5681        );
5682
5683        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5684        let output =
5685            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5686
5687        assert_eq!(apply_edit(excerpt, &output), excerpt);
5688    }
5689
5690    #[test]
5691    fn test_parse_zeta2_model_output_strips_end_marker() {
5692        let excerpt = "hello\nworld\n";
5693        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5694
5695        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5696        let output1 =
5697            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5698        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5699
5700        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5701        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5702    }
5703
5704    #[test]
5705    fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5706        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5707        let context_start = excerpt.find("ctx start").unwrap();
5708        let context_end = excerpt.find("after ctx").unwrap();
5709        let editable_start = excerpt.find("editable old").unwrap();
5710        let editable_end = editable_start + "editable old\n".len();
5711        let input = make_input_with_context_range(
5712            excerpt,
5713            editable_start..editable_end,
5714            context_start..context_end,
5715            editable_start,
5716        );
5717
5718        let parsed = parse_zeta2_model_output(
5719            "editable new\n>>>>>>> UPDATED\n",
5720            ZetaFormat::V0131GitMergeMarkersPrefix,
5721            &input,
5722        )
5723        .unwrap();
5724        let expected = apply_edit(excerpt, &parsed);
5725        let patch = parsed_output_to_patch(&input, parsed).unwrap();
5726        let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5727
5728        assert_eq!(patched, expected);
5729    }
5730
5731    #[test]
5732    fn test_special_tokens_not_triggered_by_comment_separator() {
5733        // Regression test for https://github.com/zed-industries/zed/issues/52489
5734        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5735        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5736        assert!(
5737            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5738            "comment containing ======= should not trigger special token detection"
5739        );
5740    }
5741}