zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  29pub struct ZetaPromptInput {
  30    pub cursor_path: Arc<Path>,
  31    pub cursor_excerpt: Arc<str>,
  32    pub cursor_offset_in_excerpt: usize,
  33    #[serde(default, skip_serializing_if = "Option::is_none")]
  34    pub excerpt_start_row: Option<u32>,
  35    pub events: Vec<Arc<Event>>,
  36    #[serde(default)]
  37    pub related_files: Option<Vec<RelatedFile>>,
  38    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  39    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  40    /// These ranges let the server select model-appropriate subsets.
  41    pub excerpt_ranges: ExcerptRanges,
  42    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  43    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  44    /// When present, the server uses these to compute editable/context ranges
  45    /// instead of `excerpt_ranges`.
  46    #[serde(default, skip_serializing_if = "Option::is_none")]
  47    pub syntax_ranges: Option<Vec<Range<usize>>>,
  48    /// The name of the edit prediction model experiment to use.
  49    #[serde(default, skip_serializing_if = "Option::is_none")]
  50    pub experiment: Option<String>,
  51    #[serde(default)]
  52    pub in_open_source_repo: bool,
  53    #[serde(default)]
  54    pub can_collect_data: bool,
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub repo_url: Option<String>,
  57}
  58
  59#[derive(
  60    Default,
  61    Clone,
  62    Copy,
  63    Debug,
  64    PartialEq,
  65    Eq,
  66    Hash,
  67    EnumIter,
  68    IntoStaticStr,
  69    Serialize,
  70    Deserialize,
  71)]
  72#[allow(non_camel_case_types)]
  73pub enum ZetaFormat {
  74    V0112MiddleAtEnd,
  75    V0113Ordered,
  76    V0114180EditableRegion,
  77    V0120GitMergeMarkers,
  78    #[default]
  79    V0131GitMergeMarkersPrefix,
  80    V0211Prefill,
  81    V0211SeedCoder,
  82    v0226Hashline,
  83    V0304VariableEdit,
  84    V0304SeedNoEdits,
  85    V0306SeedMultiRegions,
  86    V0316SeedMultiRegions,
  87}
  88
  89impl std::fmt::Display for ZetaFormat {
  90    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  91        write!(f, "{}", <&'static str>::from(self))
  92    }
  93}
  94
  95impl ZetaFormat {
  96    pub fn parse(format_name: &str) -> Result<Self> {
  97        let mut results = ZetaFormat::iter().filter(|version| {
  98            <&'static str>::from(version)
  99                .to_lowercase()
 100                .contains(&format_name.to_lowercase())
 101        });
 102        let Some(result) = results.next() else {
 103            anyhow::bail!(
 104                "`{format_name}` did not match any of:\n{}",
 105                Self::options_as_string()
 106            );
 107        };
 108        if results.next().is_some() {
 109            anyhow::bail!(
 110                "`{format_name}` matched more than one of:\n{}",
 111                Self::options_as_string()
 112            );
 113        }
 114        Ok(result)
 115    }
 116
 117    pub fn options_as_string() -> String {
 118        ZetaFormat::iter()
 119            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 120            .collect::<Vec<_>>()
 121            .concat()
 122    }
 123}
 124
 125#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 126#[serde(tag = "event")]
 127pub enum Event {
 128    BufferChange {
 129        path: Arc<Path>,
 130        old_path: Arc<Path>,
 131        diff: String,
 132        predicted: bool,
 133        in_open_source_repo: bool,
 134    },
 135}
 136
 137impl Event {
 138    pub fn in_open_source_repo(&self) -> bool {
 139        match self {
 140            Event::BufferChange {
 141                in_open_source_repo,
 142                ..
 143            } => *in_open_source_repo,
 144        }
 145    }
 146}
 147
 148pub fn write_event(prompt: &mut String, event: &Event) {
 149    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 150        for component in path.components() {
 151            prompt.push('/');
 152            write!(prompt, "{}", component.as_os_str().display()).ok();
 153        }
 154    }
 155    match event {
 156        Event::BufferChange {
 157            path,
 158            old_path,
 159            diff,
 160            predicted,
 161            in_open_source_repo: _,
 162        } => {
 163            if *predicted {
 164                prompt.push_str("// User accepted prediction:\n");
 165            }
 166            prompt.push_str("--- a");
 167            write_path_as_unix_str(prompt, old_path.as_ref());
 168            prompt.push_str("\n+++ b");
 169            write_path_as_unix_str(prompt, path.as_ref());
 170            prompt.push('\n');
 171            prompt.push_str(diff);
 172        }
 173    }
 174}
 175
 176#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 177pub struct ActiveBufferDiagnostic {
 178    pub severity: Option<i32>,
 179    pub message: String,
 180    pub snippet: String,
 181    pub snippet_buffer_row_range: Range<u32>,
 182    pub diagnostic_range_in_snippet: Range<usize>,
 183}
 184
 185#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 186pub struct RelatedFile {
 187    pub path: Arc<Path>,
 188    pub max_row: u32,
 189    pub excerpts: Vec<RelatedExcerpt>,
 190    #[serde(default)]
 191    pub in_open_source_repo: bool,
 192}
 193
 194#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 195pub struct RelatedExcerpt {
 196    pub row_range: Range<u32>,
 197    pub text: Arc<str>,
 198    #[serde(default)]
 199    pub order: usize,
 200}
 201
 202pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 203    special_tokens_for_format(format)
 204        .iter()
 205        .any(|token| input.cursor_excerpt.contains(token))
 206}
 207
 208pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 209    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 210}
 211
 212pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 213    match format {
 214        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 215        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 216        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 217        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 218        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 219        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 220        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 221        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 222        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 223        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 224        ZetaFormat::V0306SeedMultiRegions => {
 225            static TOKENS: &[&str] = &[
 226                seed_coder::FIM_SUFFIX,
 227                seed_coder::FIM_PREFIX,
 228                seed_coder::FIM_MIDDLE,
 229                seed_coder::FILE_MARKER,
 230                seed_coder::START_MARKER,
 231                seed_coder::SEPARATOR,
 232                seed_coder::END_MARKER,
 233                CURSOR_MARKER,
 234                multi_region::MARKER_TAG_PREFIX,
 235            ];
 236            TOKENS
 237        }
 238        ZetaFormat::V0316SeedMultiRegions => {
 239            static TOKENS: &[&str] = &[
 240                seed_coder::FIM_SUFFIX,
 241                seed_coder::FIM_PREFIX,
 242                seed_coder::FIM_MIDDLE,
 243                seed_coder::FILE_MARKER,
 244                CURSOR_MARKER,
 245                multi_region::MARKER_TAG_PREFIX,
 246            ];
 247            TOKENS
 248        }
 249    }
 250}
 251
 252/// Returns the (editable_token_limit, context_token_limit) for a given format.
 253pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 254    match format {
 255        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 256        ZetaFormat::V0114180EditableRegion => (180, 350),
 257        ZetaFormat::V0120GitMergeMarkers
 258        | ZetaFormat::V0131GitMergeMarkersPrefix
 259        | ZetaFormat::V0211Prefill
 260        | ZetaFormat::V0211SeedCoder
 261        | ZetaFormat::v0226Hashline
 262        | ZetaFormat::V0306SeedMultiRegions
 263        | ZetaFormat::V0316SeedMultiRegions
 264        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 265        ZetaFormat::V0304VariableEdit => (1024, 0),
 266    }
 267}
 268
 269pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 270    match format {
 271        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 272        ZetaFormat::V0112MiddleAtEnd
 273        | ZetaFormat::V0113Ordered
 274        | ZetaFormat::V0114180EditableRegion
 275        | ZetaFormat::V0120GitMergeMarkers
 276        | ZetaFormat::V0131GitMergeMarkersPrefix
 277        | ZetaFormat::V0211Prefill
 278        | ZetaFormat::V0211SeedCoder
 279        | ZetaFormat::V0304VariableEdit
 280        | ZetaFormat::V0306SeedMultiRegions
 281        | ZetaFormat::V0316SeedMultiRegions
 282        | ZetaFormat::V0304SeedNoEdits => &[],
 283    }
 284}
 285
 286pub fn excerpt_ranges_for_format(
 287    format: ZetaFormat,
 288    ranges: &ExcerptRanges,
 289) -> (Range<usize>, Range<usize>) {
 290    match format {
 291        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 292            ranges.editable_150.clone(),
 293            ranges.editable_150_context_350.clone(),
 294        ),
 295        ZetaFormat::V0114180EditableRegion => (
 296            ranges.editable_180.clone(),
 297            ranges.editable_180_context_350.clone(),
 298        ),
 299        ZetaFormat::V0120GitMergeMarkers
 300        | ZetaFormat::V0131GitMergeMarkersPrefix
 301        | ZetaFormat::V0211Prefill
 302        | ZetaFormat::V0211SeedCoder
 303        | ZetaFormat::v0226Hashline
 304        | ZetaFormat::V0304SeedNoEdits
 305        | ZetaFormat::V0306SeedMultiRegions
 306        | ZetaFormat::V0316SeedMultiRegions => (
 307            ranges.editable_350.clone(),
 308            ranges.editable_350_context_150.clone(),
 309        ),
 310        ZetaFormat::V0304VariableEdit => {
 311            let context = ranges
 312                .editable_350_context_1024
 313                .clone()
 314                .or(ranges.editable_350_context_512.clone())
 315                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 316            (context.clone(), context)
 317        }
 318    }
 319}
 320
 321pub fn write_cursor_excerpt_section_for_format(
 322    format: ZetaFormat,
 323    prompt: &mut String,
 324    path: &Path,
 325    context: &str,
 326    editable_range: &Range<usize>,
 327    cursor_offset: usize,
 328) {
 329    match format {
 330        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 331            prompt,
 332            path,
 333            context,
 334            editable_range,
 335            cursor_offset,
 336        ),
 337        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 338            v0113_ordered::write_cursor_excerpt_section(
 339                prompt,
 340                path,
 341                context,
 342                editable_range,
 343                cursor_offset,
 344            )
 345        }
 346        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 347            prompt,
 348            path,
 349            context,
 350            editable_range,
 351            cursor_offset,
 352        ),
 353        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 354            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 355                prompt,
 356                path,
 357                context,
 358                editable_range,
 359                cursor_offset,
 360            )
 361        }
 362        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 363            seed_coder::write_cursor_excerpt_section(
 364                prompt,
 365                path,
 366                context,
 367                editable_range,
 368                cursor_offset,
 369            )
 370        }
 371        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 372            prompt,
 373            path,
 374            context,
 375            editable_range,
 376            cursor_offset,
 377        ),
 378        ZetaFormat::V0304VariableEdit => {
 379            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 380        }
 381        ZetaFormat::V0306SeedMultiRegions => {
 382            prompt.push_str(&build_v0306_cursor_prefix(
 383                path,
 384                context,
 385                editable_range,
 386                cursor_offset,
 387            ));
 388        }
 389        ZetaFormat::V0316SeedMultiRegions => {
 390            prompt.push_str(&build_v0316_cursor_prefix(
 391                path,
 392                context,
 393                editable_range,
 394                cursor_offset,
 395            ));
 396        }
 397    }
 398}
 399
 400fn build_v0306_cursor_prefix(
 401    path: &Path,
 402    context: &str,
 403    editable_range: &Range<usize>,
 404    cursor_offset: usize,
 405) -> String {
 406    let mut section = String::new();
 407    let path_str = path.to_string_lossy();
 408    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 409
 410    section.push_str(&context[..editable_range.start]);
 411    section.push_str(seed_coder::START_MARKER);
 412
 413    let editable_text = &context[editable_range.clone()];
 414    let cursor_in_editable = cursor_offset - editable_range.start;
 415    multi_region::write_editable_with_markers(
 416        &mut section,
 417        editable_text,
 418        cursor_in_editable,
 419        CURSOR_MARKER,
 420    );
 421
 422    if !section.ends_with('\n') {
 423        section.push('\n');
 424    }
 425    section.push_str(seed_coder::SEPARATOR);
 426    section
 427}
 428
 429fn build_v0316_cursor_prefix(
 430    path: &Path,
 431    context: &str,
 432    editable_range: &Range<usize>,
 433    cursor_offset: usize,
 434) -> String {
 435    let mut section = String::new();
 436    let path_str = path.to_string_lossy();
 437    write!(
 438        section,
 439        "{}{}
 440",
 441        seed_coder::FILE_MARKER,
 442        path_str
 443    )
 444    .ok();
 445
 446    section.push_str(&context[..editable_range.start]);
 447
 448    let editable_text = &context[editable_range.clone()];
 449    let cursor_in_editable = cursor_offset - editable_range.start;
 450    multi_region::write_editable_with_markers(
 451        &mut section,
 452        editable_text,
 453        cursor_in_editable,
 454        CURSOR_MARKER,
 455    );
 456
 457    if !section.ends_with('\n') {
 458        section.push('\n');
 459    }
 460    section
 461}
 462
 463fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 464    let start_row = text[0..range.start].matches('\n').count() as u32;
 465    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 466    if !text[..range.end].ends_with('\n') {
 467        end_row += 1;
 468    }
 469    return start_row..end_row;
 470}
 471
 472pub fn format_prompt_with_budget_for_format(
 473    input: &ZetaPromptInput,
 474    format: ZetaFormat,
 475    max_tokens: usize,
 476) -> Option<String> {
 477    let (context, editable_range, context_range, cursor_offset) =
 478        resolve_cursor_region(input, format);
 479    let path = &*input.cursor_path;
 480
 481    let empty_files = Vec::new();
 482    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 483    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 484        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 485        let row_range = relative_row_range.start + cursor_excerpt_start_row
 486            ..relative_row_range.end + cursor_excerpt_start_row;
 487        &filter_redundant_excerpts(
 488            input_related_files.to_vec(),
 489            input.cursor_path.as_ref(),
 490            row_range,
 491        )
 492    } else {
 493        input_related_files
 494    };
 495
 496    let prompt = match format {
 497        ZetaFormat::V0211SeedCoder
 498        | ZetaFormat::V0304SeedNoEdits
 499        | ZetaFormat::V0306SeedMultiRegions
 500        | ZetaFormat::V0316SeedMultiRegions => {
 501            let mut cursor_section = String::new();
 502            write_cursor_excerpt_section_for_format(
 503                format,
 504                &mut cursor_section,
 505                path,
 506                context,
 507                &editable_range,
 508                cursor_offset,
 509            );
 510
 511            seed_coder::assemble_fim_prompt(
 512                context,
 513                &editable_range,
 514                &cursor_section,
 515                &input.events,
 516                related_files,
 517                max_tokens,
 518            )
 519        }
 520        _ => {
 521            let mut cursor_section = String::new();
 522            write_cursor_excerpt_section_for_format(
 523                format,
 524                &mut cursor_section,
 525                path,
 526                context,
 527                &editable_range,
 528                cursor_offset,
 529            );
 530
 531            let max_bytes = max_tokens * 3;
 532            let content_budget_tokens =
 533                estimate_tokens(max_bytes.saturating_sub(cursor_section.len()));
 534
 535            let edit_history_section = format_edit_history_within_budget(
 536                &input.events,
 537                "<|file_sep|>",
 538                "edit history",
 539                content_budget_tokens,
 540                max_edit_event_count_for_format(&format),
 541            );
 542            let remaining_budget_tokens = estimate_tokens(
 543                max_bytes
 544                    .saturating_sub(cursor_section.len())
 545                    .saturating_sub(edit_history_section.len()),
 546            );
 547
 548            let related_files_section = format_related_files_within_budget(
 549                &related_files,
 550                "<|file_sep|>",
 551                "",
 552                remaining_budget_tokens,
 553            );
 554
 555            let mut prompt = String::new();
 556            prompt.push_str(&related_files_section);
 557            prompt.push_str(&edit_history_section);
 558            prompt.push_str(&cursor_section);
 559            prompt
 560        }
 561    };
 562    let prompt_tokens = estimate_tokens(prompt.len());
 563    if prompt_tokens > max_tokens {
 564        return None;
 565    }
 566    return Some(prompt);
 567}
 568
 569pub fn filter_redundant_excerpts(
 570    mut related_files: Vec<RelatedFile>,
 571    cursor_path: &Path,
 572    cursor_row_range: Range<u32>,
 573) -> Vec<RelatedFile> {
 574    for file in &mut related_files {
 575        if file.path.as_ref() == cursor_path {
 576            file.excerpts.retain(|excerpt| {
 577                excerpt.row_range.start < cursor_row_range.start
 578                    || excerpt.row_range.end > cursor_row_range.end
 579            });
 580        }
 581    }
 582    related_files.retain(|file| !file.excerpts.is_empty());
 583    related_files
 584}
 585
 586pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 587    match format {
 588        ZetaFormat::V0112MiddleAtEnd
 589        | ZetaFormat::V0113Ordered
 590        | ZetaFormat::V0114180EditableRegion
 591        | ZetaFormat::V0120GitMergeMarkers
 592        | ZetaFormat::V0131GitMergeMarkersPrefix
 593        | ZetaFormat::V0211Prefill
 594        | ZetaFormat::V0211SeedCoder
 595        | ZetaFormat::v0226Hashline
 596        | ZetaFormat::V0304SeedNoEdits
 597        | ZetaFormat::V0304VariableEdit
 598        | ZetaFormat::V0306SeedMultiRegions
 599        | ZetaFormat::V0316SeedMultiRegions => 6,
 600    }
 601}
 602
 603pub fn get_prefill_for_format(
 604    format: ZetaFormat,
 605    context: &str,
 606    editable_range: &Range<usize>,
 607) -> String {
 608    match format {
 609        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 610        ZetaFormat::V0112MiddleAtEnd
 611        | ZetaFormat::V0113Ordered
 612        | ZetaFormat::V0114180EditableRegion
 613        | ZetaFormat::V0120GitMergeMarkers
 614        | ZetaFormat::V0131GitMergeMarkersPrefix
 615        | ZetaFormat::V0211SeedCoder
 616        | ZetaFormat::v0226Hashline
 617        | ZetaFormat::V0304VariableEdit => String::new(),
 618        ZetaFormat::V0304SeedNoEdits
 619        | ZetaFormat::V0306SeedMultiRegions
 620        | ZetaFormat::V0316SeedMultiRegions => String::new(),
 621    }
 622}
 623
 624pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 625    match format {
 626        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 627        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 628        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 629        ZetaFormat::V0211SeedCoder
 630        | ZetaFormat::V0304SeedNoEdits
 631        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 632        ZetaFormat::V0112MiddleAtEnd
 633        | ZetaFormat::V0113Ordered
 634        | ZetaFormat::V0114180EditableRegion
 635        | ZetaFormat::v0226Hashline
 636        | ZetaFormat::V0304VariableEdit
 637        | ZetaFormat::V0316SeedMultiRegions => None,
 638    }
 639}
 640
 641pub fn encode_patch_as_output_for_format(
 642    format: ZetaFormat,
 643    old_editable_region: &str,
 644    patch: &str,
 645    cursor_offset: Option<usize>,
 646) -> Result<Option<String>> {
 647    match format {
 648        ZetaFormat::v0226Hashline => {
 649            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 650        }
 651        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 652            old_editable_region,
 653            patch,
 654            cursor_offset,
 655        )
 656        .map(Some),
 657        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 658            Ok(seed_coder::no_edits(patch))
 659        }
 660        // V0316 teacher prompt encoding is not yet implemented.
 661        ZetaFormat::V0316SeedMultiRegions => Ok(None),
 662        _ => Ok(None),
 663    }
 664}
 665
 666pub struct ParsedOutput {
 667    /// Text that should replace the editable region
 668    pub new_editable_region: String,
 669    /// The byte range within `cursor_excerpt` that this replacement applies to
 670    pub range_in_excerpt: Range<usize>,
 671}
 672
 673/// Parse model output for the given zeta format
 674pub fn parse_zeta2_model_output(
 675    output: &str,
 676    format: ZetaFormat,
 677    prompt_inputs: &ZetaPromptInput,
 678) -> Result<ParsedOutput> {
 679    let output = match output_end_marker_for_format(format) {
 680        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 681        None => output,
 682    };
 683
 684    let (context, editable_range_in_context, context_range, _) =
 685        resolve_cursor_region(prompt_inputs, format);
 686    let context_start = context_range.start;
 687    let old_editable_region = &context[editable_range_in_context.clone()];
 688
 689    let (range_in_context, output) = match format {
 690        ZetaFormat::v0226Hashline => (
 691            editable_range_in_context,
 692            if hashline::output_has_edit_commands(output) {
 693                hashline::apply_edit_commands(old_editable_region, output)
 694            } else {
 695                output.to_string()
 696            },
 697        ),
 698        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 699        ZetaFormat::V0304SeedNoEdits => (
 700            editable_range_in_context,
 701            if output.starts_with(seed_coder::NO_EDITS) {
 702                old_editable_region.to_string()
 703            } else {
 704                output.to_string()
 705            },
 706        ),
 707        ZetaFormat::V0306SeedMultiRegions => (
 708            editable_range_in_context,
 709            if output.starts_with(seed_coder::NO_EDITS) {
 710                old_editable_region.to_string()
 711            } else {
 712                multi_region::apply_marker_span(old_editable_region, output)?
 713            },
 714        ),
 715        ZetaFormat::V0316SeedMultiRegions => (
 716            editable_range_in_context,
 717            if multi_region::is_repeated_final_marker(output) {
 718                old_editable_region.to_string()
 719            } else {
 720                multi_region::apply_marker_span(old_editable_region, output)?
 721            },
 722        ),
 723        _ => (editable_range_in_context, output.to_string()),
 724    };
 725
 726    let range_in_excerpt =
 727        range_in_context.start + context_start..range_in_context.end + context_start;
 728
 729    Ok(ParsedOutput {
 730        new_editable_region: output,
 731        range_in_excerpt,
 732    })
 733}
 734
 735pub fn excerpt_range_for_format(
 736    format: ZetaFormat,
 737    ranges: &ExcerptRanges,
 738) -> (Range<usize>, Range<usize>) {
 739    excerpt_ranges_for_format(format, ranges)
 740}
 741
 742pub fn resolve_cursor_region(
 743    input: &ZetaPromptInput,
 744    format: ZetaFormat,
 745) -> (&str, Range<usize>, Range<usize>, usize) {
 746    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 747        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 748        compute_editable_and_context_ranges(
 749            &input.cursor_excerpt,
 750            input.cursor_offset_in_excerpt,
 751            syntax_ranges,
 752            editable_tokens,
 753            context_tokens,
 754        )
 755    } else {
 756        excerpt_range_for_format(format, &input.excerpt_ranges)
 757    };
 758    let context_start = context_range.start;
 759    let context_text = &input.cursor_excerpt[context_range.clone()];
 760    let adjusted_editable =
 761        (editable_range.start - context_start)..(editable_range.end - context_start);
 762    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 763
 764    (
 765        context_text,
 766        adjusted_editable,
 767        context_range,
 768        adjusted_cursor,
 769    )
 770}
 771
 772pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 773    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 774    get_prefill_for_format(format, context, &editable_range)
 775}
 776
 777fn format_edit_history_within_budget(
 778    events: &[Arc<Event>],
 779    file_marker: &str,
 780    edit_history_name: &str,
 781    max_tokens: usize,
 782    max_edit_event_count: usize,
 783) -> String {
 784    let max_bytes = max_tokens.saturating_mul(3);
 785    let header = format!("{}{}\n", file_marker, edit_history_name);
 786    if header.len() >= max_bytes {
 787        return String::new();
 788    }
 789
 790    let mut event_strings: Vec<String> = Vec::new();
 791    let mut total_bytes = header.len();
 792
 793    for event in events.iter().rev().take(max_edit_event_count) {
 794        let mut event_str = String::new();
 795        write_event(&mut event_str, event);
 796
 797        if total_bytes + event_str.len() > max_bytes {
 798            break;
 799        }
 800        total_bytes += event_str.len();
 801        event_strings.push(event_str);
 802    }
 803
 804    if event_strings.is_empty() {
 805        return String::new();
 806    }
 807
 808    let mut result = header;
 809    for event_str in event_strings.iter().rev() {
 810        result.push_str(event_str);
 811    }
 812    result
 813}
 814
 815fn excerpt_rendered_bytes(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 816    excerpt.text.len()
 817        + if !excerpt.text.ends_with('\n') {
 818            "\n".len()
 819        } else {
 820            0
 821        }
 822        + if excerpt.row_range.end < file_max_row {
 823            "...\n".len()
 824        } else {
 825            0
 826        }
 827}
 828
 829pub fn format_related_files_within_budget(
 830    related_files: &[RelatedFile],
 831    file_prefix: &str,
 832    file_suffix: &str,
 833    max_tokens: usize,
 834) -> String {
 835    struct ExcerptCandidate {
 836        file_ix: usize,
 837        excerpt_ix: usize,
 838        order: usize,
 839    }
 840
 841    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 842        .iter()
 843        .enumerate()
 844        .flat_map(|(file_ix, file)| {
 845            file.excerpts
 846                .iter()
 847                .enumerate()
 848                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 849                    file_ix,
 850                    excerpt_ix,
 851                    order: e.order,
 852                })
 853        })
 854        .collect();
 855
 856    // Pre-compute file header strings and their token costs.
 857    let file_headers: Vec<String> = related_files
 858        .iter()
 859        .map(|file| {
 860            let path_str = file.path.to_string_lossy();
 861            format!("{}{}\n", file_prefix, path_str)
 862        })
 863        .collect();
 864
 865    // Sort the excerpts by their order and determine how many fit within the budget.
 866    let max_bytes = max_tokens.saturating_mul(3);
 867    let mut total_bytes = 0;
 868    let mut included_excerpt_count = 0_usize;
 869    let mut included_file_indices = vec![false; related_files.len()];
 870    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 871    for candidate in &excerpt_candidates {
 872        let file = &related_files[candidate.file_ix];
 873        let excerpt = &file.excerpts[candidate.excerpt_ix];
 874        let file_already_included = included_file_indices[candidate.file_ix];
 875        let header_cost = if file_already_included {
 876            0
 877        } else {
 878            file_headers[candidate.file_ix].len() + file_suffix.len()
 879        };
 880        let excerpt_cost = excerpt_rendered_bytes(excerpt, file.max_row);
 881        if total_bytes + header_cost + excerpt_cost > max_bytes {
 882            break;
 883        }
 884        total_bytes += header_cost + excerpt_cost;
 885        if !file_already_included {
 886            included_file_indices[candidate.file_ix] = true;
 887        }
 888        included_excerpt_count += 1;
 889    }
 890
 891    excerpt_candidates.truncate(included_excerpt_count);
 892    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 893
 894    // Render all of the files that fit within the token budget, in the original order.
 895    let mut result = String::new();
 896    let mut last_file_ix = None;
 897    for candidate in &excerpt_candidates {
 898        if last_file_ix != Some(candidate.file_ix) {
 899            if last_file_ix.is_some() {
 900                result.push_str(file_suffix);
 901            }
 902            result.push_str(&file_headers[candidate.file_ix]);
 903            last_file_ix = Some(candidate.file_ix);
 904        }
 905        let file = &related_files[candidate.file_ix];
 906        let excerpt = &file.excerpts[candidate.excerpt_ix];
 907        result.push_str(&excerpt.text);
 908        if !result.ends_with('\n') {
 909            result.push('\n');
 910        }
 911        if excerpt.row_range.end < file.max_row {
 912            result.push_str("...\n");
 913        }
 914    }
 915
 916    result
 917}
 918
 919pub fn write_related_files(
 920    prompt: &mut String,
 921    related_files: &[RelatedFile],
 922) -> Vec<Range<usize>> {
 923    let mut ranges = Vec::new();
 924    for file in related_files {
 925        let start = prompt.len();
 926        let path_str = file.path.to_string_lossy();
 927        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 928        for excerpt in &file.excerpts {
 929            prompt.push_str(&excerpt.text);
 930            if !prompt.ends_with('\n') {
 931                prompt.push('\n');
 932            }
 933            if excerpt.row_range.end < file.max_row {
 934                prompt.push_str("...\n");
 935            }
 936        }
 937        let end = prompt.len();
 938        ranges.push(start..end);
 939    }
 940    ranges
 941}
 942
 943mod v0112_middle_at_end {
 944    use super::*;
 945
 946    pub fn special_tokens() -> &'static [&'static str] {
 947        &[
 948            "<|fim_prefix|>",
 949            "<|fim_suffix|>",
 950            "<|fim_middle|>",
 951            "<|file_sep|>",
 952            CURSOR_MARKER,
 953        ]
 954    }
 955
 956    pub fn write_cursor_excerpt_section(
 957        prompt: &mut String,
 958        path: &Path,
 959        context: &str,
 960        editable_range: &Range<usize>,
 961        cursor_offset: usize,
 962    ) {
 963        let path_str = path.to_string_lossy();
 964        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 965
 966        prompt.push_str("<|fim_prefix|>\n");
 967        prompt.push_str(&context[..editable_range.start]);
 968
 969        prompt.push_str("<|fim_suffix|>\n");
 970        prompt.push_str(&context[editable_range.end..]);
 971        if !prompt.ends_with('\n') {
 972            prompt.push('\n');
 973        }
 974
 975        prompt.push_str("<|fim_middle|>current\n");
 976        prompt.push_str(&context[editable_range.start..cursor_offset]);
 977        prompt.push_str(CURSOR_MARKER);
 978        prompt.push_str(&context[cursor_offset..editable_range.end]);
 979        if !prompt.ends_with('\n') {
 980            prompt.push('\n');
 981        }
 982
 983        prompt.push_str("<|fim_middle|>updated\n");
 984    }
 985}
 986
 987mod v0113_ordered {
 988    use super::*;
 989
 990    pub fn special_tokens() -> &'static [&'static str] {
 991        &[
 992            "<|fim_prefix|>",
 993            "<|fim_suffix|>",
 994            "<|fim_middle|>",
 995            "<|file_sep|>",
 996            CURSOR_MARKER,
 997        ]
 998    }
 999
1000    pub fn write_cursor_excerpt_section(
1001        prompt: &mut String,
1002        path: &Path,
1003        context: &str,
1004        editable_range: &Range<usize>,
1005        cursor_offset: usize,
1006    ) {
1007        let path_str = path.to_string_lossy();
1008        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1009
1010        prompt.push_str("<|fim_prefix|>\n");
1011        prompt.push_str(&context[..editable_range.start]);
1012        if !prompt.ends_with('\n') {
1013            prompt.push('\n');
1014        }
1015
1016        prompt.push_str("<|fim_middle|>current\n");
1017        prompt.push_str(&context[editable_range.start..cursor_offset]);
1018        prompt.push_str(CURSOR_MARKER);
1019        prompt.push_str(&context[cursor_offset..editable_range.end]);
1020        if !prompt.ends_with('\n') {
1021            prompt.push('\n');
1022        }
1023
1024        prompt.push_str("<|fim_suffix|>\n");
1025        prompt.push_str(&context[editable_range.end..]);
1026        if !prompt.ends_with('\n') {
1027            prompt.push('\n');
1028        }
1029
1030        prompt.push_str("<|fim_middle|>updated\n");
1031    }
1032}
1033
1034mod v0114180_editable_region {
1035    use super::*;
1036
1037    pub fn special_tokens() -> &'static [&'static str] {
1038        v0113_ordered::special_tokens()
1039    }
1040}
1041
1042pub mod v0120_git_merge_markers {
1043    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1044    //!
1045    //! Example prompt:
1046    //!
1047    //! <|file_sep|>path/to/target_file.py
1048    //! <|fim_prefix|>
1049    //! code before editable region
1050    //! <|fim_suffix|>
1051    //! code after editable region
1052    //! <|fim_middle|>
1053    //! <<<<<<< CURRENT
1054    //! code that
1055    //! needs to<|user_cursor|>
1056    //! be rewritten
1057    //! =======
1058    //!
1059    //! Expected output (should be generated by the model):
1060    //!
1061    //! updated
1062    //! code with
1063    //! changes applied
1064    //! >>>>>>> UPDATED
1065
1066    use super::*;
1067
1068    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1069    pub const SEPARATOR: &str = "=======\n";
1070    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1071
1072    pub fn special_tokens() -> &'static [&'static str] {
1073        &[
1074            "<|fim_prefix|>",
1075            "<|fim_suffix|>",
1076            "<|fim_middle|>",
1077            "<|file_sep|>",
1078            START_MARKER,
1079            SEPARATOR,
1080            END_MARKER,
1081            CURSOR_MARKER,
1082        ]
1083    }
1084
1085    pub fn write_cursor_excerpt_section(
1086        prompt: &mut String,
1087        path: &Path,
1088        context: &str,
1089        editable_range: &Range<usize>,
1090        cursor_offset: usize,
1091    ) {
1092        let path_str = path.to_string_lossy();
1093        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1094
1095        prompt.push_str("<|fim_prefix|>");
1096        prompt.push_str(&context[..editable_range.start]);
1097
1098        prompt.push_str("<|fim_suffix|>");
1099        prompt.push_str(&context[editable_range.end..]);
1100        if !prompt.ends_with('\n') {
1101            prompt.push('\n');
1102        }
1103
1104        prompt.push_str("<|fim_middle|>");
1105        prompt.push_str(START_MARKER);
1106        prompt.push_str(&context[editable_range.start..cursor_offset]);
1107        prompt.push_str(CURSOR_MARKER);
1108        prompt.push_str(&context[cursor_offset..editable_range.end]);
1109        if !prompt.ends_with('\n') {
1110            prompt.push('\n');
1111        }
1112        prompt.push_str(SEPARATOR);
1113    }
1114}
1115
1116pub mod v0131_git_merge_markers_prefix {
1117    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1118    //!
1119    //! Example prompt:
1120    //!
1121    //! <|file_sep|>path/to/target_file.py
1122    //! <|fim_prefix|>
1123    //! code before editable region
1124    //! <<<<<<< CURRENT
1125    //! code that
1126    //! needs to<|user_cursor|>
1127    //! be rewritten
1128    //! =======
1129    //! <|fim_suffix|>
1130    //! code after editable region
1131    //! <|fim_middle|>
1132    //!
1133    //! Expected output (should be generated by the model):
1134    //!
1135    //! updated
1136    //! code with
1137    //! changes applied
1138    //! >>>>>>> UPDATED
1139
1140    use super::*;
1141
1142    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1143    pub const SEPARATOR: &str = "=======\n";
1144    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1145
1146    pub fn special_tokens() -> &'static [&'static str] {
1147        &[
1148            "<|fim_prefix|>",
1149            "<|fim_suffix|>",
1150            "<|fim_middle|>",
1151            "<|file_sep|>",
1152            START_MARKER,
1153            SEPARATOR,
1154            END_MARKER,
1155            CURSOR_MARKER,
1156        ]
1157    }
1158
1159    pub fn write_cursor_excerpt_section(
1160        prompt: &mut String,
1161        path: &Path,
1162        context: &str,
1163        editable_range: &Range<usize>,
1164        cursor_offset: usize,
1165    ) {
1166        let path_str = path.to_string_lossy();
1167        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1168
1169        prompt.push_str("<|fim_prefix|>");
1170        prompt.push_str(&context[..editable_range.start]);
1171        prompt.push_str(START_MARKER);
1172        prompt.push_str(&context[editable_range.start..cursor_offset]);
1173        prompt.push_str(CURSOR_MARKER);
1174        prompt.push_str(&context[cursor_offset..editable_range.end]);
1175        if !prompt.ends_with('\n') {
1176            prompt.push('\n');
1177        }
1178        prompt.push_str(SEPARATOR);
1179
1180        prompt.push_str("<|fim_suffix|>");
1181        prompt.push_str(&context[editable_range.end..]);
1182        if !prompt.ends_with('\n') {
1183            prompt.push('\n');
1184        }
1185
1186        prompt.push_str("<|fim_middle|>");
1187    }
1188}
1189
1190pub mod v0211_prefill {
1191    use super::*;
1192
1193    pub fn special_tokens() -> &'static [&'static str] {
1194        v0131_git_merge_markers_prefix::special_tokens()
1195    }
1196
1197    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1198        let editable_region = &context[editable_range.start..editable_range.end];
1199
1200        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1201        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1202
1203        // Find a token boundary to avoid splitting tokens in the prefill.
1204        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1205        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1206        // the \n and consume any consecutive \n characters after it.
1207        let prefill = &editable_region[..prefill_len];
1208        match prefill.rfind('\n') {
1209            Some(pos) => {
1210                let mut end = pos + 1;
1211                while end < editable_region.len()
1212                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1213                {
1214                    end += 1;
1215                }
1216                editable_region[..end].to_string()
1217            }
1218            // No newline found. Fall back to splitting before the last space
1219            // (word-level boundary)
1220            None => match prefill.rfind(' ') {
1221                Some(pos) => prefill[..pos].to_string(),
1222                None => prefill.to_string(),
1223            },
1224        }
1225    }
1226}
1227
1228pub mod hashline {
1229
1230    use std::fmt::Display;
1231
1232    pub const END_MARKER: &str = "<|fim_middle|>updated";
1233    pub const START_MARKER: &str = "<|fim_middle|>current";
1234
1235    use super::*;
1236
1237    const SET_COMMAND_MARKER: &str = "<|set|>";
1238    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1239    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1240
1241    pub fn special_tokens() -> &'static [&'static str] {
1242        return &[
1243            SET_COMMAND_MARKER,
1244            "<|set_range|>",
1245            INSERT_COMMAND_MARKER,
1246            NO_EDITS_COMMAND_MARKER,
1247            CURSOR_MARKER,
1248            "<|file_sep|>",
1249            "<|fim_prefix|>",
1250            "<|fim_suffix|>",
1251            "<|fim_middle|>",
1252        ];
1253    }
1254
1255    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1256    #[derive(Debug, Clone, PartialEq, Eq)]
1257    struct LineRef {
1258        index: usize,
1259        hash: u8,
1260    }
1261
1262    impl Display for LineRef {
1263        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1264            write!(f, "{}:{:02x}", self.index, self.hash)
1265        }
1266    }
1267
1268    pub fn hash_line(line: &[u8]) -> u8 {
1269        let mut h: u8 = 0;
1270        for &byte in line {
1271            h = h.wrapping_add(byte);
1272        }
1273        return h;
1274    }
1275
1276    /// Write the hashline-encoded editable region into `out`. Each line of
1277    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1278    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1279    /// to the start of `editable_text`).
1280    pub fn write_hashline_editable_region(
1281        out: &mut String,
1282        editable_text: &str,
1283        cursor_offset_in_editable: usize,
1284    ) {
1285        let mut offset = 0;
1286        for (i, line) in editable_text.lines().enumerate() {
1287            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1288                && cursor_offset_in_editable < offset + line.len()
1289            {
1290                (
1291                    &line[..cursor_offset_in_editable - offset],
1292                    CURSOR_MARKER,
1293                    &line[cursor_offset_in_editable - offset..],
1294                )
1295            } else {
1296                (line, "", "")
1297            };
1298            write!(
1299                out,
1300                "\n{}|{head}{cursor}{tail}",
1301                LineRef {
1302                    index: i,
1303                    hash: hash_line(line.as_bytes())
1304                }
1305            )
1306            .unwrap();
1307            offset += line.len() + 1;
1308        }
1309    }
1310
1311    pub fn write_cursor_excerpt_section(
1312        prompt: &mut String,
1313        path: &Path,
1314        context: &str,
1315        editable_range: &Range<usize>,
1316        cursor_offset: usize,
1317    ) {
1318        let path_str = path.to_string_lossy();
1319        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1320
1321        prompt.push_str("<|fim_prefix|>\n");
1322        prompt.push_str(&context[..editable_range.start]);
1323        prompt.push_str(START_MARKER);
1324
1325        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1326        let editable_region = &context[editable_range.clone()];
1327        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1328
1329        if !prompt.ends_with('\n') {
1330            prompt.push('\n');
1331        }
1332
1333        prompt.push_str("<|fim_suffix|>\n");
1334        prompt.push_str(&context[editable_range.end..]);
1335        if !prompt.ends_with('\n') {
1336            prompt.push('\n');
1337        }
1338
1339        prompt.push_str(END_MARKER);
1340        prompt.push('\n');
1341    }
1342
1343    /// A single edit command parsed from the model output.
1344    #[derive(Debug)]
1345    enum EditCommand<'a> {
1346        /// Replace a range of lines (inclusive on both ends). Single-line set is
1347        /// represented by `start == end`.
1348        Set {
1349            start: LineRef,
1350            end: LineRef,
1351            content: &'a str,
1352        },
1353        /// Insert new lines after the given line, or before the first line if
1354        /// `after` is `None`.
1355        Insert {
1356            after: Option<LineRef>,
1357            content: &'a str,
1358        },
1359    }
1360
1361    /// Parse a line reference like `3:c3` into a `LineRef`.
1362    fn parse_line_ref(s: &str) -> Option<LineRef> {
1363        let (idx_str, hash_str) = s.split_once(':')?;
1364        let index = idx_str.parse::<usize>().ok()?;
1365        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1366        Some(LineRef { index, hash })
1367    }
1368
1369    /// Parse the model output into a list of `EditCommand`s.
1370    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1371        let mut commands = Vec::new();
1372        let mut offset = 0usize;
1373
1374        while offset < model_output.len() {
1375            let next_nl = model_output[offset..]
1376                .find('\n')
1377                .map(|i| offset + i)
1378                .unwrap_or(model_output.len());
1379            let line = &model_output[offset..next_nl];
1380            let line_end = if next_nl < model_output.len() {
1381                next_nl + 1
1382            } else {
1383                next_nl
1384            };
1385
1386            let trimmed = line.trim();
1387            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1388                (true, spec)
1389            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1390                (false, spec)
1391            } else {
1392                offset = line_end;
1393                continue;
1394            };
1395
1396            let mut content_end = line_end;
1397            let mut scan = line_end;
1398
1399            while scan < model_output.len() {
1400                let body_nl = model_output[scan..]
1401                    .find('\n')
1402                    .map(|i| scan + i)
1403                    .unwrap_or(model_output.len());
1404                let body_line = &model_output[scan..body_nl];
1405                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1406                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1407                {
1408                    break;
1409                }
1410                scan = if body_nl < model_output.len() {
1411                    body_nl + 1
1412                } else {
1413                    body_nl
1414                };
1415                content_end = scan;
1416            }
1417
1418            let content = &model_output[line_end..content_end];
1419
1420            if is_set {
1421                if let Some((start_str, end_str)) = specifier.split_once('-') {
1422                    if let (Some(start), Some(end)) =
1423                        (parse_line_ref(start_str), parse_line_ref(end_str))
1424                    {
1425                        commands.push(EditCommand::Set {
1426                            start,
1427                            end,
1428                            content,
1429                        });
1430                    }
1431                } else if let Some(target) = parse_line_ref(specifier) {
1432                    commands.push(EditCommand::Set {
1433                        start: target.clone(),
1434                        end: target,
1435                        content,
1436                    });
1437                }
1438            } else {
1439                let after = parse_line_ref(specifier);
1440                commands.push(EditCommand::Insert { after, content });
1441            }
1442
1443            offset = scan;
1444        }
1445
1446        commands
1447    }
1448
1449    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1450    /// (as opposed to being a plain full-replacement output).
1451    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1452    /// editable region, returning the plain text content.
1453    pub fn strip_hashline_prefixes(region: &str) -> String {
1454        let mut decoded: String = region
1455            .lines()
1456            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1457            .collect::<Vec<_>>()
1458            .join("\n");
1459        if region.ends_with('\n') {
1460            decoded.push('\n');
1461        }
1462        decoded
1463    }
1464
1465    pub fn output_has_edit_commands(model_output: &str) -> bool {
1466        model_output.contains(SET_COMMAND_MARKER)
1467            || model_output.contains(INSERT_COMMAND_MARKER)
1468            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1469    }
1470
1471    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1472    /// original editable region text.
1473    ///
1474    /// `editable_region` is the original text of the editable region (without hash
1475    /// prefixes). `model_output` is the raw model response containing edit commands.
1476    ///
1477    /// Returns the full replacement text for the editable region.
1478    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1479        if model_output
1480            .trim_start()
1481            .starts_with(NO_EDITS_COMMAND_MARKER)
1482        {
1483            return editable_region.to_string();
1484        }
1485
1486        let original_lines: Vec<&str> = editable_region.lines().collect();
1487        let old_hashes: Vec<u8> = original_lines
1488            .iter()
1489            .map(|line| hash_line(line.as_bytes()))
1490            .collect();
1491
1492        let commands = parse_edit_commands(model_output);
1493
1494        // For set operations: indexed by start line → Some((end line index, content))
1495        // For insert operations: indexed by line index → vec of content to insert after
1496        // Insert-before-first is tracked separately.
1497        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1498        let mut insert_before_first: Vec<&str> = Vec::new();
1499        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1500
1501        for command in &commands {
1502            match command {
1503                EditCommand::Set {
1504                    start,
1505                    end,
1506                    content,
1507                } => {
1508                    if start.index < old_hashes.len()
1509                        && end.index < old_hashes.len()
1510                        && start.index <= end.index
1511                        && old_hashes[start.index] == start.hash
1512                        && old_hashes[end.index] == end.hash
1513                    {
1514                        set_ops[start.index] = Some((end.index, *content));
1515                    }
1516                }
1517                EditCommand::Insert { after, content } => match after {
1518                    None => insert_before_first.push(*content),
1519                    Some(line_ref) => {
1520                        if line_ref.index < old_hashes.len()
1521                            && old_hashes[line_ref.index] == line_ref.hash
1522                        {
1523                            insert_after[line_ref.index].push(*content);
1524                        }
1525                    }
1526                },
1527            }
1528        }
1529
1530        let mut result = String::new();
1531
1532        // Emit any insertions before the first line
1533        for content in &insert_before_first {
1534            result.push_str(content);
1535            if !content.ends_with('\n') {
1536                result.push('\n');
1537            }
1538        }
1539
1540        let mut i = 0;
1541        while i < original_lines.len() {
1542            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1543                // Replace lines i..=end_index with the replacement content
1544                result.push_str(replacement);
1545                if !replacement.is_empty() && !replacement.ends_with('\n') {
1546                    result.push('\n');
1547                }
1548                // Emit any insertions after the end of this set range
1549                if *end_index < insert_after.len() {
1550                    for content in &insert_after[*end_index] {
1551                        result.push_str(content);
1552                        if !content.ends_with('\n') {
1553                            result.push('\n');
1554                        }
1555                    }
1556                }
1557                i = end_index + 1;
1558            } else {
1559                // Keep the original line
1560                result.push_str(original_lines[i]);
1561                result.push('\n');
1562                // Emit any insertions after this line
1563                for content in &insert_after[i] {
1564                    result.push_str(content);
1565                    if !content.ends_with('\n') {
1566                        result.push('\n');
1567                    }
1568                }
1569                i += 1;
1570            }
1571        }
1572
1573        // Preserve trailing newline behavior: if the original ended with a
1574        // newline the result already has one; if it didn't, trim the extra one
1575        // we added.
1576        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1577            result.pop();
1578        }
1579
1580        result
1581    }
1582
1583    /// Convert a unified diff patch into hashline edit commands.
1584    ///
1585    /// Parses the unified diff `patch` directly to determine which lines of
1586    /// `old_text` are deleted/replaced and what new lines are added, then emits
1587    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1588    /// `{index}:{hash}` identifiers.
1589    ///
1590    /// `cursor_offset` is an optional byte offset into the first hunk's new
1591    /// text (context + additions) where the cursor marker should be placed.
1592    pub fn patch_to_edit_commands(
1593        old_text: &str,
1594        patch: &str,
1595        cursor_offset: Option<usize>,
1596    ) -> Result<String> {
1597        let old_lines: Vec<&str> = old_text.lines().collect();
1598        let old_hashes: Vec<u8> = old_lines
1599            .iter()
1600            .map(|line| hash_line(line.as_bytes()))
1601            .collect();
1602
1603        let mut result = String::new();
1604        let mut first_hunk = true;
1605
1606        struct Hunk<'a> {
1607            line_range: Range<usize>,
1608            new_text_lines: Vec<&'a str>,
1609            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1610        }
1611
1612        // Parse the patch line by line. We only care about hunk headers,
1613        // context, deletions, and additions.
1614        let mut old_line_index: usize = 0;
1615        let mut current_hunk: Option<Hunk> = None;
1616        // Byte offset tracking within the hunk's new text for cursor placement.
1617        let mut new_text_byte_offset: usize = 0;
1618        // The line index of the last old line seen before/in the current hunk
1619        // (used for insert-after reference).
1620        let mut last_old_line_before_hunk: Option<usize> = None;
1621
1622        fn flush_hunk(
1623            hunk: Hunk,
1624            last_old_line: Option<usize>,
1625            result: &mut String,
1626            old_hashes: &[u8],
1627        ) {
1628            if hunk.line_range.is_empty() {
1629                // Pure insertion — reference the old line to insert after when in bounds.
1630                if let Some(after) = last_old_line
1631                    && let Some(&hash) = old_hashes.get(after)
1632                {
1633                    write!(
1634                        result,
1635                        "{INSERT_COMMAND_MARKER}{}\n",
1636                        LineRef { index: after, hash }
1637                    )
1638                    .unwrap();
1639                } else {
1640                    result.push_str(INSERT_COMMAND_MARKER);
1641                    result.push('\n');
1642                }
1643            } else {
1644                let start = hunk.line_range.start;
1645                let end_exclusive = hunk.line_range.end;
1646                let deleted_line_count = end_exclusive.saturating_sub(start);
1647
1648                if deleted_line_count == 1 {
1649                    if let Some(&hash) = old_hashes.get(start) {
1650                        write!(
1651                            result,
1652                            "{SET_COMMAND_MARKER}{}\n",
1653                            LineRef { index: start, hash }
1654                        )
1655                        .unwrap();
1656                    } else {
1657                        result.push_str(SET_COMMAND_MARKER);
1658                        result.push('\n');
1659                    }
1660                } else {
1661                    let end_inclusive = end_exclusive - 1;
1662                    match (
1663                        old_hashes.get(start).copied(),
1664                        old_hashes.get(end_inclusive).copied(),
1665                    ) {
1666                        (Some(start_hash), Some(end_hash)) => {
1667                            write!(
1668                                result,
1669                                "{SET_COMMAND_MARKER}{}-{}\n",
1670                                LineRef {
1671                                    index: start,
1672                                    hash: start_hash
1673                                },
1674                                LineRef {
1675                                    index: end_inclusive,
1676                                    hash: end_hash
1677                                }
1678                            )
1679                            .unwrap();
1680                        }
1681                        _ => {
1682                            result.push_str(SET_COMMAND_MARKER);
1683                            result.push('\n');
1684                        }
1685                    }
1686                }
1687            }
1688            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1689                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1690                    && line_offset == cursor_line_offset
1691                {
1692                    result.push_str(&line[..char_offset]);
1693                    result.push_str(CURSOR_MARKER);
1694                    result.push_str(&line[char_offset..]);
1695                    continue;
1696                }
1697
1698                result.push_str(line);
1699            }
1700        }
1701
1702        for raw_line in patch.split_inclusive('\n') {
1703            if raw_line.starts_with("@@") {
1704                // Flush any pending change hunk from a previous patch hunk.
1705                if let Some(hunk) = current_hunk.take() {
1706                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1707                }
1708
1709                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1710                // We intentionally do not trust old_start as a direct local index into `old_text`,
1711                // because some patches are produced against a larger file region and carry
1712                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1713                if first_hunk {
1714                    new_text_byte_offset = 0;
1715                    first_hunk = false;
1716                }
1717                continue;
1718            }
1719
1720            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1721                continue;
1722            }
1723            if raw_line.starts_with("\\ No newline") {
1724                continue;
1725            }
1726
1727            if raw_line.starts_with('-') {
1728                // Extend or start a change hunk with this deleted old line.
1729                match &mut current_hunk {
1730                    Some(Hunk {
1731                        line_range: range, ..
1732                    }) => range.end = old_line_index + 1,
1733                    None => {
1734                        current_hunk = Some(Hunk {
1735                            line_range: old_line_index..old_line_index + 1,
1736                            new_text_lines: Vec::new(),
1737                            cursor_line_offset_in_new_text: None,
1738                        });
1739                    }
1740                }
1741                old_line_index += 1;
1742            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1743                // Place cursor marker if cursor_offset falls within this line.
1744                let mut cursor_line_offset = None;
1745                if let Some(cursor_off) = cursor_offset
1746                    && (first_hunk
1747                        || cursor_off >= new_text_byte_offset
1748                            && cursor_off <= new_text_byte_offset + added_content.len())
1749                {
1750                    let line_offset = added_content.floor_char_boundary(
1751                        cursor_off
1752                            .saturating_sub(new_text_byte_offset)
1753                            .min(added_content.len()),
1754                    );
1755                    cursor_line_offset = Some(line_offset);
1756                }
1757
1758                new_text_byte_offset += added_content.len();
1759
1760                let hunk = current_hunk.get_or_insert(Hunk {
1761                    line_range: old_line_index..old_line_index,
1762                    new_text_lines: vec![],
1763                    cursor_line_offset_in_new_text: None,
1764                });
1765                hunk.new_text_lines.push(added_content);
1766                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1767                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1768            } else {
1769                // Context line (starts with ' ' or is empty).
1770                if let Some(hunk) = current_hunk.take() {
1771                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1772                }
1773                last_old_line_before_hunk = Some(old_line_index);
1774                old_line_index += 1;
1775                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1776                new_text_byte_offset += content.len();
1777            }
1778        }
1779
1780        // Flush final group.
1781        if let Some(hunk) = current_hunk.take() {
1782            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1783        }
1784
1785        // Trim a single trailing newline.
1786        if result.ends_with('\n') {
1787            result.pop();
1788        }
1789
1790        if result.is_empty() {
1791            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1792        }
1793
1794        Ok(result)
1795    }
1796
1797    #[cfg(test)]
1798    mod tests {
1799        use super::*;
1800        use indoc::indoc;
1801
1802        #[test]
1803        fn test_format_cursor_region() {
1804            struct Case {
1805                name: &'static str,
1806                context: &'static str,
1807                editable_range: Range<usize>,
1808                cursor_offset: usize,
1809                expected: &'static str,
1810            }
1811
1812            let cases = [
1813                Case {
1814                    name: "basic_cursor_placement",
1815                    context: "hello world\n",
1816                    editable_range: 0..12,
1817                    cursor_offset: 5,
1818                    expected: indoc! {"
1819                    <|file_sep|>test.rs
1820                    <|fim_prefix|>
1821                    <|fim_middle|>current
1822                    0:5c|hello<|user_cursor|> world
1823                    <|fim_suffix|>
1824                    <|fim_middle|>updated
1825                    "},
1826                },
1827                Case {
1828                    name: "multiline_cursor_on_second_line",
1829                    context: "aaa\nbbb\nccc\n",
1830                    editable_range: 0..12,
1831                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1832                    expected: indoc! {"
1833                    <|file_sep|>test.rs
1834                    <|fim_prefix|>
1835                    <|fim_middle|>current
1836                    0:23|aaa
1837                    1:26|b<|user_cursor|>bb
1838                    2:29|ccc
1839                    <|fim_suffix|>
1840                    <|fim_middle|>updated
1841                    "},
1842                },
1843                Case {
1844                    name: "no_trailing_newline_in_context",
1845                    context: "line1\nline2",
1846                    editable_range: 0..11,
1847                    cursor_offset: 3,
1848                    expected: indoc! {"
1849                    <|file_sep|>test.rs
1850                    <|fim_prefix|>
1851                    <|fim_middle|>current
1852                    0:d9|lin<|user_cursor|>e1
1853                    1:da|line2
1854                    <|fim_suffix|>
1855                    <|fim_middle|>updated
1856                    "},
1857                },
1858                Case {
1859                    name: "leading_newline_in_editable_region",
1860                    context: "\nabc\n",
1861                    editable_range: 0..5,
1862                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1863                    expected: indoc! {"
1864                    <|file_sep|>test.rs
1865                    <|fim_prefix|>
1866                    <|fim_middle|>current
1867                    0:00|
1868                    1:26|a<|user_cursor|>bc
1869                    <|fim_suffix|>
1870                    <|fim_middle|>updated
1871                    "},
1872                },
1873                Case {
1874                    name: "with_suffix",
1875                    context: "abc\ndef",
1876                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1877                    cursor_offset: 2,
1878                    expected: indoc! {"
1879                    <|file_sep|>test.rs
1880                    <|fim_prefix|>
1881                    <|fim_middle|>current
1882                    0:26|ab<|user_cursor|>c
1883                    <|fim_suffix|>
1884                    def
1885                    <|fim_middle|>updated
1886                    "},
1887                },
1888                Case {
1889                    name: "unicode_two_byte_chars",
1890                    context: "héllo\n",
1891                    editable_range: 0..7,
1892                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1893                    expected: indoc! {"
1894                    <|file_sep|>test.rs
1895                    <|fim_prefix|>
1896                    <|fim_middle|>current
1897                    0:1b|hé<|user_cursor|>llo
1898                    <|fim_suffix|>
1899                    <|fim_middle|>updated
1900                    "},
1901                },
1902                Case {
1903                    name: "unicode_three_byte_chars",
1904                    context: "日本語\n",
1905                    editable_range: 0..10,
1906                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1907                    expected: indoc! {"
1908                    <|file_sep|>test.rs
1909                    <|fim_prefix|>
1910                    <|fim_middle|>current
1911                    0:80|日本<|user_cursor|>語
1912                    <|fim_suffix|>
1913                    <|fim_middle|>updated
1914                    "},
1915                },
1916                Case {
1917                    name: "unicode_four_byte_chars",
1918                    context: "a🌍b\n",
1919                    editable_range: 0..7,
1920                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1921                    expected: indoc! {"
1922                    <|file_sep|>test.rs
1923                    <|fim_prefix|>
1924                    <|fim_middle|>current
1925                    0:6b|a🌍<|user_cursor|>b
1926                    <|fim_suffix|>
1927                    <|fim_middle|>updated
1928                    "},
1929                },
1930                Case {
1931                    name: "cursor_at_start_of_region_not_placed",
1932                    context: "abc\n",
1933                    editable_range: 0..4,
1934                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1935                    expected: indoc! {"
1936                    <|file_sep|>test.rs
1937                    <|fim_prefix|>
1938                    <|fim_middle|>current
1939                    0:26|abc
1940                    <|fim_suffix|>
1941                    <|fim_middle|>updated
1942                    "},
1943                },
1944                Case {
1945                    name: "cursor_at_end_of_line_not_placed",
1946                    context: "abc\ndef\n",
1947                    editable_range: 0..8,
1948                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1949                    expected: indoc! {"
1950                    <|file_sep|>test.rs
1951                    <|fim_prefix|>
1952                    <|fim_middle|>current
1953                    0:26|abc
1954                    1:2f|def
1955                    <|fim_suffix|>
1956                    <|fim_middle|>updated
1957                    "},
1958                },
1959                Case {
1960                    name: "cursor_offset_relative_to_context_not_editable_region",
1961                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1962                    // write_cursor_excerpt_section must subtract it before comparing against
1963                    // per-line offsets within the editable region.
1964                    context: "pre\naaa\nbbb\nsuf\n",
1965                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1966                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1967                    expected: indoc! {"
1968                    <|file_sep|>test.rs
1969                    <|fim_prefix|>
1970                    pre
1971                    <|fim_middle|>current
1972                    0:23|aaa
1973                    1:26|b<|user_cursor|>bb
1974                    <|fim_suffix|>
1975                    suf
1976                    <|fim_middle|>updated
1977                    "},
1978                },
1979            ];
1980
1981            for case in &cases {
1982                let mut prompt = String::new();
1983                hashline::write_cursor_excerpt_section(
1984                    &mut prompt,
1985                    Path::new("test.rs"),
1986                    case.context,
1987                    &case.editable_range,
1988                    case.cursor_offset,
1989                );
1990                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1991            }
1992        }
1993
1994        #[test]
1995        fn test_apply_edit_commands() {
1996            struct Case {
1997                name: &'static str,
1998                original: &'static str,
1999                model_output: &'static str,
2000                expected: &'static str,
2001            }
2002
2003            let cases = vec![
2004                Case {
2005                    name: "set_single_line",
2006                    original: indoc! {"
2007                    let mut total = 0;
2008                    for product in products {
2009                        total += ;
2010                    }
2011                    total
2012                "},
2013                    model_output: indoc! {"
2014                    <|set|>2:87
2015                        total += product.price;
2016                "},
2017                    expected: indoc! {"
2018                    let mut total = 0;
2019                    for product in products {
2020                        total += product.price;
2021                    }
2022                    total
2023                "},
2024                },
2025                Case {
2026                    name: "set_range",
2027                    original: indoc! {"
2028                    fn foo() {
2029                        let x = 1;
2030                        let y = 2;
2031                        let z = 3;
2032                    }
2033                "},
2034                    model_output: indoc! {"
2035                    <|set|>1:46-3:4a
2036                        let sum = 6;
2037                "},
2038                    expected: indoc! {"
2039                    fn foo() {
2040                        let sum = 6;
2041                    }
2042                "},
2043                },
2044                Case {
2045                    name: "insert_after_line",
2046                    original: indoc! {"
2047                    fn main() {
2048                        let x = 1;
2049                    }
2050                "},
2051                    model_output: indoc! {"
2052                    <|insert|>1:46
2053                        let y = 2;
2054                "},
2055                    expected: indoc! {"
2056                    fn main() {
2057                        let x = 1;
2058                        let y = 2;
2059                    }
2060                "},
2061                },
2062                Case {
2063                    name: "insert_before_first",
2064                    original: indoc! {"
2065                    let x = 1;
2066                    let y = 2;
2067                "},
2068                    model_output: indoc! {"
2069                    <|insert|>
2070                    use std::io;
2071                "},
2072                    expected: indoc! {"
2073                    use std::io;
2074                    let x = 1;
2075                    let y = 2;
2076                "},
2077                },
2078                Case {
2079                    name: "set_with_cursor_marker",
2080                    original: indoc! {"
2081                    fn main() {
2082                        println!();
2083                    }
2084                "},
2085                    model_output: indoc! {"
2086                    <|set|>1:34
2087                        eprintln!(\"<|user_cursor|>\");
2088                "},
2089                    expected: indoc! {"
2090                    fn main() {
2091                        eprintln!(\"<|user_cursor|>\");
2092                    }
2093                "},
2094                },
2095                Case {
2096                    name: "multiple_set_commands",
2097                    original: indoc! {"
2098                    aaa
2099                    bbb
2100                    ccc
2101                    ddd
2102                "},
2103                    model_output: indoc! {"
2104                    <|set|>0:23
2105                    AAA
2106                    <|set|>2:29
2107                    CCC
2108                "},
2109                    expected: indoc! {"
2110                    AAA
2111                    bbb
2112                    CCC
2113                    ddd
2114                "},
2115                },
2116                Case {
2117                    name: "set_range_multiline_replacement",
2118                    original: indoc! {"
2119                    fn handle_submit() {
2120                    }
2121
2122                    fn handle_keystroke() {
2123                "},
2124                    model_output: indoc! {"
2125                    <|set|>0:3f-1:7d
2126                    fn handle_submit(modal_state: &mut ModalState) {
2127                        <|user_cursor|>
2128                    }
2129                "},
2130                    expected: indoc! {"
2131                    fn handle_submit(modal_state: &mut ModalState) {
2132                        <|user_cursor|>
2133                    }
2134
2135                    fn handle_keystroke() {
2136                "},
2137                },
2138                Case {
2139                    name: "no_edit_commands_returns_original",
2140                    original: indoc! {"
2141                    hello
2142                    world
2143                "},
2144                    model_output: "some random text with no commands",
2145                    expected: indoc! {"
2146                    hello
2147                    world
2148                "},
2149                },
2150                Case {
2151                    name: "no_edits_command_returns_original",
2152                    original: indoc! {"
2153                    hello
2154                    world
2155                "},
2156                    model_output: "<|no_edits|>",
2157                    expected: indoc! {"
2158                    hello
2159                    world
2160                "},
2161                },
2162                Case {
2163                    name: "wrong_hash_set_ignored",
2164                    original: indoc! {"
2165                    aaa
2166                    bbb
2167                "},
2168                    model_output: indoc! {"
2169                    <|set|>0:ff
2170                    ZZZ
2171                "},
2172                    expected: indoc! {"
2173                    aaa
2174                    bbb
2175                "},
2176                },
2177                Case {
2178                    name: "insert_and_set_combined",
2179                    original: indoc! {"
2180                    alpha
2181                    beta
2182                    gamma
2183                "},
2184                    model_output: indoc! {"
2185                    <|set|>0:06
2186                    ALPHA
2187                    <|insert|>1:9c
2188                    beta_extra
2189                "},
2190                    expected: indoc! {"
2191                    ALPHA
2192                    beta
2193                    beta_extra
2194                    gamma
2195                "},
2196                },
2197                Case {
2198                    name: "no_trailing_newline_preserved",
2199                    original: "hello\nworld",
2200                    model_output: indoc! {"
2201                    <|set|>0:14
2202                    HELLO
2203                "},
2204                    expected: "HELLO\nworld",
2205                },
2206                Case {
2207                    name: "set_range_hash_mismatch_in_end_bound",
2208                    original: indoc! {"
2209                    one
2210                    two
2211                    three
2212                "},
2213                    model_output: indoc! {"
2214                    <|set|>0:42-2:ff
2215                    ONE_TWO_THREE
2216                "},
2217                    expected: indoc! {"
2218                    one
2219                    two
2220                    three
2221                "},
2222                },
2223                Case {
2224                    name: "set_range_start_greater_than_end_ignored",
2225                    original: indoc! {"
2226                    a
2227                    b
2228                    c
2229                "},
2230                    model_output: indoc! {"
2231                    <|set|>2:63-1:62
2232                    X
2233                "},
2234                    expected: indoc! {"
2235                    a
2236                    b
2237                    c
2238                "},
2239                },
2240                Case {
2241                    name: "insert_out_of_bounds_ignored",
2242                    original: indoc! {"
2243                    x
2244                    y
2245                "},
2246                    model_output: indoc! {"
2247                    <|insert|>99:aa
2248                    z
2249                "},
2250                    expected: indoc! {"
2251                    x
2252                    y
2253                "},
2254                },
2255                Case {
2256                    name: "set_out_of_bounds_ignored",
2257                    original: indoc! {"
2258                    x
2259                    y
2260                "},
2261                    model_output: indoc! {"
2262                    <|set|>99:aa
2263                    z
2264                "},
2265                    expected: indoc! {"
2266                    x
2267                    y
2268                "},
2269                },
2270                Case {
2271                    name: "malformed_set_command_ignored",
2272                    original: indoc! {"
2273                    alpha
2274                    beta
2275                "},
2276                    model_output: indoc! {"
2277                    <|set|>not-a-line-ref
2278                    UPDATED
2279                "},
2280                    expected: indoc! {"
2281                    alpha
2282                    beta
2283                "},
2284                },
2285                Case {
2286                    name: "malformed_insert_hash_treated_as_before_first",
2287                    original: indoc! {"
2288                    alpha
2289                    beta
2290                "},
2291                    model_output: indoc! {"
2292                    <|insert|>1:nothex
2293                    preamble
2294                "},
2295                    expected: indoc! {"
2296                    preamble
2297                    alpha
2298                    beta
2299                "},
2300                },
2301                Case {
2302                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2303                    original: indoc! {"
2304                    cat
2305                    dog
2306                "},
2307                    model_output: indoc! {"
2308                    <|set|>0:38
2309                    CAT
2310                    <|insert|>0:38
2311                    TAIL
2312                "},
2313                    expected: indoc! {"
2314                    CAT
2315                    TAIL
2316                    dog
2317                "},
2318                },
2319                Case {
2320                    name: "overlapping_set_ranges_last_wins",
2321                    original: indoc! {"
2322                    a
2323                    b
2324                    c
2325                    d
2326                "},
2327                    model_output: indoc! {"
2328                    <|set|>0:61-2:63
2329                    FIRST
2330                    <|set|>1:62-3:64
2331                    SECOND
2332                "},
2333                    expected: indoc! {"
2334                    FIRST
2335                    d
2336                "},
2337                },
2338                Case {
2339                    name: "insert_before_first_and_after_line",
2340                    original: indoc! {"
2341                        a
2342                        b
2343                    "},
2344                    model_output: indoc! {"
2345                        <|insert|>
2346                        HEAD
2347                        <|insert|>0:61
2348                        MID
2349                    "},
2350                    expected: indoc! {"
2351                        HEAD
2352                        a
2353                        MID
2354                        b
2355                    "},
2356                },
2357            ];
2358
2359            for case in &cases {
2360                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2361                assert_eq!(result, case.expected, "failed case: {}", case.name);
2362            }
2363        }
2364
2365        #[test]
2366        fn test_output_has_edit_commands() {
2367            assert!(hashline::output_has_edit_commands(&format!(
2368                "{}0:ab\nnew",
2369                SET_COMMAND_MARKER
2370            )));
2371            assert!(hashline::output_has_edit_commands(&format!(
2372                "{}0:ab\nnew",
2373                INSERT_COMMAND_MARKER
2374            )));
2375            assert!(hashline::output_has_edit_commands(&format!(
2376                "some text\n{}1:cd\nstuff",
2377                SET_COMMAND_MARKER
2378            )));
2379            assert!(!hashline::output_has_edit_commands("just plain text"));
2380            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2381            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2382        }
2383
2384        // ---- hashline::patch_to_edit_commands round-trip tests ----
2385
2386        #[test]
2387        fn test_patch_to_edit_commands() {
2388            struct Case {
2389                name: &'static str,
2390                old: &'static str,
2391                patch: &'static str,
2392                expected_new: &'static str,
2393            }
2394
2395            let cases = [
2396                Case {
2397                    name: "single_line_replacement",
2398                    old: indoc! {"
2399                    let mut total = 0;
2400                    for product in products {
2401                        total += ;
2402                    }
2403                    total
2404                "},
2405                    patch: indoc! {"
2406                    @@ -1,5 +1,5 @@
2407                     let mut total = 0;
2408                     for product in products {
2409                    -    total += ;
2410                    +    total += product.price;
2411                     }
2412                     total
2413                "},
2414                    expected_new: indoc! {"
2415                    let mut total = 0;
2416                    for product in products {
2417                        total += product.price;
2418                    }
2419                    total
2420                "},
2421                },
2422                Case {
2423                    name: "multiline_replacement",
2424                    old: indoc! {"
2425                    fn foo() {
2426                        let x = 1;
2427                        let y = 2;
2428                        let z = 3;
2429                    }
2430                "},
2431                    patch: indoc! {"
2432                    @@ -1,5 +1,3 @@
2433                     fn foo() {
2434                    -    let x = 1;
2435                    -    let y = 2;
2436                    -    let z = 3;
2437                    +    let sum = 1 + 2 + 3;
2438                     }
2439                "},
2440                    expected_new: indoc! {"
2441                    fn foo() {
2442                        let sum = 1 + 2 + 3;
2443                    }
2444                "},
2445                },
2446                Case {
2447                    name: "insertion",
2448                    old: indoc! {"
2449                    fn main() {
2450                        let x = 1;
2451                    }
2452                "},
2453                    patch: indoc! {"
2454                    @@ -1,3 +1,4 @@
2455                     fn main() {
2456                         let x = 1;
2457                    +    let y = 2;
2458                     }
2459                "},
2460                    expected_new: indoc! {"
2461                    fn main() {
2462                        let x = 1;
2463                        let y = 2;
2464                    }
2465                "},
2466                },
2467                Case {
2468                    name: "insertion_before_first",
2469                    old: indoc! {"
2470                    let x = 1;
2471                    let y = 2;
2472                "},
2473                    patch: indoc! {"
2474                    @@ -1,2 +1,3 @@
2475                    +use std::io;
2476                     let x = 1;
2477                     let y = 2;
2478                "},
2479                    expected_new: indoc! {"
2480                    use std::io;
2481                    let x = 1;
2482                    let y = 2;
2483                "},
2484                },
2485                Case {
2486                    name: "deletion",
2487                    old: indoc! {"
2488                    aaa
2489                    bbb
2490                    ccc
2491                    ddd
2492                "},
2493                    patch: indoc! {"
2494                    @@ -1,4 +1,2 @@
2495                     aaa
2496                    -bbb
2497                    -ccc
2498                     ddd
2499                "},
2500                    expected_new: indoc! {"
2501                    aaa
2502                    ddd
2503                "},
2504                },
2505                Case {
2506                    name: "multiple_changes",
2507                    old: indoc! {"
2508                    alpha
2509                    beta
2510                    gamma
2511                    delta
2512                    epsilon
2513                "},
2514                    patch: indoc! {"
2515                    @@ -1,5 +1,5 @@
2516                    -alpha
2517                    +ALPHA
2518                     beta
2519                     gamma
2520                    -delta
2521                    +DELTA
2522                     epsilon
2523                "},
2524                    expected_new: indoc! {"
2525                    ALPHA
2526                    beta
2527                    gamma
2528                    DELTA
2529                    epsilon
2530                "},
2531                },
2532                Case {
2533                    name: "replace_with_insertion",
2534                    old: indoc! {r#"
2535                    fn handle() {
2536                        modal_state.close();
2537                        modal_state.dismiss();
2538                "#},
2539                    patch: indoc! {r#"
2540                    @@ -1,3 +1,4 @@
2541                     fn handle() {
2542                         modal_state.close();
2543                    +    eprintln!("");
2544                         modal_state.dismiss();
2545                "#},
2546                    expected_new: indoc! {r#"
2547                    fn handle() {
2548                        modal_state.close();
2549                        eprintln!("");
2550                        modal_state.dismiss();
2551                "#},
2552                },
2553                Case {
2554                    name: "complete_replacement",
2555                    old: indoc! {"
2556                    aaa
2557                    bbb
2558                    ccc
2559                "},
2560                    patch: indoc! {"
2561                    @@ -1,3 +1,3 @@
2562                    -aaa
2563                    -bbb
2564                    -ccc
2565                    +xxx
2566                    +yyy
2567                    +zzz
2568                "},
2569                    expected_new: indoc! {"
2570                    xxx
2571                    yyy
2572                    zzz
2573                "},
2574                },
2575                Case {
2576                    name: "add_function_body",
2577                    old: indoc! {"
2578                    fn foo() {
2579                        modal_state.dismiss();
2580                    }
2581
2582                    fn
2583
2584                    fn handle_keystroke() {
2585                "},
2586                    patch: indoc! {"
2587                    @@ -1,6 +1,8 @@
2588                     fn foo() {
2589                         modal_state.dismiss();
2590                     }
2591
2592                    -fn
2593                    +fn handle_submit() {
2594                    +    todo()
2595                    +}
2596
2597                     fn handle_keystroke() {
2598                "},
2599                    expected_new: indoc! {"
2600                    fn foo() {
2601                        modal_state.dismiss();
2602                    }
2603
2604                    fn handle_submit() {
2605                        todo()
2606                    }
2607
2608                    fn handle_keystroke() {
2609                "},
2610                },
2611                Case {
2612                    name: "with_cursor_offset",
2613                    old: indoc! {r#"
2614                    fn main() {
2615                        println!();
2616                    }
2617                "#},
2618                    patch: indoc! {r#"
2619                        @@ -1,3 +1,3 @@
2620                        fn main() {
2621                        -    println!();
2622                        +    eprintln!("");
2623                        }
2624                    "#},
2625                    expected_new: indoc! {r#"
2626                        fn main() {
2627                            eprintln!("<|user_cursor|>");
2628                        }
2629                    "#},
2630                },
2631                Case {
2632                    name: "non_local_hunk_header_pure_insertion_repro",
2633                    old: indoc! {"
2634                        aaa
2635                        bbb
2636                    "},
2637                    patch: indoc! {"
2638                        @@ -20,2 +20,3 @@
2639                        aaa
2640                        +xxx
2641                        bbb
2642                    "},
2643                    expected_new: indoc! {"
2644                        aaa
2645                        xxx
2646                        bbb
2647                    "},
2648                },
2649                Case {
2650                    name: "empty_patch_produces_no_edits_marker",
2651                    old: indoc! {"
2652                        aaa
2653                        bbb
2654                    "},
2655                    patch: "@@ -20,2 +20,3 @@\n",
2656                    expected_new: indoc! {"
2657                        aaa
2658                        bbb
2659                    "},
2660                },
2661            ];
2662
2663            for case in &cases {
2664                // The cursor_offset for patch_to_edit_commands is relative to
2665                // the first hunk's new text (context + additions). We compute
2666                // it by finding where the marker sits in the expected output
2667                // (which mirrors the new text of the hunk).
2668                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2669
2670                let commands =
2671                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2672                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2673
2674                assert!(
2675                    hashline::output_has_edit_commands(&commands),
2676                    "case {}: expected edit commands, got: {commands:?}",
2677                    case.name,
2678                );
2679
2680                let applied = hashline::apply_edit_commands(case.old, &commands);
2681                assert_eq!(applied, case.expected_new, "case {}", case.name);
2682            }
2683        }
2684    }
2685}
2686
2687pub mod seed_coder {
2688    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2689    //!
2690    //! Seed-Coder uses different FIM tokens and order than Qwen:
2691    //! - SPM order: suffix comes FIRST, then prefix, then middle
2692    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2693    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2694    //!
2695    //! All context (related files, edit history) goes in the PREFIX section.
2696    //! The suffix contains only code after the editable region.
2697    //!
2698    //! Example prompt:
2699    //!
2700    //! <[fim-suffix]>
2701    //! code after editable region
2702    //! <[fim-prefix]><filename>related/file.py
2703    //! related file content
2704    //!
2705    //! <filename>edit_history
2706    //! --- a/some_file.py
2707    //! +++ b/some_file.py
2708    //! -old
2709    //! +new
2710    //!
2711    //! <filename>path/to/target_file.py
2712    //! code before editable region
2713    //! <<<<<<< CURRENT
2714    //! code that
2715    //! needs to<|user_cursor|>
2716    //! be rewritten
2717    //! =======
2718    //! <[fim-middle]>
2719    //!
2720    //! Expected output (model generates):
2721    //!
2722    //! updated
2723    //! code with
2724    //! changes applied
2725    //! >>>>>>> UPDATED
2726
2727    use super::*;
2728
2729    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2730    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2731    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2732    pub const FILE_MARKER: &str = "<filename>";
2733
2734    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2735    pub const SEPARATOR: &str = "=======\n";
2736    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2737
2738    pub const NO_EDITS: &str = "NO_EDITS\n";
2739
2740    pub fn special_tokens() -> &'static [&'static str] {
2741        &[
2742            FIM_SUFFIX,
2743            FIM_PREFIX,
2744            FIM_MIDDLE,
2745            FILE_MARKER,
2746            START_MARKER,
2747            SEPARATOR,
2748            END_MARKER,
2749            CURSOR_MARKER,
2750        ]
2751    }
2752
2753    pub fn write_cursor_excerpt_section(
2754        prompt: &mut String,
2755        path: &Path,
2756        context: &str,
2757        editable_range: &Range<usize>,
2758        cursor_offset: usize,
2759    ) {
2760        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2761        prompt.push_str(&section);
2762    }
2763
2764    pub fn format_prompt_with_budget(
2765        path: &Path,
2766        context: &str,
2767        editable_range: &Range<usize>,
2768        cursor_offset: usize,
2769        events: &[Arc<Event>],
2770        related_files: &[RelatedFile],
2771        max_tokens: usize,
2772    ) -> String {
2773        let cursor_prefix_section =
2774            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2775        assemble_fim_prompt(
2776            context,
2777            editable_range,
2778            &cursor_prefix_section,
2779            events,
2780            related_files,
2781            max_tokens,
2782        )
2783    }
2784
2785    pub fn assemble_fim_prompt(
2786        context: &str,
2787        editable_range: &Range<usize>,
2788        cursor_prefix_section: &str,
2789        events: &[Arc<Event>],
2790        related_files: &[RelatedFile],
2791        max_tokens: usize,
2792    ) -> String {
2793        let suffix_section = build_suffix_section(context, editable_range);
2794
2795        // Use byte-level budgeting to avoid accumulated rounding errors from
2796        // multiple estimate_tokens (floor division) calls across components.
2797        let max_bytes = max_tokens * 3;
2798        let fixed_bytes = suffix_section.len()
2799            + FIM_PREFIX.len()
2800            + cursor_prefix_section.len()
2801            + FIM_MIDDLE.len()
2802            + 2; // two potential newline separators
2803        let content_budget_tokens = estimate_tokens(max_bytes.saturating_sub(fixed_bytes));
2804
2805        let edit_history_section = super::format_edit_history_within_budget(
2806            events,
2807            FILE_MARKER,
2808            "edit_history",
2809            content_budget_tokens,
2810            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2811        );
2812        let remaining_budget_tokens = estimate_tokens(
2813            max_bytes
2814                .saturating_sub(fixed_bytes)
2815                .saturating_sub(edit_history_section.len()),
2816        );
2817
2818        let related_files_section = super::format_related_files_within_budget(
2819            related_files,
2820            FILE_MARKER,
2821            "",
2822            remaining_budget_tokens,
2823        );
2824
2825        let mut prompt = String::new();
2826        prompt.push_str(&suffix_section);
2827        prompt.push_str(FIM_PREFIX);
2828        prompt.push_str(&related_files_section);
2829        if !related_files_section.is_empty() {
2830            prompt.push('\n');
2831        }
2832        prompt.push_str(&edit_history_section);
2833        if !edit_history_section.is_empty() {
2834            prompt.push('\n');
2835        }
2836        prompt.push_str(cursor_prefix_section);
2837        prompt.push_str(FIM_MIDDLE);
2838
2839        prompt
2840    }
2841
2842    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2843        let mut section = String::new();
2844        section.push_str(FIM_SUFFIX);
2845        section.push_str(&context[editable_range.end..]);
2846        if !section.ends_with('\n') {
2847            section.push('\n');
2848        }
2849        section
2850    }
2851
2852    fn build_cursor_prefix_section(
2853        path: &Path,
2854        context: &str,
2855        editable_range: &Range<usize>,
2856        cursor_offset: usize,
2857    ) -> String {
2858        let mut section = String::new();
2859        let path_str = path.to_string_lossy();
2860        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2861
2862        section.push_str(&context[..editable_range.start]);
2863        section.push_str(START_MARKER);
2864        section.push_str(&context[editable_range.start..cursor_offset]);
2865        section.push_str(CURSOR_MARKER);
2866        section.push_str(&context[cursor_offset..editable_range.end]);
2867        if !section.ends_with('\n') {
2868            section.push('\n');
2869        }
2870        section.push_str(SEPARATOR);
2871        section
2872    }
2873
2874    /// Format patch as containing no changes if it's empty; otherwise return None.
2875    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2876        // Count lines in the patch
2877        let empty_patch = patch.lines().count() <= 3;
2878        if empty_patch {
2879            Some(format!("{NO_EDITS}{END_MARKER}"))
2880        } else {
2881            None
2882        }
2883    }
2884}
2885
2886pub mod v0304_variable_edit {
2887    //! A prompt format with no fixed editable region. The entire context is shown
2888    //! to the model, and it chooses which text to replace by outputting surrounding
2889    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2890    //! text.
2891    //!
2892    //! Example prompt:
2893    //!
2894    //! <|file_sep|>path/to/file.py
2895    //! zero
2896    //! one
2897    //! two
2898    //! three<|user_cursor|>
2899    //! four
2900    //! five
2901    //! <|fim_prefix|>
2902    //
2903    //! Expected output (model generates):
2904    //!
2905    //! two
2906    //! <|fim_middle|>
2907    //! THREE
2908    //! <|fim_suffix|>
2909    //! four
2910    //!
2911    //! The output means: find "two\n...\nfour" in the context, and replace
2912    //! everything between "two\n" and "four" with "THREE\n".
2913
2914    use super::*;
2915
2916    pub fn special_tokens() -> &'static [&'static str] {
2917        &[
2918            "<|fim_prefix|>",
2919            "<|fim_suffix|>",
2920            "<|fim_middle|>",
2921            "<|file_sep|>",
2922            CURSOR_MARKER,
2923        ]
2924    }
2925
2926    pub fn write_cursor_excerpt_section(
2927        prompt: &mut String,
2928        path: &Path,
2929        context: &str,
2930        cursor_offset: usize,
2931    ) {
2932        let path_str = path.to_string_lossy();
2933        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2934
2935        prompt.push_str(&context[..cursor_offset]);
2936        prompt.push_str(CURSOR_MARKER);
2937        prompt.push_str(&context[cursor_offset..]);
2938        if !prompt.ends_with('\n') {
2939            prompt.push('\n');
2940        }
2941        prompt.push_str("<|fim_prefix|>\n")
2942    }
2943
2944    /// Apply a variable-edit model output to the original context text.
2945    ///
2946    /// The model output has the form:
2947    ///
2948    /// - prefix context lines
2949    /// - `<|fim_middle|>`
2950    /// - new text
2951    /// - `<|fim_suffix|>`
2952    /// - suffix context lines
2953    ///
2954    /// We locate the prefix/suffix context lines in the original text and replace
2955    /// everything between them with the new text.
2956    pub fn apply_variable_edit(
2957        context: &str,
2958        model_output: &str,
2959    ) -> Result<(Range<usize>, String)> {
2960        let (prefix_context, rest) = model_output
2961            .split_once("<|fim_middle|>\n")
2962            .or_else(|| model_output.split_once("<|fim_middle|>"))
2963            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2964
2965        let (new_text, suffix_context) = rest
2966            .split_once("<|fim_suffix|>\n")
2967            .or_else(|| rest.split_once("<|fim_suffix|>"))
2968            .unwrap_or((rest, ""));
2969
2970        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2971            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2972        } else {
2973            suffix_context
2974        };
2975
2976        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2977            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2978            + prefix_context.len();
2979        let suffix_offset = if suffix_context.is_empty() {
2980            context.len()
2981        } else {
2982            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2983                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2984                + prefix_offset
2985        };
2986
2987        let edit_range = prefix_offset..suffix_offset;
2988        return Ok((edit_range, new_text.to_string()));
2989    }
2990
2991    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2992        if needle.is_empty() {
2993            return Some(0);
2994        }
2995
2996        haystack.match_indices(needle).find_map(|(offset, _)| {
2997            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2998            matched_line_start.then_some(offset)
2999        })
3000    }
3001
3002    /// Convert a unified diff patch into the variable-edit output format.
3003    ///
3004    /// Parses `patch` as a unified diff against `old_text` and produces model
3005    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3006    /// delimiters. The diff is resolved by content matching rather than line
3007    /// numbers.
3008    pub fn patch_to_variable_edit_output(
3009        old_text: &str,
3010        patch: &str,
3011        cursor_offset: Option<usize>,
3012    ) -> Result<String> {
3013        // Parse the unified diff into hunks. Each hunk has an `old_context`
3014        // string (context + deleted lines interleaved in order) and a list of
3015        // edits expressed as byte ranges within that context plus replacement
3016        // text.
3017        let hunks = parse_hunks(patch);
3018        if hunks.is_empty() {
3019            return Ok(String::new());
3020        }
3021
3022        // Apply each hunk by finding its old_context in the text and
3023        // performing the edits. We search forward from where the previous
3024        // hunk ended so that hunks are applied in order.
3025        let mut new_text = old_text.to_string();
3026        let mut search_from: usize = 0;
3027        let mut first_hunk_pos: Option<usize> = None;
3028
3029        for hunk in &hunks {
3030            let context_pos = new_text[search_from..]
3031                .find(&hunk.old_context)
3032                .map(|pos| pos + search_from)
3033                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3034
3035            if first_hunk_pos.is_none() {
3036                first_hunk_pos = Some(context_pos);
3037            }
3038
3039            // Apply edits in reverse order so byte offsets remain valid.
3040            for edit in hunk.edits.iter().rev() {
3041                let abs_start = context_pos + edit.range.start;
3042                let abs_end = context_pos + edit.range.end;
3043                new_text.replace_range(abs_start..abs_end, &edit.text);
3044            }
3045
3046            // Advance past this hunk's region in the (now modified) text.
3047            let new_region_len: usize =
3048                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3049                    len + edit.text.len() - (edit.range.end - edit.range.start)
3050                });
3051            search_from = context_pos + new_region_len;
3052        }
3053
3054        // Now we have old_text and new_text. Find the changed line range by
3055        // comparing them.
3056        let old_lines: Vec<&str> = old_text.lines().collect();
3057        let new_lines: Vec<&str> = new_text.lines().collect();
3058
3059        // Find first differing line.
3060        let first_changed_row = old_lines
3061            .iter()
3062            .zip(new_lines.iter())
3063            .position(|(a, b)| a != b)
3064            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3065
3066        // Find last differing line (from the end).
3067        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3068        let common_suffix = old_lines
3069            .iter()
3070            .rev()
3071            .zip(new_lines.iter().rev())
3072            .take(max_suffix)
3073            .take_while(|(a, b)| a == b)
3074            .count();
3075
3076        let old_end = old_lines.len() - common_suffix;
3077        let new_end = new_lines.len() - common_suffix;
3078
3079        if first_changed_row == old_end && first_changed_row == new_end {
3080            return Ok(String::new());
3081        }
3082
3083        // Build the replacement text from new_lines[first_diff..new_end].
3084        let mut merged_new_text = String::new();
3085        for line in &new_lines[first_changed_row..new_end] {
3086            merged_new_text.push_str(line);
3087            merged_new_text.push('\n');
3088        }
3089
3090        // cursor_offset is relative to the first hunk's new content in
3091        // new_text. Translate it to an offset within merged_new_text, which
3092        // only contains lines first_diff..new_end of new_text.
3093        if let Some(hunk_offset) = cursor_offset {
3094            let hunk_start = first_hunk_pos.unwrap_or(0);
3095            let absolute_pos = hunk_start + hunk_offset;
3096
3097            // Byte offset where first_diff starts in new_text.
3098            let merged_start: usize = new_lines[..first_changed_row]
3099                .iter()
3100                .map(|line| line.len() + 1)
3101                .sum();
3102
3103            if absolute_pos >= merged_start {
3104                let relative_offset = absolute_pos - merged_start;
3105                if relative_offset <= merged_new_text.len() {
3106                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3107                }
3108            }
3109        }
3110
3111        // Build output with 2 lines of context above and below.
3112        let context_lines_count = 2;
3113        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3114        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3115
3116        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3117            let pattern = &lines[line_range];
3118            let pattern_len = pattern.len();
3119
3120            let mut count = 0;
3121            for offset in 0..=lines.len() - pattern_len {
3122                if &lines[offset..offset + pattern_len] == pattern {
3123                    count += 1;
3124                }
3125            }
3126            count
3127        }
3128
3129        // Expand prefix and suffix until they are unique
3130        while prefix_start > 0 {
3131            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3132                prefix_start -= 1;
3133            } else {
3134                break;
3135            }
3136        }
3137        while suffix_end < old_lines.len() {
3138            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3139                suffix_end += 1;
3140            } else {
3141                break;
3142            }
3143        }
3144
3145        let mut output = String::new();
3146        for line in &old_lines[prefix_start..first_changed_row] {
3147            output.push_str(line);
3148            output.push('\n');
3149        }
3150        output.push_str("<|fim_middle|>\n");
3151        output.push_str(&merged_new_text);
3152        output.push_str("<|fim_suffix|>\n");
3153        for line in &old_lines[old_end..suffix_end] {
3154            output.push_str(line);
3155            output.push('\n');
3156        }
3157
3158        Ok(output)
3159    }
3160
3161    struct ParsedHunk {
3162        old_context: String,
3163        edits: Vec<ParsedEdit>,
3164    }
3165
3166    struct ParsedEdit {
3167        range: Range<usize>,
3168        text: String,
3169    }
3170
3171    /// Parse a unified diff into content-based hunks. Each hunk contains an
3172    /// `old_context` string (context lines + deleted lines, which together
3173    /// form the text that should be found in the original) and a list of edits
3174    /// expressed as byte ranges within that context.
3175    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3176        let mut hunks = Vec::new();
3177        let mut current: Option<ParsedHunk> = None;
3178
3179        for line in patch.lines() {
3180            if line.starts_with("@@") {
3181                if let Some(hunk) = current.take() {
3182                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3183                        hunks.push(hunk);
3184                    }
3185                }
3186                current = Some(ParsedHunk {
3187                    old_context: String::new(),
3188                    edits: Vec::new(),
3189                });
3190            } else if line.starts_with("---") || line.starts_with("+++") {
3191                continue;
3192            } else if let Some(hunk) = &mut current {
3193                if let Some(added) = line.strip_prefix('+') {
3194                    let pos = hunk.old_context.len();
3195                    if let Some(last_edit) = hunk.edits.last_mut() {
3196                        if last_edit.range.end == pos {
3197                            writeln!(&mut last_edit.text, "{added}").ok();
3198                            continue;
3199                        }
3200                    }
3201                    hunk.edits.push(ParsedEdit {
3202                        range: pos..pos,
3203                        text: format!("{added}\n"),
3204                    });
3205                } else if let Some(removed) = line.strip_prefix('-') {
3206                    let start = hunk.old_context.len();
3207                    writeln!(&mut hunk.old_context, "{removed}").ok();
3208                    let end = hunk.old_context.len();
3209                    if let Some(last_edit) = hunk.edits.last_mut() {
3210                        if last_edit.range.end == start {
3211                            last_edit.range.end = end;
3212                            continue;
3213                        }
3214                    }
3215                    hunk.edits.push(ParsedEdit {
3216                        range: start..end,
3217                        text: String::new(),
3218                    });
3219                } else {
3220                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3221                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3222                }
3223            }
3224        }
3225
3226        if let Some(hunk) = current {
3227            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3228                hunks.push(hunk);
3229            }
3230        }
3231
3232        hunks
3233    }
3234
3235    #[cfg(test)]
3236    mod tests {
3237        use super::*;
3238        use indoc::indoc;
3239
3240        #[test]
3241        fn test_apply_variable_edit() {
3242            struct Case {
3243                name: &'static str,
3244                original: &'static str,
3245                model_output: &'static str,
3246                expected: &'static str,
3247            }
3248
3249            let cases = [
3250                Case {
3251                    name: "simple_single_line_replacement",
3252                    original: indoc! {"
3253                        zero
3254                        one
3255                        two
3256                        three
3257                        four
3258                        five
3259                    "},
3260                    model_output: indoc! {"
3261                        two
3262                        <|fim_middle|>
3263                        THREE
3264                        <|fim_suffix|>
3265                        four
3266                    "},
3267                    expected: indoc! {"
3268                        zero
3269                        one
3270                        two
3271                        THREE
3272                        four
3273                        five
3274                    "},
3275                },
3276                Case {
3277                    name: "multi_line_replacement",
3278                    original: indoc! {"
3279                        a
3280                        b
3281                        c
3282                        d
3283                        e
3284                    "},
3285                    model_output: indoc! {"
3286                        a
3287                        <|fim_middle|>
3288                        B
3289                        C
3290                        D
3291                        <|fim_suffix|>
3292                        e
3293                    "},
3294                    expected: indoc! {"
3295                        a
3296                        B
3297                        C
3298                        D
3299                        e
3300                    "},
3301                },
3302                Case {
3303                    name: "insertion_between_existing_lines",
3304                    original: indoc! {"
3305                        a
3306                        b
3307                        c
3308                    "},
3309                    model_output: indoc! {"
3310                        a
3311                        <|fim_middle|>
3312                        X
3313                        <|fim_suffix|>
3314                        b
3315                    "},
3316                    expected: indoc! {"
3317                        a
3318                        X
3319                        b
3320                        c
3321                    "},
3322                },
3323                Case {
3324                    name: "deletion",
3325                    original: indoc! {"
3326                        a
3327                        b
3328                        c
3329                        d
3330                    "},
3331                    model_output: indoc! {"
3332                        a
3333                        <|fim_middle|>
3334                        <|fim_suffix|>
3335                        c
3336                    "},
3337                    expected: indoc! {"
3338                        a
3339                        c
3340                        d
3341                    "},
3342                },
3343                Case {
3344                    name: "replacement_at_start_no_prefix_context",
3345                    original: indoc! {"
3346                        a
3347                        b
3348                        c
3349                    "},
3350                    model_output: indoc! {"
3351                        <|fim_middle|>
3352                        X
3353                        <|fim_suffix|>
3354                        b
3355                    "},
3356                    expected: indoc! {"
3357                        X
3358                        b
3359                        c
3360                    "},
3361                },
3362                Case {
3363                    name: "replacement_at_end_no_suffix_context",
3364                    original: indoc! {"
3365                        a
3366                        b
3367                        c
3368                    "},
3369                    model_output: indoc! {"
3370                        b
3371                        <|fim_middle|>
3372                        Z
3373                        <|fim_suffix|>
3374                    "},
3375                    expected: indoc! {"
3376                        a
3377                        b
3378                        Z
3379                    "},
3380                },
3381                Case {
3382                    name: "context_with_trailing_newline_is_preserved",
3383                    original: indoc! {"
3384                        a
3385                        b
3386                        c
3387                    "},
3388                    model_output: indoc! {"
3389                        a
3390                        <|fim_middle|>
3391                        B
3392                        <|fim_suffix|>
3393                        c
3394                    "},
3395                    expected: indoc! {"
3396                        a
3397                        B
3398                        c
3399                    "},
3400                },
3401                Case {
3402                    name: "cursor_marker_passes_through_untouched",
3403                    original: indoc! {"
3404                        a
3405                        b
3406                        c
3407                    "},
3408                    model_output: indoc! {"
3409                        a
3410                        <|fim_middle|>
3411                        B<|user_cursor|>B
3412                        <|fim_suffix|>
3413                        c
3414                    "},
3415                    expected: indoc! {"
3416                        a
3417                        B<|user_cursor|>B
3418                        c
3419                    "},
3420                },
3421                Case {
3422                    name: "multiple_prefix_context_lines",
3423                    original: indoc! {"
3424                        a
3425                        b
3426                        c
3427                        d
3428                        e
3429                    "},
3430                    model_output: indoc! {"
3431                        b
3432                        c
3433                        <|fim_middle|>
3434                        D
3435                        <|fim_suffix|>
3436                        e
3437                    "},
3438                    expected: indoc! {"
3439                        a
3440                        b
3441                        c
3442                        D
3443                        e
3444                    "},
3445                },
3446            ];
3447
3448            for case in cases {
3449                let (edit_range, replacement) =
3450                    apply_variable_edit(case.original, case.model_output).unwrap();
3451                let mut edited = case.original.to_string();
3452                edited.replace_range(edit_range, &replacement);
3453                assert_eq!(edited, case.expected, "{}", case.name);
3454            }
3455        }
3456
3457        #[test]
3458        fn test_patch_to_variable_edit() {
3459            struct Case {
3460                name: &'static str,
3461                old: &'static str,
3462                patch: &'static str,
3463                cursor_offset: Option<usize>,
3464                expected_variable_edit: &'static str,
3465                expected_after_apply: &'static str,
3466            }
3467
3468            let cases = [
3469                Case {
3470                    name: "simple_replacement",
3471                    old: indoc! {"
3472                        zero
3473                        one
3474                        two
3475                        three
3476                        four
3477                        five
3478                    "},
3479                    patch: indoc! {"
3480                        @@ -3,3 +3,3 @@
3481                         two
3482                        -three
3483                        +THREE
3484                         four
3485                    "},
3486                    cursor_offset: None,
3487                    expected_variable_edit: indoc! {"
3488                        one
3489                        two
3490                        <|fim_middle|>
3491                        THREE
3492                        <|fim_suffix|>
3493                        four
3494                        five
3495                    "},
3496                    expected_after_apply: indoc! {"
3497                        zero
3498                        one
3499                        two
3500                        THREE
3501                        four
3502                        five
3503                    "},
3504                },
3505                Case {
3506                    name: "insertion",
3507                    old: indoc! {"
3508                        a
3509                        b
3510                        c
3511                        d
3512                        e
3513                    "},
3514                    patch: indoc! {"
3515                        @@ -2,0 +3,1 @@
3516                         b
3517                        +X
3518                         c
3519                    "},
3520                    cursor_offset: None,
3521                    expected_variable_edit: indoc! {"
3522                        a
3523                        b
3524                        <|fim_middle|>
3525                        X
3526                        <|fim_suffix|>
3527                        c
3528                        d
3529                    "},
3530                    expected_after_apply: indoc! {"
3531                        a
3532                        b
3533                        X
3534                        c
3535                        d
3536                        e
3537                    "},
3538                },
3539                Case {
3540                    name: "deletion",
3541                    old: indoc! {"
3542                        a
3543                        b
3544                        c
3545                        d
3546                        e
3547                    "},
3548                    patch: indoc! {"
3549                        @@ -2,3 +2,2 @@
3550                         b
3551                        -c
3552                         d
3553                    "},
3554                    cursor_offset: None,
3555                    expected_variable_edit: indoc! {"
3556                        a
3557                        b
3558                        <|fim_middle|>
3559                        <|fim_suffix|>
3560                        d
3561                        e
3562                    "},
3563                    expected_after_apply: indoc! {"
3564                        a
3565                        b
3566                        d
3567                        e
3568                    "},
3569                },
3570                Case {
3571                    name: "edit_near_start",
3572                    old: indoc! {"
3573                        first
3574                        second
3575                        third
3576                        fourth
3577                    "},
3578                    patch: indoc! {"
3579                        @@ -1,1 +1,1 @@
3580                        -first
3581                        +FIRST
3582                    "},
3583                    cursor_offset: None,
3584                    expected_variable_edit: indoc! {"
3585                        <|fim_middle|>
3586                        FIRST
3587                        <|fim_suffix|>
3588                        second
3589                        third
3590                    "},
3591                    expected_after_apply: indoc! {"
3592                        FIRST
3593                        second
3594                        third
3595                        fourth
3596                    "},
3597                },
3598                Case {
3599                    name: "edit_near_end",
3600                    old: indoc! {"
3601                        first
3602                        second
3603                        third
3604                        fourth
3605                    "},
3606                    patch: indoc! {"
3607                        @@ -4,1 +4,1 @@
3608                        -fourth
3609                        +FOURTH
3610                    "},
3611                    cursor_offset: None,
3612                    expected_variable_edit: indoc! {"
3613                        second
3614                        third
3615                        <|fim_middle|>
3616                        FOURTH
3617                        <|fim_suffix|>
3618                    "},
3619                    expected_after_apply: indoc! {"
3620                        first
3621                        second
3622                        third
3623                        FOURTH
3624                    "},
3625                },
3626                Case {
3627                    name: "cursor_at_start_of_replacement",
3628                    old: indoc! {"
3629                        zero
3630                        one
3631                        two
3632                        three
3633                        four
3634                        five
3635                    "},
3636                    patch: indoc! {"
3637                        @@ -3,3 +3,3 @@
3638                         two
3639                        -three
3640                        +THREE
3641                         four
3642                    "},
3643                    cursor_offset: Some(4),
3644                    expected_variable_edit: indoc! {"
3645                        one
3646                        two
3647                        <|fim_middle|>
3648                        <|user_cursor|>THREE
3649                        <|fim_suffix|>
3650                        four
3651                        five
3652                    "},
3653                    expected_after_apply: indoc! {"
3654                        zero
3655                        one
3656                        two
3657                        <|user_cursor|>THREE
3658                        four
3659                        five
3660                    "},
3661                },
3662                Case {
3663                    name: "cursor_in_middle_of_replacement",
3664                    old: indoc! {"
3665                        zero
3666                        one
3667                        two
3668                        three
3669                        four
3670                        five
3671                    "},
3672                    patch: indoc! {"
3673                        @@ -3,3 +3,3 @@
3674                         two
3675                        -three
3676                        +THREE
3677                         four
3678                    "},
3679                    cursor_offset: Some(6),
3680                    expected_variable_edit: indoc! {"
3681                        one
3682                        two
3683                        <|fim_middle|>
3684                        TH<|user_cursor|>REE
3685                        <|fim_suffix|>
3686                        four
3687                        five
3688                    "},
3689                    expected_after_apply: indoc! {"
3690                        zero
3691                        one
3692                        two
3693                        TH<|user_cursor|>REE
3694                        four
3695                        five
3696                    "},
3697                },
3698                Case {
3699                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3700                    old: indoc! {"
3701                        one
3702                        a
3703                        b
3704                        c
3705                        d
3706                        two
3707                        a
3708                        b
3709                        c
3710                        d
3711                        three
3712                        a
3713                        b
3714                        c
3715                        d
3716                        four
3717                    "},
3718                    patch: indoc! {"
3719                        @@ -4,5 +4,5 @@
3720                         two
3721                         a
3722                         b
3723                        -c
3724                        +C
3725                         d
3726                         three
3727                    "},
3728                    cursor_offset: None,
3729                    expected_variable_edit: indoc! {"
3730                        two
3731                        a
3732                        b
3733                        <|fim_middle|>
3734                        C
3735                        <|fim_suffix|>
3736                        d
3737                        three
3738                    "},
3739                    expected_after_apply: indoc! {"
3740                        one
3741                        a
3742                        b
3743                        c
3744                        d
3745                        two
3746                        a
3747                        b
3748                        C
3749                        d
3750                        three
3751                        a
3752                        b
3753                        c
3754                        d
3755                        four
3756                    "},
3757                },
3758                Case {
3759                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3760                    old: indoc! {"
3761                        {
3762                            {
3763                                one();
3764                            }
3765                        }
3766                        {
3767                            {
3768                                two();
3769                            }
3770                        }
3771                        {
3772                            {
3773                                three();
3774                            }
3775                        }
3776                        {
3777                            {
3778                                four();
3779                            }
3780                        }
3781                    "},
3782                    patch: indoc! {"
3783                        @@ -4,5 +4,5 @@
3784                             {
3785                        -        two();
3786                        +        TWO();
3787                             }
3788                    "},
3789                    cursor_offset: None,
3790                    expected_variable_edit: indoc! {"
3791                                one();
3792                            }
3793                        }
3794                        {
3795                            {
3796                        <|fim_middle|>
3797                                TWO();
3798                        <|fim_suffix|>
3799                            }
3800                        }
3801                        {
3802                            {
3803                                three();
3804                    "},
3805                    expected_after_apply: indoc! {"
3806                        {
3807                            {
3808                                one();
3809                            }
3810                        }
3811                        {
3812                            {
3813                                TWO();
3814                            }
3815                        }
3816                        {
3817                            {
3818                                three();
3819                            }
3820                        }
3821                        {
3822                            {
3823                                four();
3824                            }
3825                        }
3826                    "},
3827                },
3828            ];
3829
3830            for case in cases {
3831                let output =
3832                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3833                        .unwrap_or_else(|error| {
3834                            panic!("failed converting patch for {}: {error}", case.name)
3835                        });
3836                assert_eq!(
3837                    output, case.expected_variable_edit,
3838                    "patch->variable_edit mismatch for {}",
3839                    case.name
3840                );
3841
3842                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3843                    .unwrap_or_else(|error| {
3844                        panic!("failed applying variable_edit for {}: {error}", case.name)
3845                    });
3846                let mut edited_by_variable_edit = case.old.to_string();
3847                edited_by_variable_edit.replace_range(edit_range, &replacement);
3848                assert_eq!(
3849                    edited_by_variable_edit, case.expected_after_apply,
3850                    "variable_edit apply mismatch for {}",
3851                    case.name
3852                );
3853
3854                let (expected_edit_range, expected_replacement) =
3855                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3856                        |error| {
3857                            panic!(
3858                                "failed applying expected variable_edit for {}: {error}",
3859                                case.name
3860                            )
3861                        },
3862                    );
3863                let mut edited_by_expected_variable_edit = case.old.to_string();
3864                edited_by_expected_variable_edit
3865                    .replace_range(expected_edit_range, &expected_replacement);
3866                assert_eq!(
3867                    edited_by_expected_variable_edit, case.expected_after_apply,
3868                    "expected variable_edit apply mismatch for {}",
3869                    case.name
3870                );
3871            }
3872        }
3873
3874        #[test]
3875        fn test_write_cursor_excerpt_section() {
3876            let path = Path::new("test.rs");
3877            let context = "fn main() {\n    hello();\n}\n";
3878            let cursor_offset = 17;
3879            let mut prompt = String::new();
3880            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3881            assert_eq!(
3882                prompt,
3883                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3884            );
3885        }
3886    }
3887}
3888
3889/// The zeta1 prompt format
3890pub mod zeta1 {
3891    use super::*;
3892    use std::fmt::Write;
3893
3894    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3895    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3896    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3897    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3898
3899    const INSTRUCTION_HEADER: &str = concat!(
3900        "### Instruction:\n",
3901        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3902        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3903        "into account the cursor location.\n\n",
3904        "### User Edits:\n\n"
3905    );
3906    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3907    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3908
3909    /// Formats a complete zeta1 prompt from the input events and excerpt.
3910    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3911        let mut prompt = String::with_capacity(
3912            INSTRUCTION_HEADER.len()
3913                + input_events.len()
3914                + EXCERPT_HEADER.len()
3915                + input_excerpt.len()
3916                + RESPONSE_HEADER.len(),
3917        );
3918        prompt.push_str(INSTRUCTION_HEADER);
3919        prompt.push_str(input_events);
3920        prompt.push_str(EXCERPT_HEADER);
3921        prompt.push_str(input_excerpt);
3922        prompt.push_str(RESPONSE_HEADER);
3923        prompt
3924    }
3925
3926    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3927    /// editable and context byte-offset ranges within `cursor_excerpt`.
3928    pub fn format_zeta1_from_input(
3929        input: &ZetaPromptInput,
3930        editable_range: Range<usize>,
3931        context_range: Range<usize>,
3932    ) -> String {
3933        let events = format_zeta1_events(&input.events);
3934        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3935        format_zeta1_prompt(&events, &excerpt)
3936    }
3937
3938    /// Formats events in zeta1 style (oldest first).
3939    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3940        let mut result = String::new();
3941        for event in
3942            events
3943                .iter()
3944                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3945                    &ZetaFormat::V0114180EditableRegion,
3946                )))
3947        {
3948            let event_string = format_zeta1_event(event);
3949            if event_string.is_empty() {
3950                continue;
3951            }
3952            if !result.is_empty() {
3953                result.push_str("\n\n");
3954            }
3955            result.push_str(&event_string);
3956        }
3957        result
3958    }
3959
3960    fn format_zeta1_event(event: &Event) -> String {
3961        match event {
3962            Event::BufferChange {
3963                path,
3964                old_path,
3965                diff,
3966                ..
3967            } => {
3968                let mut prompt = String::new();
3969                if old_path != path {
3970                    writeln!(
3971                        prompt,
3972                        "User renamed {} to {}\n",
3973                        old_path.display(),
3974                        path.display()
3975                    )
3976                    .ok();
3977                }
3978                if !diff.is_empty() {
3979                    write!(
3980                        prompt,
3981                        "User edited {}:\n```diff\n{}\n```",
3982                        path.display(),
3983                        diff
3984                    )
3985                    .ok();
3986                }
3987                prompt
3988            }
3989        }
3990    }
3991
3992    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3993    /// within `cursor_excerpt`.
3994    fn format_zeta1_excerpt(
3995        input: &ZetaPromptInput,
3996        editable_range: Range<usize>,
3997        context_range: Range<usize>,
3998    ) -> String {
3999        let path_str = input.cursor_path.to_string_lossy();
4000        let excerpt = &*input.cursor_excerpt;
4001        let cursor_offset = input.cursor_offset_in_excerpt;
4002
4003        let mut prompt = String::new();
4004        writeln!(&mut prompt, "```{path_str}").ok();
4005
4006        let starts_at_file_beginning =
4007            input.excerpt_start_row == Some(0) && context_range.start == 0;
4008        if starts_at_file_beginning {
4009            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4010        }
4011
4012        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4013
4014        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4015        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4016        prompt.push_str(CURSOR_MARKER);
4017        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4018        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4019
4020        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4021        write!(prompt, "\n```").ok();
4022
4023        prompt
4024    }
4025
4026    /// Cleans zeta1 model output by extracting content between editable region
4027    /// markers and converting the zeta1 cursor marker to the universal one.
4028    /// Returns `None` if the output doesn't contain the expected markers.
4029    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4030        let content = output.replace(CURSOR_MARKER, "");
4031
4032        let content_start = content
4033            .find(EDITABLE_REGION_START_MARKER)
4034            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4035            .map(|pos| {
4036                if content.as_bytes().get(pos) == Some(&b'\n') {
4037                    pos + 1
4038                } else {
4039                    pos
4040                }
4041            })
4042            .unwrap_or(0);
4043
4044        let content_end = content
4045            .find(EDITABLE_REGION_END_MARKER)
4046            .map(|pos| {
4047                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4048                    pos - 1
4049                } else {
4050                    pos
4051                }
4052            })
4053            .unwrap_or(content.len());
4054
4055        if content_start > content_end {
4056            return Some(String::new());
4057        }
4058
4059        let extracted = &content[content_start..content_end];
4060
4061        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4062            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4063            let text_before_cursor = text_before_cursor
4064                .find(EDITABLE_REGION_START_MARKER)
4065                .map(|pos| {
4066                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4067                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4068                        after_marker + 1
4069                    } else {
4070                        after_marker
4071                    }
4072                })
4073                .unwrap_or(0);
4074            let offset_in_extracted = zeta1_cursor_pos
4075                .saturating_sub(text_before_cursor)
4076                .min(extracted.len());
4077            offset_in_extracted
4078        });
4079
4080        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4081        if let Some(offset) = cursor_offset {
4082            result.push_str(&extracted[..offset]);
4083            result.push_str(super::CURSOR_MARKER);
4084            result.push_str(&extracted[offset..]);
4085        } else {
4086            result.push_str(extracted);
4087        }
4088
4089        Some(result)
4090    }
4091}
4092
4093#[cfg(test)]
4094mod tests {
4095    use super::*;
4096    use indoc::indoc;
4097
4098    fn make_input(
4099        cursor_excerpt: &str,
4100        editable_range: Range<usize>,
4101        cursor_offset: usize,
4102        events: Vec<Event>,
4103        related_files: Vec<RelatedFile>,
4104    ) -> ZetaPromptInput {
4105        let context_range = 0..cursor_excerpt.len();
4106        ZetaPromptInput {
4107            cursor_path: Path::new("test.rs").into(),
4108            cursor_excerpt: cursor_excerpt.into(),
4109            cursor_offset_in_excerpt: cursor_offset,
4110            excerpt_start_row: None,
4111            events: events.into_iter().map(Arc::new).collect(),
4112            related_files: Some(related_files),
4113            active_buffer_diagnostics: vec![],
4114            excerpt_ranges: ExcerptRanges {
4115                editable_150: editable_range.clone(),
4116                editable_180: editable_range.clone(),
4117                editable_350: editable_range,
4118                editable_150_context_350: context_range.clone(),
4119                editable_180_context_350: context_range.clone(),
4120                editable_350_context_150: context_range,
4121                ..Default::default()
4122            },
4123            syntax_ranges: None,
4124            experiment: None,
4125            in_open_source_repo: false,
4126            can_collect_data: false,
4127            repo_url: None,
4128        }
4129    }
4130
4131    fn make_input_with_context_range(
4132        excerpt: &str,
4133        editable_range: Range<usize>,
4134        context_range: Range<usize>,
4135        cursor_offset: usize,
4136    ) -> ZetaPromptInput {
4137        ZetaPromptInput {
4138            cursor_path: Path::new("test.rs").into(),
4139            cursor_excerpt: excerpt.into(),
4140            cursor_offset_in_excerpt: cursor_offset,
4141            excerpt_start_row: None,
4142            events: vec![],
4143            related_files: Some(vec![]),
4144            active_buffer_diagnostics: vec![],
4145            excerpt_ranges: ExcerptRanges {
4146                editable_150: editable_range.clone(),
4147                editable_180: editable_range.clone(),
4148                editable_350: editable_range,
4149                editable_150_context_350: context_range.clone(),
4150                editable_180_context_350: context_range.clone(),
4151                editable_350_context_150: context_range,
4152                ..Default::default()
4153            },
4154            syntax_ranges: None,
4155            experiment: None,
4156            in_open_source_repo: false,
4157            can_collect_data: false,
4158            repo_url: None,
4159        }
4160    }
4161
4162    fn make_event(path: &str, diff: &str) -> Event {
4163        Event::BufferChange {
4164            path: Path::new(path).into(),
4165            old_path: Path::new(path).into(),
4166            diff: diff.to_string(),
4167            predicted: false,
4168            in_open_source_repo: false,
4169        }
4170    }
4171
4172    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4173        RelatedFile {
4174            path: Path::new(path).into(),
4175            max_row: content.lines().count() as u32,
4176            excerpts: vec![RelatedExcerpt {
4177                row_range: 0..content.lines().count() as u32,
4178                text: content.into(),
4179                order: 0,
4180            }],
4181            in_open_source_repo: false,
4182        }
4183    }
4184
4185    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4186        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4187    }
4188
4189    #[test]
4190    fn test_no_truncation_when_within_budget() {
4191        let input = make_input(
4192            "prefix\neditable\nsuffix",
4193            7..15,
4194            10,
4195            vec![make_event("a.rs", "-old\n+new\n")],
4196            vec![make_related_file("related.rs", "fn helper() {}\n")],
4197        );
4198
4199        assert_eq!(
4200            format_with_budget(&input, 10000).unwrap(),
4201            indoc! {r#"
4202                <|file_sep|>related.rs
4203                fn helper() {}
4204                <|file_sep|>edit history
4205                --- a/a.rs
4206                +++ b/a.rs
4207                -old
4208                +new
4209                <|file_sep|>test.rs
4210                <|fim_prefix|>
4211                prefix
4212                <|fim_middle|>current
4213                edi<|user_cursor|>table
4214                <|fim_suffix|>
4215
4216                suffix
4217                <|fim_middle|>updated
4218            "#}
4219            .to_string()
4220        );
4221    }
4222
4223    #[test]
4224    fn test_truncation_drops_edit_history_when_budget_tight() {
4225        let input = make_input(
4226            "code",
4227            0..4,
4228            2,
4229            vec![make_event("a.rs", "-x\n+y\n")],
4230            vec![
4231                make_related_file("r1.rs", "aaaaaaa\n"),
4232                make_related_file("r2.rs", "bbbbbbb\n"),
4233            ],
4234        );
4235
4236        assert_eq!(
4237            format_with_budget(&input, 10000).unwrap(),
4238            indoc! {r#"
4239                <|file_sep|>r1.rs
4240                aaaaaaa
4241                <|file_sep|>r2.rs
4242                bbbbbbb
4243                <|file_sep|>edit history
4244                --- a/a.rs
4245                +++ b/a.rs
4246                -x
4247                +y
4248                <|file_sep|>test.rs
4249                <|fim_prefix|>
4250                <|fim_middle|>current
4251                co<|user_cursor|>de
4252                <|fim_suffix|>
4253                <|fim_middle|>updated
4254            "#}
4255            .to_string()
4256        );
4257
4258        assert_eq!(
4259            format_with_budget(&input, 57),
4260            Some(
4261                indoc! {r#"
4262                <|file_sep|>edit history
4263                --- a/a.rs
4264                +++ b/a.rs
4265                -x
4266                +y
4267                <|file_sep|>test.rs
4268                <|fim_prefix|>
4269                <|fim_middle|>current
4270                co<|user_cursor|>de
4271                <|fim_suffix|>
4272                <|fim_middle|>updated
4273            "#}
4274                .to_string()
4275            )
4276        );
4277    }
4278
4279    #[test]
4280    fn test_truncation_includes_partial_excerpts() {
4281        let input = make_input(
4282            "x",
4283            0..1,
4284            0,
4285            vec![],
4286            vec![RelatedFile {
4287                path: Path::new("big.rs").into(),
4288                max_row: 30,
4289                in_open_source_repo: false,
4290                excerpts: vec![
4291                    RelatedExcerpt {
4292                        row_range: 0..10,
4293                        text: "first excerpt\n".into(),
4294                        order: 0,
4295                    },
4296                    RelatedExcerpt {
4297                        row_range: 10..20,
4298                        text: "second excerpt\n".into(),
4299                        order: 0,
4300                    },
4301                    RelatedExcerpt {
4302                        row_range: 20..30,
4303                        text: "third excerpt\n".into(),
4304                        order: 0,
4305                    },
4306                ],
4307            }],
4308        );
4309
4310        assert_eq!(
4311            format_with_budget(&input, 10000).unwrap(),
4312            indoc! {r#"
4313                <|file_sep|>big.rs
4314                first excerpt
4315                ...
4316                second excerpt
4317                ...
4318                third excerpt
4319                <|file_sep|>test.rs
4320                <|fim_prefix|>
4321                <|fim_middle|>current
4322                <|user_cursor|>x
4323                <|fim_suffix|>
4324                <|fim_middle|>updated
4325            "#}
4326            .to_string()
4327        );
4328
4329        assert_eq!(
4330            format_with_budget(&input, 50).unwrap(),
4331            indoc! {r#"
4332                <|file_sep|>big.rs
4333                first excerpt
4334                ...
4335                <|file_sep|>test.rs
4336                <|fim_prefix|>
4337                <|fim_middle|>current
4338                <|user_cursor|>x
4339                <|fim_suffix|>
4340                <|fim_middle|>updated
4341            "#}
4342            .to_string()
4343        );
4344    }
4345
4346    #[test]
4347    fn test_truncation_prioritizes_lower_order_excerpts() {
4348        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4349        // With tight budget, only the lower-order excerpt from file_b should be included.
4350        let input = make_input(
4351            "x",
4352            0..1,
4353            0,
4354            vec![],
4355            vec![
4356                RelatedFile {
4357                    path: Path::new("file_a.rs").into(),
4358                    max_row: 10,
4359                    in_open_source_repo: false,
4360                    excerpts: vec![RelatedExcerpt {
4361                        row_range: 0..10,
4362                        text: "low priority content\n".into(),
4363                        order: 5,
4364                    }],
4365                },
4366                RelatedFile {
4367                    path: Path::new("file_b.rs").into(),
4368                    max_row: 10,
4369                    in_open_source_repo: false,
4370                    excerpts: vec![RelatedExcerpt {
4371                        row_range: 0..10,
4372                        text: "high priority content\n".into(),
4373                        order: 1,
4374                    }],
4375                },
4376            ],
4377        );
4378
4379        // With large budget, both files included; rendered in stable lexicographic order.
4380        assert_eq!(
4381            format_with_budget(&input, 10000).unwrap(),
4382            indoc! {r#"
4383                <|file_sep|>file_a.rs
4384                low priority content
4385                <|file_sep|>file_b.rs
4386                high priority content
4387                <|file_sep|>test.rs
4388                <|fim_prefix|>
4389                <|fim_middle|>current
4390                <|user_cursor|>x
4391                <|fim_suffix|>
4392                <|fim_middle|>updated
4393            "#}
4394            .to_string()
4395        );
4396
4397        // With tight budget, only file_b (lower order) fits.
4398        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4399        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4400        // file_a would need another 14 tokens, which doesn't fit.
4401        assert_eq!(
4402            format_with_budget(&input, 52).unwrap(),
4403            indoc! {r#"
4404                <|file_sep|>file_b.rs
4405                high priority content
4406                <|file_sep|>test.rs
4407                <|fim_prefix|>
4408                <|fim_middle|>current
4409                <|user_cursor|>x
4410                <|fim_suffix|>
4411                <|fim_middle|>updated
4412            "#}
4413            .to_string()
4414        );
4415    }
4416
4417    #[test]
4418    fn test_truncation_drops_high_order_excerpts_within_file() {
4419        // A single file has excerpts at order 1 and order 3. With a tight budget,
4420        // only the order-1 excerpts are included while the order-3 excerpt is
4421        // dropped — even though they belong to the same file. This also preserves
4422        // the parent invariant: parent outline items have order ≤ their best
4423        // child, so they're always included when any child is.
4424        let input = make_input(
4425            "x",
4426            0..1,
4427            0,
4428            vec![],
4429            vec![RelatedFile {
4430                path: Path::new("mod.rs").into(),
4431                max_row: 30,
4432                in_open_source_repo: false,
4433                excerpts: vec![
4434                    RelatedExcerpt {
4435                        row_range: 0..5,
4436                        text: "mod header\n".into(),
4437                        order: 1,
4438                    },
4439                    RelatedExcerpt {
4440                        row_range: 5..15,
4441                        text: "important fn\n".into(),
4442                        order: 1,
4443                    },
4444                    RelatedExcerpt {
4445                        row_range: 15..30,
4446                        text: "less important fn\n".into(),
4447                        order: 3,
4448                    },
4449                ],
4450            }],
4451        );
4452
4453        // With large budget, all three excerpts included.
4454        assert_eq!(
4455            format_with_budget(&input, 10000).unwrap(),
4456            indoc! {r#"
4457                <|file_sep|>mod.rs
4458                mod header
4459                ...
4460                important fn
4461                ...
4462                less important fn
4463                <|file_sep|>test.rs
4464                <|fim_prefix|>
4465                <|fim_middle|>current
4466                <|user_cursor|>x
4467                <|fim_suffix|>
4468                <|fim_middle|>updated
4469            "#}
4470            .to_string()
4471        );
4472
4473        // With tight budget, only order<=1 excerpts included (header + important fn).
4474        assert_eq!(
4475            format_with_budget(&input, 55).unwrap(),
4476            indoc! {r#"
4477                <|file_sep|>mod.rs
4478                mod header
4479                ...
4480                important fn
4481                ...
4482                <|file_sep|>test.rs
4483                <|fim_prefix|>
4484                <|fim_middle|>current
4485                <|user_cursor|>x
4486                <|fim_suffix|>
4487                <|fim_middle|>updated
4488            "#}
4489            .to_string()
4490        );
4491    }
4492
4493    #[test]
4494    fn test_truncation_drops_older_events_first() {
4495        let input = make_input(
4496            "x",
4497            0..1,
4498            0,
4499            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4500            vec![],
4501        );
4502
4503        assert_eq!(
4504            format_with_budget(&input, 10000).unwrap(),
4505            indoc! {r#"
4506                <|file_sep|>edit history
4507                --- a/old.rs
4508                +++ b/old.rs
4509                -1
4510                --- a/new.rs
4511                +++ b/new.rs
4512                -2
4513                <|file_sep|>test.rs
4514                <|fim_prefix|>
4515                <|fim_middle|>current
4516                <|user_cursor|>x
4517                <|fim_suffix|>
4518                <|fim_middle|>updated
4519            "#}
4520            .to_string()
4521        );
4522
4523        assert_eq!(
4524            format_with_budget(&input, 60).unwrap(),
4525            indoc! {r#"
4526                <|file_sep|>edit history
4527                --- a/new.rs
4528                +++ b/new.rs
4529                -2
4530                <|file_sep|>test.rs
4531                <|fim_prefix|>
4532                <|fim_middle|>current
4533                <|user_cursor|>x
4534                <|fim_suffix|>
4535                <|fim_middle|>updated
4536            "#}
4537            .to_string()
4538        );
4539    }
4540
4541    #[test]
4542    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4543        let input = make_input(
4544            "fn main() {}",
4545            0..12,
4546            3,
4547            vec![make_event("a.rs", "-old\n+new\n")],
4548            vec![make_related_file("related.rs", "helper\n")],
4549        );
4550
4551        assert!(format_with_budget(&input, 30).is_none())
4552    }
4553
4554    #[track_caller]
4555    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4556        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4557            .expect("seed coder prompt formatting should succeed")
4558    }
4559
4560    #[track_caller]
4561    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4562        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4563            .expect("seed coder prompt formatting should succeed")
4564    }
4565
4566    #[test]
4567    fn test_seed_coder_basic_format() {
4568        let input = make_input(
4569            "prefix\neditable\nsuffix",
4570            7..15,
4571            10,
4572            vec![make_event("a.rs", "-old\n+new\n")],
4573            vec![make_related_file("related.rs", "fn helper() {}\n")],
4574        );
4575
4576        assert_eq!(
4577            format_seed_coder(&input),
4578            indoc! {r#"
4579                <[fim-suffix]>
4580                suffix
4581                <[fim-prefix]><filename>related.rs
4582                fn helper() {}
4583
4584                <filename>edit_history
4585                --- a/a.rs
4586                +++ b/a.rs
4587                -old
4588                +new
4589
4590                <filename>test.rs
4591                prefix
4592                <<<<<<< CURRENT
4593                edi<|user_cursor|>table
4594                =======
4595                <[fim-middle]>"#}
4596        );
4597    }
4598
4599    #[test]
4600    fn test_seed_coder_no_context() {
4601        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4602
4603        assert_eq!(
4604            format_seed_coder(&input),
4605            indoc! {r#"
4606                <[fim-suffix]>
4607                after
4608                <[fim-prefix]><filename>test.rs
4609                before
4610                <<<<<<< CURRENT
4611                mid<|user_cursor|>dle
4612                =======
4613                <[fim-middle]>"#}
4614        );
4615    }
4616
4617    #[test]
4618    fn test_seed_coder_truncation_drops_context() {
4619        let input = make_input(
4620            "code",
4621            0..4,
4622            2,
4623            vec![make_event("a.rs", "-x\n+y\n")],
4624            vec![make_related_file("r1.rs", "content\n")],
4625        );
4626
4627        // With large budget, everything is included
4628        assert_eq!(
4629            format_seed_coder(&input),
4630            indoc! {r#"
4631                <[fim-suffix]>
4632                <[fim-prefix]><filename>r1.rs
4633                content
4634
4635                <filename>edit_history
4636                --- a/a.rs
4637                +++ b/a.rs
4638                -x
4639                +y
4640
4641                <filename>test.rs
4642                <<<<<<< CURRENT
4643                co<|user_cursor|>de
4644                =======
4645                <[fim-middle]>"#}
4646        );
4647
4648        assert_eq!(
4649            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4650            None
4651        );
4652
4653        assert_eq!(
4654            format_seed_coder_with_budget(&input, 40),
4655            indoc! {r#"
4656                <[fim-suffix]>
4657                <[fim-prefix]><filename>test.rs
4658                <<<<<<< CURRENT
4659                co<|user_cursor|>de
4660                =======
4661                <[fim-middle]>"#
4662            }
4663        )
4664    }
4665
4666    #[test]
4667    fn test_seed_coder_truncation_prioritizes_lower_order() {
4668        let input = make_input(
4669            "code",
4670            0..4,
4671            2,
4672            vec![],
4673            vec![
4674                RelatedFile {
4675                    path: Path::new("low_prio.rs").into(),
4676                    max_row: 5,
4677                    in_open_source_repo: false,
4678                    excerpts: vec![RelatedExcerpt {
4679                        row_range: 0..5,
4680                        text: "low prio\n".into(),
4681                        order: 10,
4682                    }],
4683                },
4684                RelatedFile {
4685                    path: Path::new("high_prio.rs").into(),
4686                    max_row: 5,
4687                    in_open_source_repo: false,
4688                    excerpts: vec![RelatedExcerpt {
4689                        row_range: 0..5,
4690                        text: "high prio\n".into(),
4691                        order: 1,
4692                    }],
4693                },
4694            ],
4695        );
4696
4697        // With large budget, both included; rendered in stable lexicographic order.
4698        assert_eq!(
4699            format_seed_coder(&input),
4700            indoc! {r#"
4701                <[fim-suffix]>
4702                <[fim-prefix]><filename>low_prio.rs
4703                low prio
4704                <filename>high_prio.rs
4705                high prio
4706
4707                <filename>test.rs
4708                <<<<<<< CURRENT
4709                co<|user_cursor|>de
4710                =======
4711                <[fim-middle]>"#}
4712        );
4713
4714        // With tight budget under the generic heuristic, context is dropped but the
4715        // minimal cursor section still fits.
4716        assert_eq!(
4717            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4718            Some(
4719                indoc! {r#"
4720                    <[fim-suffix]>
4721                    <[fim-prefix]><filename>test.rs
4722                    <<<<<<< CURRENT
4723                    co<|user_cursor|>de
4724                    =======
4725                    <[fim-middle]>"#}
4726                .to_string()
4727            )
4728        );
4729    }
4730
4731    #[test]
4732    fn test_format_zeta1_from_input_basic() {
4733        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4734        let input = ZetaPromptInput {
4735            cursor_path: Path::new("src/main.rs").into(),
4736            cursor_excerpt: excerpt.into(),
4737            cursor_offset_in_excerpt: 30,
4738            excerpt_start_row: Some(0),
4739            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4740            related_files: Some(vec![]),
4741            active_buffer_diagnostics: vec![],
4742            excerpt_ranges: ExcerptRanges {
4743                editable_150: 15..41,
4744                editable_180: 15..41,
4745                editable_350: 15..41,
4746                editable_150_context_350: 0..excerpt.len(),
4747                editable_180_context_350: 0..excerpt.len(),
4748                editable_350_context_150: 0..excerpt.len(),
4749                ..Default::default()
4750            },
4751            syntax_ranges: None,
4752            experiment: None,
4753            in_open_source_repo: false,
4754            can_collect_data: false,
4755            repo_url: None,
4756        };
4757
4758        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4759
4760        assert_eq!(
4761            prompt,
4762            concat!(
4763                "### Instruction:\n",
4764                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4765                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4766                "into account the cursor location.\n",
4767                "\n",
4768                "### User Edits:\n",
4769                "\n",
4770                "User edited other.rs:\n",
4771                "```diff\n",
4772                "-old\n",
4773                "+new\n",
4774                "\n",
4775                "```\n",
4776                "\n",
4777                "### User Excerpt:\n",
4778                "\n",
4779                "```src/main.rs\n",
4780                "<|start_of_file|>\n",
4781                "fn before() {}\n",
4782                "<|editable_region_start|>\n",
4783                "fn foo() {\n",
4784                "    <|user_cursor_is_here|>let x = 1;\n",
4785                "\n",
4786                "<|editable_region_end|>}\n",
4787                "fn after() {}\n",
4788                "\n",
4789                "```\n",
4790                "\n",
4791                "### Response:\n",
4792            ),
4793        );
4794    }
4795
4796    #[test]
4797    fn test_format_zeta1_from_input_no_start_of_file() {
4798        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4799        let input = ZetaPromptInput {
4800            cursor_path: Path::new("src/main.rs").into(),
4801            cursor_excerpt: excerpt.into(),
4802            cursor_offset_in_excerpt: 15,
4803            excerpt_start_row: Some(10),
4804            events: vec![],
4805            related_files: Some(vec![]),
4806            active_buffer_diagnostics: vec![],
4807            excerpt_ranges: ExcerptRanges {
4808                editable_150: 0..28,
4809                editable_180: 0..28,
4810                editable_350: 0..28,
4811                editable_150_context_350: 0..28,
4812                editable_180_context_350: 0..28,
4813                editable_350_context_150: 0..28,
4814                ..Default::default()
4815            },
4816            syntax_ranges: None,
4817            experiment: None,
4818            in_open_source_repo: false,
4819            can_collect_data: false,
4820            repo_url: None,
4821        };
4822
4823        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4824
4825        assert_eq!(
4826            prompt,
4827            concat!(
4828                "### Instruction:\n",
4829                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4830                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4831                "into account the cursor location.\n",
4832                "\n",
4833                "### User Edits:\n",
4834                "\n",
4835                "\n",
4836                "\n",
4837                "### User Excerpt:\n",
4838                "\n",
4839                "```src/main.rs\n",
4840                "<|editable_region_start|>\n",
4841                "fn foo() {\n",
4842                "    <|user_cursor_is_here|>let x = 1;\n",
4843                "}\n",
4844                "\n",
4845                "<|editable_region_end|>\n",
4846                "```\n",
4847                "\n",
4848                "### Response:\n",
4849            ),
4850        );
4851    }
4852
4853    #[test]
4854    fn test_format_zeta1_from_input_with_sub_ranges() {
4855        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4856        let editable_range = 10..37;
4857        let context_range = 0..excerpt.len();
4858
4859        let input = ZetaPromptInput {
4860            cursor_path: Path::new("test.rs").into(),
4861            cursor_excerpt: excerpt.into(),
4862            cursor_offset_in_excerpt: 25,
4863            excerpt_start_row: Some(0),
4864            events: vec![],
4865            related_files: Some(vec![]),
4866            active_buffer_diagnostics: vec![],
4867            excerpt_ranges: ExcerptRanges {
4868                editable_150: editable_range.clone(),
4869                editable_180: editable_range.clone(),
4870                editable_350: editable_range.clone(),
4871                editable_150_context_350: context_range.clone(),
4872                editable_180_context_350: context_range.clone(),
4873                editable_350_context_150: context_range.clone(),
4874                ..Default::default()
4875            },
4876            syntax_ranges: None,
4877            experiment: None,
4878            in_open_source_repo: false,
4879            can_collect_data: false,
4880            repo_url: None,
4881        };
4882
4883        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4884
4885        assert_eq!(
4886            prompt,
4887            concat!(
4888                "### Instruction:\n",
4889                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4890                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4891                "into account the cursor location.\n",
4892                "\n",
4893                "### User Edits:\n",
4894                "\n",
4895                "\n",
4896                "\n",
4897                "### User Excerpt:\n",
4898                "\n",
4899                "```test.rs\n",
4900                "<|start_of_file|>\n",
4901                "// prefix\n",
4902                "<|editable_region_start|>\n",
4903                "fn foo() {\n",
4904                "    <|user_cursor_is_here|>let x = 1;\n",
4905                "}\n",
4906                "<|editable_region_end|>\n",
4907                "// suffix\n",
4908                "\n",
4909                "```\n",
4910                "\n",
4911                "### Response:\n",
4912            ),
4913        );
4914    }
4915
4916    #[test]
4917    fn test_max_event_count() {
4918        fn make_numbered_event(index: usize) -> Event {
4919            return make_event(
4920                &format!("event-{index}.rs"),
4921                &format!("-old-{index}\n+new-{index}\n"),
4922            );
4923        }
4924        let input = make_input(
4925            "x",
4926            0..1,
4927            0,
4928            (0..3).map(make_numbered_event).collect(),
4929            vec![],
4930        );
4931
4932        let edit_history_section = format_edit_history_within_budget(
4933            &input.events,
4934            "<|file_sep|>",
4935            "edit history",
4936            usize::MAX,
4937            5,
4938        );
4939
4940        assert_eq!(
4941            &edit_history_section,
4942            indoc!(
4943                "
4944                <|file_sep|>edit history
4945                --- a/event-0.rs
4946                +++ b/event-0.rs
4947                -old-0
4948                +new-0
4949                --- a/event-1.rs
4950                +++ b/event-1.rs
4951                -old-1
4952                +new-1
4953                --- a/event-2.rs
4954                +++ b/event-2.rs
4955                -old-2
4956                +new-2
4957            "
4958            )
4959        );
4960
4961        let edit_history_section = format_edit_history_within_budget(
4962            &input.events,
4963            "<|file_sep|>",
4964            "edit history",
4965            usize::MAX,
4966            2,
4967        );
4968
4969        assert_eq!(
4970            &edit_history_section,
4971            indoc!(
4972                "
4973                <|file_sep|>edit history
4974                --- a/event-1.rs
4975                +++ b/event-1.rs
4976                -old-1
4977                +new-1
4978                --- a/event-2.rs
4979                +++ b/event-2.rs
4980                -old-2
4981                +new-2
4982            "
4983            )
4984        );
4985
4986        let edit_history_section = format_edit_history_within_budget(
4987            &input.events,
4988            "<|file_sep|>",
4989            "edit history",
4990            usize::MAX,
4991            0,
4992        );
4993
4994        assert_eq!(&edit_history_section, "");
4995    }
4996
4997    #[test]
4998    fn test_clean_zeta1_model_output_basic() {
4999        let output = indoc! {"
5000            <|editable_region_start|>
5001            fn main() {
5002                println!(\"hello\");
5003            }
5004            <|editable_region_end|>
5005        "};
5006
5007        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5008        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5009    }
5010
5011    #[test]
5012    fn test_clean_zeta1_model_output_with_cursor() {
5013        let output = indoc! {"
5014            <|editable_region_start|>
5015            fn main() {
5016                <|user_cursor_is_here|>println!(\"hello\");
5017            }
5018            <|editable_region_end|>
5019        "};
5020
5021        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5022        assert_eq!(
5023            cleaned,
5024            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5025        );
5026    }
5027
5028    #[test]
5029    fn test_clean_zeta1_model_output_no_markers() {
5030        let output = "fn main() {}\n";
5031        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5032        assert_eq!(cleaned, "fn main() {}\n");
5033    }
5034
5035    #[test]
5036    fn test_clean_zeta1_model_output_empty_region() {
5037        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5038        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5039        assert_eq!(cleaned, "");
5040    }
5041
5042    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5043        let mut result = excerpt.to_string();
5044        result.replace_range(
5045            parsed_output.range_in_excerpt.clone(),
5046            &parsed_output.new_editable_region,
5047        );
5048        result
5049    }
5050
5051    #[test]
5052    fn test_parse_zeta2_model_output() {
5053        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5054        let context_start = excerpt.find("ctx start").unwrap();
5055        let context_end = excerpt.find("after ctx").unwrap();
5056        let editable_start = excerpt.find("editable old").unwrap();
5057        let editable_end = editable_start + "editable old\n".len();
5058        let input = make_input_with_context_range(
5059            excerpt,
5060            editable_start..editable_end,
5061            context_start..context_end,
5062            editable_start,
5063        );
5064
5065        let output = parse_zeta2_model_output(
5066            "editable new\n>>>>>>> UPDATED\n",
5067            ZetaFormat::V0131GitMergeMarkersPrefix,
5068            &input,
5069        )
5070        .unwrap();
5071
5072        assert_eq!(
5073            apply_edit(excerpt, &output),
5074            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5075        );
5076    }
5077
5078    #[test]
5079    fn test_parse_zeta2_model_output_identity() {
5080        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5081        let editable_start = excerpt.find("bbb").unwrap();
5082        let editable_end = excerpt.find("ddd").unwrap();
5083        let input = make_input_with_context_range(
5084            excerpt,
5085            editable_start..editable_end,
5086            0..excerpt.len(),
5087            editable_start,
5088        );
5089
5090        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5091        let output =
5092            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5093
5094        assert_eq!(apply_edit(excerpt, &output), excerpt);
5095    }
5096
5097    #[test]
5098    fn test_parse_zeta2_model_output_strips_end_marker() {
5099        let excerpt = "hello\nworld\n";
5100        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5101
5102        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5103        let output1 =
5104            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5105        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5106
5107        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5108        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5109    }
5110}