zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33/// Ensure text fits into the tokens budget; trim by line boundaries if needed.
  34pub fn clamp_text_to_token_count(text: &str, max_tokens: usize) -> &str {
  35    if estimate_tokens(text.len()) <= max_tokens {
  36        return text;
  37    }
  38
  39    let mut end_byte_offset = 0;
  40
  41    for line in text.split_inclusive('\n') {
  42        if estimate_tokens(line.len() + end_byte_offset) > max_tokens {
  43            break;
  44        }
  45
  46        end_byte_offset += line.len();
  47    }
  48
  49    &text[..end_byte_offset]
  50}
  51
  52#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  53pub struct ZetaPromptInput {
  54    pub cursor_path: Arc<Path>,
  55    pub cursor_excerpt: Arc<str>,
  56    pub cursor_offset_in_excerpt: usize,
  57    #[serde(default, skip_serializing_if = "Option::is_none")]
  58    pub excerpt_start_row: Option<u32>,
  59    pub events: Vec<Arc<Event>>,
  60    #[serde(default)]
  61    pub related_files: Option<Vec<RelatedFile>>,
  62    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  63    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  64    /// These ranges let the server select model-appropriate subsets.
  65    pub excerpt_ranges: ExcerptRanges,
  66    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  67    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  68    /// When present, the server uses these to compute editable/context ranges
  69    /// instead of `excerpt_ranges`.
  70    #[serde(default, skip_serializing_if = "Option::is_none")]
  71    pub syntax_ranges: Option<Vec<Range<usize>>>,
  72    #[serde(default)]
  73    pub in_open_source_repo: bool,
  74    #[serde(default)]
  75    pub can_collect_data: bool,
  76    #[serde(default, skip_serializing_if = "Option::is_none")]
  77    pub repo_url: Option<String>,
  78}
  79
  80#[derive(
  81    Default,
  82    Clone,
  83    Copy,
  84    Debug,
  85    PartialEq,
  86    Eq,
  87    Hash,
  88    EnumIter,
  89    IntoStaticStr,
  90    Serialize,
  91    Deserialize,
  92)]
  93#[allow(non_camel_case_types)]
  94pub enum ZetaFormat {
  95    V0112MiddleAtEnd,
  96    V0113Ordered,
  97    V0114180EditableRegion,
  98    V0120GitMergeMarkers,
  99    #[default]
 100    V0131GitMergeMarkersPrefix,
 101    V0211Prefill,
 102    V0211SeedCoder,
 103    V0331SeedCoderModelPy,
 104    v0226Hashline,
 105    V0304VariableEdit,
 106    V0304SeedNoEdits,
 107    /// Multi-block marker spans with NO_EDITS sentinel.
 108    V0306SeedMultiRegions,
 109    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
 110    V0316SeedMultiRegions,
 111    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
 112    V0317SeedMultiRegions,
 113    /// V0316 with larger block sizes.
 114    V0318SeedMultiRegions,
 115    /// V0318-style markers over the full available current file excerpt with no related files.
 116    V0327SingleFile,
 117    /// V0318-style prompt with buffer diagnostics
 118    V0420Diagnostics,
 119}
 120
 121impl std::fmt::Display for ZetaFormat {
 122    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 123        write!(f, "{}", <&'static str>::from(self))
 124    }
 125}
 126
 127impl ZetaFormat {
 128    pub fn parse(format_name: &str) -> Result<Self> {
 129        let lower = format_name.to_lowercase();
 130
 131        // Exact case-insensitive match takes priority, bypassing ambiguity checks.
 132        for variant in ZetaFormat::iter() {
 133            if <&'static str>::from(&variant).to_lowercase() == lower {
 134                return Ok(variant);
 135            }
 136        }
 137
 138        let mut results = ZetaFormat::iter().filter(|version| {
 139            <&'static str>::from(version)
 140                .to_lowercase()
 141                .contains(&lower)
 142        });
 143        let Some(result) = results.next() else {
 144            anyhow::bail!(
 145                "`{format_name}` did not match any of:\n{}",
 146                Self::options_as_string()
 147            );
 148        };
 149        if results.next().is_some() {
 150            anyhow::bail!(
 151                "`{format_name}` matched more than one of:\n{}",
 152                Self::options_as_string()
 153            );
 154        }
 155        Ok(result)
 156    }
 157
 158    pub fn options_as_string() -> String {
 159        ZetaFormat::iter()
 160            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 161            .collect::<Vec<_>>()
 162            .concat()
 163    }
 164}
 165
 166#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 167#[serde(tag = "event")]
 168pub enum Event {
 169    BufferChange {
 170        path: Arc<Path>,
 171        old_path: Arc<Path>,
 172        diff: String,
 173        predicted: bool,
 174        in_open_source_repo: bool,
 175    },
 176}
 177
 178impl Event {
 179    pub fn in_open_source_repo(&self) -> bool {
 180        match self {
 181            Event::BufferChange {
 182                in_open_source_repo,
 183                ..
 184            } => *in_open_source_repo,
 185        }
 186    }
 187}
 188
 189pub fn write_event(prompt: &mut String, event: &Event) {
 190    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 191        for component in path.components() {
 192            prompt.push('/');
 193            write!(prompt, "{}", component.as_os_str().display()).ok();
 194        }
 195    }
 196    match event {
 197        Event::BufferChange {
 198            path,
 199            old_path,
 200            diff,
 201            predicted,
 202            in_open_source_repo: _,
 203        } => {
 204            if *predicted {
 205                prompt.push_str("// User accepted prediction:\n");
 206            }
 207            prompt.push_str("--- a");
 208            write_path_as_unix_str(prompt, old_path.as_ref());
 209            prompt.push_str("\n+++ b");
 210            write_path_as_unix_str(prompt, path.as_ref());
 211            prompt.push('\n');
 212            prompt.push_str(diff);
 213        }
 214    }
 215}
 216
 217#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 218pub struct ActiveBufferDiagnostic {
 219    pub severity: Option<i32>,
 220    pub message: String,
 221    pub snippet: String,
 222    pub snippet_buffer_row_range: Range<u32>,
 223    pub diagnostic_range_in_snippet: Range<usize>,
 224}
 225
 226#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 227pub struct RelatedFile {
 228    pub path: Arc<Path>,
 229    pub max_row: u32,
 230    pub excerpts: Vec<RelatedExcerpt>,
 231    #[serde(default)]
 232    pub in_open_source_repo: bool,
 233}
 234
 235#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 236pub struct RelatedExcerpt {
 237    pub row_range: Range<u32>,
 238    pub text: Arc<str>,
 239    #[serde(default)]
 240    pub order: usize,
 241}
 242
 243pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 244    special_tokens_for_format(format).iter().any(|token| {
 245        if let Some(line_token) = token.strip_suffix('\n') {
 246            input.cursor_excerpt.lines().any(|line| line == line_token)
 247        } else {
 248            input.cursor_excerpt.contains(token)
 249        }
 250    })
 251}
 252
 253pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 254    let max_prompt_tokens = match format {
 255        ZetaFormat::V0112MiddleAtEnd
 256        | ZetaFormat::V0113Ordered
 257        | ZetaFormat::V0114180EditableRegion
 258        | ZetaFormat::V0120GitMergeMarkers
 259        | ZetaFormat::V0131GitMergeMarkersPrefix
 260        | ZetaFormat::V0211Prefill
 261        | ZetaFormat::V0211SeedCoder
 262        | ZetaFormat::v0226Hashline
 263        | ZetaFormat::V0304VariableEdit
 264        | ZetaFormat::V0304SeedNoEdits
 265        | ZetaFormat::V0306SeedMultiRegions
 266        | ZetaFormat::V0316SeedMultiRegions
 267        | ZetaFormat::V0317SeedMultiRegions
 268        | ZetaFormat::V0331SeedCoderModelPy
 269        | ZetaFormat::V0318SeedMultiRegions => 4096,
 270        ZetaFormat::V0420Diagnostics => 8192,
 271        ZetaFormat::V0327SingleFile => 16384,
 272    };
 273
 274    format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
 275}
 276
 277pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 278    match format {
 279        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 280        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 281        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 282        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 283        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 284        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 285        ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
 286            seed_coder::special_tokens()
 287        }
 288        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 289        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 290        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 291        ZetaFormat::V0316SeedMultiRegions => {
 292            static TOKENS: &[&str] = &[
 293                seed_coder::FIM_SUFFIX,
 294                seed_coder::FIM_PREFIX,
 295                seed_coder::FIM_MIDDLE,
 296                seed_coder::FILE_MARKER,
 297                multi_region::V0316_END_MARKER,
 298                CURSOR_MARKER,
 299                multi_region::MARKER_TAG_PREFIX,
 300            ];
 301            TOKENS
 302        }
 303        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 304            static TOKENS: &[&str] = &[
 305                seed_coder::FIM_SUFFIX,
 306                seed_coder::FIM_PREFIX,
 307                seed_coder::FIM_MIDDLE,
 308                seed_coder::FILE_MARKER,
 309                multi_region::V0318_END_MARKER,
 310                CURSOR_MARKER,
 311                multi_region::MARKER_TAG_PREFIX,
 312            ];
 313            TOKENS
 314        }
 315        ZetaFormat::V0317SeedMultiRegions => {
 316            static TOKENS: &[&str] = &[
 317                seed_coder::FIM_SUFFIX,
 318                seed_coder::FIM_PREFIX,
 319                seed_coder::FIM_MIDDLE,
 320                seed_coder::FILE_MARKER,
 321                multi_region::V0317_END_MARKER,
 322                CURSOR_MARKER,
 323                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 324            ];
 325            TOKENS
 326        }
 327        ZetaFormat::V0327SingleFile => {
 328            static TOKENS: &[&str] = &[
 329                seed_coder::FIM_SUFFIX,
 330                seed_coder::FIM_PREFIX,
 331                seed_coder::FIM_MIDDLE,
 332                seed_coder::FILE_MARKER,
 333                multi_region::V0327_END_MARKER,
 334                CURSOR_MARKER,
 335                multi_region::MARKER_TAG_PREFIX,
 336            ];
 337            TOKENS
 338        }
 339        ZetaFormat::V0306SeedMultiRegions => {
 340            static TOKENS: &[&str] = &[
 341                seed_coder::FIM_SUFFIX,
 342                seed_coder::FIM_PREFIX,
 343                seed_coder::FIM_MIDDLE,
 344                seed_coder::FILE_MARKER,
 345                seed_coder::START_MARKER,
 346                seed_coder::SEPARATOR,
 347                seed_coder::END_MARKER,
 348                CURSOR_MARKER,
 349                multi_region::MARKER_TAG_PREFIX,
 350            ];
 351            TOKENS
 352        }
 353    }
 354}
 355
 356/// Returns the (editable_token_limit, context_token_limit) for a given format.
 357pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 358    match format {
 359        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 360        ZetaFormat::V0114180EditableRegion => (180, 350),
 361        ZetaFormat::V0120GitMergeMarkers
 362        | ZetaFormat::V0131GitMergeMarkersPrefix
 363        | ZetaFormat::V0211Prefill
 364        | ZetaFormat::V0211SeedCoder
 365        | ZetaFormat::V0331SeedCoderModelPy
 366        | ZetaFormat::v0226Hashline
 367        | ZetaFormat::V0306SeedMultiRegions
 368        | ZetaFormat::V0316SeedMultiRegions
 369        | ZetaFormat::V0318SeedMultiRegions
 370        | ZetaFormat::V0420Diagnostics
 371        | ZetaFormat::V0317SeedMultiRegions
 372        | ZetaFormat::V0327SingleFile
 373        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 374
 375        ZetaFormat::V0304VariableEdit => (1024, 0),
 376    }
 377}
 378
 379pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 380    match format {
 381        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 382        ZetaFormat::V0112MiddleAtEnd
 383        | ZetaFormat::V0113Ordered
 384        | ZetaFormat::V0114180EditableRegion
 385        | ZetaFormat::V0120GitMergeMarkers
 386        | ZetaFormat::V0131GitMergeMarkersPrefix
 387        | ZetaFormat::V0211Prefill
 388        | ZetaFormat::V0211SeedCoder
 389        | ZetaFormat::V0331SeedCoderModelPy
 390        | ZetaFormat::V0304VariableEdit
 391        | ZetaFormat::V0306SeedMultiRegions
 392        | ZetaFormat::V0304SeedNoEdits => &[],
 393        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 394        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 395            &[multi_region::V0318_END_MARKER]
 396        }
 397        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 398        ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
 399    }
 400}
 401
 402/// Return (editable_range, context_range) for the prompt format
 403pub fn excerpt_ranges_for_format(
 404    format: ZetaFormat,
 405    ranges: &ExcerptRanges,
 406) -> (Range<usize>, Range<usize>) {
 407    match format {
 408        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 409            ranges.editable_150.clone(),
 410            ranges.editable_150_context_350.clone(),
 411        ),
 412        ZetaFormat::V0114180EditableRegion => (
 413            ranges.editable_180.clone(),
 414            ranges.editable_180_context_350.clone(),
 415        ),
 416        ZetaFormat::V0120GitMergeMarkers
 417        | ZetaFormat::V0131GitMergeMarkersPrefix
 418        | ZetaFormat::V0211Prefill
 419        | ZetaFormat::V0211SeedCoder
 420        | ZetaFormat::V0331SeedCoderModelPy
 421        | ZetaFormat::v0226Hashline
 422        | ZetaFormat::V0304SeedNoEdits
 423        | ZetaFormat::V0306SeedMultiRegions
 424        | ZetaFormat::V0316SeedMultiRegions
 425        | ZetaFormat::V0318SeedMultiRegions
 426        | ZetaFormat::V0317SeedMultiRegions
 427        | ZetaFormat::V0420Diagnostics => (
 428            ranges.editable_350.clone(),
 429            ranges.editable_350_context_150.clone(),
 430        ),
 431        ZetaFormat::V0327SingleFile => (
 432            ranges.editable_350_context_150.clone(),
 433            ranges.context_8192.clone().unwrap_or(
 434                // shouldn't be used, only for compat with old data/clients
 435                ranges.editable_350_context_150.clone(),
 436            ),
 437        ),
 438
 439        ZetaFormat::V0304VariableEdit => {
 440            let context = ranges
 441                .editable_350_context_1024
 442                .clone()
 443                .or(ranges.editable_350_context_512.clone())
 444                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 445            (context.clone(), context)
 446        }
 447    }
 448}
 449
 450pub fn write_cursor_excerpt_section_for_format(
 451    format: ZetaFormat,
 452    prompt: &mut String,
 453    path: &Path,
 454    context: &str,
 455    editable_range: &Range<usize>,
 456    cursor_offset: usize,
 457) {
 458    match format {
 459        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 460            prompt,
 461            path,
 462            context,
 463            editable_range,
 464            cursor_offset,
 465        ),
 466        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 467            v0113_ordered::write_cursor_excerpt_section(
 468                prompt,
 469                path,
 470                context,
 471                editable_range,
 472                cursor_offset,
 473            )
 474        }
 475        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 476            prompt,
 477            path,
 478            context,
 479            editable_range,
 480            cursor_offset,
 481        ),
 482        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 483            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 484                prompt,
 485                path,
 486                context,
 487                editable_range,
 488                cursor_offset,
 489            )
 490        }
 491        ZetaFormat::V0211SeedCoder
 492        | ZetaFormat::V0331SeedCoderModelPy
 493        | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
 494            prompt,
 495            path,
 496            context,
 497            editable_range,
 498            cursor_offset,
 499        ),
 500        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 501            prompt,
 502            path,
 503            context,
 504            editable_range,
 505            cursor_offset,
 506        ),
 507        ZetaFormat::V0304VariableEdit => {
 508            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 509        }
 510        ZetaFormat::V0306SeedMultiRegions => {
 511            prompt.push_str(&build_v0306_cursor_prefix(
 512                path,
 513                context,
 514                editable_range,
 515                cursor_offset,
 516            ));
 517        }
 518        ZetaFormat::V0316SeedMultiRegions => {
 519            prompt.push_str(&build_v0316_cursor_prefix(
 520                path,
 521                context,
 522                editable_range,
 523                cursor_offset,
 524            ));
 525        }
 526        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 527            prompt.push_str(&build_v0318_cursor_prefix(
 528                path,
 529                context,
 530                editable_range,
 531                cursor_offset,
 532            ));
 533        }
 534        ZetaFormat::V0317SeedMultiRegions => {
 535            prompt.push_str(&build_v0317_cursor_prefix(
 536                path,
 537                context,
 538                editable_range,
 539                cursor_offset,
 540            ));
 541        }
 542        ZetaFormat::V0327SingleFile => {
 543            prompt.push_str(&build_v0318_cursor_prefix(
 544                path,
 545                context,
 546                editable_range,
 547                cursor_offset,
 548            ));
 549        }
 550    }
 551}
 552
 553fn build_v0306_cursor_prefix(
 554    path: &Path,
 555    context: &str,
 556    editable_range: &Range<usize>,
 557    cursor_offset: usize,
 558) -> String {
 559    let mut section = String::new();
 560    let path_str = path.to_string_lossy();
 561    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 562
 563    section.push_str(&context[..editable_range.start]);
 564    section.push_str(seed_coder::START_MARKER);
 565
 566    let editable_text = &context[editable_range.clone()];
 567    let cursor_in_editable = cursor_offset - editable_range.start;
 568    multi_region::write_editable_with_markers(
 569        &mut section,
 570        editable_text,
 571        cursor_in_editable,
 572        CURSOR_MARKER,
 573    );
 574
 575    if !section.ends_with('\n') {
 576        section.push('\n');
 577    }
 578    section.push_str(seed_coder::SEPARATOR);
 579    section
 580}
 581
 582fn build_v0316_cursor_prefix(
 583    path: &Path,
 584    context: &str,
 585    editable_range: &Range<usize>,
 586    cursor_offset: usize,
 587) -> String {
 588    let mut section = String::new();
 589    let path_str = path.to_string_lossy();
 590    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 591
 592    section.push_str(&context[..editable_range.start]);
 593
 594    let editable_text = &context[editable_range.clone()];
 595    let cursor_in_editable = cursor_offset - editable_range.start;
 596    multi_region::write_editable_with_markers_v0316(
 597        &mut section,
 598        editable_text,
 599        cursor_in_editable,
 600        CURSOR_MARKER,
 601    );
 602
 603    if !section.ends_with('\n') {
 604        section.push('\n');
 605    }
 606    section
 607}
 608
 609fn build_v0318_cursor_prefix(
 610    path: &Path,
 611    context: &str,
 612    editable_range: &Range<usize>,
 613    cursor_offset: usize,
 614) -> String {
 615    let mut section = String::new();
 616    let path_str = path.to_string_lossy();
 617    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 618
 619    section.push_str(&context[..editable_range.start]);
 620
 621    let editable_text = &context[editable_range.clone()];
 622    let cursor_in_editable = cursor_offset - editable_range.start;
 623    multi_region::write_editable_with_markers_v0318(
 624        &mut section,
 625        editable_text,
 626        cursor_in_editable,
 627        CURSOR_MARKER,
 628    );
 629
 630    if !section.ends_with('\n') {
 631        section.push('\n');
 632    }
 633    section
 634}
 635
 636fn build_v0317_cursor_prefix(
 637    path: &Path,
 638    context: &str,
 639    editable_range: &Range<usize>,
 640    cursor_offset: usize,
 641) -> String {
 642    let mut section = String::new();
 643    let path_str = path.to_string_lossy();
 644    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 645
 646    section.push_str(&context[..editable_range.start]);
 647
 648    let editable_text = &context[editable_range.clone()];
 649    let cursor_in_editable = cursor_offset - editable_range.start;
 650    multi_region::write_editable_with_markers_v0317(
 651        &mut section,
 652        editable_text,
 653        cursor_in_editable,
 654        CURSOR_MARKER,
 655    );
 656
 657    if !section.ends_with('\n') {
 658        section.push('\n');
 659    }
 660    section
 661}
 662
 663fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 664    let start_row = text[0..range.start].matches('\n').count() as u32;
 665    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 666    if !text[..range.end].ends_with('\n') {
 667        end_row += 1;
 668    }
 669    return start_row..end_row;
 670}
 671
 672fn assemble_single_file_fim_prompt(
 673    context: &str,
 674    editable_range: &Range<usize>,
 675    cursor_prefix_section: &str,
 676    events: &[Arc<Event>],
 677    max_tokens: usize,
 678) -> String {
 679    let suffix_section = seed_coder::build_suffix_section(context, editable_range);
 680
 681    let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
 682    let cursor_prefix_tokens =
 683        estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
 684    let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 685
 686    let edit_history_section = format_edit_history_within_budget(
 687        events,
 688        seed_coder::FILE_MARKER,
 689        "edit_history",
 690        budget_after_cursor,
 691        max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
 692    );
 693
 694    let mut prompt = String::new();
 695    prompt.push_str(&suffix_section);
 696    prompt.push_str(seed_coder::FIM_PREFIX);
 697    prompt.push_str(&edit_history_section);
 698    if !edit_history_section.is_empty() {
 699        prompt.push('\n');
 700    }
 701    prompt.push_str(cursor_prefix_section);
 702    prompt.push_str(seed_coder::FIM_MIDDLE);
 703    prompt
 704}
 705
 706pub fn format_prompt_with_budget_for_format(
 707    input: &ZetaPromptInput,
 708    format: ZetaFormat,
 709    max_tokens: usize,
 710) -> Option<String> {
 711    let (context, editable_range, context_range, cursor_offset) =
 712        resolve_cursor_region(input, format);
 713    let path = &*input.cursor_path;
 714
 715    let empty_files = Vec::new();
 716    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 717    let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 718        let relative_row_range =
 719            offset_range_to_row_range(&input.cursor_excerpt, context_range.clone());
 720        let row_range = relative_row_range.start + cursor_excerpt_start_row
 721            ..relative_row_range.end + cursor_excerpt_start_row;
 722        filter_redundant_excerpts(
 723            input_related_files.to_vec(),
 724            input.cursor_path.as_ref(),
 725            row_range,
 726        )
 727    } else {
 728        input_related_files.to_vec()
 729    };
 730    let related_files = filtered_related_files.as_slice();
 731
 732    let prompt = match format {
 733        ZetaFormat::V0211SeedCoder
 734        | ZetaFormat::V0331SeedCoderModelPy
 735        | ZetaFormat::V0304SeedNoEdits
 736        | ZetaFormat::V0306SeedMultiRegions
 737        | ZetaFormat::V0316SeedMultiRegions
 738        | ZetaFormat::V0318SeedMultiRegions
 739        | ZetaFormat::V0317SeedMultiRegions
 740        | ZetaFormat::V0420Diagnostics => {
 741            let mut cursor_section = String::new();
 742
 743            write_cursor_excerpt_section_for_format(
 744                format,
 745                &mut cursor_section,
 746                path,
 747                context,
 748                &editable_range,
 749                cursor_offset,
 750            );
 751
 752            let cursor_buffer_row = input.excerpt_start_row.map(|excerpt_start_row| {
 753                excerpt_start_row
 754                    + input.cursor_excerpt[..context_range.start + cursor_offset]
 755                        .bytes()
 756                        .filter(|byte| *byte == b'\n')
 757                        .count() as u32
 758            });
 759
 760            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 761            seed_coder::assemble_fim_prompt(
 762                context,
 763                &editable_range,
 764                &cursor_section,
 765                &input.events,
 766                related_files,
 767                if format == ZetaFormat::V0420Diagnostics {
 768                    &input.active_buffer_diagnostics
 769                } else {
 770                    &[]
 771                },
 772                cursor_buffer_row,
 773                budget_with_margin,
 774            )
 775        }
 776        ZetaFormat::V0327SingleFile => {
 777            let mut cursor_section = String::new();
 778            write_cursor_excerpt_section_for_format(
 779                format,
 780                &mut cursor_section,
 781                path,
 782                context,
 783                &editable_range,
 784                cursor_offset,
 785            );
 786
 787            assemble_single_file_fim_prompt(
 788                context,
 789                &editable_range,
 790                &cursor_section,
 791                &input.events,
 792                apply_prompt_budget_margin(max_tokens),
 793            )
 794        }
 795        _ => {
 796            let mut cursor_section = String::new();
 797            write_cursor_excerpt_section_for_format(
 798                format,
 799                &mut cursor_section,
 800                path,
 801                context,
 802                &editable_range,
 803                cursor_offset,
 804            );
 805
 806            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 807            let cursor_tokens = estimate_tokens(cursor_section.len());
 808            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 809
 810            let edit_history_section = format_edit_history_within_budget(
 811                &input.events,
 812                "<|file_sep|>",
 813                "edit history",
 814                remaining_budget,
 815                max_edit_event_count_for_format(&format),
 816            );
 817            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 818            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 819
 820            let related_files_section = format_related_files_within_budget(
 821                &related_files,
 822                "<|file_sep|>",
 823                "",
 824                remaining_budget,
 825            );
 826
 827            let mut prompt = String::new();
 828            prompt.push_str(&related_files_section);
 829            prompt.push_str(&edit_history_section);
 830            prompt.push_str(&cursor_section);
 831            prompt
 832        }
 833    };
 834    let prompt_tokens = estimate_tokens(prompt.len());
 835    if prompt_tokens > max_tokens {
 836        return None;
 837    }
 838    return Some(prompt);
 839}
 840
 841fn format_active_buffer_diagnostics_with_budget(
 842    diagnostics: &[ActiveBufferDiagnostic],
 843    cursor_buffer_row: Option<u32>,
 844    budget: usize,
 845) -> String {
 846    if diagnostics.is_empty() || budget == 0 {
 847        return String::new();
 848    }
 849
 850    let mut diagnostic_indices = (0..diagnostics.len()).collect::<Vec<_>>();
 851    if let Some(cursor_buffer_row) = cursor_buffer_row {
 852        diagnostic_indices.sort_by_key(|index| {
 853            let range = &diagnostics[*index].snippet_buffer_row_range;
 854            u32::abs_diff(cursor_buffer_row, range.start)
 855                + u32::abs_diff(cursor_buffer_row, range.end)
 856        });
 857    }
 858
 859    let mut output = format!("{}diagnostics\n", seed_coder::FILE_MARKER);
 860    let header_tokens = estimate_tokens(output.len());
 861    if header_tokens > budget {
 862        return String::new();
 863    }
 864
 865    let mut used_tokens = header_tokens;
 866    let mut included_diagnostics = 0;
 867    for diagnostic_index in diagnostic_indices.into_iter().take(10) {
 868        let diagnostic = &diagnostics[diagnostic_index];
 869        let snippet = clamp_text_to_token_count(&diagnostic.snippet, 256);
 870
 871        let diagnostic_section = format!(
 872            "*{}*:\n```\n{}{}\n```\n",
 873            diagnostic.message,
 874            snippet,
 875            if snippet.len() < diagnostic.snippet.len() {
 876                "..."
 877            } else {
 878                ""
 879            }
 880        );
 881        let diagnostic_tokens = estimate_tokens(diagnostic_section.len());
 882        if used_tokens + diagnostic_tokens > budget {
 883            break;
 884        }
 885        output.push_str(&diagnostic_section);
 886        used_tokens += diagnostic_tokens;
 887        included_diagnostics += 1;
 888    }
 889
 890    if included_diagnostics == 0 {
 891        String::new()
 892    } else {
 893        output
 894    }
 895}
 896
 897pub fn filter_redundant_excerpts(
 898    mut related_files: Vec<RelatedFile>,
 899    cursor_path: &Path,
 900    cursor_row_range: Range<u32>,
 901) -> Vec<RelatedFile> {
 902    for file in &mut related_files {
 903        if file.path.as_ref() == cursor_path {
 904            file.excerpts.retain(|excerpt| {
 905                excerpt.row_range.start < cursor_row_range.start
 906                    || excerpt.row_range.end > cursor_row_range.end
 907            });
 908        }
 909    }
 910    related_files.retain(|file| !file.excerpts.is_empty());
 911    related_files
 912}
 913
 914pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 915    match format {
 916        ZetaFormat::V0112MiddleAtEnd
 917        | ZetaFormat::V0113Ordered
 918        | ZetaFormat::V0114180EditableRegion
 919        | ZetaFormat::V0120GitMergeMarkers
 920        | ZetaFormat::V0131GitMergeMarkersPrefix
 921        | ZetaFormat::V0211Prefill
 922        | ZetaFormat::V0211SeedCoder
 923        | ZetaFormat::V0331SeedCoderModelPy
 924        | ZetaFormat::v0226Hashline
 925        | ZetaFormat::V0304SeedNoEdits
 926        | ZetaFormat::V0304VariableEdit
 927        | ZetaFormat::V0306SeedMultiRegions
 928        | ZetaFormat::V0316SeedMultiRegions
 929        | ZetaFormat::V0318SeedMultiRegions
 930        | ZetaFormat::V0317SeedMultiRegions
 931        | ZetaFormat::V0420Diagnostics
 932        | ZetaFormat::V0327SingleFile => 6,
 933    }
 934}
 935
 936pub fn get_prefill_for_format(
 937    format: ZetaFormat,
 938    context: &str,
 939    editable_range: &Range<usize>,
 940) -> String {
 941    match format {
 942        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 943        ZetaFormat::V0112MiddleAtEnd
 944        | ZetaFormat::V0113Ordered
 945        | ZetaFormat::V0114180EditableRegion
 946        | ZetaFormat::V0120GitMergeMarkers
 947        | ZetaFormat::V0131GitMergeMarkersPrefix
 948        | ZetaFormat::V0211SeedCoder
 949        | ZetaFormat::V0331SeedCoderModelPy
 950        | ZetaFormat::v0226Hashline
 951        | ZetaFormat::V0304VariableEdit => String::new(),
 952        ZetaFormat::V0304SeedNoEdits
 953        | ZetaFormat::V0306SeedMultiRegions
 954        | ZetaFormat::V0316SeedMultiRegions
 955        | ZetaFormat::V0318SeedMultiRegions
 956        | ZetaFormat::V0317SeedMultiRegions
 957        | ZetaFormat::V0420Diagnostics
 958        | ZetaFormat::V0327SingleFile => String::new(),
 959    }
 960}
 961
 962pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 963    match format {
 964        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 965        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 966        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 967        ZetaFormat::V0211SeedCoder
 968        | ZetaFormat::V0331SeedCoderModelPy
 969        | ZetaFormat::V0304SeedNoEdits
 970        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 971        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 972        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 973        ZetaFormat::V0420Diagnostics => Some(multi_region::V0318_END_MARKER),
 974        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 975        ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
 976
 977        ZetaFormat::V0112MiddleAtEnd
 978        | ZetaFormat::V0113Ordered
 979        | ZetaFormat::V0114180EditableRegion
 980        | ZetaFormat::v0226Hashline
 981        | ZetaFormat::V0304VariableEdit => None,
 982    }
 983}
 984
 985pub fn encode_patch_as_output_for_format(
 986    format: ZetaFormat,
 987    old_editable_region: &str,
 988    patch: &str,
 989    cursor_offset: Option<usize>,
 990) -> Result<Option<String>> {
 991    match format {
 992        ZetaFormat::v0226Hashline => {
 993            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 994        }
 995        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 996            old_editable_region,
 997            patch,
 998            cursor_offset,
 999        )
1000        .map(Some),
1001        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
1002            Ok(seed_coder::no_edits(patch))
1003        }
1004        ZetaFormat::V0316SeedMultiRegions => {
1005            let empty_patch = patch.lines().count() <= 3;
1006            if empty_patch {
1007                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
1008                let marker_num =
1009                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1010                let tag = multi_region::marker_tag(marker_num);
1011                Ok(Some(format!(
1012                    "{tag}{tag}{}",
1013                    multi_region::V0316_END_MARKER
1014                )))
1015            } else {
1016                Ok(None)
1017            }
1018        }
1019        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1020            let empty_patch = patch.lines().count() <= 3;
1021            if empty_patch {
1022                let marker_offsets =
1023                    multi_region::compute_marker_offsets_v0318(old_editable_region);
1024                let marker_num =
1025                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1026                let tag = multi_region::marker_tag(marker_num);
1027                Ok(Some(format!(
1028                    "{tag}{tag}{}",
1029                    multi_region::V0318_END_MARKER
1030                )))
1031            } else {
1032                Ok(None)
1033            }
1034        }
1035        ZetaFormat::V0317SeedMultiRegions => {
1036            let empty_patch = patch.lines().count() <= 3;
1037            if empty_patch {
1038                let tag = multi_region::marker_tag_relative(0);
1039                Ok(Some(format!(
1040                    "{tag}{tag}{}",
1041                    multi_region::V0317_END_MARKER
1042                )))
1043            } else {
1044                Ok(None)
1045            }
1046        }
1047        ZetaFormat::V0327SingleFile => {
1048            let empty_patch = patch.lines().count() <= 3;
1049            if empty_patch {
1050                let marker_offsets =
1051                    multi_region::compute_marker_offsets_v0318(old_editable_region);
1052                let marker_num =
1053                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1054                let tag = multi_region::marker_tag(marker_num);
1055                Ok(Some(format!(
1056                    "{tag}{tag}{}",
1057                    multi_region::V0327_END_MARKER
1058                )))
1059            } else {
1060                Ok(None)
1061            }
1062        }
1063        _ => Ok(None),
1064    }
1065}
1066
1067/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
1068/// extracted), produce the expected model output string for training.
1069pub fn format_expected_output(
1070    input: &ZetaPromptInput,
1071    format: ZetaFormat,
1072    patch: &str,
1073    cursor_offset: Option<usize>,
1074) -> Result<String> {
1075    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1076    let mut old_editable = context[editable_range].to_string();
1077    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
1078        old_editable.push('\n');
1079    }
1080
1081    // Formats with their own output encoding (hashline, variable-edit,
1082    // multi-region empty patches) are handled here.
1083    if let Some(output) =
1084        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
1085    {
1086        return Ok(output);
1087    }
1088
1089    let empty_patch = patch.lines().count() <= 3;
1090
1091    match format {
1092        // Multi-region formats: non-empty patches need diff application
1093        // then marker-span encoding.
1094        ZetaFormat::V0316SeedMultiRegions => {
1095            let (new_editable, first_hunk_offset) =
1096                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1097            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1098            multi_region::encode_from_old_and_new_v0316(
1099                &old_editable,
1100                &new_editable,
1101                cursor_in_new,
1102                CURSOR_MARKER,
1103                multi_region::V0316_END_MARKER,
1104            )
1105        }
1106        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1107            let (new_editable, first_hunk_offset) =
1108                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1109            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1110            multi_region::encode_from_old_and_new_v0318(
1111                &old_editable,
1112                &new_editable,
1113                cursor_in_new,
1114                CURSOR_MARKER,
1115                multi_region::V0318_END_MARKER,
1116            )
1117        }
1118        ZetaFormat::V0327SingleFile => {
1119            let (new_editable, first_hunk_offset) =
1120                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1121            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1122            multi_region::encode_from_old_and_new_v0318(
1123                &old_editable,
1124                &new_editable,
1125                cursor_in_new,
1126                CURSOR_MARKER,
1127                multi_region::V0327_END_MARKER,
1128            )
1129        }
1130        ZetaFormat::V0317SeedMultiRegions => {
1131            let (new_editable, first_hunk_offset) =
1132                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1133            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1134            multi_region::encode_from_old_and_new_v0317(
1135                &old_editable,
1136                &new_editable,
1137                cursor_in_new,
1138                CURSOR_MARKER,
1139                multi_region::V0317_END_MARKER,
1140            )
1141        }
1142        // V0131-style formats and fallback: produce new editable text with
1143        // cursor marker inserted, followed by the end marker.
1144        ZetaFormat::V0112MiddleAtEnd
1145        | ZetaFormat::V0113Ordered
1146        | ZetaFormat::V0114180EditableRegion
1147        | ZetaFormat::V0120GitMergeMarkers
1148        | ZetaFormat::V0131GitMergeMarkersPrefix
1149        | ZetaFormat::V0211Prefill
1150        | ZetaFormat::V0211SeedCoder
1151        | ZetaFormat::v0226Hashline
1152        | ZetaFormat::V0304VariableEdit
1153        | ZetaFormat::V0304SeedNoEdits
1154        | ZetaFormat::V0331SeedCoderModelPy
1155        | ZetaFormat::V0306SeedMultiRegions => {
1156            let (mut result, first_hunk_offset) = if empty_patch {
1157                (old_editable.clone(), None)
1158            } else {
1159                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1160            };
1161
1162            if let Some(cursor) = cursor_offset {
1163                let hunk_start = if !empty_patch {
1164                    first_hunk_offset.unwrap_or(0)
1165                } else {
1166                    0
1167                };
1168                let offset = (hunk_start + cursor).min(result.len());
1169                result.insert_str(offset, CURSOR_MARKER);
1170            }
1171
1172            if !result.is_empty() && !result.ends_with('\n') {
1173                result.push('\n');
1174            }
1175
1176            if let Some(end_marker) = output_end_marker_for_format(format) {
1177                result.push_str(end_marker);
1178            }
1179
1180            Ok(result)
1181        }
1182    }
1183}
1184
1185/// Compute the cursor position within the new text after diff application.
1186fn cursor_in_new_text(
1187    cursor_offset: Option<usize>,
1188    first_hunk_offset: Option<usize>,
1189    new_text: &str,
1190) -> Option<usize> {
1191    cursor_offset.map(|cursor| {
1192        let hunk_start = first_hunk_offset.unwrap_or(0);
1193        (hunk_start + cursor).min(new_text.len())
1194    })
1195}
1196
1197#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1198pub struct ParsedOutput {
1199    /// Text that should replace the editable region
1200    pub new_editable_region: String,
1201    /// The byte range within `cursor_excerpt` that this replacement applies to
1202    pub range_in_excerpt: Range<usize>,
1203    /// Byte offset of the cursor marker within `new_editable_region`, if present
1204    pub cursor_offset_in_new_editable_region: Option<usize>,
1205}
1206
1207#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1208pub struct CursorPosition {
1209    pub path: String,
1210    pub row: usize,
1211    pub column: usize,
1212    pub offset: usize,
1213    pub editable_region_offset: usize,
1214}
1215
1216pub fn parsed_output_from_editable_region(
1217    range_in_excerpt: Range<usize>,
1218    mut new_editable_region: String,
1219) -> ParsedOutput {
1220    let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1221    if let Some(offset) = cursor_offset_in_new_editable_region {
1222        new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1223    }
1224
1225    ParsedOutput {
1226        new_editable_region,
1227        range_in_excerpt,
1228        cursor_offset_in_new_editable_region,
1229    }
1230}
1231
1232/// Parse model output for the given zeta format
1233pub fn parse_zeta2_model_output(
1234    output: &str,
1235    format: ZetaFormat,
1236    prompt_inputs: &ZetaPromptInput,
1237) -> Result<ParsedOutput> {
1238    let output = match output_end_marker_for_format(format) {
1239        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1240        None => output,
1241    };
1242
1243    let (context, editable_range_in_context, context_range, cursor_offset) =
1244        resolve_cursor_region(prompt_inputs, format);
1245    let context_start = context_range.start;
1246    let old_editable_region = &context[editable_range_in_context.clone()];
1247    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1248
1249    let (range_in_context, output) = match format {
1250        ZetaFormat::v0226Hashline => (
1251            editable_range_in_context,
1252            if hashline::output_has_edit_commands(output) {
1253                hashline::apply_edit_commands(old_editable_region, output)
1254            } else {
1255                output.to_string()
1256            },
1257        ),
1258        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1259        ZetaFormat::V0304SeedNoEdits => (
1260            editable_range_in_context,
1261            if output.starts_with(seed_coder::NO_EDITS) {
1262                old_editable_region.to_string()
1263            } else {
1264                output.to_string()
1265            },
1266        ),
1267        ZetaFormat::V0306SeedMultiRegions => (
1268            editable_range_in_context,
1269            if output.starts_with(seed_coder::NO_EDITS) {
1270                old_editable_region.to_string()
1271            } else {
1272                multi_region::apply_marker_span(old_editable_region, output)?
1273            },
1274        ),
1275        ZetaFormat::V0316SeedMultiRegions => (
1276            editable_range_in_context,
1277            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1278        ),
1279        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => (
1280            editable_range_in_context,
1281            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1282        ),
1283        ZetaFormat::V0317SeedMultiRegions => (
1284            editable_range_in_context,
1285            multi_region::apply_marker_span_v0317(
1286                old_editable_region,
1287                output,
1288                Some(cursor_offset_in_editable),
1289            )?,
1290        ),
1291        ZetaFormat::V0327SingleFile => (
1292            editable_range_in_context,
1293            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1294        ),
1295        _ => (editable_range_in_context, output.to_string()),
1296    };
1297
1298    let range_in_excerpt =
1299        range_in_context.start + context_start..range_in_context.end + context_start;
1300
1301    Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1302}
1303
1304pub fn parse_zeta2_model_output_as_patch(
1305    output: &str,
1306    format: ZetaFormat,
1307    prompt_inputs: &ZetaPromptInput,
1308) -> Result<String> {
1309    let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1310    parsed_output_to_patch(prompt_inputs, parsed)
1311}
1312
1313pub fn cursor_position_from_parsed_output(
1314    prompt_inputs: &ZetaPromptInput,
1315    parsed: &ParsedOutput,
1316) -> Option<CursorPosition> {
1317    let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1318    let editable_region_offset = parsed.range_in_excerpt.start;
1319    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1320
1321    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1322
1323    let new_editable_region = &parsed.new_editable_region;
1324    let prefix_end = cursor_offset.min(new_editable_region.len());
1325    let new_region_prefix = &new_editable_region[..prefix_end];
1326
1327    let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1328
1329    let column = match new_region_prefix.rfind('\n') {
1330        Some(last_newline) => cursor_offset - last_newline - 1,
1331        None => {
1332            let content_prefix = &excerpt[..editable_region_offset];
1333            let content_column = match content_prefix.rfind('\n') {
1334                Some(last_newline) => editable_region_offset - last_newline - 1,
1335                None => editable_region_offset,
1336            };
1337            content_column + cursor_offset
1338        }
1339    };
1340
1341    Some(CursorPosition {
1342        path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1343        row,
1344        column,
1345        offset: editable_region_offset + cursor_offset,
1346        editable_region_offset: cursor_offset,
1347    })
1348}
1349
1350pub fn parsed_output_to_patch(
1351    prompt_inputs: &ZetaPromptInput,
1352    parsed: ParsedOutput,
1353) -> Result<String> {
1354    let range_in_excerpt = parsed.range_in_excerpt;
1355    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1356    let old_text = excerpt[range_in_excerpt.clone()].to_string();
1357    let mut new_text = parsed.new_editable_region;
1358
1359    let mut old_text_normalized = old_text;
1360    if !new_text.is_empty() && !new_text.ends_with('\n') {
1361        new_text.push('\n');
1362    }
1363    if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1364        old_text_normalized.push('\n');
1365    }
1366
1367    let editable_region_offset = range_in_excerpt.start;
1368    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1369    let editable_region_lines = old_text_normalized.lines().count() as u32;
1370
1371    let diff = udiff::unified_diff_with_context(
1372        &old_text_normalized,
1373        &new_text,
1374        editable_region_start_line,
1375        editable_region_start_line,
1376        editable_region_lines,
1377    );
1378
1379    let path = prompt_inputs
1380        .cursor_path
1381        .to_string_lossy()
1382        .trim_start_matches('/')
1383        .to_string();
1384    let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1385
1386    Ok(udiff::encode_cursor_in_patch(
1387        &formatted_diff,
1388        parsed.cursor_offset_in_new_editable_region,
1389    ))
1390}
1391
1392pub fn excerpt_range_for_format(
1393    format: ZetaFormat,
1394    ranges: &ExcerptRanges,
1395) -> (Range<usize>, Range<usize>) {
1396    excerpt_ranges_for_format(format, ranges)
1397}
1398
1399pub fn resolve_cursor_region(
1400    input: &ZetaPromptInput,
1401    format: ZetaFormat,
1402) -> (&str, Range<usize>, Range<usize>, usize) {
1403    let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1404        let (editable_tokens, _) = token_limits_for_format(format);
1405        let context_range = 0..input.cursor_excerpt.len();
1406        let editable_range = multi_region::compute_v0327_editable_range(
1407            &input.cursor_excerpt,
1408            input.cursor_offset_in_excerpt,
1409            editable_tokens,
1410        );
1411        (editable_range, context_range)
1412    } else if let Some(syntax_ranges) = &input.syntax_ranges {
1413        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1414        compute_editable_and_context_ranges(
1415            &input.cursor_excerpt,
1416            input.cursor_offset_in_excerpt,
1417            syntax_ranges,
1418            editable_tokens,
1419            context_tokens,
1420        )
1421    } else {
1422        excerpt_range_for_format(format, &input.excerpt_ranges)
1423    };
1424
1425    let context_start = context_range.start;
1426    let context_text = &input.cursor_excerpt[context_range.clone()];
1427    let adjusted_editable =
1428        (editable_range.start - context_start)..(editable_range.end - context_start);
1429    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1430
1431    (
1432        context_text,
1433        adjusted_editable,
1434        context_range,
1435        adjusted_cursor,
1436    )
1437}
1438
1439pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1440    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1441    get_prefill_for_format(format, context, &editable_range)
1442}
1443
1444fn format_edit_history_within_budget(
1445    events: &[Arc<Event>],
1446    file_marker: &str,
1447    edit_history_name: &str,
1448    max_tokens: usize,
1449    max_edit_event_count: usize,
1450) -> String {
1451    let header = format!("{}{}\n", file_marker, edit_history_name);
1452    let header_tokens = estimate_tokens(header.len());
1453    if header_tokens >= max_tokens {
1454        return String::new();
1455    }
1456
1457    let mut event_strings: Vec<String> = Vec::new();
1458    let mut total_tokens = header_tokens;
1459
1460    for event in events.iter().rev().take(max_edit_event_count) {
1461        let mut event_str = String::new();
1462        write_event(&mut event_str, event);
1463        let event_tokens = estimate_tokens(event_str.len());
1464
1465        if total_tokens + event_tokens > max_tokens {
1466            break;
1467        }
1468        total_tokens += event_tokens;
1469        event_strings.push(event_str);
1470    }
1471
1472    if event_strings.is_empty() {
1473        return String::new();
1474    }
1475
1476    let mut result = header;
1477    for event_str in event_strings.iter().rev() {
1478        result.push_str(event_str);
1479    }
1480    result
1481}
1482
1483fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1484    let needs_newline = !excerpt.text.ends_with('\n');
1485    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1486    let len = excerpt.text.len()
1487        + if needs_newline { "\n".len() } else { 0 }
1488        + if needs_ellipsis { "...\n".len() } else { 0 };
1489    estimate_tokens(len)
1490}
1491
1492pub fn format_related_files_within_budget(
1493    related_files: &[RelatedFile],
1494    file_prefix: &str,
1495    file_suffix: &str,
1496    max_tokens: usize,
1497) -> String {
1498    struct ExcerptCandidate {
1499        file_ix: usize,
1500        excerpt_ix: usize,
1501        order: usize,
1502    }
1503
1504    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1505        .iter()
1506        .enumerate()
1507        .flat_map(|(file_ix, file)| {
1508            file.excerpts
1509                .iter()
1510                .enumerate()
1511                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1512                    file_ix,
1513                    excerpt_ix,
1514                    order: e.order,
1515                })
1516        })
1517        .collect();
1518
1519    // Pre-compute file header strings and their token costs.
1520    let file_headers: Vec<String> = related_files
1521        .iter()
1522        .map(|file| {
1523            let path_str = file.path.to_string_lossy();
1524            format!("{}{}\n", file_prefix, path_str)
1525        })
1526        .collect();
1527
1528    // Sort the excerpts by their order and determine how many fit within the budget.
1529    let mut total_tokens = 0;
1530    let mut included_excerpt_count = 0_usize;
1531    let mut included_file_indices = vec![false; related_files.len()];
1532    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1533    for candidate in &excerpt_candidates {
1534        let file = &related_files[candidate.file_ix];
1535        let excerpt = &file.excerpts[candidate.excerpt_ix];
1536        let file_already_included = included_file_indices[candidate.file_ix];
1537        let header_cost = if file_already_included {
1538            0
1539        } else {
1540            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1541        };
1542        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1543        if total_tokens + header_cost + excerpt_cost > max_tokens {
1544            break;
1545        }
1546        total_tokens += header_cost + excerpt_cost;
1547        if !file_already_included {
1548            included_file_indices[candidate.file_ix] = true;
1549        }
1550        included_excerpt_count += 1;
1551    }
1552
1553    excerpt_candidates.truncate(included_excerpt_count);
1554    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1555
1556    // Render all of the files that fit within the token budget, in the original order.
1557    let mut result = String::new();
1558    let mut last_file_ix = None;
1559    for candidate in &excerpt_candidates {
1560        if last_file_ix != Some(candidate.file_ix) {
1561            if last_file_ix.is_some() {
1562                result.push_str(file_suffix);
1563            }
1564            result.push_str(&file_headers[candidate.file_ix]);
1565            last_file_ix = Some(candidate.file_ix);
1566        }
1567        let file = &related_files[candidate.file_ix];
1568        let excerpt = &file.excerpts[candidate.excerpt_ix];
1569        result.push_str(&excerpt.text);
1570        if !result.ends_with('\n') {
1571            result.push('\n');
1572        }
1573        if excerpt.row_range.end < file.max_row {
1574            result.push_str("...\n");
1575        }
1576    }
1577
1578    result
1579}
1580
1581pub fn write_related_files(
1582    prompt: &mut String,
1583    related_files: &[RelatedFile],
1584) -> Vec<Range<usize>> {
1585    let mut ranges = Vec::new();
1586    for file in related_files {
1587        let start = prompt.len();
1588        let path_str = file.path.to_string_lossy();
1589        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1590        for excerpt in &file.excerpts {
1591            prompt.push_str(&excerpt.text);
1592            if !prompt.ends_with('\n') {
1593                prompt.push('\n');
1594            }
1595            if excerpt.row_range.end < file.max_row {
1596                prompt.push_str("...\n");
1597            }
1598        }
1599        let end = prompt.len();
1600        ranges.push(start..end);
1601    }
1602    ranges
1603}
1604
1605mod v0112_middle_at_end {
1606    use super::*;
1607
1608    pub fn special_tokens() -> &'static [&'static str] {
1609        &[
1610            "<|fim_prefix|>",
1611            "<|fim_suffix|>",
1612            "<|fim_middle|>",
1613            "<|file_sep|>",
1614            CURSOR_MARKER,
1615        ]
1616    }
1617
1618    pub fn write_cursor_excerpt_section(
1619        prompt: &mut String,
1620        path: &Path,
1621        context: &str,
1622        editable_range: &Range<usize>,
1623        cursor_offset: usize,
1624    ) {
1625        let path_str = path.to_string_lossy();
1626        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1627
1628        prompt.push_str("<|fim_prefix|>\n");
1629        prompt.push_str(&context[..editable_range.start]);
1630
1631        prompt.push_str("<|fim_suffix|>\n");
1632        prompt.push_str(&context[editable_range.end..]);
1633        if !prompt.ends_with('\n') {
1634            prompt.push('\n');
1635        }
1636
1637        prompt.push_str("<|fim_middle|>current\n");
1638        prompt.push_str(&context[editable_range.start..cursor_offset]);
1639        prompt.push_str(CURSOR_MARKER);
1640        prompt.push_str(&context[cursor_offset..editable_range.end]);
1641        if !prompt.ends_with('\n') {
1642            prompt.push('\n');
1643        }
1644
1645        prompt.push_str("<|fim_middle|>updated\n");
1646    }
1647}
1648
1649mod v0113_ordered {
1650    use super::*;
1651
1652    pub fn special_tokens() -> &'static [&'static str] {
1653        &[
1654            "<|fim_prefix|>",
1655            "<|fim_suffix|>",
1656            "<|fim_middle|>",
1657            "<|file_sep|>",
1658            CURSOR_MARKER,
1659        ]
1660    }
1661
1662    pub fn write_cursor_excerpt_section(
1663        prompt: &mut String,
1664        path: &Path,
1665        context: &str,
1666        editable_range: &Range<usize>,
1667        cursor_offset: usize,
1668    ) {
1669        let path_str = path.to_string_lossy();
1670        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1671
1672        prompt.push_str("<|fim_prefix|>\n");
1673        prompt.push_str(&context[..editable_range.start]);
1674        if !prompt.ends_with('\n') {
1675            prompt.push('\n');
1676        }
1677
1678        prompt.push_str("<|fim_middle|>current\n");
1679        prompt.push_str(&context[editable_range.start..cursor_offset]);
1680        prompt.push_str(CURSOR_MARKER);
1681        prompt.push_str(&context[cursor_offset..editable_range.end]);
1682        if !prompt.ends_with('\n') {
1683            prompt.push('\n');
1684        }
1685
1686        prompt.push_str("<|fim_suffix|>\n");
1687        prompt.push_str(&context[editable_range.end..]);
1688        if !prompt.ends_with('\n') {
1689            prompt.push('\n');
1690        }
1691
1692        prompt.push_str("<|fim_middle|>updated\n");
1693    }
1694}
1695
1696mod v0114180_editable_region {
1697    use super::*;
1698
1699    pub fn special_tokens() -> &'static [&'static str] {
1700        v0113_ordered::special_tokens()
1701    }
1702}
1703
1704pub mod v0120_git_merge_markers {
1705    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1706    //!
1707    //! Example prompt:
1708    //!
1709    //! <|file_sep|>path/to/target_file.py
1710    //! <|fim_prefix|>
1711    //! code before editable region
1712    //! <|fim_suffix|>
1713    //! code after editable region
1714    //! <|fim_middle|>
1715    //! <<<<<<< CURRENT
1716    //! code that
1717    //! needs to<|user_cursor|>
1718    //! be rewritten
1719    //! =======
1720    //!
1721    //! Expected output (should be generated by the model):
1722    //!
1723    //! updated
1724    //! code with
1725    //! changes applied
1726    //! >>>>>>> UPDATED
1727
1728    use super::*;
1729
1730    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1731    pub const SEPARATOR: &str = "=======\n";
1732    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1733
1734    pub fn special_tokens() -> &'static [&'static str] {
1735        &[
1736            "<|fim_prefix|>",
1737            "<|fim_suffix|>",
1738            "<|fim_middle|>",
1739            "<|file_sep|>",
1740            START_MARKER,
1741            SEPARATOR,
1742            END_MARKER,
1743            CURSOR_MARKER,
1744        ]
1745    }
1746
1747    pub fn write_cursor_excerpt_section(
1748        prompt: &mut String,
1749        path: &Path,
1750        context: &str,
1751        editable_range: &Range<usize>,
1752        cursor_offset: usize,
1753    ) {
1754        let path_str = path.to_string_lossy();
1755        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1756
1757        prompt.push_str("<|fim_prefix|>");
1758        prompt.push_str(&context[..editable_range.start]);
1759
1760        prompt.push_str("<|fim_suffix|>");
1761        prompt.push_str(&context[editable_range.end..]);
1762        if !prompt.ends_with('\n') {
1763            prompt.push('\n');
1764        }
1765
1766        prompt.push_str("<|fim_middle|>");
1767        prompt.push_str(START_MARKER);
1768        prompt.push_str(&context[editable_range.start..cursor_offset]);
1769        prompt.push_str(CURSOR_MARKER);
1770        prompt.push_str(&context[cursor_offset..editable_range.end]);
1771        if !prompt.ends_with('\n') {
1772            prompt.push('\n');
1773        }
1774        prompt.push_str(SEPARATOR);
1775    }
1776}
1777
1778pub mod v0131_git_merge_markers_prefix {
1779    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1780    //!
1781    //! Example prompt:
1782    //!
1783    //! <|file_sep|>path/to/target_file.py
1784    //! <|fim_prefix|>
1785    //! code before editable region
1786    //! <<<<<<< CURRENT
1787    //! code that
1788    //! needs to<|user_cursor|>
1789    //! be rewritten
1790    //! =======
1791    //! <|fim_suffix|>
1792    //! code after editable region
1793    //! <|fim_middle|>
1794    //!
1795    //! Expected output (should be generated by the model):
1796    //!
1797    //! updated
1798    //! code with
1799    //! changes applied
1800    //! >>>>>>> UPDATED
1801
1802    use super::*;
1803
1804    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1805    pub const SEPARATOR: &str = "=======\n";
1806    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1807
1808    pub fn special_tokens() -> &'static [&'static str] {
1809        &[
1810            "<|fim_prefix|>",
1811            "<|fim_suffix|>",
1812            "<|fim_middle|>",
1813            "<|file_sep|>",
1814            START_MARKER,
1815            SEPARATOR,
1816            END_MARKER,
1817            CURSOR_MARKER,
1818        ]
1819    }
1820
1821    pub fn write_cursor_excerpt_section(
1822        prompt: &mut String,
1823        path: &Path,
1824        context: &str,
1825        editable_range: &Range<usize>,
1826        cursor_offset: usize,
1827    ) {
1828        let path_str = path.to_string_lossy();
1829        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1830
1831        prompt.push_str("<|fim_prefix|>");
1832        prompt.push_str(&context[..editable_range.start]);
1833        prompt.push_str(START_MARKER);
1834        prompt.push_str(&context[editable_range.start..cursor_offset]);
1835        prompt.push_str(CURSOR_MARKER);
1836        prompt.push_str(&context[cursor_offset..editable_range.end]);
1837        if !prompt.ends_with('\n') {
1838            prompt.push('\n');
1839        }
1840        prompt.push_str(SEPARATOR);
1841
1842        prompt.push_str("<|fim_suffix|>");
1843        prompt.push_str(&context[editable_range.end..]);
1844        if !prompt.ends_with('\n') {
1845            prompt.push('\n');
1846        }
1847
1848        prompt.push_str("<|fim_middle|>");
1849    }
1850}
1851
1852pub mod v0211_prefill {
1853    use super::*;
1854
1855    pub fn special_tokens() -> &'static [&'static str] {
1856        v0131_git_merge_markers_prefix::special_tokens()
1857    }
1858
1859    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1860        let editable_region = &context[editable_range.start..editable_range.end];
1861
1862        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1863        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1864
1865        // Find a token boundary to avoid splitting tokens in the prefill.
1866        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1867        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1868        // the \n and consume any consecutive \n characters after it.
1869        let prefill = &editable_region[..prefill_len];
1870        match prefill.rfind('\n') {
1871            Some(pos) => {
1872                let mut end = pos + 1;
1873                while end < editable_region.len()
1874                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1875                {
1876                    end += 1;
1877                }
1878                editable_region[..end].to_string()
1879            }
1880            // No newline found. Fall back to splitting before the last space
1881            // (word-level boundary)
1882            None => match prefill.rfind(' ') {
1883                Some(pos) => prefill[..pos].to_string(),
1884                None => prefill.to_string(),
1885            },
1886        }
1887    }
1888}
1889
1890pub mod hashline {
1891
1892    use std::fmt::Display;
1893
1894    pub const END_MARKER: &str = "<|fim_middle|>updated";
1895    pub const START_MARKER: &str = "<|fim_middle|>current";
1896
1897    use super::*;
1898
1899    const SET_COMMAND_MARKER: &str = "<|set|>";
1900    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1901    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1902
1903    pub fn special_tokens() -> &'static [&'static str] {
1904        return &[
1905            SET_COMMAND_MARKER,
1906            "<|set_range|>",
1907            INSERT_COMMAND_MARKER,
1908            NO_EDITS_COMMAND_MARKER,
1909            CURSOR_MARKER,
1910            "<|file_sep|>",
1911            "<|fim_prefix|>",
1912            "<|fim_suffix|>",
1913            "<|fim_middle|>",
1914        ];
1915    }
1916
1917    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1918    #[derive(Debug, Clone, PartialEq, Eq)]
1919    struct LineRef {
1920        index: usize,
1921        hash: u8,
1922    }
1923
1924    impl Display for LineRef {
1925        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1926            write!(f, "{}:{:02x}", self.index, self.hash)
1927        }
1928    }
1929
1930    pub fn hash_line(line: &[u8]) -> u8 {
1931        let mut h: u8 = 0;
1932        for &byte in line {
1933            h = h.wrapping_add(byte);
1934        }
1935        return h;
1936    }
1937
1938    /// Write the hashline-encoded editable region into `out`. Each line of
1939    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1940    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1941    /// to the start of `editable_text`).
1942    pub fn write_hashline_editable_region(
1943        out: &mut String,
1944        editable_text: &str,
1945        cursor_offset_in_editable: usize,
1946    ) {
1947        let mut offset = 0;
1948        for (i, line) in editable_text.lines().enumerate() {
1949            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1950                && cursor_offset_in_editable < offset + line.len()
1951            {
1952                (
1953                    &line[..cursor_offset_in_editable - offset],
1954                    CURSOR_MARKER,
1955                    &line[cursor_offset_in_editable - offset..],
1956                )
1957            } else {
1958                (line, "", "")
1959            };
1960            write!(
1961                out,
1962                "\n{}|{head}{cursor}{tail}",
1963                LineRef {
1964                    index: i,
1965                    hash: hash_line(line.as_bytes())
1966                }
1967            )
1968            .unwrap();
1969            offset += line.len() + 1;
1970        }
1971    }
1972
1973    pub fn write_cursor_excerpt_section(
1974        prompt: &mut String,
1975        path: &Path,
1976        context: &str,
1977        editable_range: &Range<usize>,
1978        cursor_offset: usize,
1979    ) {
1980        let path_str = path.to_string_lossy();
1981        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1982
1983        prompt.push_str("<|fim_prefix|>\n");
1984        prompt.push_str(&context[..editable_range.start]);
1985        prompt.push_str(START_MARKER);
1986
1987        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1988        let editable_region = &context[editable_range.clone()];
1989        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1990
1991        if !prompt.ends_with('\n') {
1992            prompt.push('\n');
1993        }
1994
1995        prompt.push_str("<|fim_suffix|>\n");
1996        prompt.push_str(&context[editable_range.end..]);
1997        if !prompt.ends_with('\n') {
1998            prompt.push('\n');
1999        }
2000
2001        prompt.push_str(END_MARKER);
2002        prompt.push('\n');
2003    }
2004
2005    /// A single edit command parsed from the model output.
2006    #[derive(Debug)]
2007    enum EditCommand<'a> {
2008        /// Replace a range of lines (inclusive on both ends). Single-line set is
2009        /// represented by `start == end`.
2010        Set {
2011            start: LineRef,
2012            end: LineRef,
2013            content: &'a str,
2014        },
2015        /// Insert new lines after the given line, or before the first line if
2016        /// `after` is `None`.
2017        Insert {
2018            after: Option<LineRef>,
2019            content: &'a str,
2020        },
2021    }
2022
2023    /// Parse a line reference like `3:c3` into a `LineRef`.
2024    fn parse_line_ref(s: &str) -> Option<LineRef> {
2025        let (idx_str, hash_str) = s.split_once(':')?;
2026        let index = idx_str.parse::<usize>().ok()?;
2027        let hash = u8::from_str_radix(hash_str, 16).ok()?;
2028        Some(LineRef { index, hash })
2029    }
2030
2031    /// Parse the model output into a list of `EditCommand`s.
2032    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
2033        let mut commands = Vec::new();
2034        let mut offset = 0usize;
2035
2036        while offset < model_output.len() {
2037            let next_nl = model_output[offset..]
2038                .find('\n')
2039                .map(|i| offset + i)
2040                .unwrap_or(model_output.len());
2041            let line = &model_output[offset..next_nl];
2042            let line_end = if next_nl < model_output.len() {
2043                next_nl + 1
2044            } else {
2045                next_nl
2046            };
2047
2048            let trimmed = line.trim();
2049            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
2050                (true, spec)
2051            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
2052                (false, spec)
2053            } else {
2054                offset = line_end;
2055                continue;
2056            };
2057
2058            let mut content_end = line_end;
2059            let mut scan = line_end;
2060
2061            while scan < model_output.len() {
2062                let body_nl = model_output[scan..]
2063                    .find('\n')
2064                    .map(|i| scan + i)
2065                    .unwrap_or(model_output.len());
2066                let body_line = &model_output[scan..body_nl];
2067                if body_line.trim().starts_with(SET_COMMAND_MARKER)
2068                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
2069                {
2070                    break;
2071                }
2072                scan = if body_nl < model_output.len() {
2073                    body_nl + 1
2074                } else {
2075                    body_nl
2076                };
2077                content_end = scan;
2078            }
2079
2080            let content = &model_output[line_end..content_end];
2081
2082            if is_set {
2083                if let Some((start_str, end_str)) = specifier.split_once('-') {
2084                    if let (Some(start), Some(end)) =
2085                        (parse_line_ref(start_str), parse_line_ref(end_str))
2086                    {
2087                        commands.push(EditCommand::Set {
2088                            start,
2089                            end,
2090                            content,
2091                        });
2092                    }
2093                } else if let Some(target) = parse_line_ref(specifier) {
2094                    commands.push(EditCommand::Set {
2095                        start: target.clone(),
2096                        end: target,
2097                        content,
2098                    });
2099                }
2100            } else {
2101                let after = parse_line_ref(specifier);
2102                commands.push(EditCommand::Insert { after, content });
2103            }
2104
2105            offset = scan;
2106        }
2107
2108        commands
2109    }
2110
2111    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
2112    /// (as opposed to being a plain full-replacement output).
2113    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2114    /// editable region, returning the plain text content.
2115    pub fn strip_hashline_prefixes(region: &str) -> String {
2116        let mut decoded: String = region
2117            .lines()
2118            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2119            .collect::<Vec<_>>()
2120            .join("\n");
2121        if region.ends_with('\n') {
2122            decoded.push('\n');
2123        }
2124        decoded
2125    }
2126
2127    pub fn output_has_edit_commands(model_output: &str) -> bool {
2128        model_output.contains(SET_COMMAND_MARKER)
2129            || model_output.contains(INSERT_COMMAND_MARKER)
2130            || model_output.contains(NO_EDITS_COMMAND_MARKER)
2131    }
2132
2133    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2134    /// original editable region text.
2135    ///
2136    /// `editable_region` is the original text of the editable region (without hash
2137    /// prefixes). `model_output` is the raw model response containing edit commands.
2138    ///
2139    /// Returns the full replacement text for the editable region.
2140    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2141        if model_output
2142            .trim_start()
2143            .starts_with(NO_EDITS_COMMAND_MARKER)
2144        {
2145            return editable_region.to_string();
2146        }
2147
2148        let original_lines: Vec<&str> = editable_region.lines().collect();
2149        let old_hashes: Vec<u8> = original_lines
2150            .iter()
2151            .map(|line| hash_line(line.as_bytes()))
2152            .collect();
2153
2154        let commands = parse_edit_commands(model_output);
2155
2156        // For set operations: indexed by start line → Some((end line index, content))
2157        // For insert operations: indexed by line index → vec of content to insert after
2158        // Insert-before-first is tracked separately.
2159        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2160        let mut insert_before_first: Vec<&str> = Vec::new();
2161        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2162
2163        for command in &commands {
2164            match command {
2165                EditCommand::Set {
2166                    start,
2167                    end,
2168                    content,
2169                } => {
2170                    if start.index < old_hashes.len()
2171                        && end.index < old_hashes.len()
2172                        && start.index <= end.index
2173                        && old_hashes[start.index] == start.hash
2174                        && old_hashes[end.index] == end.hash
2175                    {
2176                        set_ops[start.index] = Some((end.index, *content));
2177                    }
2178                }
2179                EditCommand::Insert { after, content } => match after {
2180                    None => insert_before_first.push(*content),
2181                    Some(line_ref) => {
2182                        if line_ref.index < old_hashes.len()
2183                            && old_hashes[line_ref.index] == line_ref.hash
2184                        {
2185                            insert_after[line_ref.index].push(*content);
2186                        }
2187                    }
2188                },
2189            }
2190        }
2191
2192        let mut result = String::new();
2193
2194        // Emit any insertions before the first line
2195        for content in &insert_before_first {
2196            result.push_str(content);
2197            if !content.ends_with('\n') {
2198                result.push('\n');
2199            }
2200        }
2201
2202        let mut i = 0;
2203        while i < original_lines.len() {
2204            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2205                // Replace lines i..=end_index with the replacement content
2206                result.push_str(replacement);
2207                if !replacement.is_empty() && !replacement.ends_with('\n') {
2208                    result.push('\n');
2209                }
2210                // Emit any insertions after the end of this set range
2211                if *end_index < insert_after.len() {
2212                    for content in &insert_after[*end_index] {
2213                        result.push_str(content);
2214                        if !content.ends_with('\n') {
2215                            result.push('\n');
2216                        }
2217                    }
2218                }
2219                i = end_index + 1;
2220            } else {
2221                // Keep the original line
2222                result.push_str(original_lines[i]);
2223                result.push('\n');
2224                // Emit any insertions after this line
2225                for content in &insert_after[i] {
2226                    result.push_str(content);
2227                    if !content.ends_with('\n') {
2228                        result.push('\n');
2229                    }
2230                }
2231                i += 1;
2232            }
2233        }
2234
2235        // Preserve trailing newline behavior: if the original ended with a
2236        // newline the result already has one; if it didn't, trim the extra one
2237        // we added.
2238        if !editable_region.ends_with('\n') && result.ends_with('\n') {
2239            result.pop();
2240        }
2241
2242        result
2243    }
2244
2245    /// Convert a unified diff patch into hashline edit commands.
2246    ///
2247    /// Parses the unified diff `patch` directly to determine which lines of
2248    /// `old_text` are deleted/replaced and what new lines are added, then emits
2249    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2250    /// `{index}:{hash}` identifiers.
2251    ///
2252    /// `cursor_offset` is an optional byte offset into the first hunk's new
2253    /// text (context + additions) where the cursor marker should be placed.
2254    pub fn patch_to_edit_commands(
2255        old_text: &str,
2256        patch: &str,
2257        cursor_offset: Option<usize>,
2258    ) -> Result<String> {
2259        let old_lines: Vec<&str> = old_text.lines().collect();
2260        let old_hashes: Vec<u8> = old_lines
2261            .iter()
2262            .map(|line| hash_line(line.as_bytes()))
2263            .collect();
2264
2265        let mut result = String::new();
2266        let mut first_hunk = true;
2267
2268        struct Hunk<'a> {
2269            line_range: Range<usize>,
2270            new_text_lines: Vec<&'a str>,
2271            cursor_line_offset_in_new_text: Option<(usize, usize)>,
2272        }
2273
2274        // Parse the patch line by line. We only care about hunk headers,
2275        // context, deletions, and additions.
2276        let mut old_line_index: usize = 0;
2277        let mut current_hunk: Option<Hunk> = None;
2278        // Byte offset tracking within the hunk's new text for cursor placement.
2279        let mut new_text_byte_offset: usize = 0;
2280        // The line index of the last old line seen before/in the current hunk
2281        // (used for insert-after reference).
2282        let mut last_old_line_before_hunk: Option<usize> = None;
2283
2284        fn flush_hunk(
2285            hunk: Hunk,
2286            last_old_line: Option<usize>,
2287            result: &mut String,
2288            old_hashes: &[u8],
2289        ) {
2290            if hunk.line_range.is_empty() {
2291                // Pure insertion — reference the old line to insert after when in bounds.
2292                if let Some(after) = last_old_line
2293                    && let Some(&hash) = old_hashes.get(after)
2294                {
2295                    write!(
2296                        result,
2297                        "{INSERT_COMMAND_MARKER}{}\n",
2298                        LineRef { index: after, hash }
2299                    )
2300                    .unwrap();
2301                } else {
2302                    result.push_str(INSERT_COMMAND_MARKER);
2303                    result.push('\n');
2304                }
2305            } else {
2306                let start = hunk.line_range.start;
2307                let end_exclusive = hunk.line_range.end;
2308                let deleted_line_count = end_exclusive.saturating_sub(start);
2309
2310                if deleted_line_count == 1 {
2311                    if let Some(&hash) = old_hashes.get(start) {
2312                        write!(
2313                            result,
2314                            "{SET_COMMAND_MARKER}{}\n",
2315                            LineRef { index: start, hash }
2316                        )
2317                        .unwrap();
2318                    } else {
2319                        result.push_str(SET_COMMAND_MARKER);
2320                        result.push('\n');
2321                    }
2322                } else {
2323                    let end_inclusive = end_exclusive - 1;
2324                    match (
2325                        old_hashes.get(start).copied(),
2326                        old_hashes.get(end_inclusive).copied(),
2327                    ) {
2328                        (Some(start_hash), Some(end_hash)) => {
2329                            write!(
2330                                result,
2331                                "{SET_COMMAND_MARKER}{}-{}\n",
2332                                LineRef {
2333                                    index: start,
2334                                    hash: start_hash
2335                                },
2336                                LineRef {
2337                                    index: end_inclusive,
2338                                    hash: end_hash
2339                                }
2340                            )
2341                            .unwrap();
2342                        }
2343                        _ => {
2344                            result.push_str(SET_COMMAND_MARKER);
2345                            result.push('\n');
2346                        }
2347                    }
2348                }
2349            }
2350            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2351                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2352                    && line_offset == cursor_line_offset
2353                {
2354                    result.push_str(&line[..char_offset]);
2355                    result.push_str(CURSOR_MARKER);
2356                    result.push_str(&line[char_offset..]);
2357                    continue;
2358                }
2359
2360                result.push_str(line);
2361            }
2362        }
2363
2364        for raw_line in patch.split_inclusive('\n') {
2365            if raw_line.starts_with("@@") {
2366                // Flush any pending change hunk from a previous patch hunk.
2367                if let Some(hunk) = current_hunk.take() {
2368                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2369                }
2370
2371                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2372                // We intentionally do not trust old_start as a direct local index into `old_text`,
2373                // because some patches are produced against a larger file region and carry
2374                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2375                if first_hunk {
2376                    new_text_byte_offset = 0;
2377                    first_hunk = false;
2378                }
2379                continue;
2380            }
2381
2382            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2383                continue;
2384            }
2385            if raw_line.starts_with("\\ No newline") {
2386                continue;
2387            }
2388
2389            if raw_line.starts_with('-') {
2390                // Extend or start a change hunk with this deleted old line.
2391                match &mut current_hunk {
2392                    Some(Hunk {
2393                        line_range: range, ..
2394                    }) => range.end = old_line_index + 1,
2395                    None => {
2396                        current_hunk = Some(Hunk {
2397                            line_range: old_line_index..old_line_index + 1,
2398                            new_text_lines: Vec::new(),
2399                            cursor_line_offset_in_new_text: None,
2400                        });
2401                    }
2402                }
2403                old_line_index += 1;
2404            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2405                // Place cursor marker if cursor_offset falls within this line.
2406                let mut cursor_line_offset = None;
2407                if let Some(cursor_off) = cursor_offset
2408                    && (first_hunk
2409                        || cursor_off >= new_text_byte_offset
2410                            && cursor_off <= new_text_byte_offset + added_content.len())
2411                {
2412                    let line_offset = added_content.floor_char_boundary(
2413                        cursor_off
2414                            .saturating_sub(new_text_byte_offset)
2415                            .min(added_content.len()),
2416                    );
2417                    cursor_line_offset = Some(line_offset);
2418                }
2419
2420                new_text_byte_offset += added_content.len();
2421
2422                let hunk = current_hunk.get_or_insert(Hunk {
2423                    line_range: old_line_index..old_line_index,
2424                    new_text_lines: vec![],
2425                    cursor_line_offset_in_new_text: None,
2426                });
2427                hunk.new_text_lines.push(added_content);
2428                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2429                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2430            } else {
2431                // Context line (starts with ' ' or is empty).
2432                if let Some(hunk) = current_hunk.take() {
2433                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2434                }
2435                last_old_line_before_hunk = Some(old_line_index);
2436                old_line_index += 1;
2437                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2438                new_text_byte_offset += content.len();
2439            }
2440        }
2441
2442        // Flush final group.
2443        if let Some(hunk) = current_hunk.take() {
2444            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2445        }
2446
2447        // Trim a single trailing newline.
2448        if result.ends_with('\n') {
2449            result.pop();
2450        }
2451
2452        if result.is_empty() {
2453            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2454        }
2455
2456        Ok(result)
2457    }
2458
2459    #[cfg(test)]
2460    mod tests {
2461        use super::*;
2462        use indoc::indoc;
2463
2464        #[test]
2465        fn test_format_cursor_region() {
2466            struct Case {
2467                name: &'static str,
2468                context: &'static str,
2469                editable_range: Range<usize>,
2470                cursor_offset: usize,
2471                expected: &'static str,
2472            }
2473
2474            let cases = [
2475                Case {
2476                    name: "basic_cursor_placement",
2477                    context: "hello world\n",
2478                    editable_range: 0..12,
2479                    cursor_offset: 5,
2480                    expected: indoc! {"
2481                    <|file_sep|>test.rs
2482                    <|fim_prefix|>
2483                    <|fim_middle|>current
2484                    0:5c|hello<|user_cursor|> world
2485                    <|fim_suffix|>
2486                    <|fim_middle|>updated
2487                    "},
2488                },
2489                Case {
2490                    name: "multiline_cursor_on_second_line",
2491                    context: "aaa\nbbb\nccc\n",
2492                    editable_range: 0..12,
2493                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2494                    expected: indoc! {"
2495                    <|file_sep|>test.rs
2496                    <|fim_prefix|>
2497                    <|fim_middle|>current
2498                    0:23|aaa
2499                    1:26|b<|user_cursor|>bb
2500                    2:29|ccc
2501                    <|fim_suffix|>
2502                    <|fim_middle|>updated
2503                    "},
2504                },
2505                Case {
2506                    name: "no_trailing_newline_in_context",
2507                    context: "line1\nline2",
2508                    editable_range: 0..11,
2509                    cursor_offset: 3,
2510                    expected: indoc! {"
2511                    <|file_sep|>test.rs
2512                    <|fim_prefix|>
2513                    <|fim_middle|>current
2514                    0:d9|lin<|user_cursor|>e1
2515                    1:da|line2
2516                    <|fim_suffix|>
2517                    <|fim_middle|>updated
2518                    "},
2519                },
2520                Case {
2521                    name: "leading_newline_in_editable_region",
2522                    context: "\nabc\n",
2523                    editable_range: 0..5,
2524                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2525                    expected: indoc! {"
2526                    <|file_sep|>test.rs
2527                    <|fim_prefix|>
2528                    <|fim_middle|>current
2529                    0:00|
2530                    1:26|a<|user_cursor|>bc
2531                    <|fim_suffix|>
2532                    <|fim_middle|>updated
2533                    "},
2534                },
2535                Case {
2536                    name: "with_suffix",
2537                    context: "abc\ndef",
2538                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2539                    cursor_offset: 2,
2540                    expected: indoc! {"
2541                    <|file_sep|>test.rs
2542                    <|fim_prefix|>
2543                    <|fim_middle|>current
2544                    0:26|ab<|user_cursor|>c
2545                    <|fim_suffix|>
2546                    def
2547                    <|fim_middle|>updated
2548                    "},
2549                },
2550                Case {
2551                    name: "unicode_two_byte_chars",
2552                    context: "héllo\n",
2553                    editable_range: 0..7,
2554                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2555                    expected: indoc! {"
2556                    <|file_sep|>test.rs
2557                    <|fim_prefix|>
2558                    <|fim_middle|>current
2559                    0:1b|hé<|user_cursor|>llo
2560                    <|fim_suffix|>
2561                    <|fim_middle|>updated
2562                    "},
2563                },
2564                Case {
2565                    name: "unicode_three_byte_chars",
2566                    context: "日本語\n",
2567                    editable_range: 0..10,
2568                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2569                    expected: indoc! {"
2570                    <|file_sep|>test.rs
2571                    <|fim_prefix|>
2572                    <|fim_middle|>current
2573                    0:80|日本<|user_cursor|>語
2574                    <|fim_suffix|>
2575                    <|fim_middle|>updated
2576                    "},
2577                },
2578                Case {
2579                    name: "unicode_four_byte_chars",
2580                    context: "a🌍b\n",
2581                    editable_range: 0..7,
2582                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2583                    expected: indoc! {"
2584                    <|file_sep|>test.rs
2585                    <|fim_prefix|>
2586                    <|fim_middle|>current
2587                    0:6b|a🌍<|user_cursor|>b
2588                    <|fim_suffix|>
2589                    <|fim_middle|>updated
2590                    "},
2591                },
2592                Case {
2593                    name: "cursor_at_start_of_region_not_placed",
2594                    context: "abc\n",
2595                    editable_range: 0..4,
2596                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2597                    expected: indoc! {"
2598                    <|file_sep|>test.rs
2599                    <|fim_prefix|>
2600                    <|fim_middle|>current
2601                    0:26|abc
2602                    <|fim_suffix|>
2603                    <|fim_middle|>updated
2604                    "},
2605                },
2606                Case {
2607                    name: "cursor_at_end_of_line_not_placed",
2608                    context: "abc\ndef\n",
2609                    editable_range: 0..8,
2610                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2611                    expected: indoc! {"
2612                    <|file_sep|>test.rs
2613                    <|fim_prefix|>
2614                    <|fim_middle|>current
2615                    0:26|abc
2616                    1:2f|def
2617                    <|fim_suffix|>
2618                    <|fim_middle|>updated
2619                    "},
2620                },
2621                Case {
2622                    name: "cursor_offset_relative_to_context_not_editable_region",
2623                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2624                    // write_cursor_excerpt_section must subtract it before comparing against
2625                    // per-line offsets within the editable region.
2626                    context: "pre\naaa\nbbb\nsuf\n",
2627                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2628                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2629                    expected: indoc! {"
2630                    <|file_sep|>test.rs
2631                    <|fim_prefix|>
2632                    pre
2633                    <|fim_middle|>current
2634                    0:23|aaa
2635                    1:26|b<|user_cursor|>bb
2636                    <|fim_suffix|>
2637                    suf
2638                    <|fim_middle|>updated
2639                    "},
2640                },
2641            ];
2642
2643            for case in &cases {
2644                let mut prompt = String::new();
2645                hashline::write_cursor_excerpt_section(
2646                    &mut prompt,
2647                    Path::new("test.rs"),
2648                    case.context,
2649                    &case.editable_range,
2650                    case.cursor_offset,
2651                );
2652                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2653            }
2654        }
2655
2656        #[test]
2657        fn test_apply_edit_commands() {
2658            struct Case {
2659                name: &'static str,
2660                original: &'static str,
2661                model_output: &'static str,
2662                expected: &'static str,
2663            }
2664
2665            let cases = vec![
2666                Case {
2667                    name: "set_single_line",
2668                    original: indoc! {"
2669                    let mut total = 0;
2670                    for product in products {
2671                        total += ;
2672                    }
2673                    total
2674                "},
2675                    model_output: indoc! {"
2676                    <|set|>2:87
2677                        total += product.price;
2678                "},
2679                    expected: indoc! {"
2680                    let mut total = 0;
2681                    for product in products {
2682                        total += product.price;
2683                    }
2684                    total
2685                "},
2686                },
2687                Case {
2688                    name: "set_range",
2689                    original: indoc! {"
2690                    fn foo() {
2691                        let x = 1;
2692                        let y = 2;
2693                        let z = 3;
2694                    }
2695                "},
2696                    model_output: indoc! {"
2697                    <|set|>1:46-3:4a
2698                        let sum = 6;
2699                "},
2700                    expected: indoc! {"
2701                    fn foo() {
2702                        let sum = 6;
2703                    }
2704                "},
2705                },
2706                Case {
2707                    name: "insert_after_line",
2708                    original: indoc! {"
2709                    fn main() {
2710                        let x = 1;
2711                    }
2712                "},
2713                    model_output: indoc! {"
2714                    <|insert|>1:46
2715                        let y = 2;
2716                "},
2717                    expected: indoc! {"
2718                    fn main() {
2719                        let x = 1;
2720                        let y = 2;
2721                    }
2722                "},
2723                },
2724                Case {
2725                    name: "insert_before_first",
2726                    original: indoc! {"
2727                    let x = 1;
2728                    let y = 2;
2729                "},
2730                    model_output: indoc! {"
2731                    <|insert|>
2732                    use std::io;
2733                "},
2734                    expected: indoc! {"
2735                    use std::io;
2736                    let x = 1;
2737                    let y = 2;
2738                "},
2739                },
2740                Case {
2741                    name: "set_with_cursor_marker",
2742                    original: indoc! {"
2743                    fn main() {
2744                        println!();
2745                    }
2746                "},
2747                    model_output: indoc! {"
2748                    <|set|>1:34
2749                        eprintln!(\"<|user_cursor|>\");
2750                "},
2751                    expected: indoc! {"
2752                    fn main() {
2753                        eprintln!(\"<|user_cursor|>\");
2754                    }
2755                "},
2756                },
2757                Case {
2758                    name: "multiple_set_commands",
2759                    original: indoc! {"
2760                    aaa
2761                    bbb
2762                    ccc
2763                    ddd
2764                "},
2765                    model_output: indoc! {"
2766                    <|set|>0:23
2767                    AAA
2768                    <|set|>2:29
2769                    CCC
2770                "},
2771                    expected: indoc! {"
2772                    AAA
2773                    bbb
2774                    CCC
2775                    ddd
2776                "},
2777                },
2778                Case {
2779                    name: "set_range_multiline_replacement",
2780                    original: indoc! {"
2781                    fn handle_submit() {
2782                    }
2783
2784                    fn handle_keystroke() {
2785                "},
2786                    model_output: indoc! {"
2787                    <|set|>0:3f-1:7d
2788                    fn handle_submit(modal_state: &mut ModalState) {
2789                        <|user_cursor|>
2790                    }
2791                "},
2792                    expected: indoc! {"
2793                    fn handle_submit(modal_state: &mut ModalState) {
2794                        <|user_cursor|>
2795                    }
2796
2797                    fn handle_keystroke() {
2798                "},
2799                },
2800                Case {
2801                    name: "no_edit_commands_returns_original",
2802                    original: indoc! {"
2803                    hello
2804                    world
2805                "},
2806                    model_output: "some random text with no commands",
2807                    expected: indoc! {"
2808                    hello
2809                    world
2810                "},
2811                },
2812                Case {
2813                    name: "no_edits_command_returns_original",
2814                    original: indoc! {"
2815                    hello
2816                    world
2817                "},
2818                    model_output: "<|no_edits|>",
2819                    expected: indoc! {"
2820                    hello
2821                    world
2822                "},
2823                },
2824                Case {
2825                    name: "wrong_hash_set_ignored",
2826                    original: indoc! {"
2827                    aaa
2828                    bbb
2829                "},
2830                    model_output: indoc! {"
2831                    <|set|>0:ff
2832                    ZZZ
2833                "},
2834                    expected: indoc! {"
2835                    aaa
2836                    bbb
2837                "},
2838                },
2839                Case {
2840                    name: "insert_and_set_combined",
2841                    original: indoc! {"
2842                    alpha
2843                    beta
2844                    gamma
2845                "},
2846                    model_output: indoc! {"
2847                    <|set|>0:06
2848                    ALPHA
2849                    <|insert|>1:9c
2850                    beta_extra
2851                "},
2852                    expected: indoc! {"
2853                    ALPHA
2854                    beta
2855                    beta_extra
2856                    gamma
2857                "},
2858                },
2859                Case {
2860                    name: "no_trailing_newline_preserved",
2861                    original: "hello\nworld",
2862                    model_output: indoc! {"
2863                    <|set|>0:14
2864                    HELLO
2865                "},
2866                    expected: "HELLO\nworld",
2867                },
2868                Case {
2869                    name: "set_range_hash_mismatch_in_end_bound",
2870                    original: indoc! {"
2871                    one
2872                    two
2873                    three
2874                "},
2875                    model_output: indoc! {"
2876                    <|set|>0:42-2:ff
2877                    ONE_TWO_THREE
2878                "},
2879                    expected: indoc! {"
2880                    one
2881                    two
2882                    three
2883                "},
2884                },
2885                Case {
2886                    name: "set_range_start_greater_than_end_ignored",
2887                    original: indoc! {"
2888                    a
2889                    b
2890                    c
2891                "},
2892                    model_output: indoc! {"
2893                    <|set|>2:63-1:62
2894                    X
2895                "},
2896                    expected: indoc! {"
2897                    a
2898                    b
2899                    c
2900                "},
2901                },
2902                Case {
2903                    name: "insert_out_of_bounds_ignored",
2904                    original: indoc! {"
2905                    x
2906                    y
2907                "},
2908                    model_output: indoc! {"
2909                    <|insert|>99:aa
2910                    z
2911                "},
2912                    expected: indoc! {"
2913                    x
2914                    y
2915                "},
2916                },
2917                Case {
2918                    name: "set_out_of_bounds_ignored",
2919                    original: indoc! {"
2920                    x
2921                    y
2922                "},
2923                    model_output: indoc! {"
2924                    <|set|>99:aa
2925                    z
2926                "},
2927                    expected: indoc! {"
2928                    x
2929                    y
2930                "},
2931                },
2932                Case {
2933                    name: "malformed_set_command_ignored",
2934                    original: indoc! {"
2935                    alpha
2936                    beta
2937                "},
2938                    model_output: indoc! {"
2939                    <|set|>not-a-line-ref
2940                    UPDATED
2941                "},
2942                    expected: indoc! {"
2943                    alpha
2944                    beta
2945                "},
2946                },
2947                Case {
2948                    name: "malformed_insert_hash_treated_as_before_first",
2949                    original: indoc! {"
2950                    alpha
2951                    beta
2952                "},
2953                    model_output: indoc! {"
2954                    <|insert|>1:nothex
2955                    preamble
2956                "},
2957                    expected: indoc! {"
2958                    preamble
2959                    alpha
2960                    beta
2961                "},
2962                },
2963                Case {
2964                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2965                    original: indoc! {"
2966                    cat
2967                    dog
2968                "},
2969                    model_output: indoc! {"
2970                    <|set|>0:38
2971                    CAT
2972                    <|insert|>0:38
2973                    TAIL
2974                "},
2975                    expected: indoc! {"
2976                    CAT
2977                    TAIL
2978                    dog
2979                "},
2980                },
2981                Case {
2982                    name: "overlapping_set_ranges_last_wins",
2983                    original: indoc! {"
2984                    a
2985                    b
2986                    c
2987                    d
2988                "},
2989                    model_output: indoc! {"
2990                    <|set|>0:61-2:63
2991                    FIRST
2992                    <|set|>1:62-3:64
2993                    SECOND
2994                "},
2995                    expected: indoc! {"
2996                    FIRST
2997                    d
2998                "},
2999                },
3000                Case {
3001                    name: "insert_before_first_and_after_line",
3002                    original: indoc! {"
3003                        a
3004                        b
3005                    "},
3006                    model_output: indoc! {"
3007                        <|insert|>
3008                        HEAD
3009                        <|insert|>0:61
3010                        MID
3011                    "},
3012                    expected: indoc! {"
3013                        HEAD
3014                        a
3015                        MID
3016                        b
3017                    "},
3018                },
3019            ];
3020
3021            for case in &cases {
3022                let result = hashline::apply_edit_commands(case.original, &case.model_output);
3023                assert_eq!(result, case.expected, "failed case: {}", case.name);
3024            }
3025        }
3026
3027        #[test]
3028        fn test_output_has_edit_commands() {
3029            assert!(hashline::output_has_edit_commands(&format!(
3030                "{}0:ab\nnew",
3031                SET_COMMAND_MARKER
3032            )));
3033            assert!(hashline::output_has_edit_commands(&format!(
3034                "{}0:ab\nnew",
3035                INSERT_COMMAND_MARKER
3036            )));
3037            assert!(hashline::output_has_edit_commands(&format!(
3038                "some text\n{}1:cd\nstuff",
3039                SET_COMMAND_MARKER
3040            )));
3041            assert!(!hashline::output_has_edit_commands("just plain text"));
3042            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
3043            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
3044        }
3045
3046        // ---- hashline::patch_to_edit_commands round-trip tests ----
3047
3048        #[test]
3049        fn test_patch_to_edit_commands() {
3050            struct Case {
3051                name: &'static str,
3052                old: &'static str,
3053                patch: &'static str,
3054                expected_new: &'static str,
3055            }
3056
3057            let cases = [
3058                Case {
3059                    name: "single_line_replacement",
3060                    old: indoc! {"
3061                    let mut total = 0;
3062                    for product in products {
3063                        total += ;
3064                    }
3065                    total
3066                "},
3067                    patch: indoc! {"
3068                    @@ -1,5 +1,5 @@
3069                     let mut total = 0;
3070                     for product in products {
3071                    -    total += ;
3072                    +    total += product.price;
3073                     }
3074                     total
3075                "},
3076                    expected_new: indoc! {"
3077                    let mut total = 0;
3078                    for product in products {
3079                        total += product.price;
3080                    }
3081                    total
3082                "},
3083                },
3084                Case {
3085                    name: "multiline_replacement",
3086                    old: indoc! {"
3087                    fn foo() {
3088                        let x = 1;
3089                        let y = 2;
3090                        let z = 3;
3091                    }
3092                "},
3093                    patch: indoc! {"
3094                    @@ -1,5 +1,3 @@
3095                     fn foo() {
3096                    -    let x = 1;
3097                    -    let y = 2;
3098                    -    let z = 3;
3099                    +    let sum = 1 + 2 + 3;
3100                     }
3101                "},
3102                    expected_new: indoc! {"
3103                    fn foo() {
3104                        let sum = 1 + 2 + 3;
3105                    }
3106                "},
3107                },
3108                Case {
3109                    name: "insertion",
3110                    old: indoc! {"
3111                    fn main() {
3112                        let x = 1;
3113                    }
3114                "},
3115                    patch: indoc! {"
3116                    @@ -1,3 +1,4 @@
3117                     fn main() {
3118                         let x = 1;
3119                    +    let y = 2;
3120                     }
3121                "},
3122                    expected_new: indoc! {"
3123                    fn main() {
3124                        let x = 1;
3125                        let y = 2;
3126                    }
3127                "},
3128                },
3129                Case {
3130                    name: "insertion_before_first",
3131                    old: indoc! {"
3132                    let x = 1;
3133                    let y = 2;
3134                "},
3135                    patch: indoc! {"
3136                    @@ -1,2 +1,3 @@
3137                    +use std::io;
3138                     let x = 1;
3139                     let y = 2;
3140                "},
3141                    expected_new: indoc! {"
3142                    use std::io;
3143                    let x = 1;
3144                    let y = 2;
3145                "},
3146                },
3147                Case {
3148                    name: "deletion",
3149                    old: indoc! {"
3150                    aaa
3151                    bbb
3152                    ccc
3153                    ddd
3154                "},
3155                    patch: indoc! {"
3156                    @@ -1,4 +1,2 @@
3157                     aaa
3158                    -bbb
3159                    -ccc
3160                     ddd
3161                "},
3162                    expected_new: indoc! {"
3163                    aaa
3164                    ddd
3165                "},
3166                },
3167                Case {
3168                    name: "multiple_changes",
3169                    old: indoc! {"
3170                    alpha
3171                    beta
3172                    gamma
3173                    delta
3174                    epsilon
3175                "},
3176                    patch: indoc! {"
3177                    @@ -1,5 +1,5 @@
3178                    -alpha
3179                    +ALPHA
3180                     beta
3181                     gamma
3182                    -delta
3183                    +DELTA
3184                     epsilon
3185                "},
3186                    expected_new: indoc! {"
3187                    ALPHA
3188                    beta
3189                    gamma
3190                    DELTA
3191                    epsilon
3192                "},
3193                },
3194                Case {
3195                    name: "replace_with_insertion",
3196                    old: indoc! {r#"
3197                    fn handle() {
3198                        modal_state.close();
3199                        modal_state.dismiss();
3200                "#},
3201                    patch: indoc! {r#"
3202                    @@ -1,3 +1,4 @@
3203                     fn handle() {
3204                         modal_state.close();
3205                    +    eprintln!("");
3206                         modal_state.dismiss();
3207                "#},
3208                    expected_new: indoc! {r#"
3209                    fn handle() {
3210                        modal_state.close();
3211                        eprintln!("");
3212                        modal_state.dismiss();
3213                "#},
3214                },
3215                Case {
3216                    name: "complete_replacement",
3217                    old: indoc! {"
3218                    aaa
3219                    bbb
3220                    ccc
3221                "},
3222                    patch: indoc! {"
3223                    @@ -1,3 +1,3 @@
3224                    -aaa
3225                    -bbb
3226                    -ccc
3227                    +xxx
3228                    +yyy
3229                    +zzz
3230                "},
3231                    expected_new: indoc! {"
3232                    xxx
3233                    yyy
3234                    zzz
3235                "},
3236                },
3237                Case {
3238                    name: "add_function_body",
3239                    old: indoc! {"
3240                    fn foo() {
3241                        modal_state.dismiss();
3242                    }
3243
3244                    fn
3245
3246                    fn handle_keystroke() {
3247                "},
3248                    patch: indoc! {"
3249                    @@ -1,6 +1,8 @@
3250                     fn foo() {
3251                         modal_state.dismiss();
3252                     }
3253
3254                    -fn
3255                    +fn handle_submit() {
3256                    +    todo()
3257                    +}
3258
3259                     fn handle_keystroke() {
3260                "},
3261                    expected_new: indoc! {"
3262                    fn foo() {
3263                        modal_state.dismiss();
3264                    }
3265
3266                    fn handle_submit() {
3267                        todo()
3268                    }
3269
3270                    fn handle_keystroke() {
3271                "},
3272                },
3273                Case {
3274                    name: "with_cursor_offset",
3275                    old: indoc! {r#"
3276                    fn main() {
3277                        println!();
3278                    }
3279                "#},
3280                    patch: indoc! {r#"
3281                        @@ -1,3 +1,3 @@
3282                        fn main() {
3283                        -    println!();
3284                        +    eprintln!("");
3285                        }
3286                    "#},
3287                    expected_new: indoc! {r#"
3288                        fn main() {
3289                            eprintln!("<|user_cursor|>");
3290                        }
3291                    "#},
3292                },
3293                Case {
3294                    name: "non_local_hunk_header_pure_insertion_repro",
3295                    old: indoc! {"
3296                        aaa
3297                        bbb
3298                    "},
3299                    patch: indoc! {"
3300                        @@ -20,2 +20,3 @@
3301                        aaa
3302                        +xxx
3303                        bbb
3304                    "},
3305                    expected_new: indoc! {"
3306                        aaa
3307                        xxx
3308                        bbb
3309                    "},
3310                },
3311                Case {
3312                    name: "empty_patch_produces_no_edits_marker",
3313                    old: indoc! {"
3314                        aaa
3315                        bbb
3316                    "},
3317                    patch: "@@ -20,2 +20,3 @@\n",
3318                    expected_new: indoc! {"
3319                        aaa
3320                        bbb
3321                    "},
3322                },
3323            ];
3324
3325            for case in &cases {
3326                // The cursor_offset for patch_to_edit_commands is relative to
3327                // the first hunk's new text (context + additions). We compute
3328                // it by finding where the marker sits in the expected output
3329                // (which mirrors the new text of the hunk).
3330                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3331
3332                let commands =
3333                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3334                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3335
3336                assert!(
3337                    hashline::output_has_edit_commands(&commands),
3338                    "case {}: expected edit commands, got: {commands:?}",
3339                    case.name,
3340                );
3341
3342                let applied = hashline::apply_edit_commands(case.old, &commands);
3343                assert_eq!(applied, case.expected_new, "case {}", case.name);
3344            }
3345        }
3346    }
3347}
3348
3349pub mod seed_coder {
3350    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3351    //!
3352    //! Seed-Coder uses different FIM tokens and order than Qwen:
3353    //! - SPM order: suffix comes FIRST, then prefix, then middle
3354    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3355    //! - File markers: StarCoder-style `<filename>path` (single token + path)
3356    //!
3357    //! All context (related files, edit history) goes in the PREFIX section.
3358    //! The suffix contains only code after the editable region.
3359    //!
3360    //! Example prompt:
3361    //!
3362    //! <[fim-suffix]>
3363    //! code after editable region
3364    //! <[fim-prefix]><filename>related/file.py
3365    //! related file content
3366    //!
3367    //! <filename>edit_history
3368    //! --- a/some_file.py
3369    //! +++ b/some_file.py
3370    //! -old
3371    //! +new
3372    //!
3373    //! <filename>path/to/target_file.py
3374    //! code before editable region
3375    //! <<<<<<< CURRENT
3376    //! code that
3377    //! needs to<|user_cursor|>
3378    //! be rewritten
3379    //! =======
3380    //! <[fim-middle]>
3381    //!
3382    //! Expected output (model generates):
3383    //!
3384    //! updated
3385    //! code with
3386    //! changes applied
3387    //! >>>>>>> UPDATED
3388
3389    use super::*;
3390
3391    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3392    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3393    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3394    pub const FILE_MARKER: &str = "<filename>";
3395
3396    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3397    pub const SEPARATOR: &str = "=======\n";
3398    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3399
3400    pub const NO_EDITS: &str = "NO_EDITS\n";
3401
3402    pub fn special_tokens() -> &'static [&'static str] {
3403        &[
3404            FIM_SUFFIX,
3405            FIM_PREFIX,
3406            FIM_MIDDLE,
3407            FILE_MARKER,
3408            START_MARKER,
3409            SEPARATOR,
3410            END_MARKER,
3411            CURSOR_MARKER,
3412        ]
3413    }
3414
3415    pub fn write_cursor_excerpt_section(
3416        prompt: &mut String,
3417        path: &Path,
3418        context: &str,
3419        editable_range: &Range<usize>,
3420        cursor_offset: usize,
3421    ) {
3422        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3423        prompt.push_str(&section);
3424    }
3425
3426    pub fn format_prompt_with_budget(
3427        path: &Path,
3428        context: &str,
3429        editable_range: &Range<usize>,
3430        cursor_offset: usize,
3431        events: &[Arc<Event>],
3432        related_files: &[RelatedFile],
3433        diagnostics: &[ActiveBufferDiagnostic],
3434        max_tokens: usize,
3435    ) -> String {
3436        let cursor_prefix_section =
3437            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3438        assemble_fim_prompt(
3439            context,
3440            editable_range,
3441            &cursor_prefix_section,
3442            events,
3443            related_files,
3444            diagnostics,
3445            None,
3446            max_tokens,
3447        )
3448    }
3449
3450    pub fn assemble_fim_prompt(
3451        context: &str,
3452        editable_range: &Range<usize>,
3453        cursor_prefix_section: &str,
3454        events: &[Arc<Event>],
3455        related_files: &[RelatedFile],
3456        diagnostics: &[ActiveBufferDiagnostic],
3457        cursor_buffer_row: Option<u32>,
3458        max_tokens: usize,
3459    ) -> String {
3460        let suffix_section = build_suffix_section(context, editable_range);
3461
3462        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3463        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3464        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3465
3466        let edit_history_section = super::format_edit_history_within_budget(
3467            events,
3468            FILE_MARKER,
3469            "edit_history",
3470            budget_after_cursor,
3471            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3472        );
3473        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3474        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
3475
3476        let diagnostics_section = super::format_active_buffer_diagnostics_with_budget(
3477            diagnostics,
3478            cursor_buffer_row,
3479            budget_after_edit_history,
3480        );
3481        let diagnostics_tokens = estimate_tokens(diagnostics_section.len() + "\n".len());
3482        let budget_after_diagnostics = budget_after_edit_history.saturating_sub(diagnostics_tokens);
3483
3484        let related_files_section = super::format_related_files_within_budget(
3485            related_files,
3486            FILE_MARKER,
3487            "",
3488            budget_after_diagnostics,
3489        );
3490
3491        let mut prompt = String::new();
3492        prompt.push_str(&suffix_section);
3493        prompt.push_str(FIM_PREFIX);
3494        prompt.push_str(&diagnostics_section);
3495        if !diagnostics_section.is_empty() {
3496            prompt.push('\n');
3497        }
3498        prompt.push_str(&related_files_section);
3499        if !related_files_section.is_empty() {
3500            prompt.push('\n');
3501        }
3502        prompt.push_str(&edit_history_section);
3503        if !edit_history_section.is_empty() {
3504            prompt.push('\n');
3505        }
3506        prompt.push_str(cursor_prefix_section);
3507        prompt.push_str(FIM_MIDDLE);
3508
3509        prompt
3510    }
3511
3512    pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3513        let mut section = String::new();
3514        section.push_str(FIM_SUFFIX);
3515        section.push_str(&context[editable_range.end..]);
3516        if !section.ends_with('\n') {
3517            section.push('\n');
3518        }
3519        section
3520    }
3521
3522    fn build_cursor_prefix_section(
3523        path: &Path,
3524        context: &str,
3525        editable_range: &Range<usize>,
3526        cursor_offset: usize,
3527    ) -> String {
3528        let mut section = String::new();
3529        let path_str = path.to_string_lossy();
3530        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3531
3532        section.push_str(&context[..editable_range.start]);
3533        section.push_str(START_MARKER);
3534        section.push_str(&context[editable_range.start..cursor_offset]);
3535        section.push_str(CURSOR_MARKER);
3536        section.push_str(&context[cursor_offset..editable_range.end]);
3537        if !section.ends_with('\n') {
3538            section.push('\n');
3539        }
3540        section.push_str(SEPARATOR);
3541        section
3542    }
3543
3544    /// Format patch as containing no changes if it's empty; otherwise return None.
3545    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3546        // Count lines in the patch
3547        let empty_patch = patch.lines().count() <= 3;
3548        if empty_patch {
3549            Some(format!("{NO_EDITS}{END_MARKER}"))
3550        } else {
3551            None
3552        }
3553    }
3554}
3555
3556pub mod v0304_variable_edit {
3557    //! A prompt format with no fixed editable region. The entire context is shown
3558    //! to the model, and it chooses which text to replace by outputting surrounding
3559    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3560    //! text.
3561    //!
3562    //! Example prompt:
3563    //!
3564    //! <|file_sep|>path/to/file.py
3565    //! zero
3566    //! one
3567    //! two
3568    //! three<|user_cursor|>
3569    //! four
3570    //! five
3571    //! <|fim_prefix|>
3572    //
3573    //! Expected output (model generates):
3574    //!
3575    //! two
3576    //! <|fim_middle|>
3577    //! THREE
3578    //! <|fim_suffix|>
3579    //! four
3580    //!
3581    //! The output means: find "two\n...\nfour" in the context, and replace
3582    //! everything between "two\n" and "four" with "THREE\n".
3583
3584    use super::*;
3585
3586    pub fn special_tokens() -> &'static [&'static str] {
3587        &[
3588            "<|fim_prefix|>",
3589            "<|fim_suffix|>",
3590            "<|fim_middle|>",
3591            "<|file_sep|>",
3592            CURSOR_MARKER,
3593        ]
3594    }
3595
3596    pub fn write_cursor_excerpt_section(
3597        prompt: &mut String,
3598        path: &Path,
3599        context: &str,
3600        cursor_offset: usize,
3601    ) {
3602        let path_str = path.to_string_lossy();
3603        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3604
3605        prompt.push_str(&context[..cursor_offset]);
3606        prompt.push_str(CURSOR_MARKER);
3607        prompt.push_str(&context[cursor_offset..]);
3608        if !prompt.ends_with('\n') {
3609            prompt.push('\n');
3610        }
3611        prompt.push_str("<|fim_prefix|>\n")
3612    }
3613
3614    /// Apply a variable-edit model output to the original context text.
3615    ///
3616    /// The model output has the form:
3617    ///
3618    /// - prefix context lines
3619    /// - `<|fim_middle|>`
3620    /// - new text
3621    /// - `<|fim_suffix|>`
3622    /// - suffix context lines
3623    ///
3624    /// We locate the prefix/suffix context lines in the original text and replace
3625    /// everything between them with the new text.
3626    pub fn apply_variable_edit(
3627        context: &str,
3628        model_output: &str,
3629    ) -> Result<(Range<usize>, String)> {
3630        let (prefix_context, rest) = model_output
3631            .split_once("<|fim_middle|>\n")
3632            .or_else(|| model_output.split_once("<|fim_middle|>"))
3633            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3634
3635        let (new_text, suffix_context) = rest
3636            .split_once("<|fim_suffix|>\n")
3637            .or_else(|| rest.split_once("<|fim_suffix|>"))
3638            .unwrap_or((rest, ""));
3639
3640        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3641            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3642        } else {
3643            suffix_context
3644        };
3645
3646        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3647            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3648            + prefix_context.len();
3649        let suffix_offset = if suffix_context.is_empty() {
3650            context.len()
3651        } else {
3652            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3653                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3654                + prefix_offset
3655        };
3656
3657        let edit_range = prefix_offset..suffix_offset;
3658        return Ok((edit_range, new_text.to_string()));
3659    }
3660
3661    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3662        if needle.is_empty() {
3663            return Some(0);
3664        }
3665
3666        haystack.match_indices(needle).find_map(|(offset, _)| {
3667            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3668            matched_line_start.then_some(offset)
3669        })
3670    }
3671
3672    /// Convert a unified diff patch into the variable-edit output format.
3673    ///
3674    /// Parses `patch` as a unified diff against `old_text` and produces model
3675    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3676    /// delimiters. The diff is resolved by content matching rather than line
3677    /// numbers.
3678    pub fn patch_to_variable_edit_output(
3679        old_text: &str,
3680        patch: &str,
3681        cursor_offset: Option<usize>,
3682    ) -> Result<String> {
3683        // Parse the unified diff into hunks. Each hunk has an `old_context`
3684        // string (context + deleted lines interleaved in order) and a list of
3685        // edits expressed as byte ranges within that context plus replacement
3686        // text.
3687        let hunks = parse_hunks(patch);
3688        if hunks.is_empty() {
3689            return Ok(String::new());
3690        }
3691
3692        // Apply each hunk by finding its old_context in the text and
3693        // performing the edits. We search forward from where the previous
3694        // hunk ended so that hunks are applied in order.
3695        let mut new_text = old_text.to_string();
3696        let mut search_from: usize = 0;
3697        let mut first_hunk_pos: Option<usize> = None;
3698
3699        for hunk in &hunks {
3700            let context_pos = new_text[search_from..]
3701                .find(&hunk.old_context)
3702                .map(|pos| pos + search_from)
3703                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3704
3705            if first_hunk_pos.is_none() {
3706                first_hunk_pos = Some(context_pos);
3707            }
3708
3709            // Apply edits in reverse order so byte offsets remain valid.
3710            for edit in hunk.edits.iter().rev() {
3711                let abs_start = context_pos + edit.range.start;
3712                let abs_end = context_pos + edit.range.end;
3713                new_text.replace_range(abs_start..abs_end, &edit.text);
3714            }
3715
3716            // Advance past this hunk's region in the (now modified) text.
3717            let new_region_len: usize =
3718                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3719                    len + edit.text.len() - (edit.range.end - edit.range.start)
3720                });
3721            search_from = context_pos + new_region_len;
3722        }
3723
3724        // Now we have old_text and new_text. Find the changed line range by
3725        // comparing them.
3726        let old_lines: Vec<&str> = old_text.lines().collect();
3727        let new_lines: Vec<&str> = new_text.lines().collect();
3728
3729        // Find first differing line.
3730        let first_changed_row = old_lines
3731            .iter()
3732            .zip(new_lines.iter())
3733            .position(|(a, b)| a != b)
3734            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3735
3736        // Find last differing line (from the end).
3737        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3738        let common_suffix = old_lines
3739            .iter()
3740            .rev()
3741            .zip(new_lines.iter().rev())
3742            .take(max_suffix)
3743            .take_while(|(a, b)| a == b)
3744            .count();
3745
3746        let old_end = old_lines.len() - common_suffix;
3747        let new_end = new_lines.len() - common_suffix;
3748
3749        if first_changed_row == old_end && first_changed_row == new_end {
3750            return Ok(String::new());
3751        }
3752
3753        // Build the replacement text from new_lines[first_diff..new_end].
3754        let mut merged_new_text = String::new();
3755        for line in &new_lines[first_changed_row..new_end] {
3756            merged_new_text.push_str(line);
3757            merged_new_text.push('\n');
3758        }
3759
3760        // cursor_offset is relative to the first hunk's new content in
3761        // new_text. Translate it to an offset within merged_new_text, which
3762        // only contains lines first_diff..new_end of new_text.
3763        if let Some(hunk_offset) = cursor_offset {
3764            let hunk_start = first_hunk_pos.unwrap_or(0);
3765            let absolute_pos = hunk_start + hunk_offset;
3766
3767            // Byte offset where first_diff starts in new_text.
3768            let merged_start: usize = new_lines[..first_changed_row]
3769                .iter()
3770                .map(|line| line.len() + 1)
3771                .sum();
3772
3773            if absolute_pos >= merged_start {
3774                let relative_offset = absolute_pos - merged_start;
3775                if relative_offset <= merged_new_text.len() {
3776                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3777                }
3778            }
3779        }
3780
3781        // Build output with 2 lines of context above and below.
3782        let context_lines_count = 2;
3783        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3784        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3785
3786        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3787            let pattern = &lines[line_range];
3788            let pattern_len = pattern.len();
3789
3790            let mut count = 0;
3791            for offset in 0..=lines.len() - pattern_len {
3792                if &lines[offset..offset + pattern_len] == pattern {
3793                    count += 1;
3794                }
3795            }
3796            count
3797        }
3798
3799        // Expand prefix and suffix until they are unique
3800        while prefix_start > 0 {
3801            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3802                prefix_start -= 1;
3803            } else {
3804                break;
3805            }
3806        }
3807        while suffix_end < old_lines.len() {
3808            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3809                suffix_end += 1;
3810            } else {
3811                break;
3812            }
3813        }
3814
3815        let mut output = String::new();
3816        for line in &old_lines[prefix_start..first_changed_row] {
3817            output.push_str(line);
3818            output.push('\n');
3819        }
3820        output.push_str("<|fim_middle|>\n");
3821        output.push_str(&merged_new_text);
3822        output.push_str("<|fim_suffix|>\n");
3823        for line in &old_lines[old_end..suffix_end] {
3824            output.push_str(line);
3825            output.push('\n');
3826        }
3827
3828        Ok(output)
3829    }
3830
3831    struct ParsedHunk {
3832        old_context: String,
3833        edits: Vec<ParsedEdit>,
3834    }
3835
3836    struct ParsedEdit {
3837        range: Range<usize>,
3838        text: String,
3839    }
3840
3841    /// Parse a unified diff into content-based hunks. Each hunk contains an
3842    /// `old_context` string (context lines + deleted lines, which together
3843    /// form the text that should be found in the original) and a list of edits
3844    /// expressed as byte ranges within that context.
3845    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3846        let mut hunks = Vec::new();
3847        let mut current: Option<ParsedHunk> = None;
3848
3849        for line in patch.lines() {
3850            if line.starts_with("@@") {
3851                if let Some(hunk) = current.take() {
3852                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3853                        hunks.push(hunk);
3854                    }
3855                }
3856                current = Some(ParsedHunk {
3857                    old_context: String::new(),
3858                    edits: Vec::new(),
3859                });
3860            } else if line.starts_with("---") || line.starts_with("+++") {
3861                continue;
3862            } else if let Some(hunk) = &mut current {
3863                if let Some(added) = line.strip_prefix('+') {
3864                    let pos = hunk.old_context.len();
3865                    if let Some(last_edit) = hunk.edits.last_mut() {
3866                        if last_edit.range.end == pos {
3867                            writeln!(&mut last_edit.text, "{added}").ok();
3868                            continue;
3869                        }
3870                    }
3871                    hunk.edits.push(ParsedEdit {
3872                        range: pos..pos,
3873                        text: format!("{added}\n"),
3874                    });
3875                } else if let Some(removed) = line.strip_prefix('-') {
3876                    let start = hunk.old_context.len();
3877                    writeln!(&mut hunk.old_context, "{removed}").ok();
3878                    let end = hunk.old_context.len();
3879                    if let Some(last_edit) = hunk.edits.last_mut() {
3880                        if last_edit.range.end == start {
3881                            last_edit.range.end = end;
3882                            continue;
3883                        }
3884                    }
3885                    hunk.edits.push(ParsedEdit {
3886                        range: start..end,
3887                        text: String::new(),
3888                    });
3889                } else {
3890                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3891                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3892                }
3893            }
3894        }
3895
3896        if let Some(hunk) = current {
3897            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3898                hunks.push(hunk);
3899            }
3900        }
3901
3902        hunks
3903    }
3904
3905    #[cfg(test)]
3906    mod tests {
3907        use super::*;
3908        use indoc::indoc;
3909
3910        #[test]
3911        fn test_apply_variable_edit() {
3912            struct Case {
3913                name: &'static str,
3914                original: &'static str,
3915                model_output: &'static str,
3916                expected: &'static str,
3917            }
3918
3919            let cases = [
3920                Case {
3921                    name: "simple_single_line_replacement",
3922                    original: indoc! {"
3923                        zero
3924                        one
3925                        two
3926                        three
3927                        four
3928                        five
3929                    "},
3930                    model_output: indoc! {"
3931                        two
3932                        <|fim_middle|>
3933                        THREE
3934                        <|fim_suffix|>
3935                        four
3936                    "},
3937                    expected: indoc! {"
3938                        zero
3939                        one
3940                        two
3941                        THREE
3942                        four
3943                        five
3944                    "},
3945                },
3946                Case {
3947                    name: "multi_line_replacement",
3948                    original: indoc! {"
3949                        a
3950                        b
3951                        c
3952                        d
3953                        e
3954                    "},
3955                    model_output: indoc! {"
3956                        a
3957                        <|fim_middle|>
3958                        B
3959                        C
3960                        D
3961                        <|fim_suffix|>
3962                        e
3963                    "},
3964                    expected: indoc! {"
3965                        a
3966                        B
3967                        C
3968                        D
3969                        e
3970                    "},
3971                },
3972                Case {
3973                    name: "insertion_between_existing_lines",
3974                    original: indoc! {"
3975                        a
3976                        b
3977                        c
3978                    "},
3979                    model_output: indoc! {"
3980                        a
3981                        <|fim_middle|>
3982                        X
3983                        <|fim_suffix|>
3984                        b
3985                    "},
3986                    expected: indoc! {"
3987                        a
3988                        X
3989                        b
3990                        c
3991                    "},
3992                },
3993                Case {
3994                    name: "deletion",
3995                    original: indoc! {"
3996                        a
3997                        b
3998                        c
3999                        d
4000                    "},
4001                    model_output: indoc! {"
4002                        a
4003                        <|fim_middle|>
4004                        <|fim_suffix|>
4005                        c
4006                    "},
4007                    expected: indoc! {"
4008                        a
4009                        c
4010                        d
4011                    "},
4012                },
4013                Case {
4014                    name: "replacement_at_start_no_prefix_context",
4015                    original: indoc! {"
4016                        a
4017                        b
4018                        c
4019                    "},
4020                    model_output: indoc! {"
4021                        <|fim_middle|>
4022                        X
4023                        <|fim_suffix|>
4024                        b
4025                    "},
4026                    expected: indoc! {"
4027                        X
4028                        b
4029                        c
4030                    "},
4031                },
4032                Case {
4033                    name: "replacement_at_end_no_suffix_context",
4034                    original: indoc! {"
4035                        a
4036                        b
4037                        c
4038                    "},
4039                    model_output: indoc! {"
4040                        b
4041                        <|fim_middle|>
4042                        Z
4043                        <|fim_suffix|>
4044                    "},
4045                    expected: indoc! {"
4046                        a
4047                        b
4048                        Z
4049                    "},
4050                },
4051                Case {
4052                    name: "context_with_trailing_newline_is_preserved",
4053                    original: indoc! {"
4054                        a
4055                        b
4056                        c
4057                    "},
4058                    model_output: indoc! {"
4059                        a
4060                        <|fim_middle|>
4061                        B
4062                        <|fim_suffix|>
4063                        c
4064                    "},
4065                    expected: indoc! {"
4066                        a
4067                        B
4068                        c
4069                    "},
4070                },
4071                Case {
4072                    name: "cursor_marker_passes_through_untouched",
4073                    original: indoc! {"
4074                        a
4075                        b
4076                        c
4077                    "},
4078                    model_output: indoc! {"
4079                        a
4080                        <|fim_middle|>
4081                        B<|user_cursor|>B
4082                        <|fim_suffix|>
4083                        c
4084                    "},
4085                    expected: indoc! {"
4086                        a
4087                        B<|user_cursor|>B
4088                        c
4089                    "},
4090                },
4091                Case {
4092                    name: "multiple_prefix_context_lines",
4093                    original: indoc! {"
4094                        a
4095                        b
4096                        c
4097                        d
4098                        e
4099                    "},
4100                    model_output: indoc! {"
4101                        b
4102                        c
4103                        <|fim_middle|>
4104                        D
4105                        <|fim_suffix|>
4106                        e
4107                    "},
4108                    expected: indoc! {"
4109                        a
4110                        b
4111                        c
4112                        D
4113                        e
4114                    "},
4115                },
4116            ];
4117
4118            for case in cases {
4119                let (edit_range, replacement) =
4120                    apply_variable_edit(case.original, case.model_output).unwrap();
4121                let mut edited = case.original.to_string();
4122                edited.replace_range(edit_range, &replacement);
4123                assert_eq!(edited, case.expected, "{}", case.name);
4124            }
4125        }
4126
4127        #[test]
4128        fn test_patch_to_variable_edit() {
4129            struct Case {
4130                name: &'static str,
4131                old: &'static str,
4132                patch: &'static str,
4133                cursor_offset: Option<usize>,
4134                expected_variable_edit: &'static str,
4135                expected_after_apply: &'static str,
4136            }
4137
4138            let cases = [
4139                Case {
4140                    name: "simple_replacement",
4141                    old: indoc! {"
4142                        zero
4143                        one
4144                        two
4145                        three
4146                        four
4147                        five
4148                    "},
4149                    patch: indoc! {"
4150                        @@ -3,3 +3,3 @@
4151                         two
4152                        -three
4153                        +THREE
4154                         four
4155                    "},
4156                    cursor_offset: None,
4157                    expected_variable_edit: indoc! {"
4158                        one
4159                        two
4160                        <|fim_middle|>
4161                        THREE
4162                        <|fim_suffix|>
4163                        four
4164                        five
4165                    "},
4166                    expected_after_apply: indoc! {"
4167                        zero
4168                        one
4169                        two
4170                        THREE
4171                        four
4172                        five
4173                    "},
4174                },
4175                Case {
4176                    name: "insertion",
4177                    old: indoc! {"
4178                        a
4179                        b
4180                        c
4181                        d
4182                        e
4183                    "},
4184                    patch: indoc! {"
4185                        @@ -2,0 +3,1 @@
4186                         b
4187                        +X
4188                         c
4189                    "},
4190                    cursor_offset: None,
4191                    expected_variable_edit: indoc! {"
4192                        a
4193                        b
4194                        <|fim_middle|>
4195                        X
4196                        <|fim_suffix|>
4197                        c
4198                        d
4199                    "},
4200                    expected_after_apply: indoc! {"
4201                        a
4202                        b
4203                        X
4204                        c
4205                        d
4206                        e
4207                    "},
4208                },
4209                Case {
4210                    name: "deletion",
4211                    old: indoc! {"
4212                        a
4213                        b
4214                        c
4215                        d
4216                        e
4217                    "},
4218                    patch: indoc! {"
4219                        @@ -2,3 +2,2 @@
4220                         b
4221                        -c
4222                         d
4223                    "},
4224                    cursor_offset: None,
4225                    expected_variable_edit: indoc! {"
4226                        a
4227                        b
4228                        <|fim_middle|>
4229                        <|fim_suffix|>
4230                        d
4231                        e
4232                    "},
4233                    expected_after_apply: indoc! {"
4234                        a
4235                        b
4236                        d
4237                        e
4238                    "},
4239                },
4240                Case {
4241                    name: "edit_near_start",
4242                    old: indoc! {"
4243                        first
4244                        second
4245                        third
4246                        fourth
4247                    "},
4248                    patch: indoc! {"
4249                        @@ -1,1 +1,1 @@
4250                        -first
4251                        +FIRST
4252                    "},
4253                    cursor_offset: None,
4254                    expected_variable_edit: indoc! {"
4255                        <|fim_middle|>
4256                        FIRST
4257                        <|fim_suffix|>
4258                        second
4259                        third
4260                    "},
4261                    expected_after_apply: indoc! {"
4262                        FIRST
4263                        second
4264                        third
4265                        fourth
4266                    "},
4267                },
4268                Case {
4269                    name: "edit_near_end",
4270                    old: indoc! {"
4271                        first
4272                        second
4273                        third
4274                        fourth
4275                    "},
4276                    patch: indoc! {"
4277                        @@ -4,1 +4,1 @@
4278                        -fourth
4279                        +FOURTH
4280                    "},
4281                    cursor_offset: None,
4282                    expected_variable_edit: indoc! {"
4283                        second
4284                        third
4285                        <|fim_middle|>
4286                        FOURTH
4287                        <|fim_suffix|>
4288                    "},
4289                    expected_after_apply: indoc! {"
4290                        first
4291                        second
4292                        third
4293                        FOURTH
4294                    "},
4295                },
4296                Case {
4297                    name: "cursor_at_start_of_replacement",
4298                    old: indoc! {"
4299                        zero
4300                        one
4301                        two
4302                        three
4303                        four
4304                        five
4305                    "},
4306                    patch: indoc! {"
4307                        @@ -3,3 +3,3 @@
4308                         two
4309                        -three
4310                        +THREE
4311                         four
4312                    "},
4313                    cursor_offset: Some(4),
4314                    expected_variable_edit: indoc! {"
4315                        one
4316                        two
4317                        <|fim_middle|>
4318                        <|user_cursor|>THREE
4319                        <|fim_suffix|>
4320                        four
4321                        five
4322                    "},
4323                    expected_after_apply: indoc! {"
4324                        zero
4325                        one
4326                        two
4327                        <|user_cursor|>THREE
4328                        four
4329                        five
4330                    "},
4331                },
4332                Case {
4333                    name: "cursor_in_middle_of_replacement",
4334                    old: indoc! {"
4335                        zero
4336                        one
4337                        two
4338                        three
4339                        four
4340                        five
4341                    "},
4342                    patch: indoc! {"
4343                        @@ -3,3 +3,3 @@
4344                         two
4345                        -three
4346                        +THREE
4347                         four
4348                    "},
4349                    cursor_offset: Some(6),
4350                    expected_variable_edit: indoc! {"
4351                        one
4352                        two
4353                        <|fim_middle|>
4354                        TH<|user_cursor|>REE
4355                        <|fim_suffix|>
4356                        four
4357                        five
4358                    "},
4359                    expected_after_apply: indoc! {"
4360                        zero
4361                        one
4362                        two
4363                        TH<|user_cursor|>REE
4364                        four
4365                        five
4366                    "},
4367                },
4368                Case {
4369                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4370                    old: indoc! {"
4371                        one
4372                        a
4373                        b
4374                        c
4375                        d
4376                        two
4377                        a
4378                        b
4379                        c
4380                        d
4381                        three
4382                        a
4383                        b
4384                        c
4385                        d
4386                        four
4387                    "},
4388                    patch: indoc! {"
4389                        @@ -4,5 +4,5 @@
4390                         two
4391                         a
4392                         b
4393                        -c
4394                        +C
4395                         d
4396                         three
4397                    "},
4398                    cursor_offset: None,
4399                    expected_variable_edit: indoc! {"
4400                        two
4401                        a
4402                        b
4403                        <|fim_middle|>
4404                        C
4405                        <|fim_suffix|>
4406                        d
4407                        three
4408                    "},
4409                    expected_after_apply: indoc! {"
4410                        one
4411                        a
4412                        b
4413                        c
4414                        d
4415                        two
4416                        a
4417                        b
4418                        C
4419                        d
4420                        three
4421                        a
4422                        b
4423                        c
4424                        d
4425                        four
4426                    "},
4427                },
4428                Case {
4429                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4430                    old: indoc! {"
4431                        {
4432                            {
4433                                one();
4434                            }
4435                        }
4436                        {
4437                            {
4438                                two();
4439                            }
4440                        }
4441                        {
4442                            {
4443                                three();
4444                            }
4445                        }
4446                        {
4447                            {
4448                                four();
4449                            }
4450                        }
4451                    "},
4452                    patch: indoc! {"
4453                        @@ -4,5 +4,5 @@
4454                             {
4455                        -        two();
4456                        +        TWO();
4457                             }
4458                    "},
4459                    cursor_offset: None,
4460                    expected_variable_edit: indoc! {"
4461                                one();
4462                            }
4463                        }
4464                        {
4465                            {
4466                        <|fim_middle|>
4467                                TWO();
4468                        <|fim_suffix|>
4469                            }
4470                        }
4471                        {
4472                            {
4473                                three();
4474                    "},
4475                    expected_after_apply: indoc! {"
4476                        {
4477                            {
4478                                one();
4479                            }
4480                        }
4481                        {
4482                            {
4483                                TWO();
4484                            }
4485                        }
4486                        {
4487                            {
4488                                three();
4489                            }
4490                        }
4491                        {
4492                            {
4493                                four();
4494                            }
4495                        }
4496                    "},
4497                },
4498            ];
4499
4500            for case in cases {
4501                let output =
4502                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4503                        .unwrap_or_else(|error| {
4504                            panic!("failed converting patch for {}: {error}", case.name)
4505                        });
4506                assert_eq!(
4507                    output, case.expected_variable_edit,
4508                    "patch->variable_edit mismatch for {}",
4509                    case.name
4510                );
4511
4512                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4513                    .unwrap_or_else(|error| {
4514                        panic!("failed applying variable_edit for {}: {error}", case.name)
4515                    });
4516                let mut edited_by_variable_edit = case.old.to_string();
4517                edited_by_variable_edit.replace_range(edit_range, &replacement);
4518                assert_eq!(
4519                    edited_by_variable_edit, case.expected_after_apply,
4520                    "variable_edit apply mismatch for {}",
4521                    case.name
4522                );
4523
4524                let (expected_edit_range, expected_replacement) =
4525                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4526                        |error| {
4527                            panic!(
4528                                "failed applying expected variable_edit for {}: {error}",
4529                                case.name
4530                            )
4531                        },
4532                    );
4533                let mut edited_by_expected_variable_edit = case.old.to_string();
4534                edited_by_expected_variable_edit
4535                    .replace_range(expected_edit_range, &expected_replacement);
4536                assert_eq!(
4537                    edited_by_expected_variable_edit, case.expected_after_apply,
4538                    "expected variable_edit apply mismatch for {}",
4539                    case.name
4540                );
4541            }
4542        }
4543
4544        #[test]
4545        fn test_write_cursor_excerpt_section() {
4546            let path = Path::new("test.rs");
4547            let context = "fn main() {\n    hello();\n}\n";
4548            let cursor_offset = 17;
4549            let mut prompt = String::new();
4550            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4551            assert_eq!(
4552                prompt,
4553                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4554            );
4555        }
4556    }
4557}
4558
4559/// The zeta1 prompt format
4560pub mod zeta1 {
4561    use super::*;
4562    use std::fmt::Write;
4563
4564    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4565    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4566    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4567    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4568
4569    const INSTRUCTION_HEADER: &str = concat!(
4570        "### Instruction:\n",
4571        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4572        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4573        "into account the cursor location.\n\n",
4574        "### User Edits:\n\n"
4575    );
4576    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4577    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4578
4579    /// Formats a complete zeta1 prompt from the input events and excerpt.
4580    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4581        let mut prompt = String::with_capacity(
4582            INSTRUCTION_HEADER.len()
4583                + input_events.len()
4584                + EXCERPT_HEADER.len()
4585                + input_excerpt.len()
4586                + RESPONSE_HEADER.len(),
4587        );
4588        prompt.push_str(INSTRUCTION_HEADER);
4589        prompt.push_str(input_events);
4590        prompt.push_str(EXCERPT_HEADER);
4591        prompt.push_str(input_excerpt);
4592        prompt.push_str(RESPONSE_HEADER);
4593        prompt
4594    }
4595
4596    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4597    /// editable and context byte-offset ranges within `cursor_excerpt`.
4598    pub fn format_zeta1_from_input(
4599        input: &ZetaPromptInput,
4600        editable_range: Range<usize>,
4601        context_range: Range<usize>,
4602    ) -> String {
4603        let events = format_zeta1_events(&input.events);
4604        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4605        format_zeta1_prompt(&events, &excerpt)
4606    }
4607
4608    /// Formats events in zeta1 style (oldest first).
4609    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4610        let mut result = String::new();
4611        for event in
4612            events
4613                .iter()
4614                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4615                    &ZetaFormat::V0114180EditableRegion,
4616                )))
4617        {
4618            let event_string = format_zeta1_event(event);
4619            if event_string.is_empty() {
4620                continue;
4621            }
4622            if !result.is_empty() {
4623                result.push_str("\n\n");
4624            }
4625            result.push_str(&event_string);
4626        }
4627        result
4628    }
4629
4630    fn format_zeta1_event(event: &Event) -> String {
4631        match event {
4632            Event::BufferChange {
4633                path,
4634                old_path,
4635                diff,
4636                ..
4637            } => {
4638                let mut prompt = String::new();
4639                if old_path != path {
4640                    writeln!(
4641                        prompt,
4642                        "User renamed {} to {}\n",
4643                        old_path.display(),
4644                        path.display()
4645                    )
4646                    .ok();
4647                }
4648                if !diff.is_empty() {
4649                    write!(
4650                        prompt,
4651                        "User edited {}:\n```diff\n{}\n```",
4652                        path.display(),
4653                        diff
4654                    )
4655                    .ok();
4656                }
4657                prompt
4658            }
4659        }
4660    }
4661
4662    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4663    /// within `cursor_excerpt`.
4664    fn format_zeta1_excerpt(
4665        input: &ZetaPromptInput,
4666        editable_range: Range<usize>,
4667        context_range: Range<usize>,
4668    ) -> String {
4669        let path_str = input.cursor_path.to_string_lossy();
4670        let excerpt = &*input.cursor_excerpt;
4671        let cursor_offset = input.cursor_offset_in_excerpt;
4672
4673        let mut prompt = String::new();
4674        writeln!(&mut prompt, "```{path_str}").ok();
4675
4676        let starts_at_file_beginning =
4677            input.excerpt_start_row == Some(0) && context_range.start == 0;
4678        if starts_at_file_beginning {
4679            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4680        }
4681
4682        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4683
4684        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4685        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4686        prompt.push_str(CURSOR_MARKER);
4687        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4688        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4689
4690        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4691        write!(prompt, "\n```").ok();
4692
4693        prompt
4694    }
4695
4696    /// Cleans zeta1 model output by extracting content between editable region
4697    /// markers and converting the zeta1 cursor marker to the universal one.
4698    /// Returns `None` if the output doesn't contain the expected markers.
4699    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4700        let content = output.replace(CURSOR_MARKER, "");
4701
4702        let content_start = content
4703            .find(EDITABLE_REGION_START_MARKER)
4704            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4705            .map(|pos| {
4706                if content.as_bytes().get(pos) == Some(&b'\n') {
4707                    pos + 1
4708                } else {
4709                    pos
4710                }
4711            })
4712            .unwrap_or(0);
4713
4714        let content_end = content
4715            .find(EDITABLE_REGION_END_MARKER)
4716            .map(|pos| {
4717                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4718                    pos - 1
4719                } else {
4720                    pos
4721                }
4722            })
4723            .unwrap_or(content.len());
4724
4725        if content_start > content_end {
4726            return Some(String::new());
4727        }
4728
4729        let extracted = &content[content_start..content_end];
4730
4731        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4732            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4733            let text_before_cursor = text_before_cursor
4734                .find(EDITABLE_REGION_START_MARKER)
4735                .map(|pos| {
4736                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4737                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4738                        after_marker + 1
4739                    } else {
4740                        after_marker
4741                    }
4742                })
4743                .unwrap_or(0);
4744            let offset_in_extracted = zeta1_cursor_pos
4745                .saturating_sub(text_before_cursor)
4746                .min(extracted.len());
4747            offset_in_extracted
4748        });
4749
4750        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4751        if let Some(offset) = cursor_offset {
4752            result.push_str(&extracted[..offset]);
4753            result.push_str(super::CURSOR_MARKER);
4754            result.push_str(&extracted[offset..]);
4755        } else {
4756            result.push_str(extracted);
4757        }
4758
4759        Some(result)
4760    }
4761}
4762
4763#[cfg(test)]
4764mod tests {
4765    use super::*;
4766    use indoc::indoc;
4767
4768    fn make_input(
4769        cursor_excerpt: &str,
4770        editable_range: Range<usize>,
4771        cursor_offset: usize,
4772        events: Vec<Event>,
4773        related_files: Vec<RelatedFile>,
4774    ) -> ZetaPromptInput {
4775        let context_range = 0..cursor_excerpt.len();
4776        ZetaPromptInput {
4777            cursor_path: Path::new("test.rs").into(),
4778            cursor_excerpt: cursor_excerpt.into(),
4779            cursor_offset_in_excerpt: cursor_offset,
4780            excerpt_start_row: None,
4781            events: events.into_iter().map(Arc::new).collect(),
4782            related_files: Some(related_files),
4783            active_buffer_diagnostics: vec![],
4784            excerpt_ranges: ExcerptRanges {
4785                editable_150: editable_range.clone(),
4786                editable_180: editable_range.clone(),
4787                editable_350: editable_range,
4788                editable_150_context_350: context_range.clone(),
4789                editable_180_context_350: context_range.clone(),
4790                editable_350_context_150: context_range,
4791                ..Default::default()
4792            },
4793            syntax_ranges: None,
4794            in_open_source_repo: false,
4795            can_collect_data: false,
4796            repo_url: None,
4797        }
4798    }
4799
4800    fn make_input_with_context_range(
4801        excerpt: &str,
4802        editable_range: Range<usize>,
4803        context_range: Range<usize>,
4804        cursor_offset: usize,
4805    ) -> ZetaPromptInput {
4806        ZetaPromptInput {
4807            cursor_path: Path::new("test.rs").into(),
4808            cursor_excerpt: excerpt.into(),
4809            cursor_offset_in_excerpt: cursor_offset,
4810            excerpt_start_row: None,
4811            events: vec![],
4812            related_files: Some(vec![]),
4813            active_buffer_diagnostics: vec![],
4814            excerpt_ranges: ExcerptRanges {
4815                editable_150: editable_range.clone(),
4816                editable_180: editable_range.clone(),
4817                editable_350: editable_range,
4818                editable_150_context_350: context_range.clone(),
4819                editable_180_context_350: context_range.clone(),
4820                editable_350_context_150: context_range,
4821                ..Default::default()
4822            },
4823            syntax_ranges: None,
4824            in_open_source_repo: false,
4825            can_collect_data: false,
4826            repo_url: None,
4827        }
4828    }
4829
4830    fn make_event(path: &str, diff: &str) -> Event {
4831        Event::BufferChange {
4832            path: Path::new(path).into(),
4833            old_path: Path::new(path).into(),
4834            diff: diff.to_string(),
4835            predicted: false,
4836            in_open_source_repo: false,
4837        }
4838    }
4839
4840    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4841        RelatedFile {
4842            path: Path::new(path).into(),
4843            max_row: content.lines().count() as u32,
4844            excerpts: vec![RelatedExcerpt {
4845                row_range: 0..content.lines().count() as u32,
4846                text: content.into(),
4847                order: 0,
4848            }],
4849            in_open_source_repo: false,
4850        }
4851    }
4852
4853    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4854        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4855    }
4856
4857    fn budget_with_margin(requested_tokens: usize) -> usize {
4858        ((requested_tokens as f64) / 0.9).ceil() as usize
4859    }
4860
4861    #[test]
4862    fn test_no_truncation_when_within_budget() {
4863        let input = make_input(
4864            "prefix\neditable\nsuffix",
4865            7..15,
4866            10,
4867            vec![make_event("a.rs", "-old\n+new\n")],
4868            vec![make_related_file("related.rs", "fn helper() {}\n")],
4869        );
4870
4871        assert_eq!(
4872            format_with_budget(&input, 10000).unwrap(),
4873            indoc! {r#"
4874                <|file_sep|>related.rs
4875                fn helper() {}
4876                <|file_sep|>edit history
4877                --- a/a.rs
4878                +++ b/a.rs
4879                -old
4880                +new
4881                <|file_sep|>test.rs
4882                <|fim_prefix|>
4883                prefix
4884                <|fim_middle|>current
4885                edi<|user_cursor|>table
4886                <|fim_suffix|>
4887
4888                suffix
4889                <|fim_middle|>updated
4890            "#}
4891            .to_string()
4892        );
4893    }
4894
4895    #[test]
4896    fn test_truncation_drops_edit_history_when_budget_tight() {
4897        let input = make_input(
4898            "code",
4899            0..4,
4900            2,
4901            vec![make_event("a.rs", "-x\n+y\n")],
4902            vec![
4903                make_related_file("r1.rs", "aaaaaaa\n"),
4904                make_related_file("r2.rs", "bbbbbbb\n"),
4905            ],
4906        );
4907
4908        assert_eq!(
4909            format_with_budget(&input, 10000).unwrap(),
4910            indoc! {r#"
4911                <|file_sep|>r1.rs
4912                aaaaaaa
4913                <|file_sep|>r2.rs
4914                bbbbbbb
4915                <|file_sep|>edit history
4916                --- a/a.rs
4917                +++ b/a.rs
4918                -x
4919                +y
4920                <|file_sep|>test.rs
4921                <|fim_prefix|>
4922                <|fim_middle|>current
4923                co<|user_cursor|>de
4924                <|fim_suffix|>
4925                <|fim_middle|>updated
4926            "#}
4927            .to_string()
4928        );
4929
4930        assert_eq!(
4931            format_with_budget(&input, budget_with_margin(55)),
4932            Some(
4933                indoc! {r#"
4934                <|file_sep|>edit history
4935                --- a/a.rs
4936                +++ b/a.rs
4937                -x
4938                +y
4939                <|file_sep|>test.rs
4940                <|fim_prefix|>
4941                <|fim_middle|>current
4942                co<|user_cursor|>de
4943                <|fim_suffix|>
4944                <|fim_middle|>updated
4945            "#}
4946                .to_string()
4947            )
4948        );
4949    }
4950
4951    #[test]
4952    fn test_truncation_includes_partial_excerpts() {
4953        let input = make_input(
4954            "x",
4955            0..1,
4956            0,
4957            vec![],
4958            vec![RelatedFile {
4959                path: Path::new("big.rs").into(),
4960                max_row: 30,
4961                in_open_source_repo: false,
4962                excerpts: vec![
4963                    RelatedExcerpt {
4964                        row_range: 0..10,
4965                        text: "first excerpt\n".into(),
4966                        order: 0,
4967                    },
4968                    RelatedExcerpt {
4969                        row_range: 10..20,
4970                        text: "second excerpt\n".into(),
4971                        order: 0,
4972                    },
4973                    RelatedExcerpt {
4974                        row_range: 20..30,
4975                        text: "third excerpt\n".into(),
4976                        order: 0,
4977                    },
4978                ],
4979            }],
4980        );
4981
4982        assert_eq!(
4983            format_with_budget(&input, 10000).unwrap(),
4984            indoc! {r#"
4985                <|file_sep|>big.rs
4986                first excerpt
4987                ...
4988                second excerpt
4989                ...
4990                third excerpt
4991                <|file_sep|>test.rs
4992                <|fim_prefix|>
4993                <|fim_middle|>current
4994                <|user_cursor|>x
4995                <|fim_suffix|>
4996                <|fim_middle|>updated
4997            "#}
4998            .to_string()
4999        );
5000
5001        assert_eq!(
5002            format_with_budget(&input, budget_with_margin(50)).unwrap(),
5003            indoc! {r#"
5004                <|file_sep|>big.rs
5005                first excerpt
5006                ...
5007                <|file_sep|>test.rs
5008                <|fim_prefix|>
5009                <|fim_middle|>current
5010                <|user_cursor|>x
5011                <|fim_suffix|>
5012                <|fim_middle|>updated
5013            "#}
5014            .to_string()
5015        );
5016    }
5017
5018    #[test]
5019    fn test_truncation_prioritizes_lower_order_excerpts() {
5020        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
5021        // With tight budget, only the lower-order excerpt from file_b should be included.
5022        let input = make_input(
5023            "x",
5024            0..1,
5025            0,
5026            vec![],
5027            vec![
5028                RelatedFile {
5029                    path: Path::new("file_a.rs").into(),
5030                    max_row: 10,
5031                    in_open_source_repo: false,
5032                    excerpts: vec![RelatedExcerpt {
5033                        row_range: 0..10,
5034                        text: "low priority content\n".into(),
5035                        order: 5,
5036                    }],
5037                },
5038                RelatedFile {
5039                    path: Path::new("file_b.rs").into(),
5040                    max_row: 10,
5041                    in_open_source_repo: false,
5042                    excerpts: vec![RelatedExcerpt {
5043                        row_range: 0..10,
5044                        text: "high priority content\n".into(),
5045                        order: 1,
5046                    }],
5047                },
5048            ],
5049        );
5050
5051        // With large budget, both files included; rendered in stable lexicographic order.
5052        assert_eq!(
5053            format_with_budget(&input, 10000).unwrap(),
5054            indoc! {r#"
5055                <|file_sep|>file_a.rs
5056                low priority content
5057                <|file_sep|>file_b.rs
5058                high priority content
5059                <|file_sep|>test.rs
5060                <|fim_prefix|>
5061                <|fim_middle|>current
5062                <|user_cursor|>x
5063                <|fim_suffix|>
5064                <|fim_middle|>updated
5065            "#}
5066            .to_string()
5067        );
5068
5069        // With tight budget, only file_b (lower order) fits.
5070        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
5071        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
5072        // file_a would need another 14 tokens, which doesn't fit.
5073        assert_eq!(
5074            format_with_budget(&input, budget_with_margin(52)).unwrap(),
5075            indoc! {r#"
5076                <|file_sep|>file_b.rs
5077                high priority content
5078                <|file_sep|>test.rs
5079                <|fim_prefix|>
5080                <|fim_middle|>current
5081                <|user_cursor|>x
5082                <|fim_suffix|>
5083                <|fim_middle|>updated
5084            "#}
5085            .to_string()
5086        );
5087    }
5088
5089    #[test]
5090    fn test_truncation_drops_high_order_excerpts_within_file() {
5091        // A single file has excerpts at order 1 and order 3. With a tight budget,
5092        // only the order-1 excerpts are included while the order-3 excerpt is
5093        // dropped — even though they belong to the same file. This also preserves
5094        // the parent invariant: parent outline items have order ≤ their best
5095        // child, so they're always included when any child is.
5096        let input = make_input(
5097            "x",
5098            0..1,
5099            0,
5100            vec![],
5101            vec![RelatedFile {
5102                path: Path::new("mod.rs").into(),
5103                max_row: 30,
5104                in_open_source_repo: false,
5105                excerpts: vec![
5106                    RelatedExcerpt {
5107                        row_range: 0..5,
5108                        text: "mod header\n".into(),
5109                        order: 1,
5110                    },
5111                    RelatedExcerpt {
5112                        row_range: 5..15,
5113                        text: "important fn\n".into(),
5114                        order: 1,
5115                    },
5116                    RelatedExcerpt {
5117                        row_range: 15..30,
5118                        text: "less important fn\n".into(),
5119                        order: 3,
5120                    },
5121                ],
5122            }],
5123        );
5124
5125        // With large budget, all three excerpts included.
5126        assert_eq!(
5127            format_with_budget(&input, 10000).unwrap(),
5128            indoc! {r#"
5129                <|file_sep|>mod.rs
5130                mod header
5131                ...
5132                important fn
5133                ...
5134                less important fn
5135                <|file_sep|>test.rs
5136                <|fim_prefix|>
5137                <|fim_middle|>current
5138                <|user_cursor|>x
5139                <|fim_suffix|>
5140                <|fim_middle|>updated
5141            "#}
5142            .to_string()
5143        );
5144
5145        // With tight budget, only order<=1 excerpts included (header + important fn).
5146        assert_eq!(
5147            format_with_budget(&input, budget_with_margin(55)).unwrap(),
5148            indoc! {r#"
5149                <|file_sep|>mod.rs
5150                mod header
5151                ...
5152                important fn
5153                ...
5154                <|file_sep|>test.rs
5155                <|fim_prefix|>
5156                <|fim_middle|>current
5157                <|user_cursor|>x
5158                <|fim_suffix|>
5159                <|fim_middle|>updated
5160            "#}
5161            .to_string()
5162        );
5163    }
5164
5165    #[test]
5166    fn test_truncation_drops_older_events_first() {
5167        let input = make_input(
5168            "x",
5169            0..1,
5170            0,
5171            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5172            vec![],
5173        );
5174
5175        assert_eq!(
5176            format_with_budget(&input, 10000).unwrap(),
5177            indoc! {r#"
5178                <|file_sep|>edit history
5179                --- a/old.rs
5180                +++ b/old.rs
5181                -1
5182                --- a/new.rs
5183                +++ b/new.rs
5184                -2
5185                <|file_sep|>test.rs
5186                <|fim_prefix|>
5187                <|fim_middle|>current
5188                <|user_cursor|>x
5189                <|fim_suffix|>
5190                <|fim_middle|>updated
5191            "#}
5192            .to_string()
5193        );
5194
5195        assert_eq!(
5196            format_with_budget(&input, 60).unwrap(),
5197            indoc! {r#"
5198                <|file_sep|>edit history
5199                --- a/new.rs
5200                +++ b/new.rs
5201                -2
5202                <|file_sep|>test.rs
5203                <|fim_prefix|>
5204                <|fim_middle|>current
5205                <|user_cursor|>x
5206                <|fim_suffix|>
5207                <|fim_middle|>updated
5208            "#}
5209            .to_string()
5210        );
5211    }
5212
5213    #[test]
5214    fn test_cursor_excerpt_always_included_with_minimal_budget() {
5215        let input = make_input(
5216            "fn main() {}",
5217            0..12,
5218            3,
5219            vec![make_event("a.rs", "-old\n+new\n")],
5220            vec![make_related_file("related.rs", "helper\n")],
5221        );
5222
5223        assert!(format_with_budget(&input, 30).is_none())
5224    }
5225
5226    #[track_caller]
5227    fn format_seed_coder(input: &ZetaPromptInput) -> String {
5228        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5229            .expect("seed coder prompt formatting should succeed")
5230    }
5231
5232    #[track_caller]
5233    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5234        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5235            .expect("seed coder prompt formatting should succeed")
5236    }
5237
5238    #[test]
5239    fn test_seed_coder_alias_matches_v0211_seed_coder() {
5240        let input = make_input(
5241            "prefix\neditable\nsuffix",
5242            7..15,
5243            10,
5244            vec![make_event("a.rs", "-old\n+new\n")],
5245            vec![make_related_file("related.rs", "fn helper() {}\n")],
5246        );
5247
5248        assert_eq!(
5249            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5250            format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5251        );
5252        assert_eq!(
5253            ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5254            ZetaFormat::V0331SeedCoderModelPy
5255        );
5256    }
5257
5258    #[test]
5259    fn test_seed_coder_basic_format() {
5260        let input = make_input(
5261            "prefix\neditable\nsuffix",
5262            7..15,
5263            10,
5264            vec![make_event("a.rs", "-old\n+new\n")],
5265            vec![make_related_file("related.rs", "fn helper() {}\n")],
5266        );
5267
5268        assert_eq!(
5269            format_seed_coder(&input),
5270            indoc! {r#"
5271                <[fim-suffix]>
5272                suffix
5273                <[fim-prefix]><filename>related.rs
5274                fn helper() {}
5275
5276                <filename>edit_history
5277                --- a/a.rs
5278                +++ b/a.rs
5279                -old
5280                +new
5281
5282                <filename>test.rs
5283                prefix
5284                <<<<<<< CURRENT
5285                edi<|user_cursor|>table
5286                =======
5287                <[fim-middle]>"#}
5288        );
5289    }
5290
5291    #[test]
5292    fn test_v0420_formats_diagnostics_before_related_files() {
5293        let mut input = make_input(
5294            "prefix\neditable\nsuffix",
5295            7..15,
5296            10,
5297            vec![],
5298            vec![make_related_file("related.rs", "fn helper() {}\n")],
5299        );
5300        input.active_buffer_diagnostics = vec![ActiveBufferDiagnostic {
5301            severity: Some(1),
5302            message: "missing semicolon".to_string(),
5303            snippet: "let value = 1".to_string(),
5304            snippet_buffer_row_range: 1..2,
5305            diagnostic_range_in_snippet: 12..13,
5306        }];
5307
5308        let prompt =
5309            format_prompt_with_budget_for_format(&input, ZetaFormat::V0420Diagnostics, 10000)
5310                .expect("v0420 prompt formatting should succeed");
5311
5312        assert_eq!(
5313            prompt,
5314            indoc! {r#"
5315                <[fim-suffix]>
5316                suffix
5317                <[fim-prefix]><filename>diagnostics
5318                *missing semicolon*:
5319                ```
5320                let value = 1
5321                ```
5322
5323                <filename>related.rs
5324                fn helper() {}
5325
5326                <filename>test.rs
5327                prefix
5328                <|marker_1|>edi<|user_cursor|>table<|marker_2|>
5329                <[fim-middle]>"#}
5330        );
5331    }
5332
5333    #[test]
5334    fn test_v0317_formats_prompt_with_many_related_files() {
5335        let related_files = (0..900)
5336            .map(|index| {
5337                make_related_file(
5338                    &format!("related_{index}.rs"),
5339                    "fn helper() {\n    let value = 1;\n}\n",
5340                )
5341            })
5342            .collect();
5343
5344        let input = make_input(
5345            "code",
5346            0..4,
5347            2,
5348            vec![make_event("a.rs", "-x\n+y\n")],
5349            related_files,
5350        );
5351
5352        let prompt =
5353            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5354
5355        assert!(prompt.is_some());
5356        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5357        assert!(prompt.contains("test.rs"));
5358        assert!(prompt.contains(CURSOR_MARKER));
5359    }
5360
5361    #[test]
5362    fn test_v0327_formats_single_file_prompt_without_related_files() {
5363        let excerpt = indoc! {"
5364            line01
5365            line02
5366            line03
5367            line04
5368            line05
5369            line06
5370            line07
5371            line08
5372            line09
5373            line10
5374            line11
5375            line12
5376            line13
5377            line14
5378            line15
5379            line16
5380            line17
5381            line18
5382            line19
5383            line20
5384        "};
5385        let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5386        let input = make_input(
5387            excerpt,
5388            0..excerpt.len(),
5389            cursor_offset,
5390            vec![make_event("a.rs", "-x\n+y\n")],
5391            vec![make_related_file("related.rs", "fn helper() {}\n")],
5392        );
5393
5394        let prompt =
5395            format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5396                .expect("v0327 prompt should fit");
5397
5398        assert!(prompt.contains("line01"));
5399        assert!(prompt.contains("line20"));
5400        assert!(prompt.contains("<filename>edit_history"));
5401        assert!(prompt.contains("<filename>test.rs"));
5402        assert!(prompt.contains(CURSOR_MARKER));
5403        assert!(!prompt.contains("related.rs"));
5404        assert!(!prompt.contains("fn helper() {}"));
5405    }
5406
5407    #[test]
5408    fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5409        let excerpt = (0..80)
5410            .map(|index| format!("l{index:02}\n"))
5411            .collect::<String>();
5412        let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5413        let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5414
5415        let (context, editable_range, context_range, adjusted_cursor) =
5416            resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5417
5418        assert_eq!(context, excerpt);
5419        assert_eq!(context_range, 0..excerpt.len());
5420        assert_eq!(adjusted_cursor, cursor_offset);
5421        assert!(editable_range.start < adjusted_cursor);
5422        assert!(editable_range.end > adjusted_cursor);
5423        assert!(editable_range.end < excerpt.len());
5424    }
5425
5426    #[test]
5427    fn test_seed_coder_no_context() {
5428        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5429
5430        assert_eq!(
5431            format_seed_coder(&input),
5432            indoc! {r#"
5433                <[fim-suffix]>
5434                after
5435                <[fim-prefix]><filename>test.rs
5436                before
5437                <<<<<<< CURRENT
5438                mid<|user_cursor|>dle
5439                =======
5440                <[fim-middle]>"#}
5441        );
5442    }
5443
5444    #[test]
5445    fn test_seed_coder_truncation_drops_context() {
5446        let input = make_input(
5447            "code",
5448            0..4,
5449            2,
5450            vec![make_event("a.rs", "-x\n+y\n")],
5451            vec![make_related_file("r1.rs", "content\n")],
5452        );
5453
5454        // With large budget, everything is included
5455        assert_eq!(
5456            format_seed_coder(&input),
5457            indoc! {r#"
5458                <[fim-suffix]>
5459                <[fim-prefix]><filename>r1.rs
5460                content
5461
5462                <filename>edit_history
5463                --- a/a.rs
5464                +++ b/a.rs
5465                -x
5466                +y
5467
5468                <filename>test.rs
5469                <<<<<<< CURRENT
5470                co<|user_cursor|>de
5471                =======
5472                <[fim-middle]>"#}
5473        );
5474
5475        assert_eq!(
5476            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5477            None
5478        );
5479
5480        assert_eq!(
5481            format_seed_coder_with_budget(&input, 40),
5482            indoc! {r#"
5483                <[fim-suffix]>
5484                <[fim-prefix]><filename>test.rs
5485                <<<<<<< CURRENT
5486                co<|user_cursor|>de
5487                =======
5488                <[fim-middle]>"#
5489            }
5490        )
5491    }
5492
5493    #[test]
5494    fn test_seed_coder_truncation_prioritizes_lower_order() {
5495        let input = make_input(
5496            "code",
5497            0..4,
5498            2,
5499            vec![],
5500            vec![
5501                RelatedFile {
5502                    path: Path::new("low_prio.rs").into(),
5503                    max_row: 5,
5504                    in_open_source_repo: false,
5505                    excerpts: vec![RelatedExcerpt {
5506                        row_range: 0..5,
5507                        text: "low prio\n".into(),
5508                        order: 10,
5509                    }],
5510                },
5511                RelatedFile {
5512                    path: Path::new("high_prio.rs").into(),
5513                    max_row: 5,
5514                    in_open_source_repo: false,
5515                    excerpts: vec![RelatedExcerpt {
5516                        row_range: 0..5,
5517                        text: "high prio\n".into(),
5518                        order: 1,
5519                    }],
5520                },
5521            ],
5522        );
5523
5524        // With large budget, both included; rendered in stable lexicographic order.
5525        assert_eq!(
5526            format_seed_coder(&input),
5527            indoc! {r#"
5528                <[fim-suffix]>
5529                <[fim-prefix]><filename>low_prio.rs
5530                low prio
5531                <filename>high_prio.rs
5532                high prio
5533
5534                <filename>test.rs
5535                <<<<<<< CURRENT
5536                co<|user_cursor|>de
5537                =======
5538                <[fim-middle]>"#}
5539        );
5540
5541        // With tight budget under the generic heuristic, context is dropped but the
5542        // minimal cursor section still fits.
5543        assert_eq!(
5544            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5545            Some(
5546                indoc! {r#"
5547                    <[fim-suffix]>
5548                    <[fim-prefix]><filename>test.rs
5549                    <<<<<<< CURRENT
5550                    co<|user_cursor|>de
5551                    =======
5552                    <[fim-middle]>"#}
5553                .to_string()
5554            )
5555        );
5556    }
5557
5558    #[test]
5559    fn test_format_zeta1_from_input_basic() {
5560        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5561        let input = ZetaPromptInput {
5562            cursor_path: Path::new("src/main.rs").into(),
5563            cursor_excerpt: excerpt.into(),
5564            cursor_offset_in_excerpt: 30,
5565            excerpt_start_row: Some(0),
5566            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5567            related_files: Some(vec![]),
5568            active_buffer_diagnostics: vec![],
5569            excerpt_ranges: ExcerptRanges {
5570                editable_150: 15..41,
5571                editable_180: 15..41,
5572                editable_350: 15..41,
5573                editable_150_context_350: 0..excerpt.len(),
5574                editable_180_context_350: 0..excerpt.len(),
5575                editable_350_context_150: 0..excerpt.len(),
5576                ..Default::default()
5577            },
5578            syntax_ranges: None,
5579            in_open_source_repo: false,
5580            can_collect_data: false,
5581            repo_url: None,
5582        };
5583
5584        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5585
5586        assert_eq!(
5587            prompt,
5588            concat!(
5589                "### Instruction:\n",
5590                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5591                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5592                "into account the cursor location.\n",
5593                "\n",
5594                "### User Edits:\n",
5595                "\n",
5596                "User edited other.rs:\n",
5597                "```diff\n",
5598                "-old\n",
5599                "+new\n",
5600                "\n",
5601                "```\n",
5602                "\n",
5603                "### User Excerpt:\n",
5604                "\n",
5605                "```src/main.rs\n",
5606                "<|start_of_file|>\n",
5607                "fn before() {}\n",
5608                "<|editable_region_start|>\n",
5609                "fn foo() {\n",
5610                "    <|user_cursor_is_here|>let x = 1;\n",
5611                "\n",
5612                "<|editable_region_end|>}\n",
5613                "fn after() {}\n",
5614                "\n",
5615                "```\n",
5616                "\n",
5617                "### Response:\n",
5618            ),
5619        );
5620    }
5621
5622    #[test]
5623    fn test_format_zeta1_from_input_no_start_of_file() {
5624        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5625        let input = ZetaPromptInput {
5626            cursor_path: Path::new("src/main.rs").into(),
5627            cursor_excerpt: excerpt.into(),
5628            cursor_offset_in_excerpt: 15,
5629            excerpt_start_row: Some(10),
5630            events: vec![],
5631            related_files: Some(vec![]),
5632            active_buffer_diagnostics: vec![],
5633            excerpt_ranges: ExcerptRanges {
5634                editable_150: 0..28,
5635                editable_180: 0..28,
5636                editable_350: 0..28,
5637                editable_150_context_350: 0..28,
5638                editable_180_context_350: 0..28,
5639                editable_350_context_150: 0..28,
5640                ..Default::default()
5641            },
5642            syntax_ranges: None,
5643            in_open_source_repo: false,
5644            can_collect_data: false,
5645            repo_url: None,
5646        };
5647
5648        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5649
5650        assert_eq!(
5651            prompt,
5652            concat!(
5653                "### Instruction:\n",
5654                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5655                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5656                "into account the cursor location.\n",
5657                "\n",
5658                "### User Edits:\n",
5659                "\n",
5660                "\n",
5661                "\n",
5662                "### User Excerpt:\n",
5663                "\n",
5664                "```src/main.rs\n",
5665                "<|editable_region_start|>\n",
5666                "fn foo() {\n",
5667                "    <|user_cursor_is_here|>let x = 1;\n",
5668                "}\n",
5669                "\n",
5670                "<|editable_region_end|>\n",
5671                "```\n",
5672                "\n",
5673                "### Response:\n",
5674            ),
5675        );
5676    }
5677
5678    #[test]
5679    fn test_format_zeta1_from_input_with_sub_ranges() {
5680        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5681        let editable_range = 10..37;
5682        let context_range = 0..excerpt.len();
5683
5684        let input = ZetaPromptInput {
5685            cursor_path: Path::new("test.rs").into(),
5686            cursor_excerpt: excerpt.into(),
5687            cursor_offset_in_excerpt: 25,
5688            excerpt_start_row: Some(0),
5689            events: vec![],
5690            related_files: Some(vec![]),
5691            active_buffer_diagnostics: vec![],
5692            excerpt_ranges: ExcerptRanges {
5693                editable_150: editable_range.clone(),
5694                editable_180: editable_range.clone(),
5695                editable_350: editable_range.clone(),
5696                editable_150_context_350: context_range.clone(),
5697                editable_180_context_350: context_range.clone(),
5698                editable_350_context_150: context_range.clone(),
5699                ..Default::default()
5700            },
5701            syntax_ranges: None,
5702            in_open_source_repo: false,
5703            can_collect_data: false,
5704            repo_url: None,
5705        };
5706
5707        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5708
5709        assert_eq!(
5710            prompt,
5711            concat!(
5712                "### Instruction:\n",
5713                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5714                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5715                "into account the cursor location.\n",
5716                "\n",
5717                "### User Edits:\n",
5718                "\n",
5719                "\n",
5720                "\n",
5721                "### User Excerpt:\n",
5722                "\n",
5723                "```test.rs\n",
5724                "<|start_of_file|>\n",
5725                "// prefix\n",
5726                "<|editable_region_start|>\n",
5727                "fn foo() {\n",
5728                "    <|user_cursor_is_here|>let x = 1;\n",
5729                "}\n",
5730                "<|editable_region_end|>\n",
5731                "// suffix\n",
5732                "\n",
5733                "```\n",
5734                "\n",
5735                "### Response:\n",
5736            ),
5737        );
5738    }
5739
5740    #[test]
5741    fn test_max_event_count() {
5742        fn make_numbered_event(index: usize) -> Event {
5743            return make_event(
5744                &format!("event-{index}.rs"),
5745                &format!("-old-{index}\n+new-{index}\n"),
5746            );
5747        }
5748        let input = make_input(
5749            "x",
5750            0..1,
5751            0,
5752            (0..3).map(make_numbered_event).collect(),
5753            vec![],
5754        );
5755
5756        let edit_history_section = format_edit_history_within_budget(
5757            &input.events,
5758            "<|file_sep|>",
5759            "edit history",
5760            usize::MAX,
5761            5,
5762        );
5763
5764        assert_eq!(
5765            &edit_history_section,
5766            indoc!(
5767                "
5768                <|file_sep|>edit history
5769                --- a/event-0.rs
5770                +++ b/event-0.rs
5771                -old-0
5772                +new-0
5773                --- a/event-1.rs
5774                +++ b/event-1.rs
5775                -old-1
5776                +new-1
5777                --- a/event-2.rs
5778                +++ b/event-2.rs
5779                -old-2
5780                +new-2
5781            "
5782            )
5783        );
5784
5785        let edit_history_section = format_edit_history_within_budget(
5786            &input.events,
5787            "<|file_sep|>",
5788            "edit history",
5789            usize::MAX,
5790            2,
5791        );
5792
5793        assert_eq!(
5794            &edit_history_section,
5795            indoc!(
5796                "
5797                <|file_sep|>edit history
5798                --- a/event-1.rs
5799                +++ b/event-1.rs
5800                -old-1
5801                +new-1
5802                --- a/event-2.rs
5803                +++ b/event-2.rs
5804                -old-2
5805                +new-2
5806            "
5807            )
5808        );
5809
5810        let edit_history_section = format_edit_history_within_budget(
5811            &input.events,
5812            "<|file_sep|>",
5813            "edit history",
5814            usize::MAX,
5815            0,
5816        );
5817
5818        assert_eq!(&edit_history_section, "");
5819    }
5820
5821    #[test]
5822    fn test_clean_zeta1_model_output_basic() {
5823        let output = indoc! {"
5824            <|editable_region_start|>
5825            fn main() {
5826                println!(\"hello\");
5827            }
5828            <|editable_region_end|>
5829        "};
5830
5831        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5832        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5833    }
5834
5835    #[test]
5836    fn test_clean_zeta1_model_output_with_cursor() {
5837        let output = indoc! {"
5838            <|editable_region_start|>
5839            fn main() {
5840                <|user_cursor_is_here|>println!(\"hello\");
5841            }
5842            <|editable_region_end|>
5843        "};
5844
5845        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5846        assert_eq!(
5847            cleaned,
5848            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5849        );
5850    }
5851
5852    #[test]
5853    fn test_clean_zeta1_model_output_no_markers() {
5854        let output = "fn main() {}\n";
5855        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5856        assert_eq!(cleaned, "fn main() {}\n");
5857    }
5858
5859    #[test]
5860    fn test_clean_zeta1_model_output_empty_region() {
5861        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5862        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5863        assert_eq!(cleaned, "");
5864    }
5865
5866    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5867        let mut result = excerpt.to_string();
5868        result.replace_range(
5869            parsed_output.range_in_excerpt.clone(),
5870            &parsed_output.new_editable_region,
5871        );
5872        result
5873    }
5874
5875    #[test]
5876    fn test_parse_zeta2_model_output() {
5877        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5878        let context_start = excerpt.find("ctx start").unwrap();
5879        let context_end = excerpt.find("after ctx").unwrap();
5880        let editable_start = excerpt.find("editable old").unwrap();
5881        let editable_end = editable_start + "editable old\n".len();
5882        let input = make_input_with_context_range(
5883            excerpt,
5884            editable_start..editable_end,
5885            context_start..context_end,
5886            editable_start,
5887        );
5888
5889        let output = parse_zeta2_model_output(
5890            "editable new\n>>>>>>> UPDATED\n",
5891            ZetaFormat::V0131GitMergeMarkersPrefix,
5892            &input,
5893        )
5894        .unwrap();
5895
5896        assert_eq!(
5897            apply_edit(excerpt, &output),
5898            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5899        );
5900    }
5901
5902    #[test]
5903    fn test_parse_zeta2_model_output_identity() {
5904        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5905        let editable_start = excerpt.find("bbb").unwrap();
5906        let editable_end = excerpt.find("ddd").unwrap();
5907        let input = make_input_with_context_range(
5908            excerpt,
5909            editable_start..editable_end,
5910            0..excerpt.len(),
5911            editable_start,
5912        );
5913
5914        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5915        let output =
5916            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5917
5918        assert_eq!(apply_edit(excerpt, &output), excerpt);
5919    }
5920
5921    #[test]
5922    fn test_parse_zeta2_model_output_strips_end_marker() {
5923        let excerpt = "hello\nworld\n";
5924        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5925
5926        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5927        let output1 =
5928            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5929        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5930
5931        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5932        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5933    }
5934
5935    #[test]
5936    fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5937        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5938        let context_start = excerpt.find("ctx start").unwrap();
5939        let context_end = excerpt.find("after ctx").unwrap();
5940        let editable_start = excerpt.find("editable old").unwrap();
5941        let editable_end = editable_start + "editable old\n".len();
5942        let input = make_input_with_context_range(
5943            excerpt,
5944            editable_start..editable_end,
5945            context_start..context_end,
5946            editable_start,
5947        );
5948
5949        let parsed = parse_zeta2_model_output(
5950            "editable new\n>>>>>>> UPDATED\n",
5951            ZetaFormat::V0131GitMergeMarkersPrefix,
5952            &input,
5953        )
5954        .unwrap();
5955        let expected = apply_edit(excerpt, &parsed);
5956        let patch = parsed_output_to_patch(&input, parsed).unwrap();
5957        let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5958
5959        assert_eq!(patched, expected);
5960    }
5961
5962    #[test]
5963    fn test_special_tokens_not_triggered_by_comment_separator() {
5964        // Regression test for https://github.com/zed-industries/zed/issues/52489
5965        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5966        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5967        assert!(
5968            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5969            "comment containing ======= should not trigger special token detection"
5970        );
5971    }
5972}