zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  34pub struct ZetaPromptInput {
  35    pub cursor_path: Arc<Path>,
  36    pub cursor_excerpt: Arc<str>,
  37    pub cursor_offset_in_excerpt: usize,
  38    #[serde(default, skip_serializing_if = "Option::is_none")]
  39    pub excerpt_start_row: Option<u32>,
  40    pub events: Vec<Arc<Event>>,
  41    #[serde(default)]
  42    pub related_files: Option<Vec<RelatedFile>>,
  43    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  44    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  45    /// These ranges let the server select model-appropriate subsets.
  46    pub excerpt_ranges: ExcerptRanges,
  47    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  48    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  49    /// When present, the server uses these to compute editable/context ranges
  50    /// instead of `excerpt_ranges`.
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub syntax_ranges: Option<Vec<Range<usize>>>,
  53    #[serde(default)]
  54    pub in_open_source_repo: bool,
  55    #[serde(default)]
  56    pub can_collect_data: bool,
  57    #[serde(default, skip_serializing_if = "Option::is_none")]
  58    pub repo_url: Option<String>,
  59}
  60
  61#[derive(
  62    Default,
  63    Clone,
  64    Copy,
  65    Debug,
  66    PartialEq,
  67    Eq,
  68    Hash,
  69    EnumIter,
  70    IntoStaticStr,
  71    Serialize,
  72    Deserialize,
  73)]
  74#[allow(non_camel_case_types)]
  75pub enum ZetaFormat {
  76    V0112MiddleAtEnd,
  77    V0113Ordered,
  78    V0114180EditableRegion,
  79    V0120GitMergeMarkers,
  80    #[default]
  81    V0131GitMergeMarkersPrefix,
  82    V0211Prefill,
  83    V0211SeedCoder,
  84    V0331SeedCoderModelPy,
  85    v0226Hashline,
  86    V0304VariableEdit,
  87    V0304SeedNoEdits,
  88    /// Multi-block marker spans with NO_EDITS sentinel.
  89    V0306SeedMultiRegions,
  90    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
  91    V0316SeedMultiRegions,
  92    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
  93    V0317SeedMultiRegions,
  94    /// V0316 with larger block sizes.
  95    V0318SeedMultiRegions,
  96    /// V0318-style markers over the full available current file excerpt with no related files.
  97    V0327SingleFile,
  98}
  99
 100impl std::fmt::Display for ZetaFormat {
 101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 102        write!(f, "{}", <&'static str>::from(self))
 103    }
 104}
 105
 106impl ZetaFormat {
 107    pub fn parse(format_name: &str) -> Result<Self> {
 108        let lower = format_name.to_lowercase();
 109
 110        // Exact case-insensitive match takes priority, bypassing ambiguity checks.
 111        for variant in ZetaFormat::iter() {
 112            if <&'static str>::from(&variant).to_lowercase() == lower {
 113                return Ok(variant);
 114            }
 115        }
 116
 117        let mut results = ZetaFormat::iter().filter(|version| {
 118            <&'static str>::from(version)
 119                .to_lowercase()
 120                .contains(&lower)
 121        });
 122        let Some(result) = results.next() else {
 123            anyhow::bail!(
 124                "`{format_name}` did not match any of:\n{}",
 125                Self::options_as_string()
 126            );
 127        };
 128        if results.next().is_some() {
 129            anyhow::bail!(
 130                "`{format_name}` matched more than one of:\n{}",
 131                Self::options_as_string()
 132            );
 133        }
 134        Ok(result)
 135    }
 136
 137    pub fn options_as_string() -> String {
 138        ZetaFormat::iter()
 139            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 140            .collect::<Vec<_>>()
 141            .concat()
 142    }
 143}
 144
 145#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 146#[serde(tag = "event")]
 147pub enum Event {
 148    BufferChange {
 149        path: Arc<Path>,
 150        old_path: Arc<Path>,
 151        diff: String,
 152        predicted: bool,
 153        in_open_source_repo: bool,
 154    },
 155}
 156
 157impl Event {
 158    pub fn in_open_source_repo(&self) -> bool {
 159        match self {
 160            Event::BufferChange {
 161                in_open_source_repo,
 162                ..
 163            } => *in_open_source_repo,
 164        }
 165    }
 166}
 167
 168pub fn write_event(prompt: &mut String, event: &Event) {
 169    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 170        for component in path.components() {
 171            prompt.push('/');
 172            write!(prompt, "{}", component.as_os_str().display()).ok();
 173        }
 174    }
 175    match event {
 176        Event::BufferChange {
 177            path,
 178            old_path,
 179            diff,
 180            predicted,
 181            in_open_source_repo: _,
 182        } => {
 183            if *predicted {
 184                prompt.push_str("// User accepted prediction:\n");
 185            }
 186            prompt.push_str("--- a");
 187            write_path_as_unix_str(prompt, old_path.as_ref());
 188            prompt.push_str("\n+++ b");
 189            write_path_as_unix_str(prompt, path.as_ref());
 190            prompt.push('\n');
 191            prompt.push_str(diff);
 192        }
 193    }
 194}
 195
 196#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 197pub struct ActiveBufferDiagnostic {
 198    pub severity: Option<i32>,
 199    pub message: String,
 200    pub snippet: String,
 201    pub snippet_buffer_row_range: Range<u32>,
 202    pub diagnostic_range_in_snippet: Range<usize>,
 203}
 204
 205#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 206pub struct RelatedFile {
 207    pub path: Arc<Path>,
 208    pub max_row: u32,
 209    pub excerpts: Vec<RelatedExcerpt>,
 210    #[serde(default)]
 211    pub in_open_source_repo: bool,
 212}
 213
 214#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 215pub struct RelatedExcerpt {
 216    pub row_range: Range<u32>,
 217    pub text: Arc<str>,
 218    #[serde(default)]
 219    pub order: usize,
 220}
 221
 222pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 223    special_tokens_for_format(format).iter().any(|token| {
 224        if let Some(line_token) = token.strip_suffix('\n') {
 225            input.cursor_excerpt.lines().any(|line| line == line_token)
 226        } else {
 227            input.cursor_excerpt.contains(token)
 228        }
 229    })
 230}
 231
 232pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 233    let max_prompt_tokens = match format {
 234        ZetaFormat::V0112MiddleAtEnd
 235        | ZetaFormat::V0113Ordered
 236        | ZetaFormat::V0114180EditableRegion
 237        | ZetaFormat::V0120GitMergeMarkers
 238        | ZetaFormat::V0131GitMergeMarkersPrefix
 239        | ZetaFormat::V0211Prefill
 240        | ZetaFormat::V0211SeedCoder
 241        | ZetaFormat::v0226Hashline
 242        | ZetaFormat::V0304VariableEdit
 243        | ZetaFormat::V0304SeedNoEdits
 244        | ZetaFormat::V0306SeedMultiRegions
 245        | ZetaFormat::V0316SeedMultiRegions
 246        | ZetaFormat::V0317SeedMultiRegions
 247        | ZetaFormat::V0331SeedCoderModelPy
 248        | ZetaFormat::V0318SeedMultiRegions => 4096,
 249        ZetaFormat::V0327SingleFile => 16384,
 250    };
 251
 252    format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
 253}
 254
 255pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 256    match format {
 257        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 258        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 259        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 260        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 261        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 262        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 263        ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
 264            seed_coder::special_tokens()
 265        }
 266        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 267        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 268        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 269        ZetaFormat::V0316SeedMultiRegions => {
 270            static TOKENS: &[&str] = &[
 271                seed_coder::FIM_SUFFIX,
 272                seed_coder::FIM_PREFIX,
 273                seed_coder::FIM_MIDDLE,
 274                seed_coder::FILE_MARKER,
 275                multi_region::V0316_END_MARKER,
 276                CURSOR_MARKER,
 277                multi_region::MARKER_TAG_PREFIX,
 278            ];
 279            TOKENS
 280        }
 281        ZetaFormat::V0318SeedMultiRegions => {
 282            static TOKENS: &[&str] = &[
 283                seed_coder::FIM_SUFFIX,
 284                seed_coder::FIM_PREFIX,
 285                seed_coder::FIM_MIDDLE,
 286                seed_coder::FILE_MARKER,
 287                multi_region::V0318_END_MARKER,
 288                CURSOR_MARKER,
 289                multi_region::MARKER_TAG_PREFIX,
 290            ];
 291            TOKENS
 292        }
 293        ZetaFormat::V0317SeedMultiRegions => {
 294            static TOKENS: &[&str] = &[
 295                seed_coder::FIM_SUFFIX,
 296                seed_coder::FIM_PREFIX,
 297                seed_coder::FIM_MIDDLE,
 298                seed_coder::FILE_MARKER,
 299                multi_region::V0317_END_MARKER,
 300                CURSOR_MARKER,
 301                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 302            ];
 303            TOKENS
 304        }
 305        ZetaFormat::V0327SingleFile => {
 306            static TOKENS: &[&str] = &[
 307                seed_coder::FIM_SUFFIX,
 308                seed_coder::FIM_PREFIX,
 309                seed_coder::FIM_MIDDLE,
 310                seed_coder::FILE_MARKER,
 311                multi_region::V0327_END_MARKER,
 312                CURSOR_MARKER,
 313                multi_region::MARKER_TAG_PREFIX,
 314            ];
 315            TOKENS
 316        }
 317        ZetaFormat::V0306SeedMultiRegions => {
 318            static TOKENS: &[&str] = &[
 319                seed_coder::FIM_SUFFIX,
 320                seed_coder::FIM_PREFIX,
 321                seed_coder::FIM_MIDDLE,
 322                seed_coder::FILE_MARKER,
 323                seed_coder::START_MARKER,
 324                seed_coder::SEPARATOR,
 325                seed_coder::END_MARKER,
 326                CURSOR_MARKER,
 327                multi_region::MARKER_TAG_PREFIX,
 328            ];
 329            TOKENS
 330        }
 331    }
 332}
 333
 334/// Returns the (editable_token_limit, context_token_limit) for a given format.
 335pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 336    match format {
 337        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 338        ZetaFormat::V0114180EditableRegion => (180, 350),
 339        ZetaFormat::V0120GitMergeMarkers
 340        | ZetaFormat::V0131GitMergeMarkersPrefix
 341        | ZetaFormat::V0211Prefill
 342        | ZetaFormat::V0211SeedCoder
 343        | ZetaFormat::V0331SeedCoderModelPy
 344        | ZetaFormat::v0226Hashline
 345        | ZetaFormat::V0306SeedMultiRegions
 346        | ZetaFormat::V0316SeedMultiRegions
 347        | ZetaFormat::V0318SeedMultiRegions
 348        | ZetaFormat::V0317SeedMultiRegions
 349        | ZetaFormat::V0327SingleFile
 350        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 351
 352        ZetaFormat::V0304VariableEdit => (1024, 0),
 353    }
 354}
 355
 356pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 357    match format {
 358        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 359        ZetaFormat::V0112MiddleAtEnd
 360        | ZetaFormat::V0113Ordered
 361        | ZetaFormat::V0114180EditableRegion
 362        | ZetaFormat::V0120GitMergeMarkers
 363        | ZetaFormat::V0131GitMergeMarkersPrefix
 364        | ZetaFormat::V0211Prefill
 365        | ZetaFormat::V0211SeedCoder
 366        | ZetaFormat::V0331SeedCoderModelPy
 367        | ZetaFormat::V0304VariableEdit
 368        | ZetaFormat::V0306SeedMultiRegions
 369        | ZetaFormat::V0304SeedNoEdits => &[],
 370        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 371        ZetaFormat::V0318SeedMultiRegions => &[multi_region::V0318_END_MARKER],
 372        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 373        ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
 374    }
 375}
 376
 377/// Return (editable_range, context_range) for the prompt format
 378pub fn excerpt_ranges_for_format(
 379    format: ZetaFormat,
 380    ranges: &ExcerptRanges,
 381) -> (Range<usize>, Range<usize>) {
 382    match format {
 383        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 384            ranges.editable_150.clone(),
 385            ranges.editable_150_context_350.clone(),
 386        ),
 387        ZetaFormat::V0114180EditableRegion => (
 388            ranges.editable_180.clone(),
 389            ranges.editable_180_context_350.clone(),
 390        ),
 391        ZetaFormat::V0120GitMergeMarkers
 392        | ZetaFormat::V0131GitMergeMarkersPrefix
 393        | ZetaFormat::V0211Prefill
 394        | ZetaFormat::V0211SeedCoder
 395        | ZetaFormat::V0331SeedCoderModelPy
 396        | ZetaFormat::v0226Hashline
 397        | ZetaFormat::V0304SeedNoEdits
 398        | ZetaFormat::V0306SeedMultiRegions
 399        | ZetaFormat::V0316SeedMultiRegions
 400        | ZetaFormat::V0318SeedMultiRegions
 401        | ZetaFormat::V0317SeedMultiRegions => (
 402            ranges.editable_350.clone(),
 403            ranges.editable_350_context_150.clone(),
 404        ),
 405        ZetaFormat::V0327SingleFile => (
 406            ranges.editable_350_context_150.clone(),
 407            ranges.context_8192.clone().unwrap_or(
 408                // shouldn't be used, only for compat with old data/clients
 409                ranges.editable_350_context_150.clone(),
 410            ),
 411        ),
 412
 413        ZetaFormat::V0304VariableEdit => {
 414            let context = ranges
 415                .editable_350_context_1024
 416                .clone()
 417                .or(ranges.editable_350_context_512.clone())
 418                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 419            (context.clone(), context)
 420        }
 421    }
 422}
 423
 424pub fn write_cursor_excerpt_section_for_format(
 425    format: ZetaFormat,
 426    prompt: &mut String,
 427    path: &Path,
 428    context: &str,
 429    editable_range: &Range<usize>,
 430    cursor_offset: usize,
 431) {
 432    match format {
 433        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 434            prompt,
 435            path,
 436            context,
 437            editable_range,
 438            cursor_offset,
 439        ),
 440        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 441            v0113_ordered::write_cursor_excerpt_section(
 442                prompt,
 443                path,
 444                context,
 445                editable_range,
 446                cursor_offset,
 447            )
 448        }
 449        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 450            prompt,
 451            path,
 452            context,
 453            editable_range,
 454            cursor_offset,
 455        ),
 456        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 457            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 458                prompt,
 459                path,
 460                context,
 461                editable_range,
 462                cursor_offset,
 463            )
 464        }
 465        ZetaFormat::V0211SeedCoder
 466        | ZetaFormat::V0331SeedCoderModelPy
 467        | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
 468            prompt,
 469            path,
 470            context,
 471            editable_range,
 472            cursor_offset,
 473        ),
 474        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 475            prompt,
 476            path,
 477            context,
 478            editable_range,
 479            cursor_offset,
 480        ),
 481        ZetaFormat::V0304VariableEdit => {
 482            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 483        }
 484        ZetaFormat::V0306SeedMultiRegions => {
 485            prompt.push_str(&build_v0306_cursor_prefix(
 486                path,
 487                context,
 488                editable_range,
 489                cursor_offset,
 490            ));
 491        }
 492        ZetaFormat::V0316SeedMultiRegions => {
 493            prompt.push_str(&build_v0316_cursor_prefix(
 494                path,
 495                context,
 496                editable_range,
 497                cursor_offset,
 498            ));
 499        }
 500        ZetaFormat::V0318SeedMultiRegions => {
 501            prompt.push_str(&build_v0318_cursor_prefix(
 502                path,
 503                context,
 504                editable_range,
 505                cursor_offset,
 506            ));
 507        }
 508        ZetaFormat::V0317SeedMultiRegions => {
 509            prompt.push_str(&build_v0317_cursor_prefix(
 510                path,
 511                context,
 512                editable_range,
 513                cursor_offset,
 514            ));
 515        }
 516        ZetaFormat::V0327SingleFile => {
 517            prompt.push_str(&build_v0318_cursor_prefix(
 518                path,
 519                context,
 520                editable_range,
 521                cursor_offset,
 522            ));
 523        }
 524    }
 525}
 526
 527fn build_v0306_cursor_prefix(
 528    path: &Path,
 529    context: &str,
 530    editable_range: &Range<usize>,
 531    cursor_offset: usize,
 532) -> String {
 533    let mut section = String::new();
 534    let path_str = path.to_string_lossy();
 535    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 536
 537    section.push_str(&context[..editable_range.start]);
 538    section.push_str(seed_coder::START_MARKER);
 539
 540    let editable_text = &context[editable_range.clone()];
 541    let cursor_in_editable = cursor_offset - editable_range.start;
 542    multi_region::write_editable_with_markers(
 543        &mut section,
 544        editable_text,
 545        cursor_in_editable,
 546        CURSOR_MARKER,
 547    );
 548
 549    if !section.ends_with('\n') {
 550        section.push('\n');
 551    }
 552    section.push_str(seed_coder::SEPARATOR);
 553    section
 554}
 555
 556fn build_v0316_cursor_prefix(
 557    path: &Path,
 558    context: &str,
 559    editable_range: &Range<usize>,
 560    cursor_offset: usize,
 561) -> String {
 562    let mut section = String::new();
 563    let path_str = path.to_string_lossy();
 564    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 565
 566    section.push_str(&context[..editable_range.start]);
 567
 568    let editable_text = &context[editable_range.clone()];
 569    let cursor_in_editable = cursor_offset - editable_range.start;
 570    multi_region::write_editable_with_markers_v0316(
 571        &mut section,
 572        editable_text,
 573        cursor_in_editable,
 574        CURSOR_MARKER,
 575    );
 576
 577    if !section.ends_with('\n') {
 578        section.push('\n');
 579    }
 580    section
 581}
 582
 583fn build_v0318_cursor_prefix(
 584    path: &Path,
 585    context: &str,
 586    editable_range: &Range<usize>,
 587    cursor_offset: usize,
 588) -> String {
 589    let mut section = String::new();
 590    let path_str = path.to_string_lossy();
 591    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 592
 593    section.push_str(&context[..editable_range.start]);
 594
 595    let editable_text = &context[editable_range.clone()];
 596    let cursor_in_editable = cursor_offset - editable_range.start;
 597    multi_region::write_editable_with_markers_v0318(
 598        &mut section,
 599        editable_text,
 600        cursor_in_editable,
 601        CURSOR_MARKER,
 602    );
 603
 604    if !section.ends_with('\n') {
 605        section.push('\n');
 606    }
 607    section
 608}
 609
 610fn build_v0317_cursor_prefix(
 611    path: &Path,
 612    context: &str,
 613    editable_range: &Range<usize>,
 614    cursor_offset: usize,
 615) -> String {
 616    let mut section = String::new();
 617    let path_str = path.to_string_lossy();
 618    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 619
 620    section.push_str(&context[..editable_range.start]);
 621
 622    let editable_text = &context[editable_range.clone()];
 623    let cursor_in_editable = cursor_offset - editable_range.start;
 624    multi_region::write_editable_with_markers_v0317(
 625        &mut section,
 626        editable_text,
 627        cursor_in_editable,
 628        CURSOR_MARKER,
 629    );
 630
 631    if !section.ends_with('\n') {
 632        section.push('\n');
 633    }
 634    section
 635}
 636
 637fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 638    let start_row = text[0..range.start].matches('\n').count() as u32;
 639    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 640    if !text[..range.end].ends_with('\n') {
 641        end_row += 1;
 642    }
 643    return start_row..end_row;
 644}
 645
 646fn assemble_single_file_fim_prompt(
 647    context: &str,
 648    editable_range: &Range<usize>,
 649    cursor_prefix_section: &str,
 650    events: &[Arc<Event>],
 651    max_tokens: usize,
 652) -> String {
 653    let suffix_section = seed_coder::build_suffix_section(context, editable_range);
 654
 655    let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
 656    let cursor_prefix_tokens =
 657        estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
 658    let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 659
 660    let edit_history_section = format_edit_history_within_budget(
 661        events,
 662        seed_coder::FILE_MARKER,
 663        "edit_history",
 664        budget_after_cursor,
 665        max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
 666    );
 667
 668    let mut prompt = String::new();
 669    prompt.push_str(&suffix_section);
 670    prompt.push_str(seed_coder::FIM_PREFIX);
 671    prompt.push_str(&edit_history_section);
 672    if !edit_history_section.is_empty() {
 673        prompt.push('\n');
 674    }
 675    prompt.push_str(cursor_prefix_section);
 676    prompt.push_str(seed_coder::FIM_MIDDLE);
 677    prompt
 678}
 679
 680pub fn format_prompt_with_budget_for_format(
 681    input: &ZetaPromptInput,
 682    format: ZetaFormat,
 683    max_tokens: usize,
 684) -> Option<String> {
 685    let (context, editable_range, context_range, cursor_offset) =
 686        resolve_cursor_region(input, format);
 687    let path = &*input.cursor_path;
 688
 689    let empty_files = Vec::new();
 690    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 691    let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 692        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 693        let row_range = relative_row_range.start + cursor_excerpt_start_row
 694            ..relative_row_range.end + cursor_excerpt_start_row;
 695        filter_redundant_excerpts(
 696            input_related_files.to_vec(),
 697            input.cursor_path.as_ref(),
 698            row_range,
 699        )
 700    } else {
 701        input_related_files.to_vec()
 702    };
 703    let related_files = filtered_related_files.as_slice();
 704
 705    let prompt = match format {
 706        ZetaFormat::V0211SeedCoder
 707        | ZetaFormat::V0331SeedCoderModelPy
 708        | ZetaFormat::V0304SeedNoEdits
 709        | ZetaFormat::V0306SeedMultiRegions
 710        | ZetaFormat::V0316SeedMultiRegions
 711        | ZetaFormat::V0318SeedMultiRegions
 712        | ZetaFormat::V0317SeedMultiRegions => {
 713            let mut cursor_section = String::new();
 714            write_cursor_excerpt_section_for_format(
 715                format,
 716                &mut cursor_section,
 717                path,
 718                context,
 719                &editable_range,
 720                cursor_offset,
 721            );
 722
 723            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 724            seed_coder::assemble_fim_prompt(
 725                context,
 726                &editable_range,
 727                &cursor_section,
 728                &input.events,
 729                related_files,
 730                budget_with_margin,
 731            )
 732        }
 733        ZetaFormat::V0327SingleFile => {
 734            let mut cursor_section = String::new();
 735            write_cursor_excerpt_section_for_format(
 736                format,
 737                &mut cursor_section,
 738                path,
 739                context,
 740                &editable_range,
 741                cursor_offset,
 742            );
 743
 744            assemble_single_file_fim_prompt(
 745                context,
 746                &editable_range,
 747                &cursor_section,
 748                &input.events,
 749                apply_prompt_budget_margin(max_tokens),
 750            )
 751        }
 752        _ => {
 753            let mut cursor_section = String::new();
 754            write_cursor_excerpt_section_for_format(
 755                format,
 756                &mut cursor_section,
 757                path,
 758                context,
 759                &editable_range,
 760                cursor_offset,
 761            );
 762
 763            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 764            let cursor_tokens = estimate_tokens(cursor_section.len());
 765            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 766
 767            let edit_history_section = format_edit_history_within_budget(
 768                &input.events,
 769                "<|file_sep|>",
 770                "edit history",
 771                remaining_budget,
 772                max_edit_event_count_for_format(&format),
 773            );
 774            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 775            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 776
 777            let related_files_section = format_related_files_within_budget(
 778                &related_files,
 779                "<|file_sep|>",
 780                "",
 781                remaining_budget,
 782            );
 783
 784            let mut prompt = String::new();
 785            prompt.push_str(&related_files_section);
 786            prompt.push_str(&edit_history_section);
 787            prompt.push_str(&cursor_section);
 788            prompt
 789        }
 790    };
 791    let prompt_tokens = estimate_tokens(prompt.len());
 792    if prompt_tokens > max_tokens {
 793        return None;
 794    }
 795    return Some(prompt);
 796}
 797
 798pub fn filter_redundant_excerpts(
 799    mut related_files: Vec<RelatedFile>,
 800    cursor_path: &Path,
 801    cursor_row_range: Range<u32>,
 802) -> Vec<RelatedFile> {
 803    for file in &mut related_files {
 804        if file.path.as_ref() == cursor_path {
 805            file.excerpts.retain(|excerpt| {
 806                excerpt.row_range.start < cursor_row_range.start
 807                    || excerpt.row_range.end > cursor_row_range.end
 808            });
 809        }
 810    }
 811    related_files.retain(|file| !file.excerpts.is_empty());
 812    related_files
 813}
 814
 815pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 816    match format {
 817        ZetaFormat::V0112MiddleAtEnd
 818        | ZetaFormat::V0113Ordered
 819        | ZetaFormat::V0114180EditableRegion
 820        | ZetaFormat::V0120GitMergeMarkers
 821        | ZetaFormat::V0131GitMergeMarkersPrefix
 822        | ZetaFormat::V0211Prefill
 823        | ZetaFormat::V0211SeedCoder
 824        | ZetaFormat::V0331SeedCoderModelPy
 825        | ZetaFormat::v0226Hashline
 826        | ZetaFormat::V0304SeedNoEdits
 827        | ZetaFormat::V0304VariableEdit
 828        | ZetaFormat::V0306SeedMultiRegions
 829        | ZetaFormat::V0316SeedMultiRegions
 830        | ZetaFormat::V0318SeedMultiRegions
 831        | ZetaFormat::V0317SeedMultiRegions
 832        | ZetaFormat::V0327SingleFile => 6,
 833    }
 834}
 835
 836pub fn get_prefill_for_format(
 837    format: ZetaFormat,
 838    context: &str,
 839    editable_range: &Range<usize>,
 840) -> String {
 841    match format {
 842        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 843        ZetaFormat::V0112MiddleAtEnd
 844        | ZetaFormat::V0113Ordered
 845        | ZetaFormat::V0114180EditableRegion
 846        | ZetaFormat::V0120GitMergeMarkers
 847        | ZetaFormat::V0131GitMergeMarkersPrefix
 848        | ZetaFormat::V0211SeedCoder
 849        | ZetaFormat::V0331SeedCoderModelPy
 850        | ZetaFormat::v0226Hashline
 851        | ZetaFormat::V0304VariableEdit => String::new(),
 852        ZetaFormat::V0304SeedNoEdits
 853        | ZetaFormat::V0306SeedMultiRegions
 854        | ZetaFormat::V0316SeedMultiRegions
 855        | ZetaFormat::V0318SeedMultiRegions
 856        | ZetaFormat::V0317SeedMultiRegions
 857        | ZetaFormat::V0327SingleFile => String::new(),
 858    }
 859}
 860
 861pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 862    match format {
 863        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 864        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 865        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 866        ZetaFormat::V0211SeedCoder
 867        | ZetaFormat::V0331SeedCoderModelPy
 868        | ZetaFormat::V0304SeedNoEdits
 869        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 870        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 871        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 872        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 873        ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
 874
 875        ZetaFormat::V0112MiddleAtEnd
 876        | ZetaFormat::V0113Ordered
 877        | ZetaFormat::V0114180EditableRegion
 878        | ZetaFormat::v0226Hashline
 879        | ZetaFormat::V0304VariableEdit => None,
 880    }
 881}
 882
 883pub fn encode_patch_as_output_for_format(
 884    format: ZetaFormat,
 885    old_editable_region: &str,
 886    patch: &str,
 887    cursor_offset: Option<usize>,
 888) -> Result<Option<String>> {
 889    match format {
 890        ZetaFormat::v0226Hashline => {
 891            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 892        }
 893        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 894            old_editable_region,
 895            patch,
 896            cursor_offset,
 897        )
 898        .map(Some),
 899        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 900            Ok(seed_coder::no_edits(patch))
 901        }
 902        ZetaFormat::V0316SeedMultiRegions => {
 903            let empty_patch = patch.lines().count() <= 3;
 904            if empty_patch {
 905                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
 906                let marker_num =
 907                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 908                let tag = multi_region::marker_tag(marker_num);
 909                Ok(Some(format!(
 910                    "{tag}{tag}{}",
 911                    multi_region::V0316_END_MARKER
 912                )))
 913            } else {
 914                Ok(None)
 915            }
 916        }
 917        ZetaFormat::V0318SeedMultiRegions => {
 918            let empty_patch = patch.lines().count() <= 3;
 919            if empty_patch {
 920                let marker_offsets =
 921                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 922                let marker_num =
 923                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 924                let tag = multi_region::marker_tag(marker_num);
 925                Ok(Some(format!(
 926                    "{tag}{tag}{}",
 927                    multi_region::V0318_END_MARKER
 928                )))
 929            } else {
 930                Ok(None)
 931            }
 932        }
 933        ZetaFormat::V0317SeedMultiRegions => {
 934            let empty_patch = patch.lines().count() <= 3;
 935            if empty_patch {
 936                let tag = multi_region::marker_tag_relative(0);
 937                Ok(Some(format!(
 938                    "{tag}{tag}{}",
 939                    multi_region::V0317_END_MARKER
 940                )))
 941            } else {
 942                Ok(None)
 943            }
 944        }
 945        ZetaFormat::V0327SingleFile => {
 946            let empty_patch = patch.lines().count() <= 3;
 947            if empty_patch {
 948                let marker_offsets =
 949                    multi_region::compute_marker_offsets_v0318(old_editable_region);
 950                let marker_num =
 951                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
 952                let tag = multi_region::marker_tag(marker_num);
 953                Ok(Some(format!(
 954                    "{tag}{tag}{}",
 955                    multi_region::V0327_END_MARKER
 956                )))
 957            } else {
 958                Ok(None)
 959            }
 960        }
 961        _ => Ok(None),
 962    }
 963}
 964
 965/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
 966/// extracted), produce the expected model output string for training.
 967pub fn format_expected_output(
 968    input: &ZetaPromptInput,
 969    format: ZetaFormat,
 970    patch: &str,
 971    cursor_offset: Option<usize>,
 972) -> Result<String> {
 973    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 974    let mut old_editable = context[editable_range].to_string();
 975    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
 976        old_editable.push('\n');
 977    }
 978
 979    // Formats with their own output encoding (hashline, variable-edit,
 980    // multi-region empty patches) are handled here.
 981    if let Some(output) =
 982        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
 983    {
 984        return Ok(output);
 985    }
 986
 987    let empty_patch = patch.lines().count() <= 3;
 988
 989    match format {
 990        // Multi-region formats: non-empty patches need diff application
 991        // then marker-span encoding.
 992        ZetaFormat::V0316SeedMultiRegions => {
 993            let (new_editable, first_hunk_offset) =
 994                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
 995            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
 996            multi_region::encode_from_old_and_new_v0316(
 997                &old_editable,
 998                &new_editable,
 999                cursor_in_new,
1000                CURSOR_MARKER,
1001                multi_region::V0316_END_MARKER,
1002            )
1003        }
1004        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0327SingleFile => {
1005            let (new_editable, first_hunk_offset) =
1006                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1007            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1008            multi_region::encode_from_old_and_new_v0318(
1009                &old_editable,
1010                &new_editable,
1011                cursor_in_new,
1012                CURSOR_MARKER,
1013                multi_region::V0318_END_MARKER,
1014            )
1015        }
1016        ZetaFormat::V0317SeedMultiRegions => {
1017            let (new_editable, first_hunk_offset) =
1018                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1019            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1020            multi_region::encode_from_old_and_new_v0317(
1021                &old_editable,
1022                &new_editable,
1023                cursor_in_new,
1024                CURSOR_MARKER,
1025                multi_region::V0317_END_MARKER,
1026            )
1027        }
1028        // V0131-style formats and fallback: produce new editable text with
1029        // cursor marker inserted, followed by the end marker.
1030        ZetaFormat::V0112MiddleAtEnd
1031        | ZetaFormat::V0113Ordered
1032        | ZetaFormat::V0114180EditableRegion
1033        | ZetaFormat::V0120GitMergeMarkers
1034        | ZetaFormat::V0131GitMergeMarkersPrefix
1035        | ZetaFormat::V0211Prefill
1036        | ZetaFormat::V0211SeedCoder
1037        | ZetaFormat::v0226Hashline
1038        | ZetaFormat::V0304VariableEdit
1039        | ZetaFormat::V0304SeedNoEdits
1040        | ZetaFormat::V0331SeedCoderModelPy
1041        | ZetaFormat::V0306SeedMultiRegions => {
1042            let (mut result, first_hunk_offset) = if empty_patch {
1043                (old_editable.clone(), None)
1044            } else {
1045                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1046            };
1047
1048            if let Some(cursor) = cursor_offset {
1049                let hunk_start = if !empty_patch {
1050                    first_hunk_offset.unwrap_or(0)
1051                } else {
1052                    0
1053                };
1054                let offset = (hunk_start + cursor).min(result.len());
1055                result.insert_str(offset, CURSOR_MARKER);
1056            }
1057
1058            if !result.is_empty() && !result.ends_with('\n') {
1059                result.push('\n');
1060            }
1061
1062            if let Some(end_marker) = output_end_marker_for_format(format) {
1063                result.push_str(end_marker);
1064            }
1065
1066            Ok(result)
1067        }
1068    }
1069}
1070
1071/// Compute the cursor position within the new text after diff application.
1072fn cursor_in_new_text(
1073    cursor_offset: Option<usize>,
1074    first_hunk_offset: Option<usize>,
1075    new_text: &str,
1076) -> Option<usize> {
1077    cursor_offset.map(|cursor| {
1078        let hunk_start = first_hunk_offset.unwrap_or(0);
1079        (hunk_start + cursor).min(new_text.len())
1080    })
1081}
1082
1083#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1084pub struct ParsedOutput {
1085    /// Text that should replace the editable region
1086    pub new_editable_region: String,
1087    /// The byte range within `cursor_excerpt` that this replacement applies to
1088    pub range_in_excerpt: Range<usize>,
1089    /// Byte offset of the cursor marker within `new_editable_region`, if present
1090    pub cursor_offset_in_new_editable_region: Option<usize>,
1091}
1092
1093#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1094pub struct CursorPosition {
1095    pub path: String,
1096    pub row: usize,
1097    pub column: usize,
1098    pub offset: usize,
1099    pub editable_region_offset: usize,
1100}
1101
1102pub fn parsed_output_from_editable_region(
1103    range_in_excerpt: Range<usize>,
1104    mut new_editable_region: String,
1105) -> ParsedOutput {
1106    let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1107    if let Some(offset) = cursor_offset_in_new_editable_region {
1108        new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1109    }
1110
1111    ParsedOutput {
1112        new_editable_region,
1113        range_in_excerpt,
1114        cursor_offset_in_new_editable_region,
1115    }
1116}
1117
1118/// Parse model output for the given zeta format
1119pub fn parse_zeta2_model_output(
1120    output: &str,
1121    format: ZetaFormat,
1122    prompt_inputs: &ZetaPromptInput,
1123) -> Result<ParsedOutput> {
1124    let output = match output_end_marker_for_format(format) {
1125        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1126        None => output,
1127    };
1128
1129    let (context, editable_range_in_context, context_range, cursor_offset) =
1130        resolve_cursor_region(prompt_inputs, format);
1131    let context_start = context_range.start;
1132    let old_editable_region = &context[editable_range_in_context.clone()];
1133    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1134
1135    let (range_in_context, output) = match format {
1136        ZetaFormat::v0226Hashline => (
1137            editable_range_in_context,
1138            if hashline::output_has_edit_commands(output) {
1139                hashline::apply_edit_commands(old_editable_region, output)
1140            } else {
1141                output.to_string()
1142            },
1143        ),
1144        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1145        ZetaFormat::V0304SeedNoEdits => (
1146            editable_range_in_context,
1147            if output.starts_with(seed_coder::NO_EDITS) {
1148                old_editable_region.to_string()
1149            } else {
1150                output.to_string()
1151            },
1152        ),
1153        ZetaFormat::V0306SeedMultiRegions => (
1154            editable_range_in_context,
1155            if output.starts_with(seed_coder::NO_EDITS) {
1156                old_editable_region.to_string()
1157            } else {
1158                multi_region::apply_marker_span(old_editable_region, output)?
1159            },
1160        ),
1161        ZetaFormat::V0316SeedMultiRegions => (
1162            editable_range_in_context,
1163            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1164        ),
1165        ZetaFormat::V0318SeedMultiRegions => (
1166            editable_range_in_context,
1167            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1168        ),
1169        ZetaFormat::V0317SeedMultiRegions => (
1170            editable_range_in_context,
1171            multi_region::apply_marker_span_v0317(
1172                old_editable_region,
1173                output,
1174                Some(cursor_offset_in_editable),
1175            )?,
1176        ),
1177        ZetaFormat::V0327SingleFile => (
1178            editable_range_in_context,
1179            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1180        ),
1181        _ => (editable_range_in_context, output.to_string()),
1182    };
1183
1184    let range_in_excerpt =
1185        range_in_context.start + context_start..range_in_context.end + context_start;
1186
1187    Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1188}
1189
1190pub fn parse_zeta2_model_output_as_patch(
1191    output: &str,
1192    format: ZetaFormat,
1193    prompt_inputs: &ZetaPromptInput,
1194) -> Result<String> {
1195    let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1196    parsed_output_to_patch(prompt_inputs, parsed)
1197}
1198
1199pub fn cursor_position_from_parsed_output(
1200    prompt_inputs: &ZetaPromptInput,
1201    parsed: &ParsedOutput,
1202) -> Option<CursorPosition> {
1203    let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1204    let editable_region_offset = parsed.range_in_excerpt.start;
1205    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1206
1207    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1208
1209    let new_editable_region = &parsed.new_editable_region;
1210    let prefix_end = cursor_offset.min(new_editable_region.len());
1211    let new_region_prefix = &new_editable_region[..prefix_end];
1212
1213    let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1214
1215    let column = match new_region_prefix.rfind('\n') {
1216        Some(last_newline) => cursor_offset - last_newline - 1,
1217        None => {
1218            let content_prefix = &excerpt[..editable_region_offset];
1219            let content_column = match content_prefix.rfind('\n') {
1220                Some(last_newline) => editable_region_offset - last_newline - 1,
1221                None => editable_region_offset,
1222            };
1223            content_column + cursor_offset
1224        }
1225    };
1226
1227    Some(CursorPosition {
1228        path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1229        row,
1230        column,
1231        offset: editable_region_offset + cursor_offset,
1232        editable_region_offset: cursor_offset,
1233    })
1234}
1235
1236pub fn parsed_output_to_patch(
1237    prompt_inputs: &ZetaPromptInput,
1238    parsed: ParsedOutput,
1239) -> Result<String> {
1240    let range_in_excerpt = parsed.range_in_excerpt;
1241    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1242    let old_text = excerpt[range_in_excerpt.clone()].to_string();
1243    let mut new_text = parsed.new_editable_region;
1244
1245    let mut old_text_normalized = old_text;
1246    if !new_text.is_empty() && !new_text.ends_with('\n') {
1247        new_text.push('\n');
1248    }
1249    if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1250        old_text_normalized.push('\n');
1251    }
1252
1253    let editable_region_offset = range_in_excerpt.start;
1254    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1255    let editable_region_lines = old_text_normalized.lines().count() as u32;
1256
1257    let diff = udiff::unified_diff_with_context(
1258        &old_text_normalized,
1259        &new_text,
1260        editable_region_start_line,
1261        editable_region_start_line,
1262        editable_region_lines,
1263    );
1264
1265    let path = prompt_inputs
1266        .cursor_path
1267        .to_string_lossy()
1268        .trim_start_matches('/')
1269        .to_string();
1270    let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1271
1272    Ok(udiff::encode_cursor_in_patch(
1273        &formatted_diff,
1274        parsed.cursor_offset_in_new_editable_region,
1275    ))
1276}
1277
1278pub fn excerpt_range_for_format(
1279    format: ZetaFormat,
1280    ranges: &ExcerptRanges,
1281) -> (Range<usize>, Range<usize>) {
1282    excerpt_ranges_for_format(format, ranges)
1283}
1284
1285pub fn resolve_cursor_region(
1286    input: &ZetaPromptInput,
1287    format: ZetaFormat,
1288) -> (&str, Range<usize>, Range<usize>, usize) {
1289    let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1290        let (editable_tokens, _) = token_limits_for_format(format);
1291        let context_range = 0..input.cursor_excerpt.len();
1292        let editable_range = multi_region::compute_v0327_editable_range(
1293            &input.cursor_excerpt,
1294            input.cursor_offset_in_excerpt,
1295            editable_tokens,
1296        );
1297        (editable_range, context_range)
1298    } else if let Some(syntax_ranges) = &input.syntax_ranges {
1299        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1300        compute_editable_and_context_ranges(
1301            &input.cursor_excerpt,
1302            input.cursor_offset_in_excerpt,
1303            syntax_ranges,
1304            editable_tokens,
1305            context_tokens,
1306        )
1307    } else {
1308        excerpt_range_for_format(format, &input.excerpt_ranges)
1309    };
1310
1311    let context_start = context_range.start;
1312    let context_text = &input.cursor_excerpt[context_range.clone()];
1313    let adjusted_editable =
1314        (editable_range.start - context_start)..(editable_range.end - context_start);
1315    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1316
1317    (
1318        context_text,
1319        adjusted_editable,
1320        context_range,
1321        adjusted_cursor,
1322    )
1323}
1324
1325pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1326    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1327    get_prefill_for_format(format, context, &editable_range)
1328}
1329
1330fn format_edit_history_within_budget(
1331    events: &[Arc<Event>],
1332    file_marker: &str,
1333    edit_history_name: &str,
1334    max_tokens: usize,
1335    max_edit_event_count: usize,
1336) -> String {
1337    let header = format!("{}{}\n", file_marker, edit_history_name);
1338    let header_tokens = estimate_tokens(header.len());
1339    if header_tokens >= max_tokens {
1340        return String::new();
1341    }
1342
1343    let mut event_strings: Vec<String> = Vec::new();
1344    let mut total_tokens = header_tokens;
1345
1346    for event in events.iter().rev().take(max_edit_event_count) {
1347        let mut event_str = String::new();
1348        write_event(&mut event_str, event);
1349        let event_tokens = estimate_tokens(event_str.len());
1350
1351        if total_tokens + event_tokens > max_tokens {
1352            break;
1353        }
1354        total_tokens += event_tokens;
1355        event_strings.push(event_str);
1356    }
1357
1358    if event_strings.is_empty() {
1359        return String::new();
1360    }
1361
1362    let mut result = header;
1363    for event_str in event_strings.iter().rev() {
1364        result.push_str(event_str);
1365    }
1366    result
1367}
1368
1369fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1370    let needs_newline = !excerpt.text.ends_with('\n');
1371    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1372    let len = excerpt.text.len()
1373        + if needs_newline { "\n".len() } else { 0 }
1374        + if needs_ellipsis { "...\n".len() } else { 0 };
1375    estimate_tokens(len)
1376}
1377
1378pub fn format_related_files_within_budget(
1379    related_files: &[RelatedFile],
1380    file_prefix: &str,
1381    file_suffix: &str,
1382    max_tokens: usize,
1383) -> String {
1384    struct ExcerptCandidate {
1385        file_ix: usize,
1386        excerpt_ix: usize,
1387        order: usize,
1388    }
1389
1390    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1391        .iter()
1392        .enumerate()
1393        .flat_map(|(file_ix, file)| {
1394            file.excerpts
1395                .iter()
1396                .enumerate()
1397                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1398                    file_ix,
1399                    excerpt_ix,
1400                    order: e.order,
1401                })
1402        })
1403        .collect();
1404
1405    // Pre-compute file header strings and their token costs.
1406    let file_headers: Vec<String> = related_files
1407        .iter()
1408        .map(|file| {
1409            let path_str = file.path.to_string_lossy();
1410            format!("{}{}\n", file_prefix, path_str)
1411        })
1412        .collect();
1413
1414    // Sort the excerpts by their order and determine how many fit within the budget.
1415    let mut total_tokens = 0;
1416    let mut included_excerpt_count = 0_usize;
1417    let mut included_file_indices = vec![false; related_files.len()];
1418    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1419    for candidate in &excerpt_candidates {
1420        let file = &related_files[candidate.file_ix];
1421        let excerpt = &file.excerpts[candidate.excerpt_ix];
1422        let file_already_included = included_file_indices[candidate.file_ix];
1423        let header_cost = if file_already_included {
1424            0
1425        } else {
1426            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1427        };
1428        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1429        if total_tokens + header_cost + excerpt_cost > max_tokens {
1430            break;
1431        }
1432        total_tokens += header_cost + excerpt_cost;
1433        if !file_already_included {
1434            included_file_indices[candidate.file_ix] = true;
1435        }
1436        included_excerpt_count += 1;
1437    }
1438
1439    excerpt_candidates.truncate(included_excerpt_count);
1440    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1441
1442    // Render all of the files that fit within the token budget, in the original order.
1443    let mut result = String::new();
1444    let mut last_file_ix = None;
1445    for candidate in &excerpt_candidates {
1446        if last_file_ix != Some(candidate.file_ix) {
1447            if last_file_ix.is_some() {
1448                result.push_str(file_suffix);
1449            }
1450            result.push_str(&file_headers[candidate.file_ix]);
1451            last_file_ix = Some(candidate.file_ix);
1452        }
1453        let file = &related_files[candidate.file_ix];
1454        let excerpt = &file.excerpts[candidate.excerpt_ix];
1455        result.push_str(&excerpt.text);
1456        if !result.ends_with('\n') {
1457            result.push('\n');
1458        }
1459        if excerpt.row_range.end < file.max_row {
1460            result.push_str("...\n");
1461        }
1462    }
1463
1464    result
1465}
1466
1467pub fn write_related_files(
1468    prompt: &mut String,
1469    related_files: &[RelatedFile],
1470) -> Vec<Range<usize>> {
1471    let mut ranges = Vec::new();
1472    for file in related_files {
1473        let start = prompt.len();
1474        let path_str = file.path.to_string_lossy();
1475        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1476        for excerpt in &file.excerpts {
1477            prompt.push_str(&excerpt.text);
1478            if !prompt.ends_with('\n') {
1479                prompt.push('\n');
1480            }
1481            if excerpt.row_range.end < file.max_row {
1482                prompt.push_str("...\n");
1483            }
1484        }
1485        let end = prompt.len();
1486        ranges.push(start..end);
1487    }
1488    ranges
1489}
1490
1491mod v0112_middle_at_end {
1492    use super::*;
1493
1494    pub fn special_tokens() -> &'static [&'static str] {
1495        &[
1496            "<|fim_prefix|>",
1497            "<|fim_suffix|>",
1498            "<|fim_middle|>",
1499            "<|file_sep|>",
1500            CURSOR_MARKER,
1501        ]
1502    }
1503
1504    pub fn write_cursor_excerpt_section(
1505        prompt: &mut String,
1506        path: &Path,
1507        context: &str,
1508        editable_range: &Range<usize>,
1509        cursor_offset: usize,
1510    ) {
1511        let path_str = path.to_string_lossy();
1512        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1513
1514        prompt.push_str("<|fim_prefix|>\n");
1515        prompt.push_str(&context[..editable_range.start]);
1516
1517        prompt.push_str("<|fim_suffix|>\n");
1518        prompt.push_str(&context[editable_range.end..]);
1519        if !prompt.ends_with('\n') {
1520            prompt.push('\n');
1521        }
1522
1523        prompt.push_str("<|fim_middle|>current\n");
1524        prompt.push_str(&context[editable_range.start..cursor_offset]);
1525        prompt.push_str(CURSOR_MARKER);
1526        prompt.push_str(&context[cursor_offset..editable_range.end]);
1527        if !prompt.ends_with('\n') {
1528            prompt.push('\n');
1529        }
1530
1531        prompt.push_str("<|fim_middle|>updated\n");
1532    }
1533}
1534
1535mod v0113_ordered {
1536    use super::*;
1537
1538    pub fn special_tokens() -> &'static [&'static str] {
1539        &[
1540            "<|fim_prefix|>",
1541            "<|fim_suffix|>",
1542            "<|fim_middle|>",
1543            "<|file_sep|>",
1544            CURSOR_MARKER,
1545        ]
1546    }
1547
1548    pub fn write_cursor_excerpt_section(
1549        prompt: &mut String,
1550        path: &Path,
1551        context: &str,
1552        editable_range: &Range<usize>,
1553        cursor_offset: usize,
1554    ) {
1555        let path_str = path.to_string_lossy();
1556        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1557
1558        prompt.push_str("<|fim_prefix|>\n");
1559        prompt.push_str(&context[..editable_range.start]);
1560        if !prompt.ends_with('\n') {
1561            prompt.push('\n');
1562        }
1563
1564        prompt.push_str("<|fim_middle|>current\n");
1565        prompt.push_str(&context[editable_range.start..cursor_offset]);
1566        prompt.push_str(CURSOR_MARKER);
1567        prompt.push_str(&context[cursor_offset..editable_range.end]);
1568        if !prompt.ends_with('\n') {
1569            prompt.push('\n');
1570        }
1571
1572        prompt.push_str("<|fim_suffix|>\n");
1573        prompt.push_str(&context[editable_range.end..]);
1574        if !prompt.ends_with('\n') {
1575            prompt.push('\n');
1576        }
1577
1578        prompt.push_str("<|fim_middle|>updated\n");
1579    }
1580}
1581
1582mod v0114180_editable_region {
1583    use super::*;
1584
1585    pub fn special_tokens() -> &'static [&'static str] {
1586        v0113_ordered::special_tokens()
1587    }
1588}
1589
1590pub mod v0120_git_merge_markers {
1591    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1592    //!
1593    //! Example prompt:
1594    //!
1595    //! <|file_sep|>path/to/target_file.py
1596    //! <|fim_prefix|>
1597    //! code before editable region
1598    //! <|fim_suffix|>
1599    //! code after editable region
1600    //! <|fim_middle|>
1601    //! <<<<<<< CURRENT
1602    //! code that
1603    //! needs to<|user_cursor|>
1604    //! be rewritten
1605    //! =======
1606    //!
1607    //! Expected output (should be generated by the model):
1608    //!
1609    //! updated
1610    //! code with
1611    //! changes applied
1612    //! >>>>>>> UPDATED
1613
1614    use super::*;
1615
1616    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1617    pub const SEPARATOR: &str = "=======\n";
1618    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1619
1620    pub fn special_tokens() -> &'static [&'static str] {
1621        &[
1622            "<|fim_prefix|>",
1623            "<|fim_suffix|>",
1624            "<|fim_middle|>",
1625            "<|file_sep|>",
1626            START_MARKER,
1627            SEPARATOR,
1628            END_MARKER,
1629            CURSOR_MARKER,
1630        ]
1631    }
1632
1633    pub fn write_cursor_excerpt_section(
1634        prompt: &mut String,
1635        path: &Path,
1636        context: &str,
1637        editable_range: &Range<usize>,
1638        cursor_offset: usize,
1639    ) {
1640        let path_str = path.to_string_lossy();
1641        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1642
1643        prompt.push_str("<|fim_prefix|>");
1644        prompt.push_str(&context[..editable_range.start]);
1645
1646        prompt.push_str("<|fim_suffix|>");
1647        prompt.push_str(&context[editable_range.end..]);
1648        if !prompt.ends_with('\n') {
1649            prompt.push('\n');
1650        }
1651
1652        prompt.push_str("<|fim_middle|>");
1653        prompt.push_str(START_MARKER);
1654        prompt.push_str(&context[editable_range.start..cursor_offset]);
1655        prompt.push_str(CURSOR_MARKER);
1656        prompt.push_str(&context[cursor_offset..editable_range.end]);
1657        if !prompt.ends_with('\n') {
1658            prompt.push('\n');
1659        }
1660        prompt.push_str(SEPARATOR);
1661    }
1662}
1663
1664pub mod v0131_git_merge_markers_prefix {
1665    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1666    //!
1667    //! Example prompt:
1668    //!
1669    //! <|file_sep|>path/to/target_file.py
1670    //! <|fim_prefix|>
1671    //! code before editable region
1672    //! <<<<<<< CURRENT
1673    //! code that
1674    //! needs to<|user_cursor|>
1675    //! be rewritten
1676    //! =======
1677    //! <|fim_suffix|>
1678    //! code after editable region
1679    //! <|fim_middle|>
1680    //!
1681    //! Expected output (should be generated by the model):
1682    //!
1683    //! updated
1684    //! code with
1685    //! changes applied
1686    //! >>>>>>> UPDATED
1687
1688    use super::*;
1689
1690    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1691    pub const SEPARATOR: &str = "=======\n";
1692    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1693
1694    pub fn special_tokens() -> &'static [&'static str] {
1695        &[
1696            "<|fim_prefix|>",
1697            "<|fim_suffix|>",
1698            "<|fim_middle|>",
1699            "<|file_sep|>",
1700            START_MARKER,
1701            SEPARATOR,
1702            END_MARKER,
1703            CURSOR_MARKER,
1704        ]
1705    }
1706
1707    pub fn write_cursor_excerpt_section(
1708        prompt: &mut String,
1709        path: &Path,
1710        context: &str,
1711        editable_range: &Range<usize>,
1712        cursor_offset: usize,
1713    ) {
1714        let path_str = path.to_string_lossy();
1715        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1716
1717        prompt.push_str("<|fim_prefix|>");
1718        prompt.push_str(&context[..editable_range.start]);
1719        prompt.push_str(START_MARKER);
1720        prompt.push_str(&context[editable_range.start..cursor_offset]);
1721        prompt.push_str(CURSOR_MARKER);
1722        prompt.push_str(&context[cursor_offset..editable_range.end]);
1723        if !prompt.ends_with('\n') {
1724            prompt.push('\n');
1725        }
1726        prompt.push_str(SEPARATOR);
1727
1728        prompt.push_str("<|fim_suffix|>");
1729        prompt.push_str(&context[editable_range.end..]);
1730        if !prompt.ends_with('\n') {
1731            prompt.push('\n');
1732        }
1733
1734        prompt.push_str("<|fim_middle|>");
1735    }
1736}
1737
1738pub mod v0211_prefill {
1739    use super::*;
1740
1741    pub fn special_tokens() -> &'static [&'static str] {
1742        v0131_git_merge_markers_prefix::special_tokens()
1743    }
1744
1745    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1746        let editable_region = &context[editable_range.start..editable_range.end];
1747
1748        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1749        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1750
1751        // Find a token boundary to avoid splitting tokens in the prefill.
1752        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1753        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1754        // the \n and consume any consecutive \n characters after it.
1755        let prefill = &editable_region[..prefill_len];
1756        match prefill.rfind('\n') {
1757            Some(pos) => {
1758                let mut end = pos + 1;
1759                while end < editable_region.len()
1760                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1761                {
1762                    end += 1;
1763                }
1764                editable_region[..end].to_string()
1765            }
1766            // No newline found. Fall back to splitting before the last space
1767            // (word-level boundary)
1768            None => match prefill.rfind(' ') {
1769                Some(pos) => prefill[..pos].to_string(),
1770                None => prefill.to_string(),
1771            },
1772        }
1773    }
1774}
1775
1776pub mod hashline {
1777
1778    use std::fmt::Display;
1779
1780    pub const END_MARKER: &str = "<|fim_middle|>updated";
1781    pub const START_MARKER: &str = "<|fim_middle|>current";
1782
1783    use super::*;
1784
1785    const SET_COMMAND_MARKER: &str = "<|set|>";
1786    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1787    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1788
1789    pub fn special_tokens() -> &'static [&'static str] {
1790        return &[
1791            SET_COMMAND_MARKER,
1792            "<|set_range|>",
1793            INSERT_COMMAND_MARKER,
1794            NO_EDITS_COMMAND_MARKER,
1795            CURSOR_MARKER,
1796            "<|file_sep|>",
1797            "<|fim_prefix|>",
1798            "<|fim_suffix|>",
1799            "<|fim_middle|>",
1800        ];
1801    }
1802
1803    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1804    #[derive(Debug, Clone, PartialEq, Eq)]
1805    struct LineRef {
1806        index: usize,
1807        hash: u8,
1808    }
1809
1810    impl Display for LineRef {
1811        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1812            write!(f, "{}:{:02x}", self.index, self.hash)
1813        }
1814    }
1815
1816    pub fn hash_line(line: &[u8]) -> u8 {
1817        let mut h: u8 = 0;
1818        for &byte in line {
1819            h = h.wrapping_add(byte);
1820        }
1821        return h;
1822    }
1823
1824    /// Write the hashline-encoded editable region into `out`. Each line of
1825    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1826    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1827    /// to the start of `editable_text`).
1828    pub fn write_hashline_editable_region(
1829        out: &mut String,
1830        editable_text: &str,
1831        cursor_offset_in_editable: usize,
1832    ) {
1833        let mut offset = 0;
1834        for (i, line) in editable_text.lines().enumerate() {
1835            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1836                && cursor_offset_in_editable < offset + line.len()
1837            {
1838                (
1839                    &line[..cursor_offset_in_editable - offset],
1840                    CURSOR_MARKER,
1841                    &line[cursor_offset_in_editable - offset..],
1842                )
1843            } else {
1844                (line, "", "")
1845            };
1846            write!(
1847                out,
1848                "\n{}|{head}{cursor}{tail}",
1849                LineRef {
1850                    index: i,
1851                    hash: hash_line(line.as_bytes())
1852                }
1853            )
1854            .unwrap();
1855            offset += line.len() + 1;
1856        }
1857    }
1858
1859    pub fn write_cursor_excerpt_section(
1860        prompt: &mut String,
1861        path: &Path,
1862        context: &str,
1863        editable_range: &Range<usize>,
1864        cursor_offset: usize,
1865    ) {
1866        let path_str = path.to_string_lossy();
1867        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1868
1869        prompt.push_str("<|fim_prefix|>\n");
1870        prompt.push_str(&context[..editable_range.start]);
1871        prompt.push_str(START_MARKER);
1872
1873        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1874        let editable_region = &context[editable_range.clone()];
1875        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1876
1877        if !prompt.ends_with('\n') {
1878            prompt.push('\n');
1879        }
1880
1881        prompt.push_str("<|fim_suffix|>\n");
1882        prompt.push_str(&context[editable_range.end..]);
1883        if !prompt.ends_with('\n') {
1884            prompt.push('\n');
1885        }
1886
1887        prompt.push_str(END_MARKER);
1888        prompt.push('\n');
1889    }
1890
1891    /// A single edit command parsed from the model output.
1892    #[derive(Debug)]
1893    enum EditCommand<'a> {
1894        /// Replace a range of lines (inclusive on both ends). Single-line set is
1895        /// represented by `start == end`.
1896        Set {
1897            start: LineRef,
1898            end: LineRef,
1899            content: &'a str,
1900        },
1901        /// Insert new lines after the given line, or before the first line if
1902        /// `after` is `None`.
1903        Insert {
1904            after: Option<LineRef>,
1905            content: &'a str,
1906        },
1907    }
1908
1909    /// Parse a line reference like `3:c3` into a `LineRef`.
1910    fn parse_line_ref(s: &str) -> Option<LineRef> {
1911        let (idx_str, hash_str) = s.split_once(':')?;
1912        let index = idx_str.parse::<usize>().ok()?;
1913        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1914        Some(LineRef { index, hash })
1915    }
1916
1917    /// Parse the model output into a list of `EditCommand`s.
1918    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1919        let mut commands = Vec::new();
1920        let mut offset = 0usize;
1921
1922        while offset < model_output.len() {
1923            let next_nl = model_output[offset..]
1924                .find('\n')
1925                .map(|i| offset + i)
1926                .unwrap_or(model_output.len());
1927            let line = &model_output[offset..next_nl];
1928            let line_end = if next_nl < model_output.len() {
1929                next_nl + 1
1930            } else {
1931                next_nl
1932            };
1933
1934            let trimmed = line.trim();
1935            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1936                (true, spec)
1937            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1938                (false, spec)
1939            } else {
1940                offset = line_end;
1941                continue;
1942            };
1943
1944            let mut content_end = line_end;
1945            let mut scan = line_end;
1946
1947            while scan < model_output.len() {
1948                let body_nl = model_output[scan..]
1949                    .find('\n')
1950                    .map(|i| scan + i)
1951                    .unwrap_or(model_output.len());
1952                let body_line = &model_output[scan..body_nl];
1953                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1954                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1955                {
1956                    break;
1957                }
1958                scan = if body_nl < model_output.len() {
1959                    body_nl + 1
1960                } else {
1961                    body_nl
1962                };
1963                content_end = scan;
1964            }
1965
1966            let content = &model_output[line_end..content_end];
1967
1968            if is_set {
1969                if let Some((start_str, end_str)) = specifier.split_once('-') {
1970                    if let (Some(start), Some(end)) =
1971                        (parse_line_ref(start_str), parse_line_ref(end_str))
1972                    {
1973                        commands.push(EditCommand::Set {
1974                            start,
1975                            end,
1976                            content,
1977                        });
1978                    }
1979                } else if let Some(target) = parse_line_ref(specifier) {
1980                    commands.push(EditCommand::Set {
1981                        start: target.clone(),
1982                        end: target,
1983                        content,
1984                    });
1985                }
1986            } else {
1987                let after = parse_line_ref(specifier);
1988                commands.push(EditCommand::Insert { after, content });
1989            }
1990
1991            offset = scan;
1992        }
1993
1994        commands
1995    }
1996
1997    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1998    /// (as opposed to being a plain full-replacement output).
1999    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2000    /// editable region, returning the plain text content.
2001    pub fn strip_hashline_prefixes(region: &str) -> String {
2002        let mut decoded: String = region
2003            .lines()
2004            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2005            .collect::<Vec<_>>()
2006            .join("\n");
2007        if region.ends_with('\n') {
2008            decoded.push('\n');
2009        }
2010        decoded
2011    }
2012
2013    pub fn output_has_edit_commands(model_output: &str) -> bool {
2014        model_output.contains(SET_COMMAND_MARKER)
2015            || model_output.contains(INSERT_COMMAND_MARKER)
2016            || model_output.contains(NO_EDITS_COMMAND_MARKER)
2017    }
2018
2019    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2020    /// original editable region text.
2021    ///
2022    /// `editable_region` is the original text of the editable region (without hash
2023    /// prefixes). `model_output` is the raw model response containing edit commands.
2024    ///
2025    /// Returns the full replacement text for the editable region.
2026    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2027        if model_output
2028            .trim_start()
2029            .starts_with(NO_EDITS_COMMAND_MARKER)
2030        {
2031            return editable_region.to_string();
2032        }
2033
2034        let original_lines: Vec<&str> = editable_region.lines().collect();
2035        let old_hashes: Vec<u8> = original_lines
2036            .iter()
2037            .map(|line| hash_line(line.as_bytes()))
2038            .collect();
2039
2040        let commands = parse_edit_commands(model_output);
2041
2042        // For set operations: indexed by start line → Some((end line index, content))
2043        // For insert operations: indexed by line index → vec of content to insert after
2044        // Insert-before-first is tracked separately.
2045        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2046        let mut insert_before_first: Vec<&str> = Vec::new();
2047        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2048
2049        for command in &commands {
2050            match command {
2051                EditCommand::Set {
2052                    start,
2053                    end,
2054                    content,
2055                } => {
2056                    if start.index < old_hashes.len()
2057                        && end.index < old_hashes.len()
2058                        && start.index <= end.index
2059                        && old_hashes[start.index] == start.hash
2060                        && old_hashes[end.index] == end.hash
2061                    {
2062                        set_ops[start.index] = Some((end.index, *content));
2063                    }
2064                }
2065                EditCommand::Insert { after, content } => match after {
2066                    None => insert_before_first.push(*content),
2067                    Some(line_ref) => {
2068                        if line_ref.index < old_hashes.len()
2069                            && old_hashes[line_ref.index] == line_ref.hash
2070                        {
2071                            insert_after[line_ref.index].push(*content);
2072                        }
2073                    }
2074                },
2075            }
2076        }
2077
2078        let mut result = String::new();
2079
2080        // Emit any insertions before the first line
2081        for content in &insert_before_first {
2082            result.push_str(content);
2083            if !content.ends_with('\n') {
2084                result.push('\n');
2085            }
2086        }
2087
2088        let mut i = 0;
2089        while i < original_lines.len() {
2090            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2091                // Replace lines i..=end_index with the replacement content
2092                result.push_str(replacement);
2093                if !replacement.is_empty() && !replacement.ends_with('\n') {
2094                    result.push('\n');
2095                }
2096                // Emit any insertions after the end of this set range
2097                if *end_index < insert_after.len() {
2098                    for content in &insert_after[*end_index] {
2099                        result.push_str(content);
2100                        if !content.ends_with('\n') {
2101                            result.push('\n');
2102                        }
2103                    }
2104                }
2105                i = end_index + 1;
2106            } else {
2107                // Keep the original line
2108                result.push_str(original_lines[i]);
2109                result.push('\n');
2110                // Emit any insertions after this line
2111                for content in &insert_after[i] {
2112                    result.push_str(content);
2113                    if !content.ends_with('\n') {
2114                        result.push('\n');
2115                    }
2116                }
2117                i += 1;
2118            }
2119        }
2120
2121        // Preserve trailing newline behavior: if the original ended with a
2122        // newline the result already has one; if it didn't, trim the extra one
2123        // we added.
2124        if !editable_region.ends_with('\n') && result.ends_with('\n') {
2125            result.pop();
2126        }
2127
2128        result
2129    }
2130
2131    /// Convert a unified diff patch into hashline edit commands.
2132    ///
2133    /// Parses the unified diff `patch` directly to determine which lines of
2134    /// `old_text` are deleted/replaced and what new lines are added, then emits
2135    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2136    /// `{index}:{hash}` identifiers.
2137    ///
2138    /// `cursor_offset` is an optional byte offset into the first hunk's new
2139    /// text (context + additions) where the cursor marker should be placed.
2140    pub fn patch_to_edit_commands(
2141        old_text: &str,
2142        patch: &str,
2143        cursor_offset: Option<usize>,
2144    ) -> Result<String> {
2145        let old_lines: Vec<&str> = old_text.lines().collect();
2146        let old_hashes: Vec<u8> = old_lines
2147            .iter()
2148            .map(|line| hash_line(line.as_bytes()))
2149            .collect();
2150
2151        let mut result = String::new();
2152        let mut first_hunk = true;
2153
2154        struct Hunk<'a> {
2155            line_range: Range<usize>,
2156            new_text_lines: Vec<&'a str>,
2157            cursor_line_offset_in_new_text: Option<(usize, usize)>,
2158        }
2159
2160        // Parse the patch line by line. We only care about hunk headers,
2161        // context, deletions, and additions.
2162        let mut old_line_index: usize = 0;
2163        let mut current_hunk: Option<Hunk> = None;
2164        // Byte offset tracking within the hunk's new text for cursor placement.
2165        let mut new_text_byte_offset: usize = 0;
2166        // The line index of the last old line seen before/in the current hunk
2167        // (used for insert-after reference).
2168        let mut last_old_line_before_hunk: Option<usize> = None;
2169
2170        fn flush_hunk(
2171            hunk: Hunk,
2172            last_old_line: Option<usize>,
2173            result: &mut String,
2174            old_hashes: &[u8],
2175        ) {
2176            if hunk.line_range.is_empty() {
2177                // Pure insertion — reference the old line to insert after when in bounds.
2178                if let Some(after) = last_old_line
2179                    && let Some(&hash) = old_hashes.get(after)
2180                {
2181                    write!(
2182                        result,
2183                        "{INSERT_COMMAND_MARKER}{}\n",
2184                        LineRef { index: after, hash }
2185                    )
2186                    .unwrap();
2187                } else {
2188                    result.push_str(INSERT_COMMAND_MARKER);
2189                    result.push('\n');
2190                }
2191            } else {
2192                let start = hunk.line_range.start;
2193                let end_exclusive = hunk.line_range.end;
2194                let deleted_line_count = end_exclusive.saturating_sub(start);
2195
2196                if deleted_line_count == 1 {
2197                    if let Some(&hash) = old_hashes.get(start) {
2198                        write!(
2199                            result,
2200                            "{SET_COMMAND_MARKER}{}\n",
2201                            LineRef { index: start, hash }
2202                        )
2203                        .unwrap();
2204                    } else {
2205                        result.push_str(SET_COMMAND_MARKER);
2206                        result.push('\n');
2207                    }
2208                } else {
2209                    let end_inclusive = end_exclusive - 1;
2210                    match (
2211                        old_hashes.get(start).copied(),
2212                        old_hashes.get(end_inclusive).copied(),
2213                    ) {
2214                        (Some(start_hash), Some(end_hash)) => {
2215                            write!(
2216                                result,
2217                                "{SET_COMMAND_MARKER}{}-{}\n",
2218                                LineRef {
2219                                    index: start,
2220                                    hash: start_hash
2221                                },
2222                                LineRef {
2223                                    index: end_inclusive,
2224                                    hash: end_hash
2225                                }
2226                            )
2227                            .unwrap();
2228                        }
2229                        _ => {
2230                            result.push_str(SET_COMMAND_MARKER);
2231                            result.push('\n');
2232                        }
2233                    }
2234                }
2235            }
2236            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2237                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2238                    && line_offset == cursor_line_offset
2239                {
2240                    result.push_str(&line[..char_offset]);
2241                    result.push_str(CURSOR_MARKER);
2242                    result.push_str(&line[char_offset..]);
2243                    continue;
2244                }
2245
2246                result.push_str(line);
2247            }
2248        }
2249
2250        for raw_line in patch.split_inclusive('\n') {
2251            if raw_line.starts_with("@@") {
2252                // Flush any pending change hunk from a previous patch hunk.
2253                if let Some(hunk) = current_hunk.take() {
2254                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2255                }
2256
2257                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2258                // We intentionally do not trust old_start as a direct local index into `old_text`,
2259                // because some patches are produced against a larger file region and carry
2260                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2261                if first_hunk {
2262                    new_text_byte_offset = 0;
2263                    first_hunk = false;
2264                }
2265                continue;
2266            }
2267
2268            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2269                continue;
2270            }
2271            if raw_line.starts_with("\\ No newline") {
2272                continue;
2273            }
2274
2275            if raw_line.starts_with('-') {
2276                // Extend or start a change hunk with this deleted old line.
2277                match &mut current_hunk {
2278                    Some(Hunk {
2279                        line_range: range, ..
2280                    }) => range.end = old_line_index + 1,
2281                    None => {
2282                        current_hunk = Some(Hunk {
2283                            line_range: old_line_index..old_line_index + 1,
2284                            new_text_lines: Vec::new(),
2285                            cursor_line_offset_in_new_text: None,
2286                        });
2287                    }
2288                }
2289                old_line_index += 1;
2290            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2291                // Place cursor marker if cursor_offset falls within this line.
2292                let mut cursor_line_offset = None;
2293                if let Some(cursor_off) = cursor_offset
2294                    && (first_hunk
2295                        || cursor_off >= new_text_byte_offset
2296                            && cursor_off <= new_text_byte_offset + added_content.len())
2297                {
2298                    let line_offset = added_content.floor_char_boundary(
2299                        cursor_off
2300                            .saturating_sub(new_text_byte_offset)
2301                            .min(added_content.len()),
2302                    );
2303                    cursor_line_offset = Some(line_offset);
2304                }
2305
2306                new_text_byte_offset += added_content.len();
2307
2308                let hunk = current_hunk.get_or_insert(Hunk {
2309                    line_range: old_line_index..old_line_index,
2310                    new_text_lines: vec![],
2311                    cursor_line_offset_in_new_text: None,
2312                });
2313                hunk.new_text_lines.push(added_content);
2314                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2315                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2316            } else {
2317                // Context line (starts with ' ' or is empty).
2318                if let Some(hunk) = current_hunk.take() {
2319                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2320                }
2321                last_old_line_before_hunk = Some(old_line_index);
2322                old_line_index += 1;
2323                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2324                new_text_byte_offset += content.len();
2325            }
2326        }
2327
2328        // Flush final group.
2329        if let Some(hunk) = current_hunk.take() {
2330            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2331        }
2332
2333        // Trim a single trailing newline.
2334        if result.ends_with('\n') {
2335            result.pop();
2336        }
2337
2338        if result.is_empty() {
2339            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2340        }
2341
2342        Ok(result)
2343    }
2344
2345    #[cfg(test)]
2346    mod tests {
2347        use super::*;
2348        use indoc::indoc;
2349
2350        #[test]
2351        fn test_format_cursor_region() {
2352            struct Case {
2353                name: &'static str,
2354                context: &'static str,
2355                editable_range: Range<usize>,
2356                cursor_offset: usize,
2357                expected: &'static str,
2358            }
2359
2360            let cases = [
2361                Case {
2362                    name: "basic_cursor_placement",
2363                    context: "hello world\n",
2364                    editable_range: 0..12,
2365                    cursor_offset: 5,
2366                    expected: indoc! {"
2367                    <|file_sep|>test.rs
2368                    <|fim_prefix|>
2369                    <|fim_middle|>current
2370                    0:5c|hello<|user_cursor|> world
2371                    <|fim_suffix|>
2372                    <|fim_middle|>updated
2373                    "},
2374                },
2375                Case {
2376                    name: "multiline_cursor_on_second_line",
2377                    context: "aaa\nbbb\nccc\n",
2378                    editable_range: 0..12,
2379                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2380                    expected: indoc! {"
2381                    <|file_sep|>test.rs
2382                    <|fim_prefix|>
2383                    <|fim_middle|>current
2384                    0:23|aaa
2385                    1:26|b<|user_cursor|>bb
2386                    2:29|ccc
2387                    <|fim_suffix|>
2388                    <|fim_middle|>updated
2389                    "},
2390                },
2391                Case {
2392                    name: "no_trailing_newline_in_context",
2393                    context: "line1\nline2",
2394                    editable_range: 0..11,
2395                    cursor_offset: 3,
2396                    expected: indoc! {"
2397                    <|file_sep|>test.rs
2398                    <|fim_prefix|>
2399                    <|fim_middle|>current
2400                    0:d9|lin<|user_cursor|>e1
2401                    1:da|line2
2402                    <|fim_suffix|>
2403                    <|fim_middle|>updated
2404                    "},
2405                },
2406                Case {
2407                    name: "leading_newline_in_editable_region",
2408                    context: "\nabc\n",
2409                    editable_range: 0..5,
2410                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2411                    expected: indoc! {"
2412                    <|file_sep|>test.rs
2413                    <|fim_prefix|>
2414                    <|fim_middle|>current
2415                    0:00|
2416                    1:26|a<|user_cursor|>bc
2417                    <|fim_suffix|>
2418                    <|fim_middle|>updated
2419                    "},
2420                },
2421                Case {
2422                    name: "with_suffix",
2423                    context: "abc\ndef",
2424                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2425                    cursor_offset: 2,
2426                    expected: indoc! {"
2427                    <|file_sep|>test.rs
2428                    <|fim_prefix|>
2429                    <|fim_middle|>current
2430                    0:26|ab<|user_cursor|>c
2431                    <|fim_suffix|>
2432                    def
2433                    <|fim_middle|>updated
2434                    "},
2435                },
2436                Case {
2437                    name: "unicode_two_byte_chars",
2438                    context: "héllo\n",
2439                    editable_range: 0..7,
2440                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2441                    expected: indoc! {"
2442                    <|file_sep|>test.rs
2443                    <|fim_prefix|>
2444                    <|fim_middle|>current
2445                    0:1b|hé<|user_cursor|>llo
2446                    <|fim_suffix|>
2447                    <|fim_middle|>updated
2448                    "},
2449                },
2450                Case {
2451                    name: "unicode_three_byte_chars",
2452                    context: "日本語\n",
2453                    editable_range: 0..10,
2454                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2455                    expected: indoc! {"
2456                    <|file_sep|>test.rs
2457                    <|fim_prefix|>
2458                    <|fim_middle|>current
2459                    0:80|日本<|user_cursor|>語
2460                    <|fim_suffix|>
2461                    <|fim_middle|>updated
2462                    "},
2463                },
2464                Case {
2465                    name: "unicode_four_byte_chars",
2466                    context: "a🌍b\n",
2467                    editable_range: 0..7,
2468                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2469                    expected: indoc! {"
2470                    <|file_sep|>test.rs
2471                    <|fim_prefix|>
2472                    <|fim_middle|>current
2473                    0:6b|a🌍<|user_cursor|>b
2474                    <|fim_suffix|>
2475                    <|fim_middle|>updated
2476                    "},
2477                },
2478                Case {
2479                    name: "cursor_at_start_of_region_not_placed",
2480                    context: "abc\n",
2481                    editable_range: 0..4,
2482                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2483                    expected: indoc! {"
2484                    <|file_sep|>test.rs
2485                    <|fim_prefix|>
2486                    <|fim_middle|>current
2487                    0:26|abc
2488                    <|fim_suffix|>
2489                    <|fim_middle|>updated
2490                    "},
2491                },
2492                Case {
2493                    name: "cursor_at_end_of_line_not_placed",
2494                    context: "abc\ndef\n",
2495                    editable_range: 0..8,
2496                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2497                    expected: indoc! {"
2498                    <|file_sep|>test.rs
2499                    <|fim_prefix|>
2500                    <|fim_middle|>current
2501                    0:26|abc
2502                    1:2f|def
2503                    <|fim_suffix|>
2504                    <|fim_middle|>updated
2505                    "},
2506                },
2507                Case {
2508                    name: "cursor_offset_relative_to_context_not_editable_region",
2509                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2510                    // write_cursor_excerpt_section must subtract it before comparing against
2511                    // per-line offsets within the editable region.
2512                    context: "pre\naaa\nbbb\nsuf\n",
2513                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2514                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2515                    expected: indoc! {"
2516                    <|file_sep|>test.rs
2517                    <|fim_prefix|>
2518                    pre
2519                    <|fim_middle|>current
2520                    0:23|aaa
2521                    1:26|b<|user_cursor|>bb
2522                    <|fim_suffix|>
2523                    suf
2524                    <|fim_middle|>updated
2525                    "},
2526                },
2527            ];
2528
2529            for case in &cases {
2530                let mut prompt = String::new();
2531                hashline::write_cursor_excerpt_section(
2532                    &mut prompt,
2533                    Path::new("test.rs"),
2534                    case.context,
2535                    &case.editable_range,
2536                    case.cursor_offset,
2537                );
2538                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2539            }
2540        }
2541
2542        #[test]
2543        fn test_apply_edit_commands() {
2544            struct Case {
2545                name: &'static str,
2546                original: &'static str,
2547                model_output: &'static str,
2548                expected: &'static str,
2549            }
2550
2551            let cases = vec![
2552                Case {
2553                    name: "set_single_line",
2554                    original: indoc! {"
2555                    let mut total = 0;
2556                    for product in products {
2557                        total += ;
2558                    }
2559                    total
2560                "},
2561                    model_output: indoc! {"
2562                    <|set|>2:87
2563                        total += product.price;
2564                "},
2565                    expected: indoc! {"
2566                    let mut total = 0;
2567                    for product in products {
2568                        total += product.price;
2569                    }
2570                    total
2571                "},
2572                },
2573                Case {
2574                    name: "set_range",
2575                    original: indoc! {"
2576                    fn foo() {
2577                        let x = 1;
2578                        let y = 2;
2579                        let z = 3;
2580                    }
2581                "},
2582                    model_output: indoc! {"
2583                    <|set|>1:46-3:4a
2584                        let sum = 6;
2585                "},
2586                    expected: indoc! {"
2587                    fn foo() {
2588                        let sum = 6;
2589                    }
2590                "},
2591                },
2592                Case {
2593                    name: "insert_after_line",
2594                    original: indoc! {"
2595                    fn main() {
2596                        let x = 1;
2597                    }
2598                "},
2599                    model_output: indoc! {"
2600                    <|insert|>1:46
2601                        let y = 2;
2602                "},
2603                    expected: indoc! {"
2604                    fn main() {
2605                        let x = 1;
2606                        let y = 2;
2607                    }
2608                "},
2609                },
2610                Case {
2611                    name: "insert_before_first",
2612                    original: indoc! {"
2613                    let x = 1;
2614                    let y = 2;
2615                "},
2616                    model_output: indoc! {"
2617                    <|insert|>
2618                    use std::io;
2619                "},
2620                    expected: indoc! {"
2621                    use std::io;
2622                    let x = 1;
2623                    let y = 2;
2624                "},
2625                },
2626                Case {
2627                    name: "set_with_cursor_marker",
2628                    original: indoc! {"
2629                    fn main() {
2630                        println!();
2631                    }
2632                "},
2633                    model_output: indoc! {"
2634                    <|set|>1:34
2635                        eprintln!(\"<|user_cursor|>\");
2636                "},
2637                    expected: indoc! {"
2638                    fn main() {
2639                        eprintln!(\"<|user_cursor|>\");
2640                    }
2641                "},
2642                },
2643                Case {
2644                    name: "multiple_set_commands",
2645                    original: indoc! {"
2646                    aaa
2647                    bbb
2648                    ccc
2649                    ddd
2650                "},
2651                    model_output: indoc! {"
2652                    <|set|>0:23
2653                    AAA
2654                    <|set|>2:29
2655                    CCC
2656                "},
2657                    expected: indoc! {"
2658                    AAA
2659                    bbb
2660                    CCC
2661                    ddd
2662                "},
2663                },
2664                Case {
2665                    name: "set_range_multiline_replacement",
2666                    original: indoc! {"
2667                    fn handle_submit() {
2668                    }
2669
2670                    fn handle_keystroke() {
2671                "},
2672                    model_output: indoc! {"
2673                    <|set|>0:3f-1:7d
2674                    fn handle_submit(modal_state: &mut ModalState) {
2675                        <|user_cursor|>
2676                    }
2677                "},
2678                    expected: indoc! {"
2679                    fn handle_submit(modal_state: &mut ModalState) {
2680                        <|user_cursor|>
2681                    }
2682
2683                    fn handle_keystroke() {
2684                "},
2685                },
2686                Case {
2687                    name: "no_edit_commands_returns_original",
2688                    original: indoc! {"
2689                    hello
2690                    world
2691                "},
2692                    model_output: "some random text with no commands",
2693                    expected: indoc! {"
2694                    hello
2695                    world
2696                "},
2697                },
2698                Case {
2699                    name: "no_edits_command_returns_original",
2700                    original: indoc! {"
2701                    hello
2702                    world
2703                "},
2704                    model_output: "<|no_edits|>",
2705                    expected: indoc! {"
2706                    hello
2707                    world
2708                "},
2709                },
2710                Case {
2711                    name: "wrong_hash_set_ignored",
2712                    original: indoc! {"
2713                    aaa
2714                    bbb
2715                "},
2716                    model_output: indoc! {"
2717                    <|set|>0:ff
2718                    ZZZ
2719                "},
2720                    expected: indoc! {"
2721                    aaa
2722                    bbb
2723                "},
2724                },
2725                Case {
2726                    name: "insert_and_set_combined",
2727                    original: indoc! {"
2728                    alpha
2729                    beta
2730                    gamma
2731                "},
2732                    model_output: indoc! {"
2733                    <|set|>0:06
2734                    ALPHA
2735                    <|insert|>1:9c
2736                    beta_extra
2737                "},
2738                    expected: indoc! {"
2739                    ALPHA
2740                    beta
2741                    beta_extra
2742                    gamma
2743                "},
2744                },
2745                Case {
2746                    name: "no_trailing_newline_preserved",
2747                    original: "hello\nworld",
2748                    model_output: indoc! {"
2749                    <|set|>0:14
2750                    HELLO
2751                "},
2752                    expected: "HELLO\nworld",
2753                },
2754                Case {
2755                    name: "set_range_hash_mismatch_in_end_bound",
2756                    original: indoc! {"
2757                    one
2758                    two
2759                    three
2760                "},
2761                    model_output: indoc! {"
2762                    <|set|>0:42-2:ff
2763                    ONE_TWO_THREE
2764                "},
2765                    expected: indoc! {"
2766                    one
2767                    two
2768                    three
2769                "},
2770                },
2771                Case {
2772                    name: "set_range_start_greater_than_end_ignored",
2773                    original: indoc! {"
2774                    a
2775                    b
2776                    c
2777                "},
2778                    model_output: indoc! {"
2779                    <|set|>2:63-1:62
2780                    X
2781                "},
2782                    expected: indoc! {"
2783                    a
2784                    b
2785                    c
2786                "},
2787                },
2788                Case {
2789                    name: "insert_out_of_bounds_ignored",
2790                    original: indoc! {"
2791                    x
2792                    y
2793                "},
2794                    model_output: indoc! {"
2795                    <|insert|>99:aa
2796                    z
2797                "},
2798                    expected: indoc! {"
2799                    x
2800                    y
2801                "},
2802                },
2803                Case {
2804                    name: "set_out_of_bounds_ignored",
2805                    original: indoc! {"
2806                    x
2807                    y
2808                "},
2809                    model_output: indoc! {"
2810                    <|set|>99:aa
2811                    z
2812                "},
2813                    expected: indoc! {"
2814                    x
2815                    y
2816                "},
2817                },
2818                Case {
2819                    name: "malformed_set_command_ignored",
2820                    original: indoc! {"
2821                    alpha
2822                    beta
2823                "},
2824                    model_output: indoc! {"
2825                    <|set|>not-a-line-ref
2826                    UPDATED
2827                "},
2828                    expected: indoc! {"
2829                    alpha
2830                    beta
2831                "},
2832                },
2833                Case {
2834                    name: "malformed_insert_hash_treated_as_before_first",
2835                    original: indoc! {"
2836                    alpha
2837                    beta
2838                "},
2839                    model_output: indoc! {"
2840                    <|insert|>1:nothex
2841                    preamble
2842                "},
2843                    expected: indoc! {"
2844                    preamble
2845                    alpha
2846                    beta
2847                "},
2848                },
2849                Case {
2850                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2851                    original: indoc! {"
2852                    cat
2853                    dog
2854                "},
2855                    model_output: indoc! {"
2856                    <|set|>0:38
2857                    CAT
2858                    <|insert|>0:38
2859                    TAIL
2860                "},
2861                    expected: indoc! {"
2862                    CAT
2863                    TAIL
2864                    dog
2865                "},
2866                },
2867                Case {
2868                    name: "overlapping_set_ranges_last_wins",
2869                    original: indoc! {"
2870                    a
2871                    b
2872                    c
2873                    d
2874                "},
2875                    model_output: indoc! {"
2876                    <|set|>0:61-2:63
2877                    FIRST
2878                    <|set|>1:62-3:64
2879                    SECOND
2880                "},
2881                    expected: indoc! {"
2882                    FIRST
2883                    d
2884                "},
2885                },
2886                Case {
2887                    name: "insert_before_first_and_after_line",
2888                    original: indoc! {"
2889                        a
2890                        b
2891                    "},
2892                    model_output: indoc! {"
2893                        <|insert|>
2894                        HEAD
2895                        <|insert|>0:61
2896                        MID
2897                    "},
2898                    expected: indoc! {"
2899                        HEAD
2900                        a
2901                        MID
2902                        b
2903                    "},
2904                },
2905            ];
2906
2907            for case in &cases {
2908                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2909                assert_eq!(result, case.expected, "failed case: {}", case.name);
2910            }
2911        }
2912
2913        #[test]
2914        fn test_output_has_edit_commands() {
2915            assert!(hashline::output_has_edit_commands(&format!(
2916                "{}0:ab\nnew",
2917                SET_COMMAND_MARKER
2918            )));
2919            assert!(hashline::output_has_edit_commands(&format!(
2920                "{}0:ab\nnew",
2921                INSERT_COMMAND_MARKER
2922            )));
2923            assert!(hashline::output_has_edit_commands(&format!(
2924                "some text\n{}1:cd\nstuff",
2925                SET_COMMAND_MARKER
2926            )));
2927            assert!(!hashline::output_has_edit_commands("just plain text"));
2928            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2929            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2930        }
2931
2932        // ---- hashline::patch_to_edit_commands round-trip tests ----
2933
2934        #[test]
2935        fn test_patch_to_edit_commands() {
2936            struct Case {
2937                name: &'static str,
2938                old: &'static str,
2939                patch: &'static str,
2940                expected_new: &'static str,
2941            }
2942
2943            let cases = [
2944                Case {
2945                    name: "single_line_replacement",
2946                    old: indoc! {"
2947                    let mut total = 0;
2948                    for product in products {
2949                        total += ;
2950                    }
2951                    total
2952                "},
2953                    patch: indoc! {"
2954                    @@ -1,5 +1,5 @@
2955                     let mut total = 0;
2956                     for product in products {
2957                    -    total += ;
2958                    +    total += product.price;
2959                     }
2960                     total
2961                "},
2962                    expected_new: indoc! {"
2963                    let mut total = 0;
2964                    for product in products {
2965                        total += product.price;
2966                    }
2967                    total
2968                "},
2969                },
2970                Case {
2971                    name: "multiline_replacement",
2972                    old: indoc! {"
2973                    fn foo() {
2974                        let x = 1;
2975                        let y = 2;
2976                        let z = 3;
2977                    }
2978                "},
2979                    patch: indoc! {"
2980                    @@ -1,5 +1,3 @@
2981                     fn foo() {
2982                    -    let x = 1;
2983                    -    let y = 2;
2984                    -    let z = 3;
2985                    +    let sum = 1 + 2 + 3;
2986                     }
2987                "},
2988                    expected_new: indoc! {"
2989                    fn foo() {
2990                        let sum = 1 + 2 + 3;
2991                    }
2992                "},
2993                },
2994                Case {
2995                    name: "insertion",
2996                    old: indoc! {"
2997                    fn main() {
2998                        let x = 1;
2999                    }
3000                "},
3001                    patch: indoc! {"
3002                    @@ -1,3 +1,4 @@
3003                     fn main() {
3004                         let x = 1;
3005                    +    let y = 2;
3006                     }
3007                "},
3008                    expected_new: indoc! {"
3009                    fn main() {
3010                        let x = 1;
3011                        let y = 2;
3012                    }
3013                "},
3014                },
3015                Case {
3016                    name: "insertion_before_first",
3017                    old: indoc! {"
3018                    let x = 1;
3019                    let y = 2;
3020                "},
3021                    patch: indoc! {"
3022                    @@ -1,2 +1,3 @@
3023                    +use std::io;
3024                     let x = 1;
3025                     let y = 2;
3026                "},
3027                    expected_new: indoc! {"
3028                    use std::io;
3029                    let x = 1;
3030                    let y = 2;
3031                "},
3032                },
3033                Case {
3034                    name: "deletion",
3035                    old: indoc! {"
3036                    aaa
3037                    bbb
3038                    ccc
3039                    ddd
3040                "},
3041                    patch: indoc! {"
3042                    @@ -1,4 +1,2 @@
3043                     aaa
3044                    -bbb
3045                    -ccc
3046                     ddd
3047                "},
3048                    expected_new: indoc! {"
3049                    aaa
3050                    ddd
3051                "},
3052                },
3053                Case {
3054                    name: "multiple_changes",
3055                    old: indoc! {"
3056                    alpha
3057                    beta
3058                    gamma
3059                    delta
3060                    epsilon
3061                "},
3062                    patch: indoc! {"
3063                    @@ -1,5 +1,5 @@
3064                    -alpha
3065                    +ALPHA
3066                     beta
3067                     gamma
3068                    -delta
3069                    +DELTA
3070                     epsilon
3071                "},
3072                    expected_new: indoc! {"
3073                    ALPHA
3074                    beta
3075                    gamma
3076                    DELTA
3077                    epsilon
3078                "},
3079                },
3080                Case {
3081                    name: "replace_with_insertion",
3082                    old: indoc! {r#"
3083                    fn handle() {
3084                        modal_state.close();
3085                        modal_state.dismiss();
3086                "#},
3087                    patch: indoc! {r#"
3088                    @@ -1,3 +1,4 @@
3089                     fn handle() {
3090                         modal_state.close();
3091                    +    eprintln!("");
3092                         modal_state.dismiss();
3093                "#},
3094                    expected_new: indoc! {r#"
3095                    fn handle() {
3096                        modal_state.close();
3097                        eprintln!("");
3098                        modal_state.dismiss();
3099                "#},
3100                },
3101                Case {
3102                    name: "complete_replacement",
3103                    old: indoc! {"
3104                    aaa
3105                    bbb
3106                    ccc
3107                "},
3108                    patch: indoc! {"
3109                    @@ -1,3 +1,3 @@
3110                    -aaa
3111                    -bbb
3112                    -ccc
3113                    +xxx
3114                    +yyy
3115                    +zzz
3116                "},
3117                    expected_new: indoc! {"
3118                    xxx
3119                    yyy
3120                    zzz
3121                "},
3122                },
3123                Case {
3124                    name: "add_function_body",
3125                    old: indoc! {"
3126                    fn foo() {
3127                        modal_state.dismiss();
3128                    }
3129
3130                    fn
3131
3132                    fn handle_keystroke() {
3133                "},
3134                    patch: indoc! {"
3135                    @@ -1,6 +1,8 @@
3136                     fn foo() {
3137                         modal_state.dismiss();
3138                     }
3139
3140                    -fn
3141                    +fn handle_submit() {
3142                    +    todo()
3143                    +}
3144
3145                     fn handle_keystroke() {
3146                "},
3147                    expected_new: indoc! {"
3148                    fn foo() {
3149                        modal_state.dismiss();
3150                    }
3151
3152                    fn handle_submit() {
3153                        todo()
3154                    }
3155
3156                    fn handle_keystroke() {
3157                "},
3158                },
3159                Case {
3160                    name: "with_cursor_offset",
3161                    old: indoc! {r#"
3162                    fn main() {
3163                        println!();
3164                    }
3165                "#},
3166                    patch: indoc! {r#"
3167                        @@ -1,3 +1,3 @@
3168                        fn main() {
3169                        -    println!();
3170                        +    eprintln!("");
3171                        }
3172                    "#},
3173                    expected_new: indoc! {r#"
3174                        fn main() {
3175                            eprintln!("<|user_cursor|>");
3176                        }
3177                    "#},
3178                },
3179                Case {
3180                    name: "non_local_hunk_header_pure_insertion_repro",
3181                    old: indoc! {"
3182                        aaa
3183                        bbb
3184                    "},
3185                    patch: indoc! {"
3186                        @@ -20,2 +20,3 @@
3187                        aaa
3188                        +xxx
3189                        bbb
3190                    "},
3191                    expected_new: indoc! {"
3192                        aaa
3193                        xxx
3194                        bbb
3195                    "},
3196                },
3197                Case {
3198                    name: "empty_patch_produces_no_edits_marker",
3199                    old: indoc! {"
3200                        aaa
3201                        bbb
3202                    "},
3203                    patch: "@@ -20,2 +20,3 @@\n",
3204                    expected_new: indoc! {"
3205                        aaa
3206                        bbb
3207                    "},
3208                },
3209            ];
3210
3211            for case in &cases {
3212                // The cursor_offset for patch_to_edit_commands is relative to
3213                // the first hunk's new text (context + additions). We compute
3214                // it by finding where the marker sits in the expected output
3215                // (which mirrors the new text of the hunk).
3216                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3217
3218                let commands =
3219                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3220                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3221
3222                assert!(
3223                    hashline::output_has_edit_commands(&commands),
3224                    "case {}: expected edit commands, got: {commands:?}",
3225                    case.name,
3226                );
3227
3228                let applied = hashline::apply_edit_commands(case.old, &commands);
3229                assert_eq!(applied, case.expected_new, "case {}", case.name);
3230            }
3231        }
3232    }
3233}
3234
3235pub mod seed_coder {
3236    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3237    //!
3238    //! Seed-Coder uses different FIM tokens and order than Qwen:
3239    //! - SPM order: suffix comes FIRST, then prefix, then middle
3240    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3241    //! - File markers: StarCoder-style `<filename>path` (single token + path)
3242    //!
3243    //! All context (related files, edit history) goes in the PREFIX section.
3244    //! The suffix contains only code after the editable region.
3245    //!
3246    //! Example prompt:
3247    //!
3248    //! <[fim-suffix]>
3249    //! code after editable region
3250    //! <[fim-prefix]><filename>related/file.py
3251    //! related file content
3252    //!
3253    //! <filename>edit_history
3254    //! --- a/some_file.py
3255    //! +++ b/some_file.py
3256    //! -old
3257    //! +new
3258    //!
3259    //! <filename>path/to/target_file.py
3260    //! code before editable region
3261    //! <<<<<<< CURRENT
3262    //! code that
3263    //! needs to<|user_cursor|>
3264    //! be rewritten
3265    //! =======
3266    //! <[fim-middle]>
3267    //!
3268    //! Expected output (model generates):
3269    //!
3270    //! updated
3271    //! code with
3272    //! changes applied
3273    //! >>>>>>> UPDATED
3274
3275    use super::*;
3276
3277    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3278    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3279    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3280    pub const FILE_MARKER: &str = "<filename>";
3281
3282    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3283    pub const SEPARATOR: &str = "=======\n";
3284    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3285
3286    pub const NO_EDITS: &str = "NO_EDITS\n";
3287
3288    pub fn special_tokens() -> &'static [&'static str] {
3289        &[
3290            FIM_SUFFIX,
3291            FIM_PREFIX,
3292            FIM_MIDDLE,
3293            FILE_MARKER,
3294            START_MARKER,
3295            SEPARATOR,
3296            END_MARKER,
3297            CURSOR_MARKER,
3298        ]
3299    }
3300
3301    pub fn write_cursor_excerpt_section(
3302        prompt: &mut String,
3303        path: &Path,
3304        context: &str,
3305        editable_range: &Range<usize>,
3306        cursor_offset: usize,
3307    ) {
3308        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3309        prompt.push_str(&section);
3310    }
3311
3312    pub fn format_prompt_with_budget(
3313        path: &Path,
3314        context: &str,
3315        editable_range: &Range<usize>,
3316        cursor_offset: usize,
3317        events: &[Arc<Event>],
3318        related_files: &[RelatedFile],
3319        max_tokens: usize,
3320    ) -> String {
3321        let cursor_prefix_section =
3322            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3323        assemble_fim_prompt(
3324            context,
3325            editable_range,
3326            &cursor_prefix_section,
3327            events,
3328            related_files,
3329            max_tokens,
3330        )
3331    }
3332
3333    pub fn assemble_fim_prompt(
3334        context: &str,
3335        editable_range: &Range<usize>,
3336        cursor_prefix_section: &str,
3337        events: &[Arc<Event>],
3338        related_files: &[RelatedFile],
3339        max_tokens: usize,
3340    ) -> String {
3341        let suffix_section = build_suffix_section(context, editable_range);
3342
3343        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3344        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3345        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3346
3347        let edit_history_section = super::format_edit_history_within_budget(
3348            events,
3349            FILE_MARKER,
3350            "edit_history",
3351            budget_after_cursor,
3352            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3353        );
3354        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3355        let budget_after_edit_history =
3356            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
3357
3358        let related_files_section = super::format_related_files_within_budget(
3359            related_files,
3360            FILE_MARKER,
3361            "",
3362            budget_after_edit_history,
3363        );
3364
3365        let mut prompt = String::new();
3366        prompt.push_str(&suffix_section);
3367        prompt.push_str(FIM_PREFIX);
3368        prompt.push_str(&related_files_section);
3369        if !related_files_section.is_empty() {
3370            prompt.push('\n');
3371        }
3372        prompt.push_str(&edit_history_section);
3373        if !edit_history_section.is_empty() {
3374            prompt.push('\n');
3375        }
3376        prompt.push_str(cursor_prefix_section);
3377        prompt.push_str(FIM_MIDDLE);
3378
3379        prompt
3380    }
3381
3382    pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3383        let mut section = String::new();
3384        section.push_str(FIM_SUFFIX);
3385        section.push_str(&context[editable_range.end..]);
3386        if !section.ends_with('\n') {
3387            section.push('\n');
3388        }
3389        section
3390    }
3391
3392    fn build_cursor_prefix_section(
3393        path: &Path,
3394        context: &str,
3395        editable_range: &Range<usize>,
3396        cursor_offset: usize,
3397    ) -> String {
3398        let mut section = String::new();
3399        let path_str = path.to_string_lossy();
3400        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3401
3402        section.push_str(&context[..editable_range.start]);
3403        section.push_str(START_MARKER);
3404        section.push_str(&context[editable_range.start..cursor_offset]);
3405        section.push_str(CURSOR_MARKER);
3406        section.push_str(&context[cursor_offset..editable_range.end]);
3407        if !section.ends_with('\n') {
3408            section.push('\n');
3409        }
3410        section.push_str(SEPARATOR);
3411        section
3412    }
3413
3414    /// Format patch as containing no changes if it's empty; otherwise return None.
3415    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3416        // Count lines in the patch
3417        let empty_patch = patch.lines().count() <= 3;
3418        if empty_patch {
3419            Some(format!("{NO_EDITS}{END_MARKER}"))
3420        } else {
3421            None
3422        }
3423    }
3424}
3425
3426pub mod v0304_variable_edit {
3427    //! A prompt format with no fixed editable region. The entire context is shown
3428    //! to the model, and it chooses which text to replace by outputting surrounding
3429    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3430    //! text.
3431    //!
3432    //! Example prompt:
3433    //!
3434    //! <|file_sep|>path/to/file.py
3435    //! zero
3436    //! one
3437    //! two
3438    //! three<|user_cursor|>
3439    //! four
3440    //! five
3441    //! <|fim_prefix|>
3442    //
3443    //! Expected output (model generates):
3444    //!
3445    //! two
3446    //! <|fim_middle|>
3447    //! THREE
3448    //! <|fim_suffix|>
3449    //! four
3450    //!
3451    //! The output means: find "two\n...\nfour" in the context, and replace
3452    //! everything between "two\n" and "four" with "THREE\n".
3453
3454    use super::*;
3455
3456    pub fn special_tokens() -> &'static [&'static str] {
3457        &[
3458            "<|fim_prefix|>",
3459            "<|fim_suffix|>",
3460            "<|fim_middle|>",
3461            "<|file_sep|>",
3462            CURSOR_MARKER,
3463        ]
3464    }
3465
3466    pub fn write_cursor_excerpt_section(
3467        prompt: &mut String,
3468        path: &Path,
3469        context: &str,
3470        cursor_offset: usize,
3471    ) {
3472        let path_str = path.to_string_lossy();
3473        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3474
3475        prompt.push_str(&context[..cursor_offset]);
3476        prompt.push_str(CURSOR_MARKER);
3477        prompt.push_str(&context[cursor_offset..]);
3478        if !prompt.ends_with('\n') {
3479            prompt.push('\n');
3480        }
3481        prompt.push_str("<|fim_prefix|>\n")
3482    }
3483
3484    /// Apply a variable-edit model output to the original context text.
3485    ///
3486    /// The model output has the form:
3487    ///
3488    /// - prefix context lines
3489    /// - `<|fim_middle|>`
3490    /// - new text
3491    /// - `<|fim_suffix|>`
3492    /// - suffix context lines
3493    ///
3494    /// We locate the prefix/suffix context lines in the original text and replace
3495    /// everything between them with the new text.
3496    pub fn apply_variable_edit(
3497        context: &str,
3498        model_output: &str,
3499    ) -> Result<(Range<usize>, String)> {
3500        let (prefix_context, rest) = model_output
3501            .split_once("<|fim_middle|>\n")
3502            .or_else(|| model_output.split_once("<|fim_middle|>"))
3503            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3504
3505        let (new_text, suffix_context) = rest
3506            .split_once("<|fim_suffix|>\n")
3507            .or_else(|| rest.split_once("<|fim_suffix|>"))
3508            .unwrap_or((rest, ""));
3509
3510        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3511            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3512        } else {
3513            suffix_context
3514        };
3515
3516        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3517            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3518            + prefix_context.len();
3519        let suffix_offset = if suffix_context.is_empty() {
3520            context.len()
3521        } else {
3522            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3523                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3524                + prefix_offset
3525        };
3526
3527        let edit_range = prefix_offset..suffix_offset;
3528        return Ok((edit_range, new_text.to_string()));
3529    }
3530
3531    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3532        if needle.is_empty() {
3533            return Some(0);
3534        }
3535
3536        haystack.match_indices(needle).find_map(|(offset, _)| {
3537            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3538            matched_line_start.then_some(offset)
3539        })
3540    }
3541
3542    /// Convert a unified diff patch into the variable-edit output format.
3543    ///
3544    /// Parses `patch` as a unified diff against `old_text` and produces model
3545    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3546    /// delimiters. The diff is resolved by content matching rather than line
3547    /// numbers.
3548    pub fn patch_to_variable_edit_output(
3549        old_text: &str,
3550        patch: &str,
3551        cursor_offset: Option<usize>,
3552    ) -> Result<String> {
3553        // Parse the unified diff into hunks. Each hunk has an `old_context`
3554        // string (context + deleted lines interleaved in order) and a list of
3555        // edits expressed as byte ranges within that context plus replacement
3556        // text.
3557        let hunks = parse_hunks(patch);
3558        if hunks.is_empty() {
3559            return Ok(String::new());
3560        }
3561
3562        // Apply each hunk by finding its old_context in the text and
3563        // performing the edits. We search forward from where the previous
3564        // hunk ended so that hunks are applied in order.
3565        let mut new_text = old_text.to_string();
3566        let mut search_from: usize = 0;
3567        let mut first_hunk_pos: Option<usize> = None;
3568
3569        for hunk in &hunks {
3570            let context_pos = new_text[search_from..]
3571                .find(&hunk.old_context)
3572                .map(|pos| pos + search_from)
3573                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3574
3575            if first_hunk_pos.is_none() {
3576                first_hunk_pos = Some(context_pos);
3577            }
3578
3579            // Apply edits in reverse order so byte offsets remain valid.
3580            for edit in hunk.edits.iter().rev() {
3581                let abs_start = context_pos + edit.range.start;
3582                let abs_end = context_pos + edit.range.end;
3583                new_text.replace_range(abs_start..abs_end, &edit.text);
3584            }
3585
3586            // Advance past this hunk's region in the (now modified) text.
3587            let new_region_len: usize =
3588                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3589                    len + edit.text.len() - (edit.range.end - edit.range.start)
3590                });
3591            search_from = context_pos + new_region_len;
3592        }
3593
3594        // Now we have old_text and new_text. Find the changed line range by
3595        // comparing them.
3596        let old_lines: Vec<&str> = old_text.lines().collect();
3597        let new_lines: Vec<&str> = new_text.lines().collect();
3598
3599        // Find first differing line.
3600        let first_changed_row = old_lines
3601            .iter()
3602            .zip(new_lines.iter())
3603            .position(|(a, b)| a != b)
3604            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3605
3606        // Find last differing line (from the end).
3607        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3608        let common_suffix = old_lines
3609            .iter()
3610            .rev()
3611            .zip(new_lines.iter().rev())
3612            .take(max_suffix)
3613            .take_while(|(a, b)| a == b)
3614            .count();
3615
3616        let old_end = old_lines.len() - common_suffix;
3617        let new_end = new_lines.len() - common_suffix;
3618
3619        if first_changed_row == old_end && first_changed_row == new_end {
3620            return Ok(String::new());
3621        }
3622
3623        // Build the replacement text from new_lines[first_diff..new_end].
3624        let mut merged_new_text = String::new();
3625        for line in &new_lines[first_changed_row..new_end] {
3626            merged_new_text.push_str(line);
3627            merged_new_text.push('\n');
3628        }
3629
3630        // cursor_offset is relative to the first hunk's new content in
3631        // new_text. Translate it to an offset within merged_new_text, which
3632        // only contains lines first_diff..new_end of new_text.
3633        if let Some(hunk_offset) = cursor_offset {
3634            let hunk_start = first_hunk_pos.unwrap_or(0);
3635            let absolute_pos = hunk_start + hunk_offset;
3636
3637            // Byte offset where first_diff starts in new_text.
3638            let merged_start: usize = new_lines[..first_changed_row]
3639                .iter()
3640                .map(|line| line.len() + 1)
3641                .sum();
3642
3643            if absolute_pos >= merged_start {
3644                let relative_offset = absolute_pos - merged_start;
3645                if relative_offset <= merged_new_text.len() {
3646                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3647                }
3648            }
3649        }
3650
3651        // Build output with 2 lines of context above and below.
3652        let context_lines_count = 2;
3653        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3654        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3655
3656        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3657            let pattern = &lines[line_range];
3658            let pattern_len = pattern.len();
3659
3660            let mut count = 0;
3661            for offset in 0..=lines.len() - pattern_len {
3662                if &lines[offset..offset + pattern_len] == pattern {
3663                    count += 1;
3664                }
3665            }
3666            count
3667        }
3668
3669        // Expand prefix and suffix until they are unique
3670        while prefix_start > 0 {
3671            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3672                prefix_start -= 1;
3673            } else {
3674                break;
3675            }
3676        }
3677        while suffix_end < old_lines.len() {
3678            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3679                suffix_end += 1;
3680            } else {
3681                break;
3682            }
3683        }
3684
3685        let mut output = String::new();
3686        for line in &old_lines[prefix_start..first_changed_row] {
3687            output.push_str(line);
3688            output.push('\n');
3689        }
3690        output.push_str("<|fim_middle|>\n");
3691        output.push_str(&merged_new_text);
3692        output.push_str("<|fim_suffix|>\n");
3693        for line in &old_lines[old_end..suffix_end] {
3694            output.push_str(line);
3695            output.push('\n');
3696        }
3697
3698        Ok(output)
3699    }
3700
3701    struct ParsedHunk {
3702        old_context: String,
3703        edits: Vec<ParsedEdit>,
3704    }
3705
3706    struct ParsedEdit {
3707        range: Range<usize>,
3708        text: String,
3709    }
3710
3711    /// Parse a unified diff into content-based hunks. Each hunk contains an
3712    /// `old_context` string (context lines + deleted lines, which together
3713    /// form the text that should be found in the original) and a list of edits
3714    /// expressed as byte ranges within that context.
3715    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3716        let mut hunks = Vec::new();
3717        let mut current: Option<ParsedHunk> = None;
3718
3719        for line in patch.lines() {
3720            if line.starts_with("@@") {
3721                if let Some(hunk) = current.take() {
3722                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3723                        hunks.push(hunk);
3724                    }
3725                }
3726                current = Some(ParsedHunk {
3727                    old_context: String::new(),
3728                    edits: Vec::new(),
3729                });
3730            } else if line.starts_with("---") || line.starts_with("+++") {
3731                continue;
3732            } else if let Some(hunk) = &mut current {
3733                if let Some(added) = line.strip_prefix('+') {
3734                    let pos = hunk.old_context.len();
3735                    if let Some(last_edit) = hunk.edits.last_mut() {
3736                        if last_edit.range.end == pos {
3737                            writeln!(&mut last_edit.text, "{added}").ok();
3738                            continue;
3739                        }
3740                    }
3741                    hunk.edits.push(ParsedEdit {
3742                        range: pos..pos,
3743                        text: format!("{added}\n"),
3744                    });
3745                } else if let Some(removed) = line.strip_prefix('-') {
3746                    let start = hunk.old_context.len();
3747                    writeln!(&mut hunk.old_context, "{removed}").ok();
3748                    let end = hunk.old_context.len();
3749                    if let Some(last_edit) = hunk.edits.last_mut() {
3750                        if last_edit.range.end == start {
3751                            last_edit.range.end = end;
3752                            continue;
3753                        }
3754                    }
3755                    hunk.edits.push(ParsedEdit {
3756                        range: start..end,
3757                        text: String::new(),
3758                    });
3759                } else {
3760                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3761                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3762                }
3763            }
3764        }
3765
3766        if let Some(hunk) = current {
3767            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3768                hunks.push(hunk);
3769            }
3770        }
3771
3772        hunks
3773    }
3774
3775    #[cfg(test)]
3776    mod tests {
3777        use super::*;
3778        use indoc::indoc;
3779
3780        #[test]
3781        fn test_apply_variable_edit() {
3782            struct Case {
3783                name: &'static str,
3784                original: &'static str,
3785                model_output: &'static str,
3786                expected: &'static str,
3787            }
3788
3789            let cases = [
3790                Case {
3791                    name: "simple_single_line_replacement",
3792                    original: indoc! {"
3793                        zero
3794                        one
3795                        two
3796                        three
3797                        four
3798                        five
3799                    "},
3800                    model_output: indoc! {"
3801                        two
3802                        <|fim_middle|>
3803                        THREE
3804                        <|fim_suffix|>
3805                        four
3806                    "},
3807                    expected: indoc! {"
3808                        zero
3809                        one
3810                        two
3811                        THREE
3812                        four
3813                        five
3814                    "},
3815                },
3816                Case {
3817                    name: "multi_line_replacement",
3818                    original: indoc! {"
3819                        a
3820                        b
3821                        c
3822                        d
3823                        e
3824                    "},
3825                    model_output: indoc! {"
3826                        a
3827                        <|fim_middle|>
3828                        B
3829                        C
3830                        D
3831                        <|fim_suffix|>
3832                        e
3833                    "},
3834                    expected: indoc! {"
3835                        a
3836                        B
3837                        C
3838                        D
3839                        e
3840                    "},
3841                },
3842                Case {
3843                    name: "insertion_between_existing_lines",
3844                    original: indoc! {"
3845                        a
3846                        b
3847                        c
3848                    "},
3849                    model_output: indoc! {"
3850                        a
3851                        <|fim_middle|>
3852                        X
3853                        <|fim_suffix|>
3854                        b
3855                    "},
3856                    expected: indoc! {"
3857                        a
3858                        X
3859                        b
3860                        c
3861                    "},
3862                },
3863                Case {
3864                    name: "deletion",
3865                    original: indoc! {"
3866                        a
3867                        b
3868                        c
3869                        d
3870                    "},
3871                    model_output: indoc! {"
3872                        a
3873                        <|fim_middle|>
3874                        <|fim_suffix|>
3875                        c
3876                    "},
3877                    expected: indoc! {"
3878                        a
3879                        c
3880                        d
3881                    "},
3882                },
3883                Case {
3884                    name: "replacement_at_start_no_prefix_context",
3885                    original: indoc! {"
3886                        a
3887                        b
3888                        c
3889                    "},
3890                    model_output: indoc! {"
3891                        <|fim_middle|>
3892                        X
3893                        <|fim_suffix|>
3894                        b
3895                    "},
3896                    expected: indoc! {"
3897                        X
3898                        b
3899                        c
3900                    "},
3901                },
3902                Case {
3903                    name: "replacement_at_end_no_suffix_context",
3904                    original: indoc! {"
3905                        a
3906                        b
3907                        c
3908                    "},
3909                    model_output: indoc! {"
3910                        b
3911                        <|fim_middle|>
3912                        Z
3913                        <|fim_suffix|>
3914                    "},
3915                    expected: indoc! {"
3916                        a
3917                        b
3918                        Z
3919                    "},
3920                },
3921                Case {
3922                    name: "context_with_trailing_newline_is_preserved",
3923                    original: indoc! {"
3924                        a
3925                        b
3926                        c
3927                    "},
3928                    model_output: indoc! {"
3929                        a
3930                        <|fim_middle|>
3931                        B
3932                        <|fim_suffix|>
3933                        c
3934                    "},
3935                    expected: indoc! {"
3936                        a
3937                        B
3938                        c
3939                    "},
3940                },
3941                Case {
3942                    name: "cursor_marker_passes_through_untouched",
3943                    original: indoc! {"
3944                        a
3945                        b
3946                        c
3947                    "},
3948                    model_output: indoc! {"
3949                        a
3950                        <|fim_middle|>
3951                        B<|user_cursor|>B
3952                        <|fim_suffix|>
3953                        c
3954                    "},
3955                    expected: indoc! {"
3956                        a
3957                        B<|user_cursor|>B
3958                        c
3959                    "},
3960                },
3961                Case {
3962                    name: "multiple_prefix_context_lines",
3963                    original: indoc! {"
3964                        a
3965                        b
3966                        c
3967                        d
3968                        e
3969                    "},
3970                    model_output: indoc! {"
3971                        b
3972                        c
3973                        <|fim_middle|>
3974                        D
3975                        <|fim_suffix|>
3976                        e
3977                    "},
3978                    expected: indoc! {"
3979                        a
3980                        b
3981                        c
3982                        D
3983                        e
3984                    "},
3985                },
3986            ];
3987
3988            for case in cases {
3989                let (edit_range, replacement) =
3990                    apply_variable_edit(case.original, case.model_output).unwrap();
3991                let mut edited = case.original.to_string();
3992                edited.replace_range(edit_range, &replacement);
3993                assert_eq!(edited, case.expected, "{}", case.name);
3994            }
3995        }
3996
3997        #[test]
3998        fn test_patch_to_variable_edit() {
3999            struct Case {
4000                name: &'static str,
4001                old: &'static str,
4002                patch: &'static str,
4003                cursor_offset: Option<usize>,
4004                expected_variable_edit: &'static str,
4005                expected_after_apply: &'static str,
4006            }
4007
4008            let cases = [
4009                Case {
4010                    name: "simple_replacement",
4011                    old: indoc! {"
4012                        zero
4013                        one
4014                        two
4015                        three
4016                        four
4017                        five
4018                    "},
4019                    patch: indoc! {"
4020                        @@ -3,3 +3,3 @@
4021                         two
4022                        -three
4023                        +THREE
4024                         four
4025                    "},
4026                    cursor_offset: None,
4027                    expected_variable_edit: indoc! {"
4028                        one
4029                        two
4030                        <|fim_middle|>
4031                        THREE
4032                        <|fim_suffix|>
4033                        four
4034                        five
4035                    "},
4036                    expected_after_apply: indoc! {"
4037                        zero
4038                        one
4039                        two
4040                        THREE
4041                        four
4042                        five
4043                    "},
4044                },
4045                Case {
4046                    name: "insertion",
4047                    old: indoc! {"
4048                        a
4049                        b
4050                        c
4051                        d
4052                        e
4053                    "},
4054                    patch: indoc! {"
4055                        @@ -2,0 +3,1 @@
4056                         b
4057                        +X
4058                         c
4059                    "},
4060                    cursor_offset: None,
4061                    expected_variable_edit: indoc! {"
4062                        a
4063                        b
4064                        <|fim_middle|>
4065                        X
4066                        <|fim_suffix|>
4067                        c
4068                        d
4069                    "},
4070                    expected_after_apply: indoc! {"
4071                        a
4072                        b
4073                        X
4074                        c
4075                        d
4076                        e
4077                    "},
4078                },
4079                Case {
4080                    name: "deletion",
4081                    old: indoc! {"
4082                        a
4083                        b
4084                        c
4085                        d
4086                        e
4087                    "},
4088                    patch: indoc! {"
4089                        @@ -2,3 +2,2 @@
4090                         b
4091                        -c
4092                         d
4093                    "},
4094                    cursor_offset: None,
4095                    expected_variable_edit: indoc! {"
4096                        a
4097                        b
4098                        <|fim_middle|>
4099                        <|fim_suffix|>
4100                        d
4101                        e
4102                    "},
4103                    expected_after_apply: indoc! {"
4104                        a
4105                        b
4106                        d
4107                        e
4108                    "},
4109                },
4110                Case {
4111                    name: "edit_near_start",
4112                    old: indoc! {"
4113                        first
4114                        second
4115                        third
4116                        fourth
4117                    "},
4118                    patch: indoc! {"
4119                        @@ -1,1 +1,1 @@
4120                        -first
4121                        +FIRST
4122                    "},
4123                    cursor_offset: None,
4124                    expected_variable_edit: indoc! {"
4125                        <|fim_middle|>
4126                        FIRST
4127                        <|fim_suffix|>
4128                        second
4129                        third
4130                    "},
4131                    expected_after_apply: indoc! {"
4132                        FIRST
4133                        second
4134                        third
4135                        fourth
4136                    "},
4137                },
4138                Case {
4139                    name: "edit_near_end",
4140                    old: indoc! {"
4141                        first
4142                        second
4143                        third
4144                        fourth
4145                    "},
4146                    patch: indoc! {"
4147                        @@ -4,1 +4,1 @@
4148                        -fourth
4149                        +FOURTH
4150                    "},
4151                    cursor_offset: None,
4152                    expected_variable_edit: indoc! {"
4153                        second
4154                        third
4155                        <|fim_middle|>
4156                        FOURTH
4157                        <|fim_suffix|>
4158                    "},
4159                    expected_after_apply: indoc! {"
4160                        first
4161                        second
4162                        third
4163                        FOURTH
4164                    "},
4165                },
4166                Case {
4167                    name: "cursor_at_start_of_replacement",
4168                    old: indoc! {"
4169                        zero
4170                        one
4171                        two
4172                        three
4173                        four
4174                        five
4175                    "},
4176                    patch: indoc! {"
4177                        @@ -3,3 +3,3 @@
4178                         two
4179                        -three
4180                        +THREE
4181                         four
4182                    "},
4183                    cursor_offset: Some(4),
4184                    expected_variable_edit: indoc! {"
4185                        one
4186                        two
4187                        <|fim_middle|>
4188                        <|user_cursor|>THREE
4189                        <|fim_suffix|>
4190                        four
4191                        five
4192                    "},
4193                    expected_after_apply: indoc! {"
4194                        zero
4195                        one
4196                        two
4197                        <|user_cursor|>THREE
4198                        four
4199                        five
4200                    "},
4201                },
4202                Case {
4203                    name: "cursor_in_middle_of_replacement",
4204                    old: indoc! {"
4205                        zero
4206                        one
4207                        two
4208                        three
4209                        four
4210                        five
4211                    "},
4212                    patch: indoc! {"
4213                        @@ -3,3 +3,3 @@
4214                         two
4215                        -three
4216                        +THREE
4217                         four
4218                    "},
4219                    cursor_offset: Some(6),
4220                    expected_variable_edit: indoc! {"
4221                        one
4222                        two
4223                        <|fim_middle|>
4224                        TH<|user_cursor|>REE
4225                        <|fim_suffix|>
4226                        four
4227                        five
4228                    "},
4229                    expected_after_apply: indoc! {"
4230                        zero
4231                        one
4232                        two
4233                        TH<|user_cursor|>REE
4234                        four
4235                        five
4236                    "},
4237                },
4238                Case {
4239                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4240                    old: indoc! {"
4241                        one
4242                        a
4243                        b
4244                        c
4245                        d
4246                        two
4247                        a
4248                        b
4249                        c
4250                        d
4251                        three
4252                        a
4253                        b
4254                        c
4255                        d
4256                        four
4257                    "},
4258                    patch: indoc! {"
4259                        @@ -4,5 +4,5 @@
4260                         two
4261                         a
4262                         b
4263                        -c
4264                        +C
4265                         d
4266                         three
4267                    "},
4268                    cursor_offset: None,
4269                    expected_variable_edit: indoc! {"
4270                        two
4271                        a
4272                        b
4273                        <|fim_middle|>
4274                        C
4275                        <|fim_suffix|>
4276                        d
4277                        three
4278                    "},
4279                    expected_after_apply: indoc! {"
4280                        one
4281                        a
4282                        b
4283                        c
4284                        d
4285                        two
4286                        a
4287                        b
4288                        C
4289                        d
4290                        three
4291                        a
4292                        b
4293                        c
4294                        d
4295                        four
4296                    "},
4297                },
4298                Case {
4299                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4300                    old: indoc! {"
4301                        {
4302                            {
4303                                one();
4304                            }
4305                        }
4306                        {
4307                            {
4308                                two();
4309                            }
4310                        }
4311                        {
4312                            {
4313                                three();
4314                            }
4315                        }
4316                        {
4317                            {
4318                                four();
4319                            }
4320                        }
4321                    "},
4322                    patch: indoc! {"
4323                        @@ -4,5 +4,5 @@
4324                             {
4325                        -        two();
4326                        +        TWO();
4327                             }
4328                    "},
4329                    cursor_offset: None,
4330                    expected_variable_edit: indoc! {"
4331                                one();
4332                            }
4333                        }
4334                        {
4335                            {
4336                        <|fim_middle|>
4337                                TWO();
4338                        <|fim_suffix|>
4339                            }
4340                        }
4341                        {
4342                            {
4343                                three();
4344                    "},
4345                    expected_after_apply: indoc! {"
4346                        {
4347                            {
4348                                one();
4349                            }
4350                        }
4351                        {
4352                            {
4353                                TWO();
4354                            }
4355                        }
4356                        {
4357                            {
4358                                three();
4359                            }
4360                        }
4361                        {
4362                            {
4363                                four();
4364                            }
4365                        }
4366                    "},
4367                },
4368            ];
4369
4370            for case in cases {
4371                let output =
4372                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4373                        .unwrap_or_else(|error| {
4374                            panic!("failed converting patch for {}: {error}", case.name)
4375                        });
4376                assert_eq!(
4377                    output, case.expected_variable_edit,
4378                    "patch->variable_edit mismatch for {}",
4379                    case.name
4380                );
4381
4382                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4383                    .unwrap_or_else(|error| {
4384                        panic!("failed applying variable_edit for {}: {error}", case.name)
4385                    });
4386                let mut edited_by_variable_edit = case.old.to_string();
4387                edited_by_variable_edit.replace_range(edit_range, &replacement);
4388                assert_eq!(
4389                    edited_by_variable_edit, case.expected_after_apply,
4390                    "variable_edit apply mismatch for {}",
4391                    case.name
4392                );
4393
4394                let (expected_edit_range, expected_replacement) =
4395                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4396                        |error| {
4397                            panic!(
4398                                "failed applying expected variable_edit for {}: {error}",
4399                                case.name
4400                            )
4401                        },
4402                    );
4403                let mut edited_by_expected_variable_edit = case.old.to_string();
4404                edited_by_expected_variable_edit
4405                    .replace_range(expected_edit_range, &expected_replacement);
4406                assert_eq!(
4407                    edited_by_expected_variable_edit, case.expected_after_apply,
4408                    "expected variable_edit apply mismatch for {}",
4409                    case.name
4410                );
4411            }
4412        }
4413
4414        #[test]
4415        fn test_write_cursor_excerpt_section() {
4416            let path = Path::new("test.rs");
4417            let context = "fn main() {\n    hello();\n}\n";
4418            let cursor_offset = 17;
4419            let mut prompt = String::new();
4420            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4421            assert_eq!(
4422                prompt,
4423                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4424            );
4425        }
4426    }
4427}
4428
4429/// The zeta1 prompt format
4430pub mod zeta1 {
4431    use super::*;
4432    use std::fmt::Write;
4433
4434    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4435    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4436    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4437    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4438
4439    const INSTRUCTION_HEADER: &str = concat!(
4440        "### Instruction:\n",
4441        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4442        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4443        "into account the cursor location.\n\n",
4444        "### User Edits:\n\n"
4445    );
4446    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4447    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4448
4449    /// Formats a complete zeta1 prompt from the input events and excerpt.
4450    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4451        let mut prompt = String::with_capacity(
4452            INSTRUCTION_HEADER.len()
4453                + input_events.len()
4454                + EXCERPT_HEADER.len()
4455                + input_excerpt.len()
4456                + RESPONSE_HEADER.len(),
4457        );
4458        prompt.push_str(INSTRUCTION_HEADER);
4459        prompt.push_str(input_events);
4460        prompt.push_str(EXCERPT_HEADER);
4461        prompt.push_str(input_excerpt);
4462        prompt.push_str(RESPONSE_HEADER);
4463        prompt
4464    }
4465
4466    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4467    /// editable and context byte-offset ranges within `cursor_excerpt`.
4468    pub fn format_zeta1_from_input(
4469        input: &ZetaPromptInput,
4470        editable_range: Range<usize>,
4471        context_range: Range<usize>,
4472    ) -> String {
4473        let events = format_zeta1_events(&input.events);
4474        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4475        format_zeta1_prompt(&events, &excerpt)
4476    }
4477
4478    /// Formats events in zeta1 style (oldest first).
4479    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4480        let mut result = String::new();
4481        for event in
4482            events
4483                .iter()
4484                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4485                    &ZetaFormat::V0114180EditableRegion,
4486                )))
4487        {
4488            let event_string = format_zeta1_event(event);
4489            if event_string.is_empty() {
4490                continue;
4491            }
4492            if !result.is_empty() {
4493                result.push_str("\n\n");
4494            }
4495            result.push_str(&event_string);
4496        }
4497        result
4498    }
4499
4500    fn format_zeta1_event(event: &Event) -> String {
4501        match event {
4502            Event::BufferChange {
4503                path,
4504                old_path,
4505                diff,
4506                ..
4507            } => {
4508                let mut prompt = String::new();
4509                if old_path != path {
4510                    writeln!(
4511                        prompt,
4512                        "User renamed {} to {}\n",
4513                        old_path.display(),
4514                        path.display()
4515                    )
4516                    .ok();
4517                }
4518                if !diff.is_empty() {
4519                    write!(
4520                        prompt,
4521                        "User edited {}:\n```diff\n{}\n```",
4522                        path.display(),
4523                        diff
4524                    )
4525                    .ok();
4526                }
4527                prompt
4528            }
4529        }
4530    }
4531
4532    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4533    /// within `cursor_excerpt`.
4534    fn format_zeta1_excerpt(
4535        input: &ZetaPromptInput,
4536        editable_range: Range<usize>,
4537        context_range: Range<usize>,
4538    ) -> String {
4539        let path_str = input.cursor_path.to_string_lossy();
4540        let excerpt = &*input.cursor_excerpt;
4541        let cursor_offset = input.cursor_offset_in_excerpt;
4542
4543        let mut prompt = String::new();
4544        writeln!(&mut prompt, "```{path_str}").ok();
4545
4546        let starts_at_file_beginning =
4547            input.excerpt_start_row == Some(0) && context_range.start == 0;
4548        if starts_at_file_beginning {
4549            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4550        }
4551
4552        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4553
4554        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4555        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4556        prompt.push_str(CURSOR_MARKER);
4557        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4558        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4559
4560        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4561        write!(prompt, "\n```").ok();
4562
4563        prompt
4564    }
4565
4566    /// Cleans zeta1 model output by extracting content between editable region
4567    /// markers and converting the zeta1 cursor marker to the universal one.
4568    /// Returns `None` if the output doesn't contain the expected markers.
4569    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4570        let content = output.replace(CURSOR_MARKER, "");
4571
4572        let content_start = content
4573            .find(EDITABLE_REGION_START_MARKER)
4574            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4575            .map(|pos| {
4576                if content.as_bytes().get(pos) == Some(&b'\n') {
4577                    pos + 1
4578                } else {
4579                    pos
4580                }
4581            })
4582            .unwrap_or(0);
4583
4584        let content_end = content
4585            .find(EDITABLE_REGION_END_MARKER)
4586            .map(|pos| {
4587                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4588                    pos - 1
4589                } else {
4590                    pos
4591                }
4592            })
4593            .unwrap_or(content.len());
4594
4595        if content_start > content_end {
4596            return Some(String::new());
4597        }
4598
4599        let extracted = &content[content_start..content_end];
4600
4601        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4602            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4603            let text_before_cursor = text_before_cursor
4604                .find(EDITABLE_REGION_START_MARKER)
4605                .map(|pos| {
4606                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4607                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4608                        after_marker + 1
4609                    } else {
4610                        after_marker
4611                    }
4612                })
4613                .unwrap_or(0);
4614            let offset_in_extracted = zeta1_cursor_pos
4615                .saturating_sub(text_before_cursor)
4616                .min(extracted.len());
4617            offset_in_extracted
4618        });
4619
4620        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4621        if let Some(offset) = cursor_offset {
4622            result.push_str(&extracted[..offset]);
4623            result.push_str(super::CURSOR_MARKER);
4624            result.push_str(&extracted[offset..]);
4625        } else {
4626            result.push_str(extracted);
4627        }
4628
4629        Some(result)
4630    }
4631}
4632
4633#[cfg(test)]
4634mod tests {
4635    use super::*;
4636    use indoc::indoc;
4637
4638    fn make_input(
4639        cursor_excerpt: &str,
4640        editable_range: Range<usize>,
4641        cursor_offset: usize,
4642        events: Vec<Event>,
4643        related_files: Vec<RelatedFile>,
4644    ) -> ZetaPromptInput {
4645        let context_range = 0..cursor_excerpt.len();
4646        ZetaPromptInput {
4647            cursor_path: Path::new("test.rs").into(),
4648            cursor_excerpt: cursor_excerpt.into(),
4649            cursor_offset_in_excerpt: cursor_offset,
4650            excerpt_start_row: None,
4651            events: events.into_iter().map(Arc::new).collect(),
4652            related_files: Some(related_files),
4653            active_buffer_diagnostics: vec![],
4654            excerpt_ranges: ExcerptRanges {
4655                editable_150: editable_range.clone(),
4656                editable_180: editable_range.clone(),
4657                editable_350: editable_range,
4658                editable_150_context_350: context_range.clone(),
4659                editable_180_context_350: context_range.clone(),
4660                editable_350_context_150: context_range,
4661                ..Default::default()
4662            },
4663            syntax_ranges: None,
4664            in_open_source_repo: false,
4665            can_collect_data: false,
4666            repo_url: None,
4667        }
4668    }
4669
4670    fn make_input_with_context_range(
4671        excerpt: &str,
4672        editable_range: Range<usize>,
4673        context_range: Range<usize>,
4674        cursor_offset: usize,
4675    ) -> ZetaPromptInput {
4676        ZetaPromptInput {
4677            cursor_path: Path::new("test.rs").into(),
4678            cursor_excerpt: excerpt.into(),
4679            cursor_offset_in_excerpt: cursor_offset,
4680            excerpt_start_row: None,
4681            events: vec![],
4682            related_files: Some(vec![]),
4683            active_buffer_diagnostics: vec![],
4684            excerpt_ranges: ExcerptRanges {
4685                editable_150: editable_range.clone(),
4686                editable_180: editable_range.clone(),
4687                editable_350: editable_range,
4688                editable_150_context_350: context_range.clone(),
4689                editable_180_context_350: context_range.clone(),
4690                editable_350_context_150: context_range,
4691                ..Default::default()
4692            },
4693            syntax_ranges: None,
4694            in_open_source_repo: false,
4695            can_collect_data: false,
4696            repo_url: None,
4697        }
4698    }
4699
4700    fn make_event(path: &str, diff: &str) -> Event {
4701        Event::BufferChange {
4702            path: Path::new(path).into(),
4703            old_path: Path::new(path).into(),
4704            diff: diff.to_string(),
4705            predicted: false,
4706            in_open_source_repo: false,
4707        }
4708    }
4709
4710    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4711        RelatedFile {
4712            path: Path::new(path).into(),
4713            max_row: content.lines().count() as u32,
4714            excerpts: vec![RelatedExcerpt {
4715                row_range: 0..content.lines().count() as u32,
4716                text: content.into(),
4717                order: 0,
4718            }],
4719            in_open_source_repo: false,
4720        }
4721    }
4722
4723    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4724        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4725    }
4726
4727    fn budget_with_margin(requested_tokens: usize) -> usize {
4728        ((requested_tokens as f64) / 0.9).ceil() as usize
4729    }
4730
4731    #[test]
4732    fn test_no_truncation_when_within_budget() {
4733        let input = make_input(
4734            "prefix\neditable\nsuffix",
4735            7..15,
4736            10,
4737            vec![make_event("a.rs", "-old\n+new\n")],
4738            vec![make_related_file("related.rs", "fn helper() {}\n")],
4739        );
4740
4741        assert_eq!(
4742            format_with_budget(&input, 10000).unwrap(),
4743            indoc! {r#"
4744                <|file_sep|>related.rs
4745                fn helper() {}
4746                <|file_sep|>edit history
4747                --- a/a.rs
4748                +++ b/a.rs
4749                -old
4750                +new
4751                <|file_sep|>test.rs
4752                <|fim_prefix|>
4753                prefix
4754                <|fim_middle|>current
4755                edi<|user_cursor|>table
4756                <|fim_suffix|>
4757
4758                suffix
4759                <|fim_middle|>updated
4760            "#}
4761            .to_string()
4762        );
4763    }
4764
4765    #[test]
4766    fn test_truncation_drops_edit_history_when_budget_tight() {
4767        let input = make_input(
4768            "code",
4769            0..4,
4770            2,
4771            vec![make_event("a.rs", "-x\n+y\n")],
4772            vec![
4773                make_related_file("r1.rs", "aaaaaaa\n"),
4774                make_related_file("r2.rs", "bbbbbbb\n"),
4775            ],
4776        );
4777
4778        assert_eq!(
4779            format_with_budget(&input, 10000).unwrap(),
4780            indoc! {r#"
4781                <|file_sep|>r1.rs
4782                aaaaaaa
4783                <|file_sep|>r2.rs
4784                bbbbbbb
4785                <|file_sep|>edit history
4786                --- a/a.rs
4787                +++ b/a.rs
4788                -x
4789                +y
4790                <|file_sep|>test.rs
4791                <|fim_prefix|>
4792                <|fim_middle|>current
4793                co<|user_cursor|>de
4794                <|fim_suffix|>
4795                <|fim_middle|>updated
4796            "#}
4797            .to_string()
4798        );
4799
4800        assert_eq!(
4801            format_with_budget(&input, budget_with_margin(55)),
4802            Some(
4803                indoc! {r#"
4804                <|file_sep|>edit history
4805                --- a/a.rs
4806                +++ b/a.rs
4807                -x
4808                +y
4809                <|file_sep|>test.rs
4810                <|fim_prefix|>
4811                <|fim_middle|>current
4812                co<|user_cursor|>de
4813                <|fim_suffix|>
4814                <|fim_middle|>updated
4815            "#}
4816                .to_string()
4817            )
4818        );
4819    }
4820
4821    #[test]
4822    fn test_truncation_includes_partial_excerpts() {
4823        let input = make_input(
4824            "x",
4825            0..1,
4826            0,
4827            vec![],
4828            vec![RelatedFile {
4829                path: Path::new("big.rs").into(),
4830                max_row: 30,
4831                in_open_source_repo: false,
4832                excerpts: vec![
4833                    RelatedExcerpt {
4834                        row_range: 0..10,
4835                        text: "first excerpt\n".into(),
4836                        order: 0,
4837                    },
4838                    RelatedExcerpt {
4839                        row_range: 10..20,
4840                        text: "second excerpt\n".into(),
4841                        order: 0,
4842                    },
4843                    RelatedExcerpt {
4844                        row_range: 20..30,
4845                        text: "third excerpt\n".into(),
4846                        order: 0,
4847                    },
4848                ],
4849            }],
4850        );
4851
4852        assert_eq!(
4853            format_with_budget(&input, 10000).unwrap(),
4854            indoc! {r#"
4855                <|file_sep|>big.rs
4856                first excerpt
4857                ...
4858                second excerpt
4859                ...
4860                third excerpt
4861                <|file_sep|>test.rs
4862                <|fim_prefix|>
4863                <|fim_middle|>current
4864                <|user_cursor|>x
4865                <|fim_suffix|>
4866                <|fim_middle|>updated
4867            "#}
4868            .to_string()
4869        );
4870
4871        assert_eq!(
4872            format_with_budget(&input, budget_with_margin(50)).unwrap(),
4873            indoc! {r#"
4874                <|file_sep|>big.rs
4875                first excerpt
4876                ...
4877                <|file_sep|>test.rs
4878                <|fim_prefix|>
4879                <|fim_middle|>current
4880                <|user_cursor|>x
4881                <|fim_suffix|>
4882                <|fim_middle|>updated
4883            "#}
4884            .to_string()
4885        );
4886    }
4887
4888    #[test]
4889    fn test_truncation_prioritizes_lower_order_excerpts() {
4890        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4891        // With tight budget, only the lower-order excerpt from file_b should be included.
4892        let input = make_input(
4893            "x",
4894            0..1,
4895            0,
4896            vec![],
4897            vec![
4898                RelatedFile {
4899                    path: Path::new("file_a.rs").into(),
4900                    max_row: 10,
4901                    in_open_source_repo: false,
4902                    excerpts: vec![RelatedExcerpt {
4903                        row_range: 0..10,
4904                        text: "low priority content\n".into(),
4905                        order: 5,
4906                    }],
4907                },
4908                RelatedFile {
4909                    path: Path::new("file_b.rs").into(),
4910                    max_row: 10,
4911                    in_open_source_repo: false,
4912                    excerpts: vec![RelatedExcerpt {
4913                        row_range: 0..10,
4914                        text: "high priority content\n".into(),
4915                        order: 1,
4916                    }],
4917                },
4918            ],
4919        );
4920
4921        // With large budget, both files included; rendered in stable lexicographic order.
4922        assert_eq!(
4923            format_with_budget(&input, 10000).unwrap(),
4924            indoc! {r#"
4925                <|file_sep|>file_a.rs
4926                low priority content
4927                <|file_sep|>file_b.rs
4928                high priority content
4929                <|file_sep|>test.rs
4930                <|fim_prefix|>
4931                <|fim_middle|>current
4932                <|user_cursor|>x
4933                <|fim_suffix|>
4934                <|fim_middle|>updated
4935            "#}
4936            .to_string()
4937        );
4938
4939        // With tight budget, only file_b (lower order) fits.
4940        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4941        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4942        // file_a would need another 14 tokens, which doesn't fit.
4943        assert_eq!(
4944            format_with_budget(&input, budget_with_margin(52)).unwrap(),
4945            indoc! {r#"
4946                <|file_sep|>file_b.rs
4947                high priority content
4948                <|file_sep|>test.rs
4949                <|fim_prefix|>
4950                <|fim_middle|>current
4951                <|user_cursor|>x
4952                <|fim_suffix|>
4953                <|fim_middle|>updated
4954            "#}
4955            .to_string()
4956        );
4957    }
4958
4959    #[test]
4960    fn test_truncation_drops_high_order_excerpts_within_file() {
4961        // A single file has excerpts at order 1 and order 3. With a tight budget,
4962        // only the order-1 excerpts are included while the order-3 excerpt is
4963        // dropped — even though they belong to the same file. This also preserves
4964        // the parent invariant: parent outline items have order ≤ their best
4965        // child, so they're always included when any child is.
4966        let input = make_input(
4967            "x",
4968            0..1,
4969            0,
4970            vec![],
4971            vec![RelatedFile {
4972                path: Path::new("mod.rs").into(),
4973                max_row: 30,
4974                in_open_source_repo: false,
4975                excerpts: vec![
4976                    RelatedExcerpt {
4977                        row_range: 0..5,
4978                        text: "mod header\n".into(),
4979                        order: 1,
4980                    },
4981                    RelatedExcerpt {
4982                        row_range: 5..15,
4983                        text: "important fn\n".into(),
4984                        order: 1,
4985                    },
4986                    RelatedExcerpt {
4987                        row_range: 15..30,
4988                        text: "less important fn\n".into(),
4989                        order: 3,
4990                    },
4991                ],
4992            }],
4993        );
4994
4995        // With large budget, all three excerpts included.
4996        assert_eq!(
4997            format_with_budget(&input, 10000).unwrap(),
4998            indoc! {r#"
4999                <|file_sep|>mod.rs
5000                mod header
5001                ...
5002                important fn
5003                ...
5004                less important fn
5005                <|file_sep|>test.rs
5006                <|fim_prefix|>
5007                <|fim_middle|>current
5008                <|user_cursor|>x
5009                <|fim_suffix|>
5010                <|fim_middle|>updated
5011            "#}
5012            .to_string()
5013        );
5014
5015        // With tight budget, only order<=1 excerpts included (header + important fn).
5016        assert_eq!(
5017            format_with_budget(&input, budget_with_margin(55)).unwrap(),
5018            indoc! {r#"
5019                <|file_sep|>mod.rs
5020                mod header
5021                ...
5022                important fn
5023                ...
5024                <|file_sep|>test.rs
5025                <|fim_prefix|>
5026                <|fim_middle|>current
5027                <|user_cursor|>x
5028                <|fim_suffix|>
5029                <|fim_middle|>updated
5030            "#}
5031            .to_string()
5032        );
5033    }
5034
5035    #[test]
5036    fn test_truncation_drops_older_events_first() {
5037        let input = make_input(
5038            "x",
5039            0..1,
5040            0,
5041            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5042            vec![],
5043        );
5044
5045        assert_eq!(
5046            format_with_budget(&input, 10000).unwrap(),
5047            indoc! {r#"
5048                <|file_sep|>edit history
5049                --- a/old.rs
5050                +++ b/old.rs
5051                -1
5052                --- a/new.rs
5053                +++ b/new.rs
5054                -2
5055                <|file_sep|>test.rs
5056                <|fim_prefix|>
5057                <|fim_middle|>current
5058                <|user_cursor|>x
5059                <|fim_suffix|>
5060                <|fim_middle|>updated
5061            "#}
5062            .to_string()
5063        );
5064
5065        assert_eq!(
5066            format_with_budget(&input, 60).unwrap(),
5067            indoc! {r#"
5068                <|file_sep|>edit history
5069                --- a/new.rs
5070                +++ b/new.rs
5071                -2
5072                <|file_sep|>test.rs
5073                <|fim_prefix|>
5074                <|fim_middle|>current
5075                <|user_cursor|>x
5076                <|fim_suffix|>
5077                <|fim_middle|>updated
5078            "#}
5079            .to_string()
5080        );
5081    }
5082
5083    #[test]
5084    fn test_cursor_excerpt_always_included_with_minimal_budget() {
5085        let input = make_input(
5086            "fn main() {}",
5087            0..12,
5088            3,
5089            vec![make_event("a.rs", "-old\n+new\n")],
5090            vec![make_related_file("related.rs", "helper\n")],
5091        );
5092
5093        assert!(format_with_budget(&input, 30).is_none())
5094    }
5095
5096    #[track_caller]
5097    fn format_seed_coder(input: &ZetaPromptInput) -> String {
5098        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5099            .expect("seed coder prompt formatting should succeed")
5100    }
5101
5102    #[track_caller]
5103    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5104        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5105            .expect("seed coder prompt formatting should succeed")
5106    }
5107
5108    #[test]
5109    fn test_seed_coder_alias_matches_v0211_seed_coder() {
5110        let input = make_input(
5111            "prefix\neditable\nsuffix",
5112            7..15,
5113            10,
5114            vec![make_event("a.rs", "-old\n+new\n")],
5115            vec![make_related_file("related.rs", "fn helper() {}\n")],
5116        );
5117
5118        assert_eq!(
5119            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5120            format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5121        );
5122        assert_eq!(
5123            ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5124            ZetaFormat::V0331SeedCoderModelPy
5125        );
5126    }
5127
5128    #[test]
5129    fn test_seed_coder_basic_format() {
5130        let input = make_input(
5131            "prefix\neditable\nsuffix",
5132            7..15,
5133            10,
5134            vec![make_event("a.rs", "-old\n+new\n")],
5135            vec![make_related_file("related.rs", "fn helper() {}\n")],
5136        );
5137
5138        assert_eq!(
5139            format_seed_coder(&input),
5140            indoc! {r#"
5141                <[fim-suffix]>
5142                suffix
5143                <[fim-prefix]><filename>related.rs
5144                fn helper() {}
5145
5146                <filename>edit_history
5147                --- a/a.rs
5148                +++ b/a.rs
5149                -old
5150                +new
5151
5152                <filename>test.rs
5153                prefix
5154                <<<<<<< CURRENT
5155                edi<|user_cursor|>table
5156                =======
5157                <[fim-middle]>"#}
5158        );
5159    }
5160
5161    #[test]
5162    fn test_v0317_formats_prompt_with_many_related_files() {
5163        let related_files = (0..900)
5164            .map(|index| {
5165                make_related_file(
5166                    &format!("related_{index}.rs"),
5167                    "fn helper() {\n    let value = 1;\n}\n",
5168                )
5169            })
5170            .collect();
5171
5172        let input = make_input(
5173            "code",
5174            0..4,
5175            2,
5176            vec![make_event("a.rs", "-x\n+y\n")],
5177            related_files,
5178        );
5179
5180        let prompt =
5181            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5182
5183        assert!(prompt.is_some());
5184        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5185        assert!(prompt.contains("test.rs"));
5186        assert!(prompt.contains(CURSOR_MARKER));
5187    }
5188
5189    #[test]
5190    fn test_v0327_formats_single_file_prompt_without_related_files() {
5191        let excerpt = indoc! {"
5192            line01
5193            line02
5194            line03
5195            line04
5196            line05
5197            line06
5198            line07
5199            line08
5200            line09
5201            line10
5202            line11
5203            line12
5204            line13
5205            line14
5206            line15
5207            line16
5208            line17
5209            line18
5210            line19
5211            line20
5212        "};
5213        let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5214        let input = make_input(
5215            excerpt,
5216            0..excerpt.len(),
5217            cursor_offset,
5218            vec![make_event("a.rs", "-x\n+y\n")],
5219            vec![make_related_file("related.rs", "fn helper() {}\n")],
5220        );
5221
5222        let prompt =
5223            format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5224                .expect("v0327 prompt should fit");
5225
5226        assert!(prompt.contains("line01"));
5227        assert!(prompt.contains("line20"));
5228        assert!(prompt.contains("<filename>edit_history"));
5229        assert!(prompt.contains("<filename>test.rs"));
5230        assert!(prompt.contains(CURSOR_MARKER));
5231        assert!(!prompt.contains("related.rs"));
5232        assert!(!prompt.contains("fn helper() {}"));
5233    }
5234
5235    #[test]
5236    fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5237        let excerpt = (0..80)
5238            .map(|index| format!("l{index:02}\n"))
5239            .collect::<String>();
5240        let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5241        let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5242
5243        let (context, editable_range, context_range, adjusted_cursor) =
5244            resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5245
5246        assert_eq!(context, excerpt);
5247        assert_eq!(context_range, 0..excerpt.len());
5248        assert_eq!(adjusted_cursor, cursor_offset);
5249        assert!(editable_range.start < adjusted_cursor);
5250        assert!(editable_range.end > adjusted_cursor);
5251        assert!(editable_range.end < excerpt.len());
5252    }
5253
5254    #[test]
5255    fn test_seed_coder_no_context() {
5256        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5257
5258        assert_eq!(
5259            format_seed_coder(&input),
5260            indoc! {r#"
5261                <[fim-suffix]>
5262                after
5263                <[fim-prefix]><filename>test.rs
5264                before
5265                <<<<<<< CURRENT
5266                mid<|user_cursor|>dle
5267                =======
5268                <[fim-middle]>"#}
5269        );
5270    }
5271
5272    #[test]
5273    fn test_seed_coder_truncation_drops_context() {
5274        let input = make_input(
5275            "code",
5276            0..4,
5277            2,
5278            vec![make_event("a.rs", "-x\n+y\n")],
5279            vec![make_related_file("r1.rs", "content\n")],
5280        );
5281
5282        // With large budget, everything is included
5283        assert_eq!(
5284            format_seed_coder(&input),
5285            indoc! {r#"
5286                <[fim-suffix]>
5287                <[fim-prefix]><filename>r1.rs
5288                content
5289
5290                <filename>edit_history
5291                --- a/a.rs
5292                +++ b/a.rs
5293                -x
5294                +y
5295
5296                <filename>test.rs
5297                <<<<<<< CURRENT
5298                co<|user_cursor|>de
5299                =======
5300                <[fim-middle]>"#}
5301        );
5302
5303        assert_eq!(
5304            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5305            None
5306        );
5307
5308        assert_eq!(
5309            format_seed_coder_with_budget(&input, 40),
5310            indoc! {r#"
5311                <[fim-suffix]>
5312                <[fim-prefix]><filename>test.rs
5313                <<<<<<< CURRENT
5314                co<|user_cursor|>de
5315                =======
5316                <[fim-middle]>"#
5317            }
5318        )
5319    }
5320
5321    #[test]
5322    fn test_seed_coder_truncation_prioritizes_lower_order() {
5323        let input = make_input(
5324            "code",
5325            0..4,
5326            2,
5327            vec![],
5328            vec![
5329                RelatedFile {
5330                    path: Path::new("low_prio.rs").into(),
5331                    max_row: 5,
5332                    in_open_source_repo: false,
5333                    excerpts: vec![RelatedExcerpt {
5334                        row_range: 0..5,
5335                        text: "low prio\n".into(),
5336                        order: 10,
5337                    }],
5338                },
5339                RelatedFile {
5340                    path: Path::new("high_prio.rs").into(),
5341                    max_row: 5,
5342                    in_open_source_repo: false,
5343                    excerpts: vec![RelatedExcerpt {
5344                        row_range: 0..5,
5345                        text: "high prio\n".into(),
5346                        order: 1,
5347                    }],
5348                },
5349            ],
5350        );
5351
5352        // With large budget, both included; rendered in stable lexicographic order.
5353        assert_eq!(
5354            format_seed_coder(&input),
5355            indoc! {r#"
5356                <[fim-suffix]>
5357                <[fim-prefix]><filename>low_prio.rs
5358                low prio
5359                <filename>high_prio.rs
5360                high prio
5361
5362                <filename>test.rs
5363                <<<<<<< CURRENT
5364                co<|user_cursor|>de
5365                =======
5366                <[fim-middle]>"#}
5367        );
5368
5369        // With tight budget under the generic heuristic, context is dropped but the
5370        // minimal cursor section still fits.
5371        assert_eq!(
5372            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5373            Some(
5374                indoc! {r#"
5375                    <[fim-suffix]>
5376                    <[fim-prefix]><filename>test.rs
5377                    <<<<<<< CURRENT
5378                    co<|user_cursor|>de
5379                    =======
5380                    <[fim-middle]>"#}
5381                .to_string()
5382            )
5383        );
5384    }
5385
5386    #[test]
5387    fn test_format_zeta1_from_input_basic() {
5388        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5389        let input = ZetaPromptInput {
5390            cursor_path: Path::new("src/main.rs").into(),
5391            cursor_excerpt: excerpt.into(),
5392            cursor_offset_in_excerpt: 30,
5393            excerpt_start_row: Some(0),
5394            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5395            related_files: Some(vec![]),
5396            active_buffer_diagnostics: vec![],
5397            excerpt_ranges: ExcerptRanges {
5398                editable_150: 15..41,
5399                editable_180: 15..41,
5400                editable_350: 15..41,
5401                editable_150_context_350: 0..excerpt.len(),
5402                editable_180_context_350: 0..excerpt.len(),
5403                editable_350_context_150: 0..excerpt.len(),
5404                ..Default::default()
5405            },
5406            syntax_ranges: None,
5407            in_open_source_repo: false,
5408            can_collect_data: false,
5409            repo_url: None,
5410        };
5411
5412        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5413
5414        assert_eq!(
5415            prompt,
5416            concat!(
5417                "### Instruction:\n",
5418                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5419                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5420                "into account the cursor location.\n",
5421                "\n",
5422                "### User Edits:\n",
5423                "\n",
5424                "User edited other.rs:\n",
5425                "```diff\n",
5426                "-old\n",
5427                "+new\n",
5428                "\n",
5429                "```\n",
5430                "\n",
5431                "### User Excerpt:\n",
5432                "\n",
5433                "```src/main.rs\n",
5434                "<|start_of_file|>\n",
5435                "fn before() {}\n",
5436                "<|editable_region_start|>\n",
5437                "fn foo() {\n",
5438                "    <|user_cursor_is_here|>let x = 1;\n",
5439                "\n",
5440                "<|editable_region_end|>}\n",
5441                "fn after() {}\n",
5442                "\n",
5443                "```\n",
5444                "\n",
5445                "### Response:\n",
5446            ),
5447        );
5448    }
5449
5450    #[test]
5451    fn test_format_zeta1_from_input_no_start_of_file() {
5452        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5453        let input = ZetaPromptInput {
5454            cursor_path: Path::new("src/main.rs").into(),
5455            cursor_excerpt: excerpt.into(),
5456            cursor_offset_in_excerpt: 15,
5457            excerpt_start_row: Some(10),
5458            events: vec![],
5459            related_files: Some(vec![]),
5460            active_buffer_diagnostics: vec![],
5461            excerpt_ranges: ExcerptRanges {
5462                editable_150: 0..28,
5463                editable_180: 0..28,
5464                editable_350: 0..28,
5465                editable_150_context_350: 0..28,
5466                editable_180_context_350: 0..28,
5467                editable_350_context_150: 0..28,
5468                ..Default::default()
5469            },
5470            syntax_ranges: None,
5471            in_open_source_repo: false,
5472            can_collect_data: false,
5473            repo_url: None,
5474        };
5475
5476        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5477
5478        assert_eq!(
5479            prompt,
5480            concat!(
5481                "### Instruction:\n",
5482                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5483                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5484                "into account the cursor location.\n",
5485                "\n",
5486                "### User Edits:\n",
5487                "\n",
5488                "\n",
5489                "\n",
5490                "### User Excerpt:\n",
5491                "\n",
5492                "```src/main.rs\n",
5493                "<|editable_region_start|>\n",
5494                "fn foo() {\n",
5495                "    <|user_cursor_is_here|>let x = 1;\n",
5496                "}\n",
5497                "\n",
5498                "<|editable_region_end|>\n",
5499                "```\n",
5500                "\n",
5501                "### Response:\n",
5502            ),
5503        );
5504    }
5505
5506    #[test]
5507    fn test_format_zeta1_from_input_with_sub_ranges() {
5508        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5509        let editable_range = 10..37;
5510        let context_range = 0..excerpt.len();
5511
5512        let input = ZetaPromptInput {
5513            cursor_path: Path::new("test.rs").into(),
5514            cursor_excerpt: excerpt.into(),
5515            cursor_offset_in_excerpt: 25,
5516            excerpt_start_row: Some(0),
5517            events: vec![],
5518            related_files: Some(vec![]),
5519            active_buffer_diagnostics: vec![],
5520            excerpt_ranges: ExcerptRanges {
5521                editable_150: editable_range.clone(),
5522                editable_180: editable_range.clone(),
5523                editable_350: editable_range.clone(),
5524                editable_150_context_350: context_range.clone(),
5525                editable_180_context_350: context_range.clone(),
5526                editable_350_context_150: context_range.clone(),
5527                ..Default::default()
5528            },
5529            syntax_ranges: None,
5530            in_open_source_repo: false,
5531            can_collect_data: false,
5532            repo_url: None,
5533        };
5534
5535        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5536
5537        assert_eq!(
5538            prompt,
5539            concat!(
5540                "### Instruction:\n",
5541                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5542                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5543                "into account the cursor location.\n",
5544                "\n",
5545                "### User Edits:\n",
5546                "\n",
5547                "\n",
5548                "\n",
5549                "### User Excerpt:\n",
5550                "\n",
5551                "```test.rs\n",
5552                "<|start_of_file|>\n",
5553                "// prefix\n",
5554                "<|editable_region_start|>\n",
5555                "fn foo() {\n",
5556                "    <|user_cursor_is_here|>let x = 1;\n",
5557                "}\n",
5558                "<|editable_region_end|>\n",
5559                "// suffix\n",
5560                "\n",
5561                "```\n",
5562                "\n",
5563                "### Response:\n",
5564            ),
5565        );
5566    }
5567
5568    #[test]
5569    fn test_max_event_count() {
5570        fn make_numbered_event(index: usize) -> Event {
5571            return make_event(
5572                &format!("event-{index}.rs"),
5573                &format!("-old-{index}\n+new-{index}\n"),
5574            );
5575        }
5576        let input = make_input(
5577            "x",
5578            0..1,
5579            0,
5580            (0..3).map(make_numbered_event).collect(),
5581            vec![],
5582        );
5583
5584        let edit_history_section = format_edit_history_within_budget(
5585            &input.events,
5586            "<|file_sep|>",
5587            "edit history",
5588            usize::MAX,
5589            5,
5590        );
5591
5592        assert_eq!(
5593            &edit_history_section,
5594            indoc!(
5595                "
5596                <|file_sep|>edit history
5597                --- a/event-0.rs
5598                +++ b/event-0.rs
5599                -old-0
5600                +new-0
5601                --- a/event-1.rs
5602                +++ b/event-1.rs
5603                -old-1
5604                +new-1
5605                --- a/event-2.rs
5606                +++ b/event-2.rs
5607                -old-2
5608                +new-2
5609            "
5610            )
5611        );
5612
5613        let edit_history_section = format_edit_history_within_budget(
5614            &input.events,
5615            "<|file_sep|>",
5616            "edit history",
5617            usize::MAX,
5618            2,
5619        );
5620
5621        assert_eq!(
5622            &edit_history_section,
5623            indoc!(
5624                "
5625                <|file_sep|>edit history
5626                --- a/event-1.rs
5627                +++ b/event-1.rs
5628                -old-1
5629                +new-1
5630                --- a/event-2.rs
5631                +++ b/event-2.rs
5632                -old-2
5633                +new-2
5634            "
5635            )
5636        );
5637
5638        let edit_history_section = format_edit_history_within_budget(
5639            &input.events,
5640            "<|file_sep|>",
5641            "edit history",
5642            usize::MAX,
5643            0,
5644        );
5645
5646        assert_eq!(&edit_history_section, "");
5647    }
5648
5649    #[test]
5650    fn test_clean_zeta1_model_output_basic() {
5651        let output = indoc! {"
5652            <|editable_region_start|>
5653            fn main() {
5654                println!(\"hello\");
5655            }
5656            <|editable_region_end|>
5657        "};
5658
5659        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5660        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5661    }
5662
5663    #[test]
5664    fn test_clean_zeta1_model_output_with_cursor() {
5665        let output = indoc! {"
5666            <|editable_region_start|>
5667            fn main() {
5668                <|user_cursor_is_here|>println!(\"hello\");
5669            }
5670            <|editable_region_end|>
5671        "};
5672
5673        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5674        assert_eq!(
5675            cleaned,
5676            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5677        );
5678    }
5679
5680    #[test]
5681    fn test_clean_zeta1_model_output_no_markers() {
5682        let output = "fn main() {}\n";
5683        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5684        assert_eq!(cleaned, "fn main() {}\n");
5685    }
5686
5687    #[test]
5688    fn test_clean_zeta1_model_output_empty_region() {
5689        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5690        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5691        assert_eq!(cleaned, "");
5692    }
5693
5694    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5695        let mut result = excerpt.to_string();
5696        result.replace_range(
5697            parsed_output.range_in_excerpt.clone(),
5698            &parsed_output.new_editable_region,
5699        );
5700        result
5701    }
5702
5703    #[test]
5704    fn test_parse_zeta2_model_output() {
5705        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5706        let context_start = excerpt.find("ctx start").unwrap();
5707        let context_end = excerpt.find("after ctx").unwrap();
5708        let editable_start = excerpt.find("editable old").unwrap();
5709        let editable_end = editable_start + "editable old\n".len();
5710        let input = make_input_with_context_range(
5711            excerpt,
5712            editable_start..editable_end,
5713            context_start..context_end,
5714            editable_start,
5715        );
5716
5717        let output = parse_zeta2_model_output(
5718            "editable new\n>>>>>>> UPDATED\n",
5719            ZetaFormat::V0131GitMergeMarkersPrefix,
5720            &input,
5721        )
5722        .unwrap();
5723
5724        assert_eq!(
5725            apply_edit(excerpt, &output),
5726            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5727        );
5728    }
5729
5730    #[test]
5731    fn test_parse_zeta2_model_output_identity() {
5732        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5733        let editable_start = excerpt.find("bbb").unwrap();
5734        let editable_end = excerpt.find("ddd").unwrap();
5735        let input = make_input_with_context_range(
5736            excerpt,
5737            editable_start..editable_end,
5738            0..excerpt.len(),
5739            editable_start,
5740        );
5741
5742        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5743        let output =
5744            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5745
5746        assert_eq!(apply_edit(excerpt, &output), excerpt);
5747    }
5748
5749    #[test]
5750    fn test_parse_zeta2_model_output_strips_end_marker() {
5751        let excerpt = "hello\nworld\n";
5752        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5753
5754        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5755        let output1 =
5756            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5757        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5758
5759        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5760        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5761    }
5762
5763    #[test]
5764    fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5765        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5766        let context_start = excerpt.find("ctx start").unwrap();
5767        let context_end = excerpt.find("after ctx").unwrap();
5768        let editable_start = excerpt.find("editable old").unwrap();
5769        let editable_end = editable_start + "editable old\n".len();
5770        let input = make_input_with_context_range(
5771            excerpt,
5772            editable_start..editable_end,
5773            context_start..context_end,
5774            editable_start,
5775        );
5776
5777        let parsed = parse_zeta2_model_output(
5778            "editable new\n>>>>>>> UPDATED\n",
5779            ZetaFormat::V0131GitMergeMarkersPrefix,
5780            &input,
5781        )
5782        .unwrap();
5783        let expected = apply_edit(excerpt, &parsed);
5784        let patch = parsed_output_to_patch(&input, parsed).unwrap();
5785        let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5786
5787        assert_eq!(patched, expected);
5788    }
5789
5790    #[test]
5791    fn test_special_tokens_not_triggered_by_comment_separator() {
5792        // Regression test for https://github.com/zed-industries/zed/issues/52489
5793        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5794        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5795        assert!(
5796            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5797            "comment containing ======= should not trigger special token detection"
5798        );
5799    }
5800}