zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3pub mod udiff;
   4
   5use anyhow::{Result, anyhow};
   6use serde::{Deserialize, Serialize};
   7use std::fmt::Write;
   8use std::ops::Range;
   9use std::path::Path;
  10use std::sync::Arc;
  11use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  12
  13pub use crate::excerpt_ranges::{
  14    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  15};
  16
  17pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28/// Leave some slack to avoid overflow.
  29fn apply_prompt_budget_margin(max_tokens: usize) -> usize {
  30    (max_tokens as f64 * 0.9).floor() as usize
  31}
  32
  33/// Ensure text fits into the tokens budget; trim by line boundaries if needed.
  34pub fn clamp_text_to_token_count(text: &str, max_tokens: usize) -> &str {
  35    if estimate_tokens(text.len()) <= max_tokens {
  36        return text;
  37    }
  38
  39    let mut end_byte_offset = 0;
  40
  41    for line in text.split_inclusive('\n') {
  42        if estimate_tokens(line.len() + end_byte_offset) > max_tokens {
  43            break;
  44        }
  45
  46        end_byte_offset += line.len();
  47    }
  48
  49    &text[..end_byte_offset]
  50}
  51
  52#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  53pub struct ZetaPromptInput {
  54    pub cursor_path: Arc<Path>,
  55    pub cursor_excerpt: Arc<str>,
  56    pub cursor_offset_in_excerpt: usize,
  57    #[serde(default, skip_serializing_if = "Option::is_none")]
  58    pub excerpt_start_row: Option<u32>,
  59    pub events: Vec<Arc<Event>>,
  60    #[serde(default)]
  61    pub related_files: Option<Vec<RelatedFile>>,
  62    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  63    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  64    /// These ranges let the server select model-appropriate subsets.
  65    pub excerpt_ranges: ExcerptRanges,
  66    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  67    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  68    /// When present, the server uses these to compute editable/context ranges
  69    /// instead of `excerpt_ranges`.
  70    #[serde(default, skip_serializing_if = "Option::is_none")]
  71    pub syntax_ranges: Option<Vec<Range<usize>>>,
  72    #[serde(default)]
  73    pub in_open_source_repo: bool,
  74    #[serde(default)]
  75    pub can_collect_data: bool,
  76    #[serde(default, skip_serializing_if = "Option::is_none")]
  77    pub repo_url: Option<String>,
  78}
  79
  80#[derive(
  81    Default,
  82    Clone,
  83    Copy,
  84    Debug,
  85    PartialEq,
  86    Eq,
  87    Hash,
  88    EnumIter,
  89    IntoStaticStr,
  90    Serialize,
  91    Deserialize,
  92)]
  93#[allow(non_camel_case_types)]
  94pub enum ZetaFormat {
  95    V0112MiddleAtEnd,
  96    V0113Ordered,
  97    V0114180EditableRegion,
  98    V0120GitMergeMarkers,
  99    #[default]
 100    V0131GitMergeMarkersPrefix,
 101    V0211Prefill,
 102    #[serde(alias = "Zeta2")]
 103    V0211SeedCoder,
 104    V0331SeedCoderModelPy,
 105    v0226Hashline,
 106    V0304VariableEdit,
 107    V0304SeedNoEdits,
 108    /// Multi-block marker spans with NO_EDITS sentinel.
 109    V0306SeedMultiRegions,
 110    /// Byte-exact marker spans; all intermediate markers emitted; repeated marker means no-edit.
 111    V0316SeedMultiRegions,
 112    /// V0316, but marker numbers are relative to the cursor block (e.g. -1, -0, +1).
 113    V0317SeedMultiRegions,
 114    /// V0316 with larger block sizes.
 115    #[serde(alias = "Zeta2.1")]
 116    V0318SeedMultiRegions,
 117    /// V0318-style markers over the full available current file excerpt with no related files.
 118    V0327SingleFile,
 119    /// V0318-style prompt with buffer diagnostics
 120    V0420Diagnostics,
 121}
 122
 123impl std::fmt::Display for ZetaFormat {
 124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 125        write!(f, "{}", <&'static str>::from(self))
 126    }
 127}
 128
 129impl ZetaFormat {
 130    pub fn parse(format_name: &str) -> Result<Self> {
 131        let lower = format_name.to_lowercase();
 132
 133        // Exact case-insensitive match takes priority, bypassing ambiguity checks.
 134        for variant in ZetaFormat::iter() {
 135            if <&'static str>::from(&variant).to_lowercase() == lower {
 136                return Ok(variant);
 137            }
 138        }
 139
 140        let mut results = ZetaFormat::iter().filter(|version| {
 141            <&'static str>::from(version)
 142                .to_lowercase()
 143                .contains(&lower)
 144        });
 145        let Some(result) = results.next() else {
 146            anyhow::bail!(
 147                "`{format_name}` did not match any of:\n{}",
 148                Self::options_as_string()
 149            );
 150        };
 151        if results.next().is_some() {
 152            anyhow::bail!(
 153                "`{format_name}` matched more than one of:\n{}",
 154                Self::options_as_string()
 155            );
 156        }
 157        Ok(result)
 158    }
 159
 160    pub fn options_as_string() -> String {
 161        ZetaFormat::iter()
 162            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 163            .collect::<Vec<_>>()
 164            .concat()
 165    }
 166}
 167
 168#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 169#[serde(tag = "event")]
 170pub enum Event {
 171    BufferChange {
 172        path: Arc<Path>,
 173        old_path: Arc<Path>,
 174        diff: String,
 175        predicted: bool,
 176        in_open_source_repo: bool,
 177    },
 178}
 179
 180impl Event {
 181    pub fn in_open_source_repo(&self) -> bool {
 182        match self {
 183            Event::BufferChange {
 184                in_open_source_repo,
 185                ..
 186            } => *in_open_source_repo,
 187        }
 188    }
 189}
 190
 191pub fn write_event(prompt: &mut String, event: &Event) {
 192    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 193        for component in path.components() {
 194            prompt.push('/');
 195            write!(prompt, "{}", component.as_os_str().display()).ok();
 196        }
 197    }
 198    match event {
 199        Event::BufferChange {
 200            path,
 201            old_path,
 202            diff,
 203            predicted,
 204            in_open_source_repo: _,
 205        } => {
 206            if *predicted {
 207                prompt.push_str("// User accepted prediction:\n");
 208            }
 209            prompt.push_str("--- a");
 210            write_path_as_unix_str(prompt, old_path.as_ref());
 211            prompt.push_str("\n+++ b");
 212            write_path_as_unix_str(prompt, path.as_ref());
 213            prompt.push('\n');
 214            prompt.push_str(diff);
 215        }
 216    }
 217}
 218
 219#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 220pub struct ActiveBufferDiagnostic {
 221    pub severity: Option<i32>,
 222    pub message: String,
 223    pub snippet: String,
 224    pub snippet_buffer_row_range: Range<u32>,
 225    pub diagnostic_range_in_snippet: Range<usize>,
 226}
 227
 228#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 229pub struct RelatedFile {
 230    pub path: Arc<Path>,
 231    pub max_row: u32,
 232    pub excerpts: Vec<RelatedExcerpt>,
 233    #[serde(default)]
 234    pub in_open_source_repo: bool,
 235}
 236
 237#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 238pub struct RelatedExcerpt {
 239    pub row_range: Range<u32>,
 240    pub text: Arc<str>,
 241    #[serde(default)]
 242    pub order: usize,
 243}
 244
 245pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 246    special_tokens_for_format(format).iter().any(|token| {
 247        if let Some(line_token) = token.strip_suffix('\n') {
 248            input.cursor_excerpt.lines().any(|line| line == line_token)
 249        } else {
 250            input.cursor_excerpt.contains(token)
 251        }
 252    })
 253}
 254
 255pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 256    let max_prompt_tokens = match format {
 257        ZetaFormat::V0112MiddleAtEnd
 258        | ZetaFormat::V0113Ordered
 259        | ZetaFormat::V0114180EditableRegion
 260        | ZetaFormat::V0120GitMergeMarkers
 261        | ZetaFormat::V0131GitMergeMarkersPrefix
 262        | ZetaFormat::V0211Prefill
 263        | ZetaFormat::V0211SeedCoder
 264        | ZetaFormat::v0226Hashline
 265        | ZetaFormat::V0304VariableEdit
 266        | ZetaFormat::V0304SeedNoEdits
 267        | ZetaFormat::V0306SeedMultiRegions
 268        | ZetaFormat::V0316SeedMultiRegions
 269        | ZetaFormat::V0317SeedMultiRegions
 270        | ZetaFormat::V0331SeedCoderModelPy
 271        | ZetaFormat::V0318SeedMultiRegions => 4096,
 272        ZetaFormat::V0420Diagnostics => 8192,
 273        ZetaFormat::V0327SingleFile => 16384,
 274    };
 275
 276    format_prompt_with_budget_for_format(input, format, max_prompt_tokens)
 277}
 278
 279pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 280    match format {
 281        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 282        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 283        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 284        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 285        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 286        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 287        ZetaFormat::V0211SeedCoder | ZetaFormat::V0331SeedCoderModelPy => {
 288            seed_coder::special_tokens()
 289        }
 290        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 291        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 292        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 293        ZetaFormat::V0316SeedMultiRegions => {
 294            static TOKENS: &[&str] = &[
 295                seed_coder::FIM_SUFFIX,
 296                seed_coder::FIM_PREFIX,
 297                seed_coder::FIM_MIDDLE,
 298                seed_coder::FILE_MARKER,
 299                multi_region::V0316_END_MARKER,
 300                CURSOR_MARKER,
 301                multi_region::MARKER_TAG_PREFIX,
 302            ];
 303            TOKENS
 304        }
 305        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 306            static TOKENS: &[&str] = &[
 307                seed_coder::FIM_SUFFIX,
 308                seed_coder::FIM_PREFIX,
 309                seed_coder::FIM_MIDDLE,
 310                seed_coder::FILE_MARKER,
 311                multi_region::V0318_END_MARKER,
 312                CURSOR_MARKER,
 313                multi_region::MARKER_TAG_PREFIX,
 314            ];
 315            TOKENS
 316        }
 317        ZetaFormat::V0317SeedMultiRegions => {
 318            static TOKENS: &[&str] = &[
 319                seed_coder::FIM_SUFFIX,
 320                seed_coder::FIM_PREFIX,
 321                seed_coder::FIM_MIDDLE,
 322                seed_coder::FILE_MARKER,
 323                multi_region::V0317_END_MARKER,
 324                CURSOR_MARKER,
 325                multi_region::RELATIVE_MARKER_TAG_PREFIX,
 326            ];
 327            TOKENS
 328        }
 329        ZetaFormat::V0327SingleFile => {
 330            static TOKENS: &[&str] = &[
 331                seed_coder::FIM_SUFFIX,
 332                seed_coder::FIM_PREFIX,
 333                seed_coder::FIM_MIDDLE,
 334                seed_coder::FILE_MARKER,
 335                multi_region::V0327_END_MARKER,
 336                CURSOR_MARKER,
 337                multi_region::MARKER_TAG_PREFIX,
 338            ];
 339            TOKENS
 340        }
 341        ZetaFormat::V0306SeedMultiRegions => {
 342            static TOKENS: &[&str] = &[
 343                seed_coder::FIM_SUFFIX,
 344                seed_coder::FIM_PREFIX,
 345                seed_coder::FIM_MIDDLE,
 346                seed_coder::FILE_MARKER,
 347                seed_coder::START_MARKER,
 348                seed_coder::SEPARATOR,
 349                seed_coder::END_MARKER,
 350                CURSOR_MARKER,
 351                multi_region::MARKER_TAG_PREFIX,
 352            ];
 353            TOKENS
 354        }
 355    }
 356}
 357
 358/// Returns the (editable_token_limit, context_token_limit) for a given format.
 359pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 360    match format {
 361        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 362        ZetaFormat::V0114180EditableRegion => (180, 350),
 363        ZetaFormat::V0120GitMergeMarkers
 364        | ZetaFormat::V0131GitMergeMarkersPrefix
 365        | ZetaFormat::V0211Prefill
 366        | ZetaFormat::V0211SeedCoder
 367        | ZetaFormat::V0331SeedCoderModelPy
 368        | ZetaFormat::v0226Hashline
 369        | ZetaFormat::V0306SeedMultiRegions
 370        | ZetaFormat::V0316SeedMultiRegions
 371        | ZetaFormat::V0318SeedMultiRegions
 372        | ZetaFormat::V0420Diagnostics
 373        | ZetaFormat::V0317SeedMultiRegions
 374        | ZetaFormat::V0327SingleFile
 375        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 376
 377        ZetaFormat::V0304VariableEdit => (1024, 0),
 378    }
 379}
 380
 381pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 382    match format {
 383        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 384        ZetaFormat::V0112MiddleAtEnd
 385        | ZetaFormat::V0113Ordered
 386        | ZetaFormat::V0114180EditableRegion
 387        | ZetaFormat::V0120GitMergeMarkers
 388        | ZetaFormat::V0131GitMergeMarkersPrefix
 389        | ZetaFormat::V0211Prefill
 390        | ZetaFormat::V0211SeedCoder
 391        | ZetaFormat::V0331SeedCoderModelPy
 392        | ZetaFormat::V0304VariableEdit
 393        | ZetaFormat::V0306SeedMultiRegions
 394        | ZetaFormat::V0304SeedNoEdits => &[],
 395        ZetaFormat::V0316SeedMultiRegions => &[multi_region::V0316_END_MARKER],
 396        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 397            &[multi_region::V0318_END_MARKER]
 398        }
 399        ZetaFormat::V0317SeedMultiRegions => &[multi_region::V0317_END_MARKER],
 400        ZetaFormat::V0327SingleFile => &[multi_region::V0327_END_MARKER],
 401    }
 402}
 403
 404/// Return (editable_range, context_range) for the prompt format
 405pub fn excerpt_ranges_for_format(
 406    format: ZetaFormat,
 407    ranges: &ExcerptRanges,
 408) -> (Range<usize>, Range<usize>) {
 409    match format {
 410        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 411            ranges.editable_150.clone(),
 412            ranges.editable_150_context_350.clone(),
 413        ),
 414        ZetaFormat::V0114180EditableRegion => (
 415            ranges.editable_180.clone(),
 416            ranges.editable_180_context_350.clone(),
 417        ),
 418        ZetaFormat::V0120GitMergeMarkers
 419        | ZetaFormat::V0131GitMergeMarkersPrefix
 420        | ZetaFormat::V0211Prefill
 421        | ZetaFormat::V0211SeedCoder
 422        | ZetaFormat::V0331SeedCoderModelPy
 423        | ZetaFormat::v0226Hashline
 424        | ZetaFormat::V0304SeedNoEdits
 425        | ZetaFormat::V0306SeedMultiRegions
 426        | ZetaFormat::V0316SeedMultiRegions
 427        | ZetaFormat::V0318SeedMultiRegions
 428        | ZetaFormat::V0317SeedMultiRegions
 429        | ZetaFormat::V0420Diagnostics => (
 430            ranges.editable_350.clone(),
 431            ranges.editable_350_context_150.clone(),
 432        ),
 433        ZetaFormat::V0327SingleFile => (
 434            ranges.editable_350_context_150.clone(),
 435            ranges.context_8192.clone().unwrap_or(
 436                // shouldn't be used, only for compat with old data/clients
 437                ranges.editable_350_context_150.clone(),
 438            ),
 439        ),
 440
 441        ZetaFormat::V0304VariableEdit => {
 442            let context = ranges
 443                .editable_350_context_1024
 444                .clone()
 445                .or(ranges.editable_350_context_512.clone())
 446                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 447            (context.clone(), context)
 448        }
 449    }
 450}
 451
 452pub fn write_cursor_excerpt_section_for_format(
 453    format: ZetaFormat,
 454    prompt: &mut String,
 455    path: &Path,
 456    context: &str,
 457    editable_range: &Range<usize>,
 458    cursor_offset: usize,
 459) {
 460    match format {
 461        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 462            prompt,
 463            path,
 464            context,
 465            editable_range,
 466            cursor_offset,
 467        ),
 468        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 469            v0113_ordered::write_cursor_excerpt_section(
 470                prompt,
 471                path,
 472                context,
 473                editable_range,
 474                cursor_offset,
 475            )
 476        }
 477        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 478            prompt,
 479            path,
 480            context,
 481            editable_range,
 482            cursor_offset,
 483        ),
 484        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 485            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 486                prompt,
 487                path,
 488                context,
 489                editable_range,
 490                cursor_offset,
 491            )
 492        }
 493        ZetaFormat::V0211SeedCoder
 494        | ZetaFormat::V0331SeedCoderModelPy
 495        | ZetaFormat::V0304SeedNoEdits => seed_coder::write_cursor_excerpt_section(
 496            prompt,
 497            path,
 498            context,
 499            editable_range,
 500            cursor_offset,
 501        ),
 502        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 503            prompt,
 504            path,
 505            context,
 506            editable_range,
 507            cursor_offset,
 508        ),
 509        ZetaFormat::V0304VariableEdit => {
 510            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 511        }
 512        ZetaFormat::V0306SeedMultiRegions => {
 513            prompt.push_str(&build_v0306_cursor_prefix(
 514                path,
 515                context,
 516                editable_range,
 517                cursor_offset,
 518            ));
 519        }
 520        ZetaFormat::V0316SeedMultiRegions => {
 521            prompt.push_str(&build_v0316_cursor_prefix(
 522                path,
 523                context,
 524                editable_range,
 525                cursor_offset,
 526            ));
 527        }
 528        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
 529            prompt.push_str(&build_v0318_cursor_prefix(
 530                path,
 531                context,
 532                editable_range,
 533                cursor_offset,
 534            ));
 535        }
 536        ZetaFormat::V0317SeedMultiRegions => {
 537            prompt.push_str(&build_v0317_cursor_prefix(
 538                path,
 539                context,
 540                editable_range,
 541                cursor_offset,
 542            ));
 543        }
 544        ZetaFormat::V0327SingleFile => {
 545            prompt.push_str(&build_v0318_cursor_prefix(
 546                path,
 547                context,
 548                editable_range,
 549                cursor_offset,
 550            ));
 551        }
 552    }
 553}
 554
 555fn build_v0306_cursor_prefix(
 556    path: &Path,
 557    context: &str,
 558    editable_range: &Range<usize>,
 559    cursor_offset: usize,
 560) -> String {
 561    let mut section = String::new();
 562    let path_str = path.to_string_lossy();
 563    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 564
 565    section.push_str(&context[..editable_range.start]);
 566    section.push_str(seed_coder::START_MARKER);
 567
 568    let editable_text = &context[editable_range.clone()];
 569    let cursor_in_editable = cursor_offset - editable_range.start;
 570    multi_region::write_editable_with_markers(
 571        &mut section,
 572        editable_text,
 573        cursor_in_editable,
 574        CURSOR_MARKER,
 575    );
 576
 577    if !section.ends_with('\n') {
 578        section.push('\n');
 579    }
 580    section.push_str(seed_coder::SEPARATOR);
 581    section
 582}
 583
 584fn build_v0316_cursor_prefix(
 585    path: &Path,
 586    context: &str,
 587    editable_range: &Range<usize>,
 588    cursor_offset: usize,
 589) -> String {
 590    let mut section = String::new();
 591    let path_str = path.to_string_lossy();
 592    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 593
 594    section.push_str(&context[..editable_range.start]);
 595
 596    let editable_text = &context[editable_range.clone()];
 597    let cursor_in_editable = cursor_offset - editable_range.start;
 598    multi_region::write_editable_with_markers_v0316(
 599        &mut section,
 600        editable_text,
 601        cursor_in_editable,
 602        CURSOR_MARKER,
 603    );
 604
 605    if !section.ends_with('\n') {
 606        section.push('\n');
 607    }
 608    section
 609}
 610
 611fn build_v0318_cursor_prefix(
 612    path: &Path,
 613    context: &str,
 614    editable_range: &Range<usize>,
 615    cursor_offset: usize,
 616) -> String {
 617    let mut section = String::new();
 618    let path_str = path.to_string_lossy();
 619    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 620
 621    section.push_str(&context[..editable_range.start]);
 622
 623    let editable_text = &context[editable_range.clone()];
 624    let cursor_in_editable = cursor_offset - editable_range.start;
 625    multi_region::write_editable_with_markers_v0318(
 626        &mut section,
 627        editable_text,
 628        cursor_in_editable,
 629        CURSOR_MARKER,
 630    );
 631
 632    if !section.ends_with('\n') {
 633        section.push('\n');
 634    }
 635    section
 636}
 637
 638fn build_v0317_cursor_prefix(
 639    path: &Path,
 640    context: &str,
 641    editable_range: &Range<usize>,
 642    cursor_offset: usize,
 643) -> String {
 644    let mut section = String::new();
 645    let path_str = path.to_string_lossy();
 646    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 647
 648    section.push_str(&context[..editable_range.start]);
 649
 650    let editable_text = &context[editable_range.clone()];
 651    let cursor_in_editable = cursor_offset - editable_range.start;
 652    multi_region::write_editable_with_markers_v0317(
 653        &mut section,
 654        editable_text,
 655        cursor_in_editable,
 656        CURSOR_MARKER,
 657    );
 658
 659    if !section.ends_with('\n') {
 660        section.push('\n');
 661    }
 662    section
 663}
 664
 665fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 666    let start_row = text[0..range.start].matches('\n').count() as u32;
 667    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 668    if !text[..range.end].ends_with('\n') {
 669        end_row += 1;
 670    }
 671    return start_row..end_row;
 672}
 673
 674fn assemble_single_file_fim_prompt(
 675    context: &str,
 676    editable_range: &Range<usize>,
 677    cursor_prefix_section: &str,
 678    events: &[Arc<Event>],
 679    max_tokens: usize,
 680) -> String {
 681    let suffix_section = seed_coder::build_suffix_section(context, editable_range);
 682
 683    let suffix_tokens = estimate_tokens(suffix_section.len() + seed_coder::FIM_PREFIX.len());
 684    let cursor_prefix_tokens =
 685        estimate_tokens(cursor_prefix_section.len() + seed_coder::FIM_MIDDLE.len());
 686    let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
 687
 688    let edit_history_section = format_edit_history_within_budget(
 689        events,
 690        seed_coder::FILE_MARKER,
 691        "edit_history",
 692        budget_after_cursor,
 693        max_edit_event_count_for_format(&ZetaFormat::V0327SingleFile),
 694    );
 695
 696    let mut prompt = String::new();
 697    prompt.push_str(&suffix_section);
 698    prompt.push_str(seed_coder::FIM_PREFIX);
 699    prompt.push_str(&edit_history_section);
 700    if !edit_history_section.is_empty() {
 701        prompt.push('\n');
 702    }
 703    prompt.push_str(cursor_prefix_section);
 704    prompt.push_str(seed_coder::FIM_MIDDLE);
 705    prompt
 706}
 707
 708pub fn format_prompt_with_budget_for_format(
 709    input: &ZetaPromptInput,
 710    format: ZetaFormat,
 711    max_tokens: usize,
 712) -> Option<String> {
 713    let (context, editable_range, context_range, cursor_offset) =
 714        resolve_cursor_region(input, format);
 715    let path = &*input.cursor_path;
 716
 717    let empty_files = Vec::new();
 718    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 719    let filtered_related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 720        let relative_row_range =
 721            offset_range_to_row_range(&input.cursor_excerpt, context_range.clone());
 722        let row_range = relative_row_range.start + cursor_excerpt_start_row
 723            ..relative_row_range.end + cursor_excerpt_start_row;
 724        filter_redundant_excerpts(
 725            input_related_files.to_vec(),
 726            input.cursor_path.as_ref(),
 727            row_range,
 728        )
 729    } else {
 730        input_related_files.to_vec()
 731    };
 732    let related_files = filtered_related_files.as_slice();
 733
 734    let prompt = match format {
 735        ZetaFormat::V0211SeedCoder
 736        | ZetaFormat::V0331SeedCoderModelPy
 737        | ZetaFormat::V0304SeedNoEdits
 738        | ZetaFormat::V0306SeedMultiRegions
 739        | ZetaFormat::V0316SeedMultiRegions
 740        | ZetaFormat::V0318SeedMultiRegions
 741        | ZetaFormat::V0317SeedMultiRegions
 742        | ZetaFormat::V0420Diagnostics => {
 743            let mut cursor_section = String::new();
 744
 745            write_cursor_excerpt_section_for_format(
 746                format,
 747                &mut cursor_section,
 748                path,
 749                context,
 750                &editable_range,
 751                cursor_offset,
 752            );
 753
 754            let cursor_buffer_row = input.excerpt_start_row.map(|excerpt_start_row| {
 755                excerpt_start_row
 756                    + input.cursor_excerpt[..context_range.start + cursor_offset]
 757                        .bytes()
 758                        .filter(|byte| *byte == b'\n')
 759                        .count() as u32
 760            });
 761
 762            let budget_with_margin = apply_prompt_budget_margin(max_tokens);
 763            seed_coder::assemble_fim_prompt(
 764                context,
 765                &editable_range,
 766                &cursor_section,
 767                &input.events,
 768                related_files,
 769                if format == ZetaFormat::V0420Diagnostics {
 770                    &input.active_buffer_diagnostics
 771                } else {
 772                    &[]
 773                },
 774                cursor_buffer_row,
 775                budget_with_margin,
 776            )
 777        }
 778        ZetaFormat::V0327SingleFile => {
 779            let mut cursor_section = String::new();
 780            write_cursor_excerpt_section_for_format(
 781                format,
 782                &mut cursor_section,
 783                path,
 784                context,
 785                &editable_range,
 786                cursor_offset,
 787            );
 788
 789            assemble_single_file_fim_prompt(
 790                context,
 791                &editable_range,
 792                &cursor_section,
 793                &input.events,
 794                apply_prompt_budget_margin(max_tokens),
 795            )
 796        }
 797        _ => {
 798            let mut cursor_section = String::new();
 799            write_cursor_excerpt_section_for_format(
 800                format,
 801                &mut cursor_section,
 802                path,
 803                context,
 804                &editable_range,
 805                cursor_offset,
 806            );
 807
 808            let mut remaining_budget = apply_prompt_budget_margin(max_tokens);
 809            let cursor_tokens = estimate_tokens(cursor_section.len());
 810            remaining_budget = remaining_budget.saturating_sub(cursor_tokens);
 811
 812            let edit_history_section = format_edit_history_within_budget(
 813                &input.events,
 814                "<|file_sep|>",
 815                "edit history",
 816                remaining_budget,
 817                max_edit_event_count_for_format(&format),
 818            );
 819            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 820            remaining_budget = remaining_budget.saturating_sub(edit_history_tokens);
 821
 822            let related_files_section = format_related_files_within_budget(
 823                &related_files,
 824                "<|file_sep|>",
 825                "",
 826                remaining_budget,
 827            );
 828
 829            let mut prompt = String::new();
 830            prompt.push_str(&related_files_section);
 831            prompt.push_str(&edit_history_section);
 832            prompt.push_str(&cursor_section);
 833            prompt
 834        }
 835    };
 836    let prompt_tokens = estimate_tokens(prompt.len());
 837    if prompt_tokens > max_tokens {
 838        return None;
 839    }
 840    return Some(prompt);
 841}
 842
 843fn format_active_buffer_diagnostics_with_budget(
 844    diagnostics: &[ActiveBufferDiagnostic],
 845    cursor_buffer_row: Option<u32>,
 846    budget: usize,
 847) -> String {
 848    if diagnostics.is_empty() || budget == 0 {
 849        return String::new();
 850    }
 851
 852    let mut diagnostic_indices = (0..diagnostics.len()).collect::<Vec<_>>();
 853    if let Some(cursor_buffer_row) = cursor_buffer_row {
 854        diagnostic_indices.sort_by_key(|index| {
 855            let range = &diagnostics[*index].snippet_buffer_row_range;
 856            u32::abs_diff(cursor_buffer_row, range.start)
 857                + u32::abs_diff(cursor_buffer_row, range.end)
 858        });
 859    }
 860
 861    let mut output = format!("{}diagnostics\n", seed_coder::FILE_MARKER);
 862    let header_tokens = estimate_tokens(output.len());
 863    if header_tokens > budget {
 864        return String::new();
 865    }
 866
 867    let mut used_tokens = header_tokens;
 868    let mut included_diagnostics = 0;
 869    for diagnostic_index in diagnostic_indices.into_iter().take(10) {
 870        let diagnostic = &diagnostics[diagnostic_index];
 871        let snippet = clamp_text_to_token_count(&diagnostic.snippet, 256);
 872
 873        let diagnostic_section = format!(
 874            "*{}*:\n```\n{}{}\n```\n",
 875            diagnostic.message,
 876            snippet,
 877            if snippet.len() < diagnostic.snippet.len() {
 878                "..."
 879            } else {
 880                ""
 881            }
 882        );
 883        let diagnostic_tokens = estimate_tokens(diagnostic_section.len());
 884        if used_tokens + diagnostic_tokens > budget {
 885            break;
 886        }
 887        output.push_str(&diagnostic_section);
 888        used_tokens += diagnostic_tokens;
 889        included_diagnostics += 1;
 890    }
 891
 892    if included_diagnostics == 0 {
 893        String::new()
 894    } else {
 895        output
 896    }
 897}
 898
 899pub fn filter_redundant_excerpts(
 900    mut related_files: Vec<RelatedFile>,
 901    cursor_path: &Path,
 902    cursor_row_range: Range<u32>,
 903) -> Vec<RelatedFile> {
 904    for file in &mut related_files {
 905        if file.path.as_ref() == cursor_path {
 906            file.excerpts.retain(|excerpt| {
 907                excerpt.row_range.start < cursor_row_range.start
 908                    || excerpt.row_range.end > cursor_row_range.end
 909            });
 910        }
 911    }
 912    related_files.retain(|file| !file.excerpts.is_empty());
 913    related_files
 914}
 915
 916pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 917    match format {
 918        ZetaFormat::V0112MiddleAtEnd
 919        | ZetaFormat::V0113Ordered
 920        | ZetaFormat::V0114180EditableRegion
 921        | ZetaFormat::V0120GitMergeMarkers
 922        | ZetaFormat::V0131GitMergeMarkersPrefix
 923        | ZetaFormat::V0211Prefill
 924        | ZetaFormat::V0211SeedCoder
 925        | ZetaFormat::V0331SeedCoderModelPy
 926        | ZetaFormat::v0226Hashline
 927        | ZetaFormat::V0304SeedNoEdits
 928        | ZetaFormat::V0304VariableEdit
 929        | ZetaFormat::V0306SeedMultiRegions
 930        | ZetaFormat::V0316SeedMultiRegions
 931        | ZetaFormat::V0318SeedMultiRegions
 932        | ZetaFormat::V0317SeedMultiRegions
 933        | ZetaFormat::V0420Diagnostics
 934        | ZetaFormat::V0327SingleFile => 6,
 935    }
 936}
 937
 938pub fn get_prefill_for_format(
 939    format: ZetaFormat,
 940    context: &str,
 941    editable_range: &Range<usize>,
 942) -> String {
 943    match format {
 944        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 945        ZetaFormat::V0112MiddleAtEnd
 946        | ZetaFormat::V0113Ordered
 947        | ZetaFormat::V0114180EditableRegion
 948        | ZetaFormat::V0120GitMergeMarkers
 949        | ZetaFormat::V0131GitMergeMarkersPrefix
 950        | ZetaFormat::V0211SeedCoder
 951        | ZetaFormat::V0331SeedCoderModelPy
 952        | ZetaFormat::v0226Hashline
 953        | ZetaFormat::V0304VariableEdit => String::new(),
 954        ZetaFormat::V0304SeedNoEdits
 955        | ZetaFormat::V0306SeedMultiRegions
 956        | ZetaFormat::V0316SeedMultiRegions
 957        | ZetaFormat::V0318SeedMultiRegions
 958        | ZetaFormat::V0317SeedMultiRegions
 959        | ZetaFormat::V0420Diagnostics
 960        | ZetaFormat::V0327SingleFile => String::new(),
 961    }
 962}
 963
 964pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 965    match format {
 966        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 967        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 968        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 969        ZetaFormat::V0211SeedCoder
 970        | ZetaFormat::V0331SeedCoderModelPy
 971        | ZetaFormat::V0304SeedNoEdits
 972        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 973        ZetaFormat::V0316SeedMultiRegions => Some(multi_region::V0316_END_MARKER),
 974        ZetaFormat::V0318SeedMultiRegions => Some(multi_region::V0318_END_MARKER),
 975        ZetaFormat::V0420Diagnostics => Some(multi_region::V0318_END_MARKER),
 976        ZetaFormat::V0317SeedMultiRegions => Some(multi_region::V0317_END_MARKER),
 977        ZetaFormat::V0327SingleFile => Some(multi_region::V0327_END_MARKER),
 978
 979        ZetaFormat::V0112MiddleAtEnd
 980        | ZetaFormat::V0113Ordered
 981        | ZetaFormat::V0114180EditableRegion
 982        | ZetaFormat::v0226Hashline
 983        | ZetaFormat::V0304VariableEdit => None,
 984    }
 985}
 986
 987pub fn encode_patch_as_output_for_format(
 988    format: ZetaFormat,
 989    old_editable_region: &str,
 990    patch: &str,
 991    cursor_offset: Option<usize>,
 992) -> Result<Option<String>> {
 993    match format {
 994        ZetaFormat::v0226Hashline => {
 995            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 996        }
 997        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 998            old_editable_region,
 999            patch,
1000            cursor_offset,
1001        )
1002        .map(Some),
1003        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
1004            Ok(seed_coder::no_edits(patch))
1005        }
1006        ZetaFormat::V0316SeedMultiRegions => {
1007            let empty_patch = patch.lines().count() <= 3;
1008            if empty_patch {
1009                let marker_offsets = multi_region::compute_marker_offsets(old_editable_region);
1010                let marker_num =
1011                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1012                let tag = multi_region::marker_tag(marker_num);
1013                Ok(Some(format!(
1014                    "{tag}{tag}{}",
1015                    multi_region::V0316_END_MARKER
1016                )))
1017            } else {
1018                Ok(None)
1019            }
1020        }
1021        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1022            let empty_patch = patch.lines().count() <= 3;
1023            if empty_patch {
1024                let marker_offsets =
1025                    multi_region::compute_marker_offsets_v0318(old_editable_region);
1026                let marker_num =
1027                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1028                let tag = multi_region::marker_tag(marker_num);
1029                Ok(Some(format!(
1030                    "{tag}{tag}{}",
1031                    multi_region::V0318_END_MARKER
1032                )))
1033            } else {
1034                Ok(None)
1035            }
1036        }
1037        ZetaFormat::V0317SeedMultiRegions => {
1038            let empty_patch = patch.lines().count() <= 3;
1039            if empty_patch {
1040                let tag = multi_region::marker_tag_relative(0);
1041                Ok(Some(format!(
1042                    "{tag}{tag}{}",
1043                    multi_region::V0317_END_MARKER
1044                )))
1045            } else {
1046                Ok(None)
1047            }
1048        }
1049        ZetaFormat::V0327SingleFile => {
1050            let empty_patch = patch.lines().count() <= 3;
1051            if empty_patch {
1052                let marker_offsets =
1053                    multi_region::compute_marker_offsets_v0318(old_editable_region);
1054                let marker_num =
1055                    multi_region::nearest_marker_number(cursor_offset, &marker_offsets);
1056                let tag = multi_region::marker_tag(marker_num);
1057                Ok(Some(format!(
1058                    "{tag}{tag}{}",
1059                    multi_region::V0327_END_MARKER
1060                )))
1061            } else {
1062                Ok(None)
1063            }
1064        }
1065        _ => Ok(None),
1066    }
1067}
1068
1069/// Given a `ZetaPromptInput`, a format, and a patch (with cursor already
1070/// extracted), produce the expected model output string for training.
1071pub fn format_expected_output(
1072    input: &ZetaPromptInput,
1073    format: ZetaFormat,
1074    patch: &str,
1075    cursor_offset: Option<usize>,
1076) -> Result<String> {
1077    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1078    let mut old_editable = context[editable_range].to_string();
1079    if !old_editable.is_empty() && !old_editable.ends_with('\n') {
1080        old_editable.push('\n');
1081    }
1082
1083    // Formats with their own output encoding (hashline, variable-edit,
1084    // multi-region empty patches) are handled here.
1085    if let Some(output) =
1086        encode_patch_as_output_for_format(format, &old_editable, patch, cursor_offset)?
1087    {
1088        return Ok(output);
1089    }
1090
1091    let empty_patch = patch.lines().count() <= 3;
1092
1093    match format {
1094        // Multi-region formats: non-empty patches need diff application
1095        // then marker-span encoding.
1096        ZetaFormat::V0316SeedMultiRegions => {
1097            let (new_editable, first_hunk_offset) =
1098                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1099            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1100            multi_region::encode_from_old_and_new_v0316(
1101                &old_editable,
1102                &new_editable,
1103                cursor_in_new,
1104                CURSOR_MARKER,
1105                multi_region::V0316_END_MARKER,
1106            )
1107        }
1108        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => {
1109            let (new_editable, first_hunk_offset) =
1110                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1111            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1112            multi_region::encode_from_old_and_new_v0318(
1113                &old_editable,
1114                &new_editable,
1115                cursor_in_new,
1116                CURSOR_MARKER,
1117                multi_region::V0318_END_MARKER,
1118            )
1119        }
1120        ZetaFormat::V0327SingleFile => {
1121            let (new_editable, first_hunk_offset) =
1122                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1123            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1124            multi_region::encode_from_old_and_new_v0318(
1125                &old_editable,
1126                &new_editable,
1127                cursor_in_new,
1128                CURSOR_MARKER,
1129                multi_region::V0327_END_MARKER,
1130            )
1131        }
1132        ZetaFormat::V0317SeedMultiRegions => {
1133            let (new_editable, first_hunk_offset) =
1134                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?;
1135            let cursor_in_new = cursor_in_new_text(cursor_offset, first_hunk_offset, &new_editable);
1136            multi_region::encode_from_old_and_new_v0317(
1137                &old_editable,
1138                &new_editable,
1139                cursor_in_new,
1140                CURSOR_MARKER,
1141                multi_region::V0317_END_MARKER,
1142            )
1143        }
1144        // V0131-style formats and fallback: produce new editable text with
1145        // cursor marker inserted, followed by the end marker.
1146        ZetaFormat::V0112MiddleAtEnd
1147        | ZetaFormat::V0113Ordered
1148        | ZetaFormat::V0114180EditableRegion
1149        | ZetaFormat::V0120GitMergeMarkers
1150        | ZetaFormat::V0131GitMergeMarkersPrefix
1151        | ZetaFormat::V0211Prefill
1152        | ZetaFormat::V0211SeedCoder
1153        | ZetaFormat::v0226Hashline
1154        | ZetaFormat::V0304VariableEdit
1155        | ZetaFormat::V0304SeedNoEdits
1156        | ZetaFormat::V0331SeedCoderModelPy
1157        | ZetaFormat::V0306SeedMultiRegions => {
1158            let (mut result, first_hunk_offset) = if empty_patch {
1159                (old_editable.clone(), None)
1160            } else {
1161                udiff::apply_diff_to_string_with_hunk_offset(patch, &old_editable)?
1162            };
1163
1164            if let Some(cursor) = cursor_offset {
1165                let hunk_start = if !empty_patch {
1166                    first_hunk_offset.unwrap_or(0)
1167                } else {
1168                    0
1169                };
1170                let offset = (hunk_start + cursor).min(result.len());
1171                result.insert_str(offset, CURSOR_MARKER);
1172            }
1173
1174            if !result.is_empty() && !result.ends_with('\n') {
1175                result.push('\n');
1176            }
1177
1178            if let Some(end_marker) = output_end_marker_for_format(format) {
1179                result.push_str(end_marker);
1180            }
1181
1182            Ok(result)
1183        }
1184    }
1185}
1186
1187/// Compute the cursor position within the new text after diff application.
1188fn cursor_in_new_text(
1189    cursor_offset: Option<usize>,
1190    first_hunk_offset: Option<usize>,
1191    new_text: &str,
1192) -> Option<usize> {
1193    cursor_offset.map(|cursor| {
1194        let hunk_start = first_hunk_offset.unwrap_or(0);
1195        (hunk_start + cursor).min(new_text.len())
1196    })
1197}
1198
1199#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1200pub struct ParsedOutput {
1201    /// Text that should replace the editable region
1202    pub new_editable_region: String,
1203    /// The byte range within `cursor_excerpt` that this replacement applies to
1204    pub range_in_excerpt: Range<usize>,
1205    /// Byte offset of the cursor marker within `new_editable_region`, if present
1206    pub cursor_offset_in_new_editable_region: Option<usize>,
1207}
1208
1209#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
1210pub struct CursorPosition {
1211    pub path: String,
1212    pub row: usize,
1213    pub column: usize,
1214    pub offset: usize,
1215    pub editable_region_offset: usize,
1216}
1217
1218pub fn parsed_output_from_editable_region(
1219    range_in_excerpt: Range<usize>,
1220    mut new_editable_region: String,
1221) -> ParsedOutput {
1222    let cursor_offset_in_new_editable_region = new_editable_region.find(CURSOR_MARKER);
1223    if let Some(offset) = cursor_offset_in_new_editable_region {
1224        new_editable_region.replace_range(offset..offset + CURSOR_MARKER.len(), "");
1225    }
1226
1227    ParsedOutput {
1228        new_editable_region,
1229        range_in_excerpt,
1230        cursor_offset_in_new_editable_region,
1231    }
1232}
1233
1234/// Parse model output for the given zeta format
1235pub fn parse_zeta2_model_output(
1236    output: &str,
1237    format: ZetaFormat,
1238    prompt_inputs: &ZetaPromptInput,
1239) -> Result<ParsedOutput> {
1240    let output = match output_end_marker_for_format(format) {
1241        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
1242        None => output,
1243    };
1244
1245    let (context, editable_range_in_context, context_range, cursor_offset) =
1246        resolve_cursor_region(prompt_inputs, format);
1247    let context_start = context_range.start;
1248    let old_editable_region = &context[editable_range_in_context.clone()];
1249    let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range_in_context.start);
1250
1251    let (range_in_context, output) = match format {
1252        ZetaFormat::v0226Hashline => (
1253            editable_range_in_context,
1254            if hashline::output_has_edit_commands(output) {
1255                hashline::apply_edit_commands(old_editable_region, output)
1256            } else {
1257                output.to_string()
1258            },
1259        ),
1260        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
1261        ZetaFormat::V0304SeedNoEdits => (
1262            editable_range_in_context,
1263            if output.starts_with(seed_coder::NO_EDITS) {
1264                old_editable_region.to_string()
1265            } else {
1266                output.to_string()
1267            },
1268        ),
1269        ZetaFormat::V0306SeedMultiRegions => (
1270            editable_range_in_context,
1271            if output.starts_with(seed_coder::NO_EDITS) {
1272                old_editable_region.to_string()
1273            } else {
1274                multi_region::apply_marker_span(old_editable_region, output)?
1275            },
1276        ),
1277        ZetaFormat::V0316SeedMultiRegions => (
1278            editable_range_in_context,
1279            multi_region::apply_marker_span_v0316(old_editable_region, output)?,
1280        ),
1281        ZetaFormat::V0318SeedMultiRegions | ZetaFormat::V0420Diagnostics => (
1282            editable_range_in_context,
1283            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1284        ),
1285        ZetaFormat::V0317SeedMultiRegions => (
1286            editable_range_in_context,
1287            multi_region::apply_marker_span_v0317(
1288                old_editable_region,
1289                output,
1290                Some(cursor_offset_in_editable),
1291            )?,
1292        ),
1293        ZetaFormat::V0327SingleFile => (
1294            editable_range_in_context,
1295            multi_region::apply_marker_span_v0318(old_editable_region, output)?,
1296        ),
1297        _ => (editable_range_in_context, output.to_string()),
1298    };
1299
1300    let range_in_excerpt =
1301        range_in_context.start + context_start..range_in_context.end + context_start;
1302
1303    Ok(parsed_output_from_editable_region(range_in_excerpt, output))
1304}
1305
1306pub fn parse_zeta2_model_output_as_patch(
1307    output: &str,
1308    format: ZetaFormat,
1309    prompt_inputs: &ZetaPromptInput,
1310) -> Result<String> {
1311    let parsed = parse_zeta2_model_output(output, format, prompt_inputs)?;
1312    parsed_output_to_patch(prompt_inputs, parsed)
1313}
1314
1315pub fn cursor_position_from_parsed_output(
1316    prompt_inputs: &ZetaPromptInput,
1317    parsed: &ParsedOutput,
1318) -> Option<CursorPosition> {
1319    let cursor_offset = parsed.cursor_offset_in_new_editable_region?;
1320    let editable_region_offset = parsed.range_in_excerpt.start;
1321    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1322
1323    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count();
1324
1325    let new_editable_region = &parsed.new_editable_region;
1326    let prefix_end = cursor_offset.min(new_editable_region.len());
1327    let new_region_prefix = &new_editable_region[..prefix_end];
1328
1329    let row = editable_region_start_line + new_region_prefix.matches('\n').count();
1330
1331    let column = match new_region_prefix.rfind('\n') {
1332        Some(last_newline) => cursor_offset - last_newline - 1,
1333        None => {
1334            let content_prefix = &excerpt[..editable_region_offset];
1335            let content_column = match content_prefix.rfind('\n') {
1336                Some(last_newline) => editable_region_offset - last_newline - 1,
1337                None => editable_region_offset,
1338            };
1339            content_column + cursor_offset
1340        }
1341    };
1342
1343    Some(CursorPosition {
1344        path: prompt_inputs.cursor_path.to_string_lossy().into_owned(),
1345        row,
1346        column,
1347        offset: editable_region_offset + cursor_offset,
1348        editable_region_offset: cursor_offset,
1349    })
1350}
1351
1352pub fn parsed_output_to_patch(
1353    prompt_inputs: &ZetaPromptInput,
1354    parsed: ParsedOutput,
1355) -> Result<String> {
1356    let range_in_excerpt = parsed.range_in_excerpt;
1357    let excerpt = prompt_inputs.cursor_excerpt.as_ref();
1358    let old_text = excerpt[range_in_excerpt.clone()].to_string();
1359    let mut new_text = parsed.new_editable_region;
1360
1361    let mut old_text_normalized = old_text;
1362    if !new_text.is_empty() && !new_text.ends_with('\n') {
1363        new_text.push('\n');
1364    }
1365    if !old_text_normalized.is_empty() && !old_text_normalized.ends_with('\n') {
1366        old_text_normalized.push('\n');
1367    }
1368
1369    let editable_region_offset = range_in_excerpt.start;
1370    let editable_region_start_line = excerpt[..editable_region_offset].matches('\n').count() as u32;
1371    let editable_region_lines = old_text_normalized.lines().count() as u32;
1372
1373    let diff = udiff::unified_diff_with_context(
1374        &old_text_normalized,
1375        &new_text,
1376        editable_region_start_line,
1377        editable_region_start_line,
1378        editable_region_lines,
1379    );
1380
1381    let path = prompt_inputs
1382        .cursor_path
1383        .to_string_lossy()
1384        .trim_start_matches('/')
1385        .to_string();
1386    let formatted_diff = format!("--- a/{path}\n+++ b/{path}\n{diff}");
1387
1388    Ok(udiff::encode_cursor_in_patch(
1389        &formatted_diff,
1390        parsed.cursor_offset_in_new_editable_region,
1391    ))
1392}
1393
1394pub fn excerpt_range_for_format(
1395    format: ZetaFormat,
1396    ranges: &ExcerptRanges,
1397) -> (Range<usize>, Range<usize>) {
1398    excerpt_ranges_for_format(format, ranges)
1399}
1400
1401pub fn resolve_cursor_region(
1402    input: &ZetaPromptInput,
1403    format: ZetaFormat,
1404) -> (&str, Range<usize>, Range<usize>, usize) {
1405    let (editable_range, context_range) = if format == ZetaFormat::V0327SingleFile {
1406        let (editable_tokens, _) = token_limits_for_format(format);
1407        let context_range = 0..input.cursor_excerpt.len();
1408        let editable_range = multi_region::compute_v0327_editable_range(
1409            &input.cursor_excerpt,
1410            input.cursor_offset_in_excerpt,
1411            editable_tokens,
1412        );
1413        (editable_range, context_range)
1414    } else if let Some(syntax_ranges) = &input.syntax_ranges {
1415        let (editable_tokens, context_tokens) = token_limits_for_format(format);
1416        compute_editable_and_context_ranges(
1417            &input.cursor_excerpt,
1418            input.cursor_offset_in_excerpt,
1419            syntax_ranges,
1420            editable_tokens,
1421            context_tokens,
1422        )
1423    } else {
1424        excerpt_range_for_format(format, &input.excerpt_ranges)
1425    };
1426
1427    let context_start = context_range.start;
1428    let context_text = &input.cursor_excerpt[context_range.clone()];
1429    let adjusted_editable =
1430        (editable_range.start - context_start)..(editable_range.end - context_start);
1431    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
1432
1433    (
1434        context_text,
1435        adjusted_editable,
1436        context_range,
1437        adjusted_cursor,
1438    )
1439}
1440
1441pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
1442    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
1443    get_prefill_for_format(format, context, &editable_range)
1444}
1445
1446fn format_edit_history_within_budget(
1447    events: &[Arc<Event>],
1448    file_marker: &str,
1449    edit_history_name: &str,
1450    max_tokens: usize,
1451    max_edit_event_count: usize,
1452) -> String {
1453    let header = format!("{}{}\n", file_marker, edit_history_name);
1454    let header_tokens = estimate_tokens(header.len());
1455    if header_tokens >= max_tokens {
1456        return String::new();
1457    }
1458
1459    let mut event_strings: Vec<String> = Vec::new();
1460    let mut total_tokens = header_tokens;
1461
1462    for event in events.iter().rev().take(max_edit_event_count) {
1463        let mut event_str = String::new();
1464        write_event(&mut event_str, event);
1465        let event_tokens = estimate_tokens(event_str.len());
1466
1467        if total_tokens + event_tokens > max_tokens {
1468            break;
1469        }
1470        total_tokens += event_tokens;
1471        event_strings.push(event_str);
1472    }
1473
1474    if event_strings.is_empty() {
1475        return String::new();
1476    }
1477
1478    let mut result = header;
1479    for event_str in event_strings.iter().rev() {
1480        result.push_str(event_str);
1481    }
1482    result
1483}
1484
1485fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
1486    let needs_newline = !excerpt.text.ends_with('\n');
1487    let needs_ellipsis = excerpt.row_range.end < file_max_row;
1488    let len = excerpt.text.len()
1489        + if needs_newline { "\n".len() } else { 0 }
1490        + if needs_ellipsis { "...\n".len() } else { 0 };
1491    estimate_tokens(len)
1492}
1493
1494pub fn format_related_files_within_budget(
1495    related_files: &[RelatedFile],
1496    file_prefix: &str,
1497    file_suffix: &str,
1498    max_tokens: usize,
1499) -> String {
1500    struct ExcerptCandidate {
1501        file_ix: usize,
1502        excerpt_ix: usize,
1503        order: usize,
1504    }
1505
1506    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
1507        .iter()
1508        .enumerate()
1509        .flat_map(|(file_ix, file)| {
1510            file.excerpts
1511                .iter()
1512                .enumerate()
1513                .map(move |(excerpt_ix, e)| ExcerptCandidate {
1514                    file_ix,
1515                    excerpt_ix,
1516                    order: e.order,
1517                })
1518        })
1519        .collect();
1520
1521    // Pre-compute file header strings and their token costs.
1522    let file_headers: Vec<String> = related_files
1523        .iter()
1524        .map(|file| {
1525            let path_str = file.path.to_string_lossy();
1526            format!("{}{}\n", file_prefix, path_str)
1527        })
1528        .collect();
1529
1530    // Sort the excerpts by their order and determine how many fit within the budget.
1531    let mut total_tokens = 0;
1532    let mut included_excerpt_count = 0_usize;
1533    let mut included_file_indices = vec![false; related_files.len()];
1534    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
1535    for candidate in &excerpt_candidates {
1536        let file = &related_files[candidate.file_ix];
1537        let excerpt = &file.excerpts[candidate.excerpt_ix];
1538        let file_already_included = included_file_indices[candidate.file_ix];
1539        let header_cost = if file_already_included {
1540            0
1541        } else {
1542            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
1543        };
1544        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
1545        if total_tokens + header_cost + excerpt_cost > max_tokens {
1546            break;
1547        }
1548        total_tokens += header_cost + excerpt_cost;
1549        if !file_already_included {
1550            included_file_indices[candidate.file_ix] = true;
1551        }
1552        included_excerpt_count += 1;
1553    }
1554
1555    excerpt_candidates.truncate(included_excerpt_count);
1556    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
1557
1558    // Render all of the files that fit within the token budget, in the original order.
1559    let mut result = String::new();
1560    let mut last_file_ix = None;
1561    for candidate in &excerpt_candidates {
1562        if last_file_ix != Some(candidate.file_ix) {
1563            if last_file_ix.is_some() {
1564                result.push_str(file_suffix);
1565            }
1566            result.push_str(&file_headers[candidate.file_ix]);
1567            last_file_ix = Some(candidate.file_ix);
1568        }
1569        let file = &related_files[candidate.file_ix];
1570        let excerpt = &file.excerpts[candidate.excerpt_ix];
1571        result.push_str(&excerpt.text);
1572        if !result.ends_with('\n') {
1573            result.push('\n');
1574        }
1575        if excerpt.row_range.end < file.max_row {
1576            result.push_str("...\n");
1577        }
1578    }
1579
1580    result
1581}
1582
1583pub fn write_related_files(
1584    prompt: &mut String,
1585    related_files: &[RelatedFile],
1586) -> Vec<Range<usize>> {
1587    let mut ranges = Vec::new();
1588    for file in related_files {
1589        let start = prompt.len();
1590        let path_str = file.path.to_string_lossy();
1591        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1592        for excerpt in &file.excerpts {
1593            prompt.push_str(&excerpt.text);
1594            if !prompt.ends_with('\n') {
1595                prompt.push('\n');
1596            }
1597            if excerpt.row_range.end < file.max_row {
1598                prompt.push_str("...\n");
1599            }
1600        }
1601        let end = prompt.len();
1602        ranges.push(start..end);
1603    }
1604    ranges
1605}
1606
1607mod v0112_middle_at_end {
1608    use super::*;
1609
1610    pub fn special_tokens() -> &'static [&'static str] {
1611        &[
1612            "<|fim_prefix|>",
1613            "<|fim_suffix|>",
1614            "<|fim_middle|>",
1615            "<|file_sep|>",
1616            CURSOR_MARKER,
1617        ]
1618    }
1619
1620    pub fn write_cursor_excerpt_section(
1621        prompt: &mut String,
1622        path: &Path,
1623        context: &str,
1624        editable_range: &Range<usize>,
1625        cursor_offset: usize,
1626    ) {
1627        let path_str = path.to_string_lossy();
1628        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1629
1630        prompt.push_str("<|fim_prefix|>\n");
1631        prompt.push_str(&context[..editable_range.start]);
1632
1633        prompt.push_str("<|fim_suffix|>\n");
1634        prompt.push_str(&context[editable_range.end..]);
1635        if !prompt.ends_with('\n') {
1636            prompt.push('\n');
1637        }
1638
1639        prompt.push_str("<|fim_middle|>current\n");
1640        prompt.push_str(&context[editable_range.start..cursor_offset]);
1641        prompt.push_str(CURSOR_MARKER);
1642        prompt.push_str(&context[cursor_offset..editable_range.end]);
1643        if !prompt.ends_with('\n') {
1644            prompt.push('\n');
1645        }
1646
1647        prompt.push_str("<|fim_middle|>updated\n");
1648    }
1649}
1650
1651mod v0113_ordered {
1652    use super::*;
1653
1654    pub fn special_tokens() -> &'static [&'static str] {
1655        &[
1656            "<|fim_prefix|>",
1657            "<|fim_suffix|>",
1658            "<|fim_middle|>",
1659            "<|file_sep|>",
1660            CURSOR_MARKER,
1661        ]
1662    }
1663
1664    pub fn write_cursor_excerpt_section(
1665        prompt: &mut String,
1666        path: &Path,
1667        context: &str,
1668        editable_range: &Range<usize>,
1669        cursor_offset: usize,
1670    ) {
1671        let path_str = path.to_string_lossy();
1672        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1673
1674        prompt.push_str("<|fim_prefix|>\n");
1675        prompt.push_str(&context[..editable_range.start]);
1676        if !prompt.ends_with('\n') {
1677            prompt.push('\n');
1678        }
1679
1680        prompt.push_str("<|fim_middle|>current\n");
1681        prompt.push_str(&context[editable_range.start..cursor_offset]);
1682        prompt.push_str(CURSOR_MARKER);
1683        prompt.push_str(&context[cursor_offset..editable_range.end]);
1684        if !prompt.ends_with('\n') {
1685            prompt.push('\n');
1686        }
1687
1688        prompt.push_str("<|fim_suffix|>\n");
1689        prompt.push_str(&context[editable_range.end..]);
1690        if !prompt.ends_with('\n') {
1691            prompt.push('\n');
1692        }
1693
1694        prompt.push_str("<|fim_middle|>updated\n");
1695    }
1696}
1697
1698mod v0114180_editable_region {
1699    use super::*;
1700
1701    pub fn special_tokens() -> &'static [&'static str] {
1702        v0113_ordered::special_tokens()
1703    }
1704}
1705
1706pub mod v0120_git_merge_markers {
1707    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1708    //!
1709    //! Example prompt:
1710    //!
1711    //! <|file_sep|>path/to/target_file.py
1712    //! <|fim_prefix|>
1713    //! code before editable region
1714    //! <|fim_suffix|>
1715    //! code after editable region
1716    //! <|fim_middle|>
1717    //! <<<<<<< CURRENT
1718    //! code that
1719    //! needs to<|user_cursor|>
1720    //! be rewritten
1721    //! =======
1722    //!
1723    //! Expected output (should be generated by the model):
1724    //!
1725    //! updated
1726    //! code with
1727    //! changes applied
1728    //! >>>>>>> UPDATED
1729
1730    use super::*;
1731
1732    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1733    pub const SEPARATOR: &str = "=======\n";
1734    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1735
1736    pub fn special_tokens() -> &'static [&'static str] {
1737        &[
1738            "<|fim_prefix|>",
1739            "<|fim_suffix|>",
1740            "<|fim_middle|>",
1741            "<|file_sep|>",
1742            START_MARKER,
1743            SEPARATOR,
1744            END_MARKER,
1745            CURSOR_MARKER,
1746        ]
1747    }
1748
1749    pub fn write_cursor_excerpt_section(
1750        prompt: &mut String,
1751        path: &Path,
1752        context: &str,
1753        editable_range: &Range<usize>,
1754        cursor_offset: usize,
1755    ) {
1756        let path_str = path.to_string_lossy();
1757        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1758
1759        prompt.push_str("<|fim_prefix|>");
1760        prompt.push_str(&context[..editable_range.start]);
1761
1762        prompt.push_str("<|fim_suffix|>");
1763        prompt.push_str(&context[editable_range.end..]);
1764        if !prompt.ends_with('\n') {
1765            prompt.push('\n');
1766        }
1767
1768        prompt.push_str("<|fim_middle|>");
1769        prompt.push_str(START_MARKER);
1770        prompt.push_str(&context[editable_range.start..cursor_offset]);
1771        prompt.push_str(CURSOR_MARKER);
1772        prompt.push_str(&context[cursor_offset..editable_range.end]);
1773        if !prompt.ends_with('\n') {
1774            prompt.push('\n');
1775        }
1776        prompt.push_str(SEPARATOR);
1777    }
1778}
1779
1780pub mod v0131_git_merge_markers_prefix {
1781    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1782    //!
1783    //! Example prompt:
1784    //!
1785    //! <|file_sep|>path/to/target_file.py
1786    //! <|fim_prefix|>
1787    //! code before editable region
1788    //! <<<<<<< CURRENT
1789    //! code that
1790    //! needs to<|user_cursor|>
1791    //! be rewritten
1792    //! =======
1793    //! <|fim_suffix|>
1794    //! code after editable region
1795    //! <|fim_middle|>
1796    //!
1797    //! Expected output (should be generated by the model):
1798    //!
1799    //! updated
1800    //! code with
1801    //! changes applied
1802    //! >>>>>>> UPDATED
1803
1804    use super::*;
1805
1806    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1807    pub const SEPARATOR: &str = "=======\n";
1808    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1809
1810    pub fn special_tokens() -> &'static [&'static str] {
1811        &[
1812            "<|fim_prefix|>",
1813            "<|fim_suffix|>",
1814            "<|fim_middle|>",
1815            "<|file_sep|>",
1816            START_MARKER,
1817            SEPARATOR,
1818            END_MARKER,
1819            CURSOR_MARKER,
1820        ]
1821    }
1822
1823    pub fn write_cursor_excerpt_section(
1824        prompt: &mut String,
1825        path: &Path,
1826        context: &str,
1827        editable_range: &Range<usize>,
1828        cursor_offset: usize,
1829    ) {
1830        let path_str = path.to_string_lossy();
1831        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1832
1833        prompt.push_str("<|fim_prefix|>");
1834        prompt.push_str(&context[..editable_range.start]);
1835        prompt.push_str(START_MARKER);
1836        prompt.push_str(&context[editable_range.start..cursor_offset]);
1837        prompt.push_str(CURSOR_MARKER);
1838        prompt.push_str(&context[cursor_offset..editable_range.end]);
1839        if !prompt.ends_with('\n') {
1840            prompt.push('\n');
1841        }
1842        prompt.push_str(SEPARATOR);
1843
1844        prompt.push_str("<|fim_suffix|>");
1845        prompt.push_str(&context[editable_range.end..]);
1846        if !prompt.ends_with('\n') {
1847            prompt.push('\n');
1848        }
1849
1850        prompt.push_str("<|fim_middle|>");
1851    }
1852}
1853
1854pub mod v0211_prefill {
1855    use super::*;
1856
1857    pub fn special_tokens() -> &'static [&'static str] {
1858        v0131_git_merge_markers_prefix::special_tokens()
1859    }
1860
1861    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1862        let editable_region = &context[editable_range.start..editable_range.end];
1863
1864        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1865        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1866
1867        // Find a token boundary to avoid splitting tokens in the prefill.
1868        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1869        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1870        // the \n and consume any consecutive \n characters after it.
1871        let prefill = &editable_region[..prefill_len];
1872        match prefill.rfind('\n') {
1873            Some(pos) => {
1874                let mut end = pos + 1;
1875                while end < editable_region.len()
1876                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1877                {
1878                    end += 1;
1879                }
1880                editable_region[..end].to_string()
1881            }
1882            // No newline found. Fall back to splitting before the last space
1883            // (word-level boundary)
1884            None => match prefill.rfind(' ') {
1885                Some(pos) => prefill[..pos].to_string(),
1886                None => prefill.to_string(),
1887            },
1888        }
1889    }
1890}
1891
1892pub mod hashline {
1893
1894    use std::fmt::Display;
1895
1896    pub const END_MARKER: &str = "<|fim_middle|>updated";
1897    pub const START_MARKER: &str = "<|fim_middle|>current";
1898
1899    use super::*;
1900
1901    const SET_COMMAND_MARKER: &str = "<|set|>";
1902    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1903    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1904
1905    pub fn special_tokens() -> &'static [&'static str] {
1906        return &[
1907            SET_COMMAND_MARKER,
1908            "<|set_range|>",
1909            INSERT_COMMAND_MARKER,
1910            NO_EDITS_COMMAND_MARKER,
1911            CURSOR_MARKER,
1912            "<|file_sep|>",
1913            "<|fim_prefix|>",
1914            "<|fim_suffix|>",
1915            "<|fim_middle|>",
1916        ];
1917    }
1918
1919    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1920    #[derive(Debug, Clone, PartialEq, Eq)]
1921    struct LineRef {
1922        index: usize,
1923        hash: u8,
1924    }
1925
1926    impl Display for LineRef {
1927        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1928            write!(f, "{}:{:02x}", self.index, self.hash)
1929        }
1930    }
1931
1932    pub fn hash_line(line: &[u8]) -> u8 {
1933        let mut h: u8 = 0;
1934        for &byte in line {
1935            h = h.wrapping_add(byte);
1936        }
1937        return h;
1938    }
1939
1940    /// Write the hashline-encoded editable region into `out`. Each line of
1941    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1942    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1943    /// to the start of `editable_text`).
1944    pub fn write_hashline_editable_region(
1945        out: &mut String,
1946        editable_text: &str,
1947        cursor_offset_in_editable: usize,
1948    ) {
1949        let mut offset = 0;
1950        for (i, line) in editable_text.lines().enumerate() {
1951            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1952                && cursor_offset_in_editable < offset + line.len()
1953            {
1954                (
1955                    &line[..cursor_offset_in_editable - offset],
1956                    CURSOR_MARKER,
1957                    &line[cursor_offset_in_editable - offset..],
1958                )
1959            } else {
1960                (line, "", "")
1961            };
1962            write!(
1963                out,
1964                "\n{}|{head}{cursor}{tail}",
1965                LineRef {
1966                    index: i,
1967                    hash: hash_line(line.as_bytes())
1968                }
1969            )
1970            .unwrap();
1971            offset += line.len() + 1;
1972        }
1973    }
1974
1975    pub fn write_cursor_excerpt_section(
1976        prompt: &mut String,
1977        path: &Path,
1978        context: &str,
1979        editable_range: &Range<usize>,
1980        cursor_offset: usize,
1981    ) {
1982        let path_str = path.to_string_lossy();
1983        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1984
1985        prompt.push_str("<|fim_prefix|>\n");
1986        prompt.push_str(&context[..editable_range.start]);
1987        prompt.push_str(START_MARKER);
1988
1989        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1990        let editable_region = &context[editable_range.clone()];
1991        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1992
1993        if !prompt.ends_with('\n') {
1994            prompt.push('\n');
1995        }
1996
1997        prompt.push_str("<|fim_suffix|>\n");
1998        prompt.push_str(&context[editable_range.end..]);
1999        if !prompt.ends_with('\n') {
2000            prompt.push('\n');
2001        }
2002
2003        prompt.push_str(END_MARKER);
2004        prompt.push('\n');
2005    }
2006
2007    /// A single edit command parsed from the model output.
2008    #[derive(Debug)]
2009    enum EditCommand<'a> {
2010        /// Replace a range of lines (inclusive on both ends). Single-line set is
2011        /// represented by `start == end`.
2012        Set {
2013            start: LineRef,
2014            end: LineRef,
2015            content: &'a str,
2016        },
2017        /// Insert new lines after the given line, or before the first line if
2018        /// `after` is `None`.
2019        Insert {
2020            after: Option<LineRef>,
2021            content: &'a str,
2022        },
2023    }
2024
2025    /// Parse a line reference like `3:c3` into a `LineRef`.
2026    fn parse_line_ref(s: &str) -> Option<LineRef> {
2027        let (idx_str, hash_str) = s.split_once(':')?;
2028        let index = idx_str.parse::<usize>().ok()?;
2029        let hash = u8::from_str_radix(hash_str, 16).ok()?;
2030        Some(LineRef { index, hash })
2031    }
2032
2033    /// Parse the model output into a list of `EditCommand`s.
2034    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
2035        let mut commands = Vec::new();
2036        let mut offset = 0usize;
2037
2038        while offset < model_output.len() {
2039            let next_nl = model_output[offset..]
2040                .find('\n')
2041                .map(|i| offset + i)
2042                .unwrap_or(model_output.len());
2043            let line = &model_output[offset..next_nl];
2044            let line_end = if next_nl < model_output.len() {
2045                next_nl + 1
2046            } else {
2047                next_nl
2048            };
2049
2050            let trimmed = line.trim();
2051            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
2052                (true, spec)
2053            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
2054                (false, spec)
2055            } else {
2056                offset = line_end;
2057                continue;
2058            };
2059
2060            let mut content_end = line_end;
2061            let mut scan = line_end;
2062
2063            while scan < model_output.len() {
2064                let body_nl = model_output[scan..]
2065                    .find('\n')
2066                    .map(|i| scan + i)
2067                    .unwrap_or(model_output.len());
2068                let body_line = &model_output[scan..body_nl];
2069                if body_line.trim().starts_with(SET_COMMAND_MARKER)
2070                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
2071                {
2072                    break;
2073                }
2074                scan = if body_nl < model_output.len() {
2075                    body_nl + 1
2076                } else {
2077                    body_nl
2078                };
2079                content_end = scan;
2080            }
2081
2082            let content = &model_output[line_end..content_end];
2083
2084            if is_set {
2085                if let Some((start_str, end_str)) = specifier.split_once('-') {
2086                    if let (Some(start), Some(end)) =
2087                        (parse_line_ref(start_str), parse_line_ref(end_str))
2088                    {
2089                        commands.push(EditCommand::Set {
2090                            start,
2091                            end,
2092                            content,
2093                        });
2094                    }
2095                } else if let Some(target) = parse_line_ref(specifier) {
2096                    commands.push(EditCommand::Set {
2097                        start: target.clone(),
2098                        end: target,
2099                        content,
2100                    });
2101                }
2102            } else {
2103                let after = parse_line_ref(specifier);
2104                commands.push(EditCommand::Insert { after, content });
2105            }
2106
2107            offset = scan;
2108        }
2109
2110        commands
2111    }
2112
2113    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
2114    /// (as opposed to being a plain full-replacement output).
2115    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
2116    /// editable region, returning the plain text content.
2117    pub fn strip_hashline_prefixes(region: &str) -> String {
2118        let mut decoded: String = region
2119            .lines()
2120            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
2121            .collect::<Vec<_>>()
2122            .join("\n");
2123        if region.ends_with('\n') {
2124            decoded.push('\n');
2125        }
2126        decoded
2127    }
2128
2129    pub fn output_has_edit_commands(model_output: &str) -> bool {
2130        model_output.contains(SET_COMMAND_MARKER)
2131            || model_output.contains(INSERT_COMMAND_MARKER)
2132            || model_output.contains(NO_EDITS_COMMAND_MARKER)
2133    }
2134
2135    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
2136    /// original editable region text.
2137    ///
2138    /// `editable_region` is the original text of the editable region (without hash
2139    /// prefixes). `model_output` is the raw model response containing edit commands.
2140    ///
2141    /// Returns the full replacement text for the editable region.
2142    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
2143        if model_output
2144            .trim_start()
2145            .starts_with(NO_EDITS_COMMAND_MARKER)
2146        {
2147            return editable_region.to_string();
2148        }
2149
2150        let original_lines: Vec<&str> = editable_region.lines().collect();
2151        let old_hashes: Vec<u8> = original_lines
2152            .iter()
2153            .map(|line| hash_line(line.as_bytes()))
2154            .collect();
2155
2156        let commands = parse_edit_commands(model_output);
2157
2158        // For set operations: indexed by start line → Some((end line index, content))
2159        // For insert operations: indexed by line index → vec of content to insert after
2160        // Insert-before-first is tracked separately.
2161        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
2162        let mut insert_before_first: Vec<&str> = Vec::new();
2163        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
2164
2165        for command in &commands {
2166            match command {
2167                EditCommand::Set {
2168                    start,
2169                    end,
2170                    content,
2171                } => {
2172                    if start.index < old_hashes.len()
2173                        && end.index < old_hashes.len()
2174                        && start.index <= end.index
2175                        && old_hashes[start.index] == start.hash
2176                        && old_hashes[end.index] == end.hash
2177                    {
2178                        set_ops[start.index] = Some((end.index, *content));
2179                    }
2180                }
2181                EditCommand::Insert { after, content } => match after {
2182                    None => insert_before_first.push(*content),
2183                    Some(line_ref) => {
2184                        if line_ref.index < old_hashes.len()
2185                            && old_hashes[line_ref.index] == line_ref.hash
2186                        {
2187                            insert_after[line_ref.index].push(*content);
2188                        }
2189                    }
2190                },
2191            }
2192        }
2193
2194        let mut result = String::new();
2195
2196        // Emit any insertions before the first line
2197        for content in &insert_before_first {
2198            result.push_str(content);
2199            if !content.ends_with('\n') {
2200                result.push('\n');
2201            }
2202        }
2203
2204        let mut i = 0;
2205        while i < original_lines.len() {
2206            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
2207                // Replace lines i..=end_index with the replacement content
2208                result.push_str(replacement);
2209                if !replacement.is_empty() && !replacement.ends_with('\n') {
2210                    result.push('\n');
2211                }
2212                // Emit any insertions after the end of this set range
2213                if *end_index < insert_after.len() {
2214                    for content in &insert_after[*end_index] {
2215                        result.push_str(content);
2216                        if !content.ends_with('\n') {
2217                            result.push('\n');
2218                        }
2219                    }
2220                }
2221                i = end_index + 1;
2222            } else {
2223                // Keep the original line
2224                result.push_str(original_lines[i]);
2225                result.push('\n');
2226                // Emit any insertions after this line
2227                for content in &insert_after[i] {
2228                    result.push_str(content);
2229                    if !content.ends_with('\n') {
2230                        result.push('\n');
2231                    }
2232                }
2233                i += 1;
2234            }
2235        }
2236
2237        // Preserve trailing newline behavior: if the original ended with a
2238        // newline the result already has one; if it didn't, trim the extra one
2239        // we added.
2240        if !editable_region.ends_with('\n') && result.ends_with('\n') {
2241            result.pop();
2242        }
2243
2244        result
2245    }
2246
2247    /// Convert a unified diff patch into hashline edit commands.
2248    ///
2249    /// Parses the unified diff `patch` directly to determine which lines of
2250    /// `old_text` are deleted/replaced and what new lines are added, then emits
2251    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
2252    /// `{index}:{hash}` identifiers.
2253    ///
2254    /// `cursor_offset` is an optional byte offset into the first hunk's new
2255    /// text (context + additions) where the cursor marker should be placed.
2256    pub fn patch_to_edit_commands(
2257        old_text: &str,
2258        patch: &str,
2259        cursor_offset: Option<usize>,
2260    ) -> Result<String> {
2261        let old_lines: Vec<&str> = old_text.lines().collect();
2262        let old_hashes: Vec<u8> = old_lines
2263            .iter()
2264            .map(|line| hash_line(line.as_bytes()))
2265            .collect();
2266
2267        let mut result = String::new();
2268        let mut first_hunk = true;
2269
2270        struct Hunk<'a> {
2271            line_range: Range<usize>,
2272            new_text_lines: Vec<&'a str>,
2273            cursor_line_offset_in_new_text: Option<(usize, usize)>,
2274        }
2275
2276        // Parse the patch line by line. We only care about hunk headers,
2277        // context, deletions, and additions.
2278        let mut old_line_index: usize = 0;
2279        let mut current_hunk: Option<Hunk> = None;
2280        // Byte offset tracking within the hunk's new text for cursor placement.
2281        let mut new_text_byte_offset: usize = 0;
2282        // The line index of the last old line seen before/in the current hunk
2283        // (used for insert-after reference).
2284        let mut last_old_line_before_hunk: Option<usize> = None;
2285
2286        fn flush_hunk(
2287            hunk: Hunk,
2288            last_old_line: Option<usize>,
2289            result: &mut String,
2290            old_hashes: &[u8],
2291        ) {
2292            if hunk.line_range.is_empty() {
2293                // Pure insertion — reference the old line to insert after when in bounds.
2294                if let Some(after) = last_old_line
2295                    && let Some(&hash) = old_hashes.get(after)
2296                {
2297                    write!(
2298                        result,
2299                        "{INSERT_COMMAND_MARKER}{}\n",
2300                        LineRef { index: after, hash }
2301                    )
2302                    .unwrap();
2303                } else {
2304                    result.push_str(INSERT_COMMAND_MARKER);
2305                    result.push('\n');
2306                }
2307            } else {
2308                let start = hunk.line_range.start;
2309                let end_exclusive = hunk.line_range.end;
2310                let deleted_line_count = end_exclusive.saturating_sub(start);
2311
2312                if deleted_line_count == 1 {
2313                    if let Some(&hash) = old_hashes.get(start) {
2314                        write!(
2315                            result,
2316                            "{SET_COMMAND_MARKER}{}\n",
2317                            LineRef { index: start, hash }
2318                        )
2319                        .unwrap();
2320                    } else {
2321                        result.push_str(SET_COMMAND_MARKER);
2322                        result.push('\n');
2323                    }
2324                } else {
2325                    let end_inclusive = end_exclusive - 1;
2326                    match (
2327                        old_hashes.get(start).copied(),
2328                        old_hashes.get(end_inclusive).copied(),
2329                    ) {
2330                        (Some(start_hash), Some(end_hash)) => {
2331                            write!(
2332                                result,
2333                                "{SET_COMMAND_MARKER}{}-{}\n",
2334                                LineRef {
2335                                    index: start,
2336                                    hash: start_hash
2337                                },
2338                                LineRef {
2339                                    index: end_inclusive,
2340                                    hash: end_hash
2341                                }
2342                            )
2343                            .unwrap();
2344                        }
2345                        _ => {
2346                            result.push_str(SET_COMMAND_MARKER);
2347                            result.push('\n');
2348                        }
2349                    }
2350                }
2351            }
2352            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
2353                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
2354                    && line_offset == cursor_line_offset
2355                {
2356                    result.push_str(&line[..char_offset]);
2357                    result.push_str(CURSOR_MARKER);
2358                    result.push_str(&line[char_offset..]);
2359                    continue;
2360                }
2361
2362                result.push_str(line);
2363            }
2364        }
2365
2366        for raw_line in patch.split_inclusive('\n') {
2367            if raw_line.starts_with("@@") {
2368                // Flush any pending change hunk from a previous patch hunk.
2369                if let Some(hunk) = current_hunk.take() {
2370                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2371                }
2372
2373                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
2374                // We intentionally do not trust old_start as a direct local index into `old_text`,
2375                // because some patches are produced against a larger file region and carry
2376                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
2377                if first_hunk {
2378                    new_text_byte_offset = 0;
2379                    first_hunk = false;
2380                }
2381                continue;
2382            }
2383
2384            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
2385                continue;
2386            }
2387            if raw_line.starts_with("\\ No newline") {
2388                continue;
2389            }
2390
2391            if raw_line.starts_with('-') {
2392                // Extend or start a change hunk with this deleted old line.
2393                match &mut current_hunk {
2394                    Some(Hunk {
2395                        line_range: range, ..
2396                    }) => range.end = old_line_index + 1,
2397                    None => {
2398                        current_hunk = Some(Hunk {
2399                            line_range: old_line_index..old_line_index + 1,
2400                            new_text_lines: Vec::new(),
2401                            cursor_line_offset_in_new_text: None,
2402                        });
2403                    }
2404                }
2405                old_line_index += 1;
2406            } else if let Some(added_content) = raw_line.strip_prefix('+') {
2407                // Place cursor marker if cursor_offset falls within this line.
2408                let mut cursor_line_offset = None;
2409                if let Some(cursor_off) = cursor_offset
2410                    && (first_hunk
2411                        || cursor_off >= new_text_byte_offset
2412                            && cursor_off <= new_text_byte_offset + added_content.len())
2413                {
2414                    let line_offset = added_content.floor_char_boundary(
2415                        cursor_off
2416                            .saturating_sub(new_text_byte_offset)
2417                            .min(added_content.len()),
2418                    );
2419                    cursor_line_offset = Some(line_offset);
2420                }
2421
2422                new_text_byte_offset += added_content.len();
2423
2424                let hunk = current_hunk.get_or_insert(Hunk {
2425                    line_range: old_line_index..old_line_index,
2426                    new_text_lines: vec![],
2427                    cursor_line_offset_in_new_text: None,
2428                });
2429                hunk.new_text_lines.push(added_content);
2430                hunk.cursor_line_offset_in_new_text = cursor_line_offset
2431                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
2432            } else {
2433                // Context line (starts with ' ' or is empty).
2434                if let Some(hunk) = current_hunk.take() {
2435                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2436                }
2437                last_old_line_before_hunk = Some(old_line_index);
2438                old_line_index += 1;
2439                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
2440                new_text_byte_offset += content.len();
2441            }
2442        }
2443
2444        // Flush final group.
2445        if let Some(hunk) = current_hunk.take() {
2446            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
2447        }
2448
2449        // Trim a single trailing newline.
2450        if result.ends_with('\n') {
2451            result.pop();
2452        }
2453
2454        if result.is_empty() {
2455            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
2456        }
2457
2458        Ok(result)
2459    }
2460
2461    #[cfg(test)]
2462    mod tests {
2463        use super::*;
2464        use indoc::indoc;
2465
2466        #[test]
2467        fn test_format_cursor_region() {
2468            struct Case {
2469                name: &'static str,
2470                context: &'static str,
2471                editable_range: Range<usize>,
2472                cursor_offset: usize,
2473                expected: &'static str,
2474            }
2475
2476            let cases = [
2477                Case {
2478                    name: "basic_cursor_placement",
2479                    context: "hello world\n",
2480                    editable_range: 0..12,
2481                    cursor_offset: 5,
2482                    expected: indoc! {"
2483                    <|file_sep|>test.rs
2484                    <|fim_prefix|>
2485                    <|fim_middle|>current
2486                    0:5c|hello<|user_cursor|> world
2487                    <|fim_suffix|>
2488                    <|fim_middle|>updated
2489                    "},
2490                },
2491                Case {
2492                    name: "multiline_cursor_on_second_line",
2493                    context: "aaa\nbbb\nccc\n",
2494                    editable_range: 0..12,
2495                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
2496                    expected: indoc! {"
2497                    <|file_sep|>test.rs
2498                    <|fim_prefix|>
2499                    <|fim_middle|>current
2500                    0:23|aaa
2501                    1:26|b<|user_cursor|>bb
2502                    2:29|ccc
2503                    <|fim_suffix|>
2504                    <|fim_middle|>updated
2505                    "},
2506                },
2507                Case {
2508                    name: "no_trailing_newline_in_context",
2509                    context: "line1\nline2",
2510                    editable_range: 0..11,
2511                    cursor_offset: 3,
2512                    expected: indoc! {"
2513                    <|file_sep|>test.rs
2514                    <|fim_prefix|>
2515                    <|fim_middle|>current
2516                    0:d9|lin<|user_cursor|>e1
2517                    1:da|line2
2518                    <|fim_suffix|>
2519                    <|fim_middle|>updated
2520                    "},
2521                },
2522                Case {
2523                    name: "leading_newline_in_editable_region",
2524                    context: "\nabc\n",
2525                    editable_range: 0..5,
2526                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
2527                    expected: indoc! {"
2528                    <|file_sep|>test.rs
2529                    <|fim_prefix|>
2530                    <|fim_middle|>current
2531                    0:00|
2532                    1:26|a<|user_cursor|>bc
2533                    <|fim_suffix|>
2534                    <|fim_middle|>updated
2535                    "},
2536                },
2537                Case {
2538                    name: "with_suffix",
2539                    context: "abc\ndef",
2540                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
2541                    cursor_offset: 2,
2542                    expected: indoc! {"
2543                    <|file_sep|>test.rs
2544                    <|fim_prefix|>
2545                    <|fim_middle|>current
2546                    0:26|ab<|user_cursor|>c
2547                    <|fim_suffix|>
2548                    def
2549                    <|fim_middle|>updated
2550                    "},
2551                },
2552                Case {
2553                    name: "unicode_two_byte_chars",
2554                    context: "héllo\n",
2555                    editable_range: 0..7,
2556                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
2557                    expected: indoc! {"
2558                    <|file_sep|>test.rs
2559                    <|fim_prefix|>
2560                    <|fim_middle|>current
2561                    0:1b|hé<|user_cursor|>llo
2562                    <|fim_suffix|>
2563                    <|fim_middle|>updated
2564                    "},
2565                },
2566                Case {
2567                    name: "unicode_three_byte_chars",
2568                    context: "日本語\n",
2569                    editable_range: 0..10,
2570                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
2571                    expected: indoc! {"
2572                    <|file_sep|>test.rs
2573                    <|fim_prefix|>
2574                    <|fim_middle|>current
2575                    0:80|日本<|user_cursor|>語
2576                    <|fim_suffix|>
2577                    <|fim_middle|>updated
2578                    "},
2579                },
2580                Case {
2581                    name: "unicode_four_byte_chars",
2582                    context: "a🌍b\n",
2583                    editable_range: 0..7,
2584                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
2585                    expected: indoc! {"
2586                    <|file_sep|>test.rs
2587                    <|fim_prefix|>
2588                    <|fim_middle|>current
2589                    0:6b|a🌍<|user_cursor|>b
2590                    <|fim_suffix|>
2591                    <|fim_middle|>updated
2592                    "},
2593                },
2594                Case {
2595                    name: "cursor_at_start_of_region_not_placed",
2596                    context: "abc\n",
2597                    editable_range: 0..4,
2598                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
2599                    expected: indoc! {"
2600                    <|file_sep|>test.rs
2601                    <|fim_prefix|>
2602                    <|fim_middle|>current
2603                    0:26|abc
2604                    <|fim_suffix|>
2605                    <|fim_middle|>updated
2606                    "},
2607                },
2608                Case {
2609                    name: "cursor_at_end_of_line_not_placed",
2610                    context: "abc\ndef\n",
2611                    editable_range: 0..8,
2612                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
2613                    expected: indoc! {"
2614                    <|file_sep|>test.rs
2615                    <|fim_prefix|>
2616                    <|fim_middle|>current
2617                    0:26|abc
2618                    1:2f|def
2619                    <|fim_suffix|>
2620                    <|fim_middle|>updated
2621                    "},
2622                },
2623                Case {
2624                    name: "cursor_offset_relative_to_context_not_editable_region",
2625                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
2626                    // write_cursor_excerpt_section must subtract it before comparing against
2627                    // per-line offsets within the editable region.
2628                    context: "pre\naaa\nbbb\nsuf\n",
2629                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
2630                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
2631                    expected: indoc! {"
2632                    <|file_sep|>test.rs
2633                    <|fim_prefix|>
2634                    pre
2635                    <|fim_middle|>current
2636                    0:23|aaa
2637                    1:26|b<|user_cursor|>bb
2638                    <|fim_suffix|>
2639                    suf
2640                    <|fim_middle|>updated
2641                    "},
2642                },
2643            ];
2644
2645            for case in &cases {
2646                let mut prompt = String::new();
2647                hashline::write_cursor_excerpt_section(
2648                    &mut prompt,
2649                    Path::new("test.rs"),
2650                    case.context,
2651                    &case.editable_range,
2652                    case.cursor_offset,
2653                );
2654                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
2655            }
2656        }
2657
2658        #[test]
2659        fn test_apply_edit_commands() {
2660            struct Case {
2661                name: &'static str,
2662                original: &'static str,
2663                model_output: &'static str,
2664                expected: &'static str,
2665            }
2666
2667            let cases = vec![
2668                Case {
2669                    name: "set_single_line",
2670                    original: indoc! {"
2671                    let mut total = 0;
2672                    for product in products {
2673                        total += ;
2674                    }
2675                    total
2676                "},
2677                    model_output: indoc! {"
2678                    <|set|>2:87
2679                        total += product.price;
2680                "},
2681                    expected: indoc! {"
2682                    let mut total = 0;
2683                    for product in products {
2684                        total += product.price;
2685                    }
2686                    total
2687                "},
2688                },
2689                Case {
2690                    name: "set_range",
2691                    original: indoc! {"
2692                    fn foo() {
2693                        let x = 1;
2694                        let y = 2;
2695                        let z = 3;
2696                    }
2697                "},
2698                    model_output: indoc! {"
2699                    <|set|>1:46-3:4a
2700                        let sum = 6;
2701                "},
2702                    expected: indoc! {"
2703                    fn foo() {
2704                        let sum = 6;
2705                    }
2706                "},
2707                },
2708                Case {
2709                    name: "insert_after_line",
2710                    original: indoc! {"
2711                    fn main() {
2712                        let x = 1;
2713                    }
2714                "},
2715                    model_output: indoc! {"
2716                    <|insert|>1:46
2717                        let y = 2;
2718                "},
2719                    expected: indoc! {"
2720                    fn main() {
2721                        let x = 1;
2722                        let y = 2;
2723                    }
2724                "},
2725                },
2726                Case {
2727                    name: "insert_before_first",
2728                    original: indoc! {"
2729                    let x = 1;
2730                    let y = 2;
2731                "},
2732                    model_output: indoc! {"
2733                    <|insert|>
2734                    use std::io;
2735                "},
2736                    expected: indoc! {"
2737                    use std::io;
2738                    let x = 1;
2739                    let y = 2;
2740                "},
2741                },
2742                Case {
2743                    name: "set_with_cursor_marker",
2744                    original: indoc! {"
2745                    fn main() {
2746                        println!();
2747                    }
2748                "},
2749                    model_output: indoc! {"
2750                    <|set|>1:34
2751                        eprintln!(\"<|user_cursor|>\");
2752                "},
2753                    expected: indoc! {"
2754                    fn main() {
2755                        eprintln!(\"<|user_cursor|>\");
2756                    }
2757                "},
2758                },
2759                Case {
2760                    name: "multiple_set_commands",
2761                    original: indoc! {"
2762                    aaa
2763                    bbb
2764                    ccc
2765                    ddd
2766                "},
2767                    model_output: indoc! {"
2768                    <|set|>0:23
2769                    AAA
2770                    <|set|>2:29
2771                    CCC
2772                "},
2773                    expected: indoc! {"
2774                    AAA
2775                    bbb
2776                    CCC
2777                    ddd
2778                "},
2779                },
2780                Case {
2781                    name: "set_range_multiline_replacement",
2782                    original: indoc! {"
2783                    fn handle_submit() {
2784                    }
2785
2786                    fn handle_keystroke() {
2787                "},
2788                    model_output: indoc! {"
2789                    <|set|>0:3f-1:7d
2790                    fn handle_submit(modal_state: &mut ModalState) {
2791                        <|user_cursor|>
2792                    }
2793                "},
2794                    expected: indoc! {"
2795                    fn handle_submit(modal_state: &mut ModalState) {
2796                        <|user_cursor|>
2797                    }
2798
2799                    fn handle_keystroke() {
2800                "},
2801                },
2802                Case {
2803                    name: "no_edit_commands_returns_original",
2804                    original: indoc! {"
2805                    hello
2806                    world
2807                "},
2808                    model_output: "some random text with no commands",
2809                    expected: indoc! {"
2810                    hello
2811                    world
2812                "},
2813                },
2814                Case {
2815                    name: "no_edits_command_returns_original",
2816                    original: indoc! {"
2817                    hello
2818                    world
2819                "},
2820                    model_output: "<|no_edits|>",
2821                    expected: indoc! {"
2822                    hello
2823                    world
2824                "},
2825                },
2826                Case {
2827                    name: "wrong_hash_set_ignored",
2828                    original: indoc! {"
2829                    aaa
2830                    bbb
2831                "},
2832                    model_output: indoc! {"
2833                    <|set|>0:ff
2834                    ZZZ
2835                "},
2836                    expected: indoc! {"
2837                    aaa
2838                    bbb
2839                "},
2840                },
2841                Case {
2842                    name: "insert_and_set_combined",
2843                    original: indoc! {"
2844                    alpha
2845                    beta
2846                    gamma
2847                "},
2848                    model_output: indoc! {"
2849                    <|set|>0:06
2850                    ALPHA
2851                    <|insert|>1:9c
2852                    beta_extra
2853                "},
2854                    expected: indoc! {"
2855                    ALPHA
2856                    beta
2857                    beta_extra
2858                    gamma
2859                "},
2860                },
2861                Case {
2862                    name: "no_trailing_newline_preserved",
2863                    original: "hello\nworld",
2864                    model_output: indoc! {"
2865                    <|set|>0:14
2866                    HELLO
2867                "},
2868                    expected: "HELLO\nworld",
2869                },
2870                Case {
2871                    name: "set_range_hash_mismatch_in_end_bound",
2872                    original: indoc! {"
2873                    one
2874                    two
2875                    three
2876                "},
2877                    model_output: indoc! {"
2878                    <|set|>0:42-2:ff
2879                    ONE_TWO_THREE
2880                "},
2881                    expected: indoc! {"
2882                    one
2883                    two
2884                    three
2885                "},
2886                },
2887                Case {
2888                    name: "set_range_start_greater_than_end_ignored",
2889                    original: indoc! {"
2890                    a
2891                    b
2892                    c
2893                "},
2894                    model_output: indoc! {"
2895                    <|set|>2:63-1:62
2896                    X
2897                "},
2898                    expected: indoc! {"
2899                    a
2900                    b
2901                    c
2902                "},
2903                },
2904                Case {
2905                    name: "insert_out_of_bounds_ignored",
2906                    original: indoc! {"
2907                    x
2908                    y
2909                "},
2910                    model_output: indoc! {"
2911                    <|insert|>99:aa
2912                    z
2913                "},
2914                    expected: indoc! {"
2915                    x
2916                    y
2917                "},
2918                },
2919                Case {
2920                    name: "set_out_of_bounds_ignored",
2921                    original: indoc! {"
2922                    x
2923                    y
2924                "},
2925                    model_output: indoc! {"
2926                    <|set|>99:aa
2927                    z
2928                "},
2929                    expected: indoc! {"
2930                    x
2931                    y
2932                "},
2933                },
2934                Case {
2935                    name: "malformed_set_command_ignored",
2936                    original: indoc! {"
2937                    alpha
2938                    beta
2939                "},
2940                    model_output: indoc! {"
2941                    <|set|>not-a-line-ref
2942                    UPDATED
2943                "},
2944                    expected: indoc! {"
2945                    alpha
2946                    beta
2947                "},
2948                },
2949                Case {
2950                    name: "malformed_insert_hash_treated_as_before_first",
2951                    original: indoc! {"
2952                    alpha
2953                    beta
2954                "},
2955                    model_output: indoc! {"
2956                    <|insert|>1:nothex
2957                    preamble
2958                "},
2959                    expected: indoc! {"
2960                    preamble
2961                    alpha
2962                    beta
2963                "},
2964                },
2965                Case {
2966                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2967                    original: indoc! {"
2968                    cat
2969                    dog
2970                "},
2971                    model_output: indoc! {"
2972                    <|set|>0:38
2973                    CAT
2974                    <|insert|>0:38
2975                    TAIL
2976                "},
2977                    expected: indoc! {"
2978                    CAT
2979                    TAIL
2980                    dog
2981                "},
2982                },
2983                Case {
2984                    name: "overlapping_set_ranges_last_wins",
2985                    original: indoc! {"
2986                    a
2987                    b
2988                    c
2989                    d
2990                "},
2991                    model_output: indoc! {"
2992                    <|set|>0:61-2:63
2993                    FIRST
2994                    <|set|>1:62-3:64
2995                    SECOND
2996                "},
2997                    expected: indoc! {"
2998                    FIRST
2999                    d
3000                "},
3001                },
3002                Case {
3003                    name: "insert_before_first_and_after_line",
3004                    original: indoc! {"
3005                        a
3006                        b
3007                    "},
3008                    model_output: indoc! {"
3009                        <|insert|>
3010                        HEAD
3011                        <|insert|>0:61
3012                        MID
3013                    "},
3014                    expected: indoc! {"
3015                        HEAD
3016                        a
3017                        MID
3018                        b
3019                    "},
3020                },
3021            ];
3022
3023            for case in &cases {
3024                let result = hashline::apply_edit_commands(case.original, &case.model_output);
3025                assert_eq!(result, case.expected, "failed case: {}", case.name);
3026            }
3027        }
3028
3029        #[test]
3030        fn test_output_has_edit_commands() {
3031            assert!(hashline::output_has_edit_commands(&format!(
3032                "{}0:ab\nnew",
3033                SET_COMMAND_MARKER
3034            )));
3035            assert!(hashline::output_has_edit_commands(&format!(
3036                "{}0:ab\nnew",
3037                INSERT_COMMAND_MARKER
3038            )));
3039            assert!(hashline::output_has_edit_commands(&format!(
3040                "some text\n{}1:cd\nstuff",
3041                SET_COMMAND_MARKER
3042            )));
3043            assert!(!hashline::output_has_edit_commands("just plain text"));
3044            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
3045            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
3046        }
3047
3048        // ---- hashline::patch_to_edit_commands round-trip tests ----
3049
3050        #[test]
3051        fn test_patch_to_edit_commands() {
3052            struct Case {
3053                name: &'static str,
3054                old: &'static str,
3055                patch: &'static str,
3056                expected_new: &'static str,
3057            }
3058
3059            let cases = [
3060                Case {
3061                    name: "single_line_replacement",
3062                    old: indoc! {"
3063                    let mut total = 0;
3064                    for product in products {
3065                        total += ;
3066                    }
3067                    total
3068                "},
3069                    patch: indoc! {"
3070                    @@ -1,5 +1,5 @@
3071                     let mut total = 0;
3072                     for product in products {
3073                    -    total += ;
3074                    +    total += product.price;
3075                     }
3076                     total
3077                "},
3078                    expected_new: indoc! {"
3079                    let mut total = 0;
3080                    for product in products {
3081                        total += product.price;
3082                    }
3083                    total
3084                "},
3085                },
3086                Case {
3087                    name: "multiline_replacement",
3088                    old: indoc! {"
3089                    fn foo() {
3090                        let x = 1;
3091                        let y = 2;
3092                        let z = 3;
3093                    }
3094                "},
3095                    patch: indoc! {"
3096                    @@ -1,5 +1,3 @@
3097                     fn foo() {
3098                    -    let x = 1;
3099                    -    let y = 2;
3100                    -    let z = 3;
3101                    +    let sum = 1 + 2 + 3;
3102                     }
3103                "},
3104                    expected_new: indoc! {"
3105                    fn foo() {
3106                        let sum = 1 + 2 + 3;
3107                    }
3108                "},
3109                },
3110                Case {
3111                    name: "insertion",
3112                    old: indoc! {"
3113                    fn main() {
3114                        let x = 1;
3115                    }
3116                "},
3117                    patch: indoc! {"
3118                    @@ -1,3 +1,4 @@
3119                     fn main() {
3120                         let x = 1;
3121                    +    let y = 2;
3122                     }
3123                "},
3124                    expected_new: indoc! {"
3125                    fn main() {
3126                        let x = 1;
3127                        let y = 2;
3128                    }
3129                "},
3130                },
3131                Case {
3132                    name: "insertion_before_first",
3133                    old: indoc! {"
3134                    let x = 1;
3135                    let y = 2;
3136                "},
3137                    patch: indoc! {"
3138                    @@ -1,2 +1,3 @@
3139                    +use std::io;
3140                     let x = 1;
3141                     let y = 2;
3142                "},
3143                    expected_new: indoc! {"
3144                    use std::io;
3145                    let x = 1;
3146                    let y = 2;
3147                "},
3148                },
3149                Case {
3150                    name: "deletion",
3151                    old: indoc! {"
3152                    aaa
3153                    bbb
3154                    ccc
3155                    ddd
3156                "},
3157                    patch: indoc! {"
3158                    @@ -1,4 +1,2 @@
3159                     aaa
3160                    -bbb
3161                    -ccc
3162                     ddd
3163                "},
3164                    expected_new: indoc! {"
3165                    aaa
3166                    ddd
3167                "},
3168                },
3169                Case {
3170                    name: "multiple_changes",
3171                    old: indoc! {"
3172                    alpha
3173                    beta
3174                    gamma
3175                    delta
3176                    epsilon
3177                "},
3178                    patch: indoc! {"
3179                    @@ -1,5 +1,5 @@
3180                    -alpha
3181                    +ALPHA
3182                     beta
3183                     gamma
3184                    -delta
3185                    +DELTA
3186                     epsilon
3187                "},
3188                    expected_new: indoc! {"
3189                    ALPHA
3190                    beta
3191                    gamma
3192                    DELTA
3193                    epsilon
3194                "},
3195                },
3196                Case {
3197                    name: "replace_with_insertion",
3198                    old: indoc! {r#"
3199                    fn handle() {
3200                        modal_state.close();
3201                        modal_state.dismiss();
3202                "#},
3203                    patch: indoc! {r#"
3204                    @@ -1,3 +1,4 @@
3205                     fn handle() {
3206                         modal_state.close();
3207                    +    eprintln!("");
3208                         modal_state.dismiss();
3209                "#},
3210                    expected_new: indoc! {r#"
3211                    fn handle() {
3212                        modal_state.close();
3213                        eprintln!("");
3214                        modal_state.dismiss();
3215                "#},
3216                },
3217                Case {
3218                    name: "complete_replacement",
3219                    old: indoc! {"
3220                    aaa
3221                    bbb
3222                    ccc
3223                "},
3224                    patch: indoc! {"
3225                    @@ -1,3 +1,3 @@
3226                    -aaa
3227                    -bbb
3228                    -ccc
3229                    +xxx
3230                    +yyy
3231                    +zzz
3232                "},
3233                    expected_new: indoc! {"
3234                    xxx
3235                    yyy
3236                    zzz
3237                "},
3238                },
3239                Case {
3240                    name: "add_function_body",
3241                    old: indoc! {"
3242                    fn foo() {
3243                        modal_state.dismiss();
3244                    }
3245
3246                    fn
3247
3248                    fn handle_keystroke() {
3249                "},
3250                    patch: indoc! {"
3251                    @@ -1,6 +1,8 @@
3252                     fn foo() {
3253                         modal_state.dismiss();
3254                     }
3255
3256                    -fn
3257                    +fn handle_submit() {
3258                    +    todo()
3259                    +}
3260
3261                     fn handle_keystroke() {
3262                "},
3263                    expected_new: indoc! {"
3264                    fn foo() {
3265                        modal_state.dismiss();
3266                    }
3267
3268                    fn handle_submit() {
3269                        todo()
3270                    }
3271
3272                    fn handle_keystroke() {
3273                "},
3274                },
3275                Case {
3276                    name: "with_cursor_offset",
3277                    old: indoc! {r#"
3278                    fn main() {
3279                        println!();
3280                    }
3281                "#},
3282                    patch: indoc! {r#"
3283                        @@ -1,3 +1,3 @@
3284                        fn main() {
3285                        -    println!();
3286                        +    eprintln!("");
3287                        }
3288                    "#},
3289                    expected_new: indoc! {r#"
3290                        fn main() {
3291                            eprintln!("<|user_cursor|>");
3292                        }
3293                    "#},
3294                },
3295                Case {
3296                    name: "non_local_hunk_header_pure_insertion_repro",
3297                    old: indoc! {"
3298                        aaa
3299                        bbb
3300                    "},
3301                    patch: indoc! {"
3302                        @@ -20,2 +20,3 @@
3303                        aaa
3304                        +xxx
3305                        bbb
3306                    "},
3307                    expected_new: indoc! {"
3308                        aaa
3309                        xxx
3310                        bbb
3311                    "},
3312                },
3313                Case {
3314                    name: "empty_patch_produces_no_edits_marker",
3315                    old: indoc! {"
3316                        aaa
3317                        bbb
3318                    "},
3319                    patch: "@@ -20,2 +20,3 @@\n",
3320                    expected_new: indoc! {"
3321                        aaa
3322                        bbb
3323                    "},
3324                },
3325            ];
3326
3327            for case in &cases {
3328                // The cursor_offset for patch_to_edit_commands is relative to
3329                // the first hunk's new text (context + additions). We compute
3330                // it by finding where the marker sits in the expected output
3331                // (which mirrors the new text of the hunk).
3332                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
3333
3334                let commands =
3335                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
3336                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
3337
3338                assert!(
3339                    hashline::output_has_edit_commands(&commands),
3340                    "case {}: expected edit commands, got: {commands:?}",
3341                    case.name,
3342                );
3343
3344                let applied = hashline::apply_edit_commands(case.old, &commands);
3345                assert_eq!(applied, case.expected_new, "case {}", case.name);
3346            }
3347        }
3348    }
3349}
3350
3351pub mod seed_coder {
3352    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
3353    //!
3354    //! Seed-Coder uses different FIM tokens and order than Qwen:
3355    //! - SPM order: suffix comes FIRST, then prefix, then middle
3356    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
3357    //! - File markers: StarCoder-style `<filename>path` (single token + path)
3358    //!
3359    //! All context (related files, edit history) goes in the PREFIX section.
3360    //! The suffix contains only code after the editable region.
3361    //!
3362    //! Example prompt:
3363    //!
3364    //! <[fim-suffix]>
3365    //! code after editable region
3366    //! <[fim-prefix]><filename>related/file.py
3367    //! related file content
3368    //!
3369    //! <filename>edit_history
3370    //! --- a/some_file.py
3371    //! +++ b/some_file.py
3372    //! -old
3373    //! +new
3374    //!
3375    //! <filename>path/to/target_file.py
3376    //! code before editable region
3377    //! <<<<<<< CURRENT
3378    //! code that
3379    //! needs to<|user_cursor|>
3380    //! be rewritten
3381    //! =======
3382    //! <[fim-middle]>
3383    //!
3384    //! Expected output (model generates):
3385    //!
3386    //! updated
3387    //! code with
3388    //! changes applied
3389    //! >>>>>>> UPDATED
3390
3391    use super::*;
3392
3393    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
3394    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
3395    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
3396    pub const FILE_MARKER: &str = "<filename>";
3397
3398    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
3399    pub const SEPARATOR: &str = "=======\n";
3400    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
3401
3402    pub const NO_EDITS: &str = "NO_EDITS\n";
3403
3404    pub fn special_tokens() -> &'static [&'static str] {
3405        &[
3406            FIM_SUFFIX,
3407            FIM_PREFIX,
3408            FIM_MIDDLE,
3409            FILE_MARKER,
3410            START_MARKER,
3411            SEPARATOR,
3412            END_MARKER,
3413            CURSOR_MARKER,
3414        ]
3415    }
3416
3417    pub fn write_cursor_excerpt_section(
3418        prompt: &mut String,
3419        path: &Path,
3420        context: &str,
3421        editable_range: &Range<usize>,
3422        cursor_offset: usize,
3423    ) {
3424        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3425        prompt.push_str(&section);
3426    }
3427
3428    pub fn format_prompt_with_budget(
3429        path: &Path,
3430        context: &str,
3431        editable_range: &Range<usize>,
3432        cursor_offset: usize,
3433        events: &[Arc<Event>],
3434        related_files: &[RelatedFile],
3435        diagnostics: &[ActiveBufferDiagnostic],
3436        max_tokens: usize,
3437    ) -> String {
3438        let cursor_prefix_section =
3439            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
3440        assemble_fim_prompt(
3441            context,
3442            editable_range,
3443            &cursor_prefix_section,
3444            events,
3445            related_files,
3446            diagnostics,
3447            None,
3448            max_tokens,
3449        )
3450    }
3451
3452    pub fn assemble_fim_prompt(
3453        context: &str,
3454        editable_range: &Range<usize>,
3455        cursor_prefix_section: &str,
3456        events: &[Arc<Event>],
3457        related_files: &[RelatedFile],
3458        diagnostics: &[ActiveBufferDiagnostic],
3459        cursor_buffer_row: Option<u32>,
3460        max_tokens: usize,
3461    ) -> String {
3462        let suffix_section = build_suffix_section(context, editable_range);
3463
3464        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
3465        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
3466        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
3467
3468        let edit_history_section = super::format_edit_history_within_budget(
3469            events,
3470            FILE_MARKER,
3471            "edit_history",
3472            budget_after_cursor,
3473            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
3474        );
3475        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
3476        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
3477
3478        let diagnostics_section = super::format_active_buffer_diagnostics_with_budget(
3479            diagnostics,
3480            cursor_buffer_row,
3481            budget_after_edit_history,
3482        );
3483        let diagnostics_tokens = estimate_tokens(diagnostics_section.len() + "\n".len());
3484        let budget_after_diagnostics = budget_after_edit_history.saturating_sub(diagnostics_tokens);
3485
3486        let related_files_section = super::format_related_files_within_budget(
3487            related_files,
3488            FILE_MARKER,
3489            "",
3490            budget_after_diagnostics,
3491        );
3492
3493        let mut prompt = String::new();
3494        prompt.push_str(&suffix_section);
3495        prompt.push_str(FIM_PREFIX);
3496        prompt.push_str(&diagnostics_section);
3497        if !diagnostics_section.is_empty() {
3498            prompt.push('\n');
3499        }
3500        prompt.push_str(&related_files_section);
3501        if !related_files_section.is_empty() {
3502            prompt.push('\n');
3503        }
3504        prompt.push_str(&edit_history_section);
3505        if !edit_history_section.is_empty() {
3506            prompt.push('\n');
3507        }
3508        prompt.push_str(cursor_prefix_section);
3509        prompt.push_str(FIM_MIDDLE);
3510
3511        prompt
3512    }
3513
3514    pub(crate) fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
3515        let mut section = String::new();
3516        section.push_str(FIM_SUFFIX);
3517        section.push_str(&context[editable_range.end..]);
3518        if !section.ends_with('\n') {
3519            section.push('\n');
3520        }
3521        section
3522    }
3523
3524    fn build_cursor_prefix_section(
3525        path: &Path,
3526        context: &str,
3527        editable_range: &Range<usize>,
3528        cursor_offset: usize,
3529    ) -> String {
3530        let mut section = String::new();
3531        let path_str = path.to_string_lossy();
3532        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
3533
3534        section.push_str(&context[..editable_range.start]);
3535        section.push_str(START_MARKER);
3536        section.push_str(&context[editable_range.start..cursor_offset]);
3537        section.push_str(CURSOR_MARKER);
3538        section.push_str(&context[cursor_offset..editable_range.end]);
3539        if !section.ends_with('\n') {
3540            section.push('\n');
3541        }
3542        section.push_str(SEPARATOR);
3543        section
3544    }
3545
3546    /// Format patch as containing no changes if it's empty; otherwise return None.
3547    pub(crate) fn no_edits(patch: &str) -> Option<String> {
3548        // Count lines in the patch
3549        let empty_patch = patch.lines().count() <= 3;
3550        if empty_patch {
3551            Some(format!("{NO_EDITS}{END_MARKER}"))
3552        } else {
3553            None
3554        }
3555    }
3556}
3557
3558pub mod v0304_variable_edit {
3559    //! A prompt format with no fixed editable region. The entire context is shown
3560    //! to the model, and it chooses which text to replace by outputting surrounding
3561    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
3562    //! text.
3563    //!
3564    //! Example prompt:
3565    //!
3566    //! <|file_sep|>path/to/file.py
3567    //! zero
3568    //! one
3569    //! two
3570    //! three<|user_cursor|>
3571    //! four
3572    //! five
3573    //! <|fim_prefix|>
3574    //
3575    //! Expected output (model generates):
3576    //!
3577    //! two
3578    //! <|fim_middle|>
3579    //! THREE
3580    //! <|fim_suffix|>
3581    //! four
3582    //!
3583    //! The output means: find "two\n...\nfour" in the context, and replace
3584    //! everything between "two\n" and "four" with "THREE\n".
3585
3586    use super::*;
3587
3588    pub fn special_tokens() -> &'static [&'static str] {
3589        &[
3590            "<|fim_prefix|>",
3591            "<|fim_suffix|>",
3592            "<|fim_middle|>",
3593            "<|file_sep|>",
3594            CURSOR_MARKER,
3595        ]
3596    }
3597
3598    pub fn write_cursor_excerpt_section(
3599        prompt: &mut String,
3600        path: &Path,
3601        context: &str,
3602        cursor_offset: usize,
3603    ) {
3604        let path_str = path.to_string_lossy();
3605        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
3606
3607        prompt.push_str(&context[..cursor_offset]);
3608        prompt.push_str(CURSOR_MARKER);
3609        prompt.push_str(&context[cursor_offset..]);
3610        if !prompt.ends_with('\n') {
3611            prompt.push('\n');
3612        }
3613        prompt.push_str("<|fim_prefix|>\n")
3614    }
3615
3616    /// Apply a variable-edit model output to the original context text.
3617    ///
3618    /// The model output has the form:
3619    ///
3620    /// - prefix context lines
3621    /// - `<|fim_middle|>`
3622    /// - new text
3623    /// - `<|fim_suffix|>`
3624    /// - suffix context lines
3625    ///
3626    /// We locate the prefix/suffix context lines in the original text and replace
3627    /// everything between them with the new text.
3628    pub fn apply_variable_edit(
3629        context: &str,
3630        model_output: &str,
3631    ) -> Result<(Range<usize>, String)> {
3632        let (prefix_context, rest) = model_output
3633            .split_once("<|fim_middle|>\n")
3634            .or_else(|| model_output.split_once("<|fim_middle|>"))
3635            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
3636
3637        let (new_text, suffix_context) = rest
3638            .split_once("<|fim_suffix|>\n")
3639            .or_else(|| rest.split_once("<|fim_suffix|>"))
3640            .unwrap_or((rest, ""));
3641
3642        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
3643            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
3644        } else {
3645            suffix_context
3646        };
3647
3648        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
3649            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
3650            + prefix_context.len();
3651        let suffix_offset = if suffix_context.is_empty() {
3652            context.len()
3653        } else {
3654            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
3655                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
3656                + prefix_offset
3657        };
3658
3659        let edit_range = prefix_offset..suffix_offset;
3660        return Ok((edit_range, new_text.to_string()));
3661    }
3662
3663    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
3664        if needle.is_empty() {
3665            return Some(0);
3666        }
3667
3668        haystack.match_indices(needle).find_map(|(offset, _)| {
3669            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
3670            matched_line_start.then_some(offset)
3671        })
3672    }
3673
3674    /// Convert a unified diff patch into the variable-edit output format.
3675    ///
3676    /// Parses `patch` as a unified diff against `old_text` and produces model
3677    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
3678    /// delimiters. The diff is resolved by content matching rather than line
3679    /// numbers.
3680    pub fn patch_to_variable_edit_output(
3681        old_text: &str,
3682        patch: &str,
3683        cursor_offset: Option<usize>,
3684    ) -> Result<String> {
3685        // Parse the unified diff into hunks. Each hunk has an `old_context`
3686        // string (context + deleted lines interleaved in order) and a list of
3687        // edits expressed as byte ranges within that context plus replacement
3688        // text.
3689        let hunks = parse_hunks(patch);
3690        if hunks.is_empty() {
3691            return Ok(String::new());
3692        }
3693
3694        // Apply each hunk by finding its old_context in the text and
3695        // performing the edits. We search forward from where the previous
3696        // hunk ended so that hunks are applied in order.
3697        let mut new_text = old_text.to_string();
3698        let mut search_from: usize = 0;
3699        let mut first_hunk_pos: Option<usize> = None;
3700
3701        for hunk in &hunks {
3702            let context_pos = new_text[search_from..]
3703                .find(&hunk.old_context)
3704                .map(|pos| pos + search_from)
3705                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
3706
3707            if first_hunk_pos.is_none() {
3708                first_hunk_pos = Some(context_pos);
3709            }
3710
3711            // Apply edits in reverse order so byte offsets remain valid.
3712            for edit in hunk.edits.iter().rev() {
3713                let abs_start = context_pos + edit.range.start;
3714                let abs_end = context_pos + edit.range.end;
3715                new_text.replace_range(abs_start..abs_end, &edit.text);
3716            }
3717
3718            // Advance past this hunk's region in the (now modified) text.
3719            let new_region_len: usize =
3720                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
3721                    len + edit.text.len() - (edit.range.end - edit.range.start)
3722                });
3723            search_from = context_pos + new_region_len;
3724        }
3725
3726        // Now we have old_text and new_text. Find the changed line range by
3727        // comparing them.
3728        let old_lines: Vec<&str> = old_text.lines().collect();
3729        let new_lines: Vec<&str> = new_text.lines().collect();
3730
3731        // Find first differing line.
3732        let first_changed_row = old_lines
3733            .iter()
3734            .zip(new_lines.iter())
3735            .position(|(a, b)| a != b)
3736            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
3737
3738        // Find last differing line (from the end).
3739        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
3740        let common_suffix = old_lines
3741            .iter()
3742            .rev()
3743            .zip(new_lines.iter().rev())
3744            .take(max_suffix)
3745            .take_while(|(a, b)| a == b)
3746            .count();
3747
3748        let old_end = old_lines.len() - common_suffix;
3749        let new_end = new_lines.len() - common_suffix;
3750
3751        if first_changed_row == old_end && first_changed_row == new_end {
3752            return Ok(String::new());
3753        }
3754
3755        // Build the replacement text from new_lines[first_diff..new_end].
3756        let mut merged_new_text = String::new();
3757        for line in &new_lines[first_changed_row..new_end] {
3758            merged_new_text.push_str(line);
3759            merged_new_text.push('\n');
3760        }
3761
3762        // cursor_offset is relative to the first hunk's new content in
3763        // new_text. Translate it to an offset within merged_new_text, which
3764        // only contains lines first_diff..new_end of new_text.
3765        if let Some(hunk_offset) = cursor_offset {
3766            let hunk_start = first_hunk_pos.unwrap_or(0);
3767            let absolute_pos = hunk_start + hunk_offset;
3768
3769            // Byte offset where first_diff starts in new_text.
3770            let merged_start: usize = new_lines[..first_changed_row]
3771                .iter()
3772                .map(|line| line.len() + 1)
3773                .sum();
3774
3775            if absolute_pos >= merged_start {
3776                let relative_offset = absolute_pos - merged_start;
3777                if relative_offset <= merged_new_text.len() {
3778                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3779                }
3780            }
3781        }
3782
3783        // Build output with 2 lines of context above and below.
3784        let context_lines_count = 2;
3785        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3786        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3787
3788        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3789            let pattern = &lines[line_range];
3790            let pattern_len = pattern.len();
3791
3792            let mut count = 0;
3793            for offset in 0..=lines.len() - pattern_len {
3794                if &lines[offset..offset + pattern_len] == pattern {
3795                    count += 1;
3796                }
3797            }
3798            count
3799        }
3800
3801        // Expand prefix and suffix until they are unique
3802        while prefix_start > 0 {
3803            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3804                prefix_start -= 1;
3805            } else {
3806                break;
3807            }
3808        }
3809        while suffix_end < old_lines.len() {
3810            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3811                suffix_end += 1;
3812            } else {
3813                break;
3814            }
3815        }
3816
3817        let mut output = String::new();
3818        for line in &old_lines[prefix_start..first_changed_row] {
3819            output.push_str(line);
3820            output.push('\n');
3821        }
3822        output.push_str("<|fim_middle|>\n");
3823        output.push_str(&merged_new_text);
3824        output.push_str("<|fim_suffix|>\n");
3825        for line in &old_lines[old_end..suffix_end] {
3826            output.push_str(line);
3827            output.push('\n');
3828        }
3829
3830        Ok(output)
3831    }
3832
3833    struct ParsedHunk {
3834        old_context: String,
3835        edits: Vec<ParsedEdit>,
3836    }
3837
3838    struct ParsedEdit {
3839        range: Range<usize>,
3840        text: String,
3841    }
3842
3843    /// Parse a unified diff into content-based hunks. Each hunk contains an
3844    /// `old_context` string (context lines + deleted lines, which together
3845    /// form the text that should be found in the original) and a list of edits
3846    /// expressed as byte ranges within that context.
3847    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3848        let mut hunks = Vec::new();
3849        let mut current: Option<ParsedHunk> = None;
3850
3851        for line in patch.lines() {
3852            if line.starts_with("@@") {
3853                if let Some(hunk) = current.take() {
3854                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3855                        hunks.push(hunk);
3856                    }
3857                }
3858                current = Some(ParsedHunk {
3859                    old_context: String::new(),
3860                    edits: Vec::new(),
3861                });
3862            } else if line.starts_with("---") || line.starts_with("+++") {
3863                continue;
3864            } else if let Some(hunk) = &mut current {
3865                if let Some(added) = line.strip_prefix('+') {
3866                    let pos = hunk.old_context.len();
3867                    if let Some(last_edit) = hunk.edits.last_mut() {
3868                        if last_edit.range.end == pos {
3869                            writeln!(&mut last_edit.text, "{added}").ok();
3870                            continue;
3871                        }
3872                    }
3873                    hunk.edits.push(ParsedEdit {
3874                        range: pos..pos,
3875                        text: format!("{added}\n"),
3876                    });
3877                } else if let Some(removed) = line.strip_prefix('-') {
3878                    let start = hunk.old_context.len();
3879                    writeln!(&mut hunk.old_context, "{removed}").ok();
3880                    let end = hunk.old_context.len();
3881                    if let Some(last_edit) = hunk.edits.last_mut() {
3882                        if last_edit.range.end == start {
3883                            last_edit.range.end = end;
3884                            continue;
3885                        }
3886                    }
3887                    hunk.edits.push(ParsedEdit {
3888                        range: start..end,
3889                        text: String::new(),
3890                    });
3891                } else {
3892                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3893                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3894                }
3895            }
3896        }
3897
3898        if let Some(hunk) = current {
3899            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3900                hunks.push(hunk);
3901            }
3902        }
3903
3904        hunks
3905    }
3906
3907    #[cfg(test)]
3908    mod tests {
3909        use super::*;
3910        use indoc::indoc;
3911
3912        #[test]
3913        fn test_apply_variable_edit() {
3914            struct Case {
3915                name: &'static str,
3916                original: &'static str,
3917                model_output: &'static str,
3918                expected: &'static str,
3919            }
3920
3921            let cases = [
3922                Case {
3923                    name: "simple_single_line_replacement",
3924                    original: indoc! {"
3925                        zero
3926                        one
3927                        two
3928                        three
3929                        four
3930                        five
3931                    "},
3932                    model_output: indoc! {"
3933                        two
3934                        <|fim_middle|>
3935                        THREE
3936                        <|fim_suffix|>
3937                        four
3938                    "},
3939                    expected: indoc! {"
3940                        zero
3941                        one
3942                        two
3943                        THREE
3944                        four
3945                        five
3946                    "},
3947                },
3948                Case {
3949                    name: "multi_line_replacement",
3950                    original: indoc! {"
3951                        a
3952                        b
3953                        c
3954                        d
3955                        e
3956                    "},
3957                    model_output: indoc! {"
3958                        a
3959                        <|fim_middle|>
3960                        B
3961                        C
3962                        D
3963                        <|fim_suffix|>
3964                        e
3965                    "},
3966                    expected: indoc! {"
3967                        a
3968                        B
3969                        C
3970                        D
3971                        e
3972                    "},
3973                },
3974                Case {
3975                    name: "insertion_between_existing_lines",
3976                    original: indoc! {"
3977                        a
3978                        b
3979                        c
3980                    "},
3981                    model_output: indoc! {"
3982                        a
3983                        <|fim_middle|>
3984                        X
3985                        <|fim_suffix|>
3986                        b
3987                    "},
3988                    expected: indoc! {"
3989                        a
3990                        X
3991                        b
3992                        c
3993                    "},
3994                },
3995                Case {
3996                    name: "deletion",
3997                    original: indoc! {"
3998                        a
3999                        b
4000                        c
4001                        d
4002                    "},
4003                    model_output: indoc! {"
4004                        a
4005                        <|fim_middle|>
4006                        <|fim_suffix|>
4007                        c
4008                    "},
4009                    expected: indoc! {"
4010                        a
4011                        c
4012                        d
4013                    "},
4014                },
4015                Case {
4016                    name: "replacement_at_start_no_prefix_context",
4017                    original: indoc! {"
4018                        a
4019                        b
4020                        c
4021                    "},
4022                    model_output: indoc! {"
4023                        <|fim_middle|>
4024                        X
4025                        <|fim_suffix|>
4026                        b
4027                    "},
4028                    expected: indoc! {"
4029                        X
4030                        b
4031                        c
4032                    "},
4033                },
4034                Case {
4035                    name: "replacement_at_end_no_suffix_context",
4036                    original: indoc! {"
4037                        a
4038                        b
4039                        c
4040                    "},
4041                    model_output: indoc! {"
4042                        b
4043                        <|fim_middle|>
4044                        Z
4045                        <|fim_suffix|>
4046                    "},
4047                    expected: indoc! {"
4048                        a
4049                        b
4050                        Z
4051                    "},
4052                },
4053                Case {
4054                    name: "context_with_trailing_newline_is_preserved",
4055                    original: indoc! {"
4056                        a
4057                        b
4058                        c
4059                    "},
4060                    model_output: indoc! {"
4061                        a
4062                        <|fim_middle|>
4063                        B
4064                        <|fim_suffix|>
4065                        c
4066                    "},
4067                    expected: indoc! {"
4068                        a
4069                        B
4070                        c
4071                    "},
4072                },
4073                Case {
4074                    name: "cursor_marker_passes_through_untouched",
4075                    original: indoc! {"
4076                        a
4077                        b
4078                        c
4079                    "},
4080                    model_output: indoc! {"
4081                        a
4082                        <|fim_middle|>
4083                        B<|user_cursor|>B
4084                        <|fim_suffix|>
4085                        c
4086                    "},
4087                    expected: indoc! {"
4088                        a
4089                        B<|user_cursor|>B
4090                        c
4091                    "},
4092                },
4093                Case {
4094                    name: "multiple_prefix_context_lines",
4095                    original: indoc! {"
4096                        a
4097                        b
4098                        c
4099                        d
4100                        e
4101                    "},
4102                    model_output: indoc! {"
4103                        b
4104                        c
4105                        <|fim_middle|>
4106                        D
4107                        <|fim_suffix|>
4108                        e
4109                    "},
4110                    expected: indoc! {"
4111                        a
4112                        b
4113                        c
4114                        D
4115                        e
4116                    "},
4117                },
4118            ];
4119
4120            for case in cases {
4121                let (edit_range, replacement) =
4122                    apply_variable_edit(case.original, case.model_output).unwrap();
4123                let mut edited = case.original.to_string();
4124                edited.replace_range(edit_range, &replacement);
4125                assert_eq!(edited, case.expected, "{}", case.name);
4126            }
4127        }
4128
4129        #[test]
4130        fn test_patch_to_variable_edit() {
4131            struct Case {
4132                name: &'static str,
4133                old: &'static str,
4134                patch: &'static str,
4135                cursor_offset: Option<usize>,
4136                expected_variable_edit: &'static str,
4137                expected_after_apply: &'static str,
4138            }
4139
4140            let cases = [
4141                Case {
4142                    name: "simple_replacement",
4143                    old: indoc! {"
4144                        zero
4145                        one
4146                        two
4147                        three
4148                        four
4149                        five
4150                    "},
4151                    patch: indoc! {"
4152                        @@ -3,3 +3,3 @@
4153                         two
4154                        -three
4155                        +THREE
4156                         four
4157                    "},
4158                    cursor_offset: None,
4159                    expected_variable_edit: indoc! {"
4160                        one
4161                        two
4162                        <|fim_middle|>
4163                        THREE
4164                        <|fim_suffix|>
4165                        four
4166                        five
4167                    "},
4168                    expected_after_apply: indoc! {"
4169                        zero
4170                        one
4171                        two
4172                        THREE
4173                        four
4174                        five
4175                    "},
4176                },
4177                Case {
4178                    name: "insertion",
4179                    old: indoc! {"
4180                        a
4181                        b
4182                        c
4183                        d
4184                        e
4185                    "},
4186                    patch: indoc! {"
4187                        @@ -2,0 +3,1 @@
4188                         b
4189                        +X
4190                         c
4191                    "},
4192                    cursor_offset: None,
4193                    expected_variable_edit: indoc! {"
4194                        a
4195                        b
4196                        <|fim_middle|>
4197                        X
4198                        <|fim_suffix|>
4199                        c
4200                        d
4201                    "},
4202                    expected_after_apply: indoc! {"
4203                        a
4204                        b
4205                        X
4206                        c
4207                        d
4208                        e
4209                    "},
4210                },
4211                Case {
4212                    name: "deletion",
4213                    old: indoc! {"
4214                        a
4215                        b
4216                        c
4217                        d
4218                        e
4219                    "},
4220                    patch: indoc! {"
4221                        @@ -2,3 +2,2 @@
4222                         b
4223                        -c
4224                         d
4225                    "},
4226                    cursor_offset: None,
4227                    expected_variable_edit: indoc! {"
4228                        a
4229                        b
4230                        <|fim_middle|>
4231                        <|fim_suffix|>
4232                        d
4233                        e
4234                    "},
4235                    expected_after_apply: indoc! {"
4236                        a
4237                        b
4238                        d
4239                        e
4240                    "},
4241                },
4242                Case {
4243                    name: "edit_near_start",
4244                    old: indoc! {"
4245                        first
4246                        second
4247                        third
4248                        fourth
4249                    "},
4250                    patch: indoc! {"
4251                        @@ -1,1 +1,1 @@
4252                        -first
4253                        +FIRST
4254                    "},
4255                    cursor_offset: None,
4256                    expected_variable_edit: indoc! {"
4257                        <|fim_middle|>
4258                        FIRST
4259                        <|fim_suffix|>
4260                        second
4261                        third
4262                    "},
4263                    expected_after_apply: indoc! {"
4264                        FIRST
4265                        second
4266                        third
4267                        fourth
4268                    "},
4269                },
4270                Case {
4271                    name: "edit_near_end",
4272                    old: indoc! {"
4273                        first
4274                        second
4275                        third
4276                        fourth
4277                    "},
4278                    patch: indoc! {"
4279                        @@ -4,1 +4,1 @@
4280                        -fourth
4281                        +FOURTH
4282                    "},
4283                    cursor_offset: None,
4284                    expected_variable_edit: indoc! {"
4285                        second
4286                        third
4287                        <|fim_middle|>
4288                        FOURTH
4289                        <|fim_suffix|>
4290                    "},
4291                    expected_after_apply: indoc! {"
4292                        first
4293                        second
4294                        third
4295                        FOURTH
4296                    "},
4297                },
4298                Case {
4299                    name: "cursor_at_start_of_replacement",
4300                    old: indoc! {"
4301                        zero
4302                        one
4303                        two
4304                        three
4305                        four
4306                        five
4307                    "},
4308                    patch: indoc! {"
4309                        @@ -3,3 +3,3 @@
4310                         two
4311                        -three
4312                        +THREE
4313                         four
4314                    "},
4315                    cursor_offset: Some(4),
4316                    expected_variable_edit: indoc! {"
4317                        one
4318                        two
4319                        <|fim_middle|>
4320                        <|user_cursor|>THREE
4321                        <|fim_suffix|>
4322                        four
4323                        five
4324                    "},
4325                    expected_after_apply: indoc! {"
4326                        zero
4327                        one
4328                        two
4329                        <|user_cursor|>THREE
4330                        four
4331                        five
4332                    "},
4333                },
4334                Case {
4335                    name: "cursor_in_middle_of_replacement",
4336                    old: indoc! {"
4337                        zero
4338                        one
4339                        two
4340                        three
4341                        four
4342                        five
4343                    "},
4344                    patch: indoc! {"
4345                        @@ -3,3 +3,3 @@
4346                         two
4347                        -three
4348                        +THREE
4349                         four
4350                    "},
4351                    cursor_offset: Some(6),
4352                    expected_variable_edit: indoc! {"
4353                        one
4354                        two
4355                        <|fim_middle|>
4356                        TH<|user_cursor|>REE
4357                        <|fim_suffix|>
4358                        four
4359                        five
4360                    "},
4361                    expected_after_apply: indoc! {"
4362                        zero
4363                        one
4364                        two
4365                        TH<|user_cursor|>REE
4366                        four
4367                        five
4368                    "},
4369                },
4370                Case {
4371                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4372                    old: indoc! {"
4373                        one
4374                        a
4375                        b
4376                        c
4377                        d
4378                        two
4379                        a
4380                        b
4381                        c
4382                        d
4383                        three
4384                        a
4385                        b
4386                        c
4387                        d
4388                        four
4389                    "},
4390                    patch: indoc! {"
4391                        @@ -4,5 +4,5 @@
4392                         two
4393                         a
4394                         b
4395                        -c
4396                        +C
4397                         d
4398                         three
4399                    "},
4400                    cursor_offset: None,
4401                    expected_variable_edit: indoc! {"
4402                        two
4403                        a
4404                        b
4405                        <|fim_middle|>
4406                        C
4407                        <|fim_suffix|>
4408                        d
4409                        three
4410                    "},
4411                    expected_after_apply: indoc! {"
4412                        one
4413                        a
4414                        b
4415                        c
4416                        d
4417                        two
4418                        a
4419                        b
4420                        C
4421                        d
4422                        three
4423                        a
4424                        b
4425                        c
4426                        d
4427                        four
4428                    "},
4429                },
4430                Case {
4431                    name: "expands_context_when_two_lines_not_unique_before_and_after",
4432                    old: indoc! {"
4433                        {
4434                            {
4435                                one();
4436                            }
4437                        }
4438                        {
4439                            {
4440                                two();
4441                            }
4442                        }
4443                        {
4444                            {
4445                                three();
4446                            }
4447                        }
4448                        {
4449                            {
4450                                four();
4451                            }
4452                        }
4453                    "},
4454                    patch: indoc! {"
4455                        @@ -4,5 +4,5 @@
4456                             {
4457                        -        two();
4458                        +        TWO();
4459                             }
4460                    "},
4461                    cursor_offset: None,
4462                    expected_variable_edit: indoc! {"
4463                                one();
4464                            }
4465                        }
4466                        {
4467                            {
4468                        <|fim_middle|>
4469                                TWO();
4470                        <|fim_suffix|>
4471                            }
4472                        }
4473                        {
4474                            {
4475                                three();
4476                    "},
4477                    expected_after_apply: indoc! {"
4478                        {
4479                            {
4480                                one();
4481                            }
4482                        }
4483                        {
4484                            {
4485                                TWO();
4486                            }
4487                        }
4488                        {
4489                            {
4490                                three();
4491                            }
4492                        }
4493                        {
4494                            {
4495                                four();
4496                            }
4497                        }
4498                    "},
4499                },
4500            ];
4501
4502            for case in cases {
4503                let output =
4504                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
4505                        .unwrap_or_else(|error| {
4506                            panic!("failed converting patch for {}: {error}", case.name)
4507                        });
4508                assert_eq!(
4509                    output, case.expected_variable_edit,
4510                    "patch->variable_edit mismatch for {}",
4511                    case.name
4512                );
4513
4514                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
4515                    .unwrap_or_else(|error| {
4516                        panic!("failed applying variable_edit for {}: {error}", case.name)
4517                    });
4518                let mut edited_by_variable_edit = case.old.to_string();
4519                edited_by_variable_edit.replace_range(edit_range, &replacement);
4520                assert_eq!(
4521                    edited_by_variable_edit, case.expected_after_apply,
4522                    "variable_edit apply mismatch for {}",
4523                    case.name
4524                );
4525
4526                let (expected_edit_range, expected_replacement) =
4527                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
4528                        |error| {
4529                            panic!(
4530                                "failed applying expected variable_edit for {}: {error}",
4531                                case.name
4532                            )
4533                        },
4534                    );
4535                let mut edited_by_expected_variable_edit = case.old.to_string();
4536                edited_by_expected_variable_edit
4537                    .replace_range(expected_edit_range, &expected_replacement);
4538                assert_eq!(
4539                    edited_by_expected_variable_edit, case.expected_after_apply,
4540                    "expected variable_edit apply mismatch for {}",
4541                    case.name
4542                );
4543            }
4544        }
4545
4546        #[test]
4547        fn test_write_cursor_excerpt_section() {
4548            let path = Path::new("test.rs");
4549            let context = "fn main() {\n    hello();\n}\n";
4550            let cursor_offset = 17;
4551            let mut prompt = String::new();
4552            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
4553            assert_eq!(
4554                prompt,
4555                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
4556            );
4557        }
4558    }
4559}
4560
4561/// The zeta1 prompt format
4562pub mod zeta1 {
4563    use super::*;
4564    use std::fmt::Write;
4565
4566    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
4567    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
4568    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
4569    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
4570
4571    const INSTRUCTION_HEADER: &str = concat!(
4572        "### Instruction:\n",
4573        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4574        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4575        "into account the cursor location.\n\n",
4576        "### User Edits:\n\n"
4577    );
4578    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
4579    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
4580
4581    /// Formats a complete zeta1 prompt from the input events and excerpt.
4582    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
4583        let mut prompt = String::with_capacity(
4584            INSTRUCTION_HEADER.len()
4585                + input_events.len()
4586                + EXCERPT_HEADER.len()
4587                + input_excerpt.len()
4588                + RESPONSE_HEADER.len(),
4589        );
4590        prompt.push_str(INSTRUCTION_HEADER);
4591        prompt.push_str(input_events);
4592        prompt.push_str(EXCERPT_HEADER);
4593        prompt.push_str(input_excerpt);
4594        prompt.push_str(RESPONSE_HEADER);
4595        prompt
4596    }
4597
4598    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
4599    /// editable and context byte-offset ranges within `cursor_excerpt`.
4600    pub fn format_zeta1_from_input(
4601        input: &ZetaPromptInput,
4602        editable_range: Range<usize>,
4603        context_range: Range<usize>,
4604    ) -> String {
4605        let events = format_zeta1_events(&input.events);
4606        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
4607        format_zeta1_prompt(&events, &excerpt)
4608    }
4609
4610    /// Formats events in zeta1 style (oldest first).
4611    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
4612        let mut result = String::new();
4613        for event in
4614            events
4615                .iter()
4616                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
4617                    &ZetaFormat::V0114180EditableRegion,
4618                )))
4619        {
4620            let event_string = format_zeta1_event(event);
4621            if event_string.is_empty() {
4622                continue;
4623            }
4624            if !result.is_empty() {
4625                result.push_str("\n\n");
4626            }
4627            result.push_str(&event_string);
4628        }
4629        result
4630    }
4631
4632    fn format_zeta1_event(event: &Event) -> String {
4633        match event {
4634            Event::BufferChange {
4635                path,
4636                old_path,
4637                diff,
4638                ..
4639            } => {
4640                let mut prompt = String::new();
4641                if old_path != path {
4642                    writeln!(
4643                        prompt,
4644                        "User renamed {} to {}\n",
4645                        old_path.display(),
4646                        path.display()
4647                    )
4648                    .ok();
4649                }
4650                if !diff.is_empty() {
4651                    write!(
4652                        prompt,
4653                        "User edited {}:\n```diff\n{}\n```",
4654                        path.display(),
4655                        diff
4656                    )
4657                    .ok();
4658                }
4659                prompt
4660            }
4661        }
4662    }
4663
4664    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
4665    /// within `cursor_excerpt`.
4666    fn format_zeta1_excerpt(
4667        input: &ZetaPromptInput,
4668        editable_range: Range<usize>,
4669        context_range: Range<usize>,
4670    ) -> String {
4671        let path_str = input.cursor_path.to_string_lossy();
4672        let excerpt = &*input.cursor_excerpt;
4673        let cursor_offset = input.cursor_offset_in_excerpt;
4674
4675        let mut prompt = String::new();
4676        writeln!(&mut prompt, "```{path_str}").ok();
4677
4678        let starts_at_file_beginning =
4679            input.excerpt_start_row == Some(0) && context_range.start == 0;
4680        if starts_at_file_beginning {
4681            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
4682        }
4683
4684        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
4685
4686        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
4687        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
4688        prompt.push_str(CURSOR_MARKER);
4689        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
4690        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
4691
4692        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
4693        write!(prompt, "\n```").ok();
4694
4695        prompt
4696    }
4697
4698    /// Cleans zeta1 model output by extracting content between editable region
4699    /// markers and converting the zeta1 cursor marker to the universal one.
4700    /// Returns `None` if the output doesn't contain the expected markers.
4701    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
4702        let content = output.replace(CURSOR_MARKER, "");
4703
4704        let content_start = content
4705            .find(EDITABLE_REGION_START_MARKER)
4706            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
4707            .map(|pos| {
4708                if content.as_bytes().get(pos) == Some(&b'\n') {
4709                    pos + 1
4710                } else {
4711                    pos
4712                }
4713            })
4714            .unwrap_or(0);
4715
4716        let content_end = content
4717            .find(EDITABLE_REGION_END_MARKER)
4718            .map(|pos| {
4719                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
4720                    pos - 1
4721                } else {
4722                    pos
4723                }
4724            })
4725            .unwrap_or(content.len());
4726
4727        if content_start > content_end {
4728            return Some(String::new());
4729        }
4730
4731        let extracted = &content[content_start..content_end];
4732
4733        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
4734            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
4735            let text_before_cursor = text_before_cursor
4736                .find(EDITABLE_REGION_START_MARKER)
4737                .map(|pos| {
4738                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
4739                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
4740                        after_marker + 1
4741                    } else {
4742                        after_marker
4743                    }
4744                })
4745                .unwrap_or(0);
4746            let offset_in_extracted = zeta1_cursor_pos
4747                .saturating_sub(text_before_cursor)
4748                .min(extracted.len());
4749            offset_in_extracted
4750        });
4751
4752        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
4753        if let Some(offset) = cursor_offset {
4754            result.push_str(&extracted[..offset]);
4755            result.push_str(super::CURSOR_MARKER);
4756            result.push_str(&extracted[offset..]);
4757        } else {
4758            result.push_str(extracted);
4759        }
4760
4761        Some(result)
4762    }
4763}
4764
4765#[cfg(test)]
4766mod tests {
4767    use super::*;
4768    use indoc::indoc;
4769
4770    fn make_input(
4771        cursor_excerpt: &str,
4772        editable_range: Range<usize>,
4773        cursor_offset: usize,
4774        events: Vec<Event>,
4775        related_files: Vec<RelatedFile>,
4776    ) -> ZetaPromptInput {
4777        let context_range = 0..cursor_excerpt.len();
4778        ZetaPromptInput {
4779            cursor_path: Path::new("test.rs").into(),
4780            cursor_excerpt: cursor_excerpt.into(),
4781            cursor_offset_in_excerpt: cursor_offset,
4782            excerpt_start_row: None,
4783            events: events.into_iter().map(Arc::new).collect(),
4784            related_files: Some(related_files),
4785            active_buffer_diagnostics: vec![],
4786            excerpt_ranges: ExcerptRanges {
4787                editable_150: editable_range.clone(),
4788                editable_180: editable_range.clone(),
4789                editable_350: editable_range,
4790                editable_150_context_350: context_range.clone(),
4791                editable_180_context_350: context_range.clone(),
4792                editable_350_context_150: context_range,
4793                ..Default::default()
4794            },
4795            syntax_ranges: None,
4796            in_open_source_repo: false,
4797            can_collect_data: false,
4798            repo_url: None,
4799        }
4800    }
4801
4802    fn make_input_with_context_range(
4803        excerpt: &str,
4804        editable_range: Range<usize>,
4805        context_range: Range<usize>,
4806        cursor_offset: usize,
4807    ) -> ZetaPromptInput {
4808        ZetaPromptInput {
4809            cursor_path: Path::new("test.rs").into(),
4810            cursor_excerpt: excerpt.into(),
4811            cursor_offset_in_excerpt: cursor_offset,
4812            excerpt_start_row: None,
4813            events: vec![],
4814            related_files: Some(vec![]),
4815            active_buffer_diagnostics: vec![],
4816            excerpt_ranges: ExcerptRanges {
4817                editable_150: editable_range.clone(),
4818                editable_180: editable_range.clone(),
4819                editable_350: editable_range,
4820                editable_150_context_350: context_range.clone(),
4821                editable_180_context_350: context_range.clone(),
4822                editable_350_context_150: context_range,
4823                ..Default::default()
4824            },
4825            syntax_ranges: None,
4826            in_open_source_repo: false,
4827            can_collect_data: false,
4828            repo_url: None,
4829        }
4830    }
4831
4832    fn make_event(path: &str, diff: &str) -> Event {
4833        Event::BufferChange {
4834            path: Path::new(path).into(),
4835            old_path: Path::new(path).into(),
4836            diff: diff.to_string(),
4837            predicted: false,
4838            in_open_source_repo: false,
4839        }
4840    }
4841
4842    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4843        RelatedFile {
4844            path: Path::new(path).into(),
4845            max_row: content.lines().count() as u32,
4846            excerpts: vec![RelatedExcerpt {
4847                row_range: 0..content.lines().count() as u32,
4848                text: content.into(),
4849                order: 0,
4850            }],
4851            in_open_source_repo: false,
4852        }
4853    }
4854
4855    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4856        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4857    }
4858
4859    fn budget_with_margin(requested_tokens: usize) -> usize {
4860        ((requested_tokens as f64) / 0.9).ceil() as usize
4861    }
4862
4863    #[test]
4864    fn test_no_truncation_when_within_budget() {
4865        let input = make_input(
4866            "prefix\neditable\nsuffix",
4867            7..15,
4868            10,
4869            vec![make_event("a.rs", "-old\n+new\n")],
4870            vec![make_related_file("related.rs", "fn helper() {}\n")],
4871        );
4872
4873        assert_eq!(
4874            format_with_budget(&input, 10000).unwrap(),
4875            indoc! {r#"
4876                <|file_sep|>related.rs
4877                fn helper() {}
4878                <|file_sep|>edit history
4879                --- a/a.rs
4880                +++ b/a.rs
4881                -old
4882                +new
4883                <|file_sep|>test.rs
4884                <|fim_prefix|>
4885                prefix
4886                <|fim_middle|>current
4887                edi<|user_cursor|>table
4888                <|fim_suffix|>
4889
4890                suffix
4891                <|fim_middle|>updated
4892            "#}
4893            .to_string()
4894        );
4895    }
4896
4897    #[test]
4898    fn test_truncation_drops_edit_history_when_budget_tight() {
4899        let input = make_input(
4900            "code",
4901            0..4,
4902            2,
4903            vec![make_event("a.rs", "-x\n+y\n")],
4904            vec![
4905                make_related_file("r1.rs", "aaaaaaa\n"),
4906                make_related_file("r2.rs", "bbbbbbb\n"),
4907            ],
4908        );
4909
4910        assert_eq!(
4911            format_with_budget(&input, 10000).unwrap(),
4912            indoc! {r#"
4913                <|file_sep|>r1.rs
4914                aaaaaaa
4915                <|file_sep|>r2.rs
4916                bbbbbbb
4917                <|file_sep|>edit history
4918                --- a/a.rs
4919                +++ b/a.rs
4920                -x
4921                +y
4922                <|file_sep|>test.rs
4923                <|fim_prefix|>
4924                <|fim_middle|>current
4925                co<|user_cursor|>de
4926                <|fim_suffix|>
4927                <|fim_middle|>updated
4928            "#}
4929            .to_string()
4930        );
4931
4932        assert_eq!(
4933            format_with_budget(&input, budget_with_margin(55)),
4934            Some(
4935                indoc! {r#"
4936                <|file_sep|>edit history
4937                --- a/a.rs
4938                +++ b/a.rs
4939                -x
4940                +y
4941                <|file_sep|>test.rs
4942                <|fim_prefix|>
4943                <|fim_middle|>current
4944                co<|user_cursor|>de
4945                <|fim_suffix|>
4946                <|fim_middle|>updated
4947            "#}
4948                .to_string()
4949            )
4950        );
4951    }
4952
4953    #[test]
4954    fn test_truncation_includes_partial_excerpts() {
4955        let input = make_input(
4956            "x",
4957            0..1,
4958            0,
4959            vec![],
4960            vec![RelatedFile {
4961                path: Path::new("big.rs").into(),
4962                max_row: 30,
4963                in_open_source_repo: false,
4964                excerpts: vec![
4965                    RelatedExcerpt {
4966                        row_range: 0..10,
4967                        text: "first excerpt\n".into(),
4968                        order: 0,
4969                    },
4970                    RelatedExcerpt {
4971                        row_range: 10..20,
4972                        text: "second excerpt\n".into(),
4973                        order: 0,
4974                    },
4975                    RelatedExcerpt {
4976                        row_range: 20..30,
4977                        text: "third excerpt\n".into(),
4978                        order: 0,
4979                    },
4980                ],
4981            }],
4982        );
4983
4984        assert_eq!(
4985            format_with_budget(&input, 10000).unwrap(),
4986            indoc! {r#"
4987                <|file_sep|>big.rs
4988                first excerpt
4989                ...
4990                second excerpt
4991                ...
4992                third excerpt
4993                <|file_sep|>test.rs
4994                <|fim_prefix|>
4995                <|fim_middle|>current
4996                <|user_cursor|>x
4997                <|fim_suffix|>
4998                <|fim_middle|>updated
4999            "#}
5000            .to_string()
5001        );
5002
5003        assert_eq!(
5004            format_with_budget(&input, budget_with_margin(50)).unwrap(),
5005            indoc! {r#"
5006                <|file_sep|>big.rs
5007                first excerpt
5008                ...
5009                <|file_sep|>test.rs
5010                <|fim_prefix|>
5011                <|fim_middle|>current
5012                <|user_cursor|>x
5013                <|fim_suffix|>
5014                <|fim_middle|>updated
5015            "#}
5016            .to_string()
5017        );
5018    }
5019
5020    #[test]
5021    fn test_truncation_prioritizes_lower_order_excerpts() {
5022        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
5023        // With tight budget, only the lower-order excerpt from file_b should be included.
5024        let input = make_input(
5025            "x",
5026            0..1,
5027            0,
5028            vec![],
5029            vec![
5030                RelatedFile {
5031                    path: Path::new("file_a.rs").into(),
5032                    max_row: 10,
5033                    in_open_source_repo: false,
5034                    excerpts: vec![RelatedExcerpt {
5035                        row_range: 0..10,
5036                        text: "low priority content\n".into(),
5037                        order: 5,
5038                    }],
5039                },
5040                RelatedFile {
5041                    path: Path::new("file_b.rs").into(),
5042                    max_row: 10,
5043                    in_open_source_repo: false,
5044                    excerpts: vec![RelatedExcerpt {
5045                        row_range: 0..10,
5046                        text: "high priority content\n".into(),
5047                        order: 1,
5048                    }],
5049                },
5050            ],
5051        );
5052
5053        // With large budget, both files included; rendered in stable lexicographic order.
5054        assert_eq!(
5055            format_with_budget(&input, 10000).unwrap(),
5056            indoc! {r#"
5057                <|file_sep|>file_a.rs
5058                low priority content
5059                <|file_sep|>file_b.rs
5060                high priority content
5061                <|file_sep|>test.rs
5062                <|fim_prefix|>
5063                <|fim_middle|>current
5064                <|user_cursor|>x
5065                <|fim_suffix|>
5066                <|fim_middle|>updated
5067            "#}
5068            .to_string()
5069        );
5070
5071        // With tight budget, only file_b (lower order) fits.
5072        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
5073        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
5074        // file_a would need another 14 tokens, which doesn't fit.
5075        assert_eq!(
5076            format_with_budget(&input, budget_with_margin(52)).unwrap(),
5077            indoc! {r#"
5078                <|file_sep|>file_b.rs
5079                high priority content
5080                <|file_sep|>test.rs
5081                <|fim_prefix|>
5082                <|fim_middle|>current
5083                <|user_cursor|>x
5084                <|fim_suffix|>
5085                <|fim_middle|>updated
5086            "#}
5087            .to_string()
5088        );
5089    }
5090
5091    #[test]
5092    fn test_truncation_drops_high_order_excerpts_within_file() {
5093        // A single file has excerpts at order 1 and order 3. With a tight budget,
5094        // only the order-1 excerpts are included while the order-3 excerpt is
5095        // dropped — even though they belong to the same file. This also preserves
5096        // the parent invariant: parent outline items have order ≤ their best
5097        // child, so they're always included when any child is.
5098        let input = make_input(
5099            "x",
5100            0..1,
5101            0,
5102            vec![],
5103            vec![RelatedFile {
5104                path: Path::new("mod.rs").into(),
5105                max_row: 30,
5106                in_open_source_repo: false,
5107                excerpts: vec![
5108                    RelatedExcerpt {
5109                        row_range: 0..5,
5110                        text: "mod header\n".into(),
5111                        order: 1,
5112                    },
5113                    RelatedExcerpt {
5114                        row_range: 5..15,
5115                        text: "important fn\n".into(),
5116                        order: 1,
5117                    },
5118                    RelatedExcerpt {
5119                        row_range: 15..30,
5120                        text: "less important fn\n".into(),
5121                        order: 3,
5122                    },
5123                ],
5124            }],
5125        );
5126
5127        // With large budget, all three excerpts included.
5128        assert_eq!(
5129            format_with_budget(&input, 10000).unwrap(),
5130            indoc! {r#"
5131                <|file_sep|>mod.rs
5132                mod header
5133                ...
5134                important fn
5135                ...
5136                less important fn
5137                <|file_sep|>test.rs
5138                <|fim_prefix|>
5139                <|fim_middle|>current
5140                <|user_cursor|>x
5141                <|fim_suffix|>
5142                <|fim_middle|>updated
5143            "#}
5144            .to_string()
5145        );
5146
5147        // With tight budget, only order<=1 excerpts included (header + important fn).
5148        assert_eq!(
5149            format_with_budget(&input, budget_with_margin(55)).unwrap(),
5150            indoc! {r#"
5151                <|file_sep|>mod.rs
5152                mod header
5153                ...
5154                important fn
5155                ...
5156                <|file_sep|>test.rs
5157                <|fim_prefix|>
5158                <|fim_middle|>current
5159                <|user_cursor|>x
5160                <|fim_suffix|>
5161                <|fim_middle|>updated
5162            "#}
5163            .to_string()
5164        );
5165    }
5166
5167    #[test]
5168    fn test_truncation_drops_older_events_first() {
5169        let input = make_input(
5170            "x",
5171            0..1,
5172            0,
5173            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
5174            vec![],
5175        );
5176
5177        assert_eq!(
5178            format_with_budget(&input, 10000).unwrap(),
5179            indoc! {r#"
5180                <|file_sep|>edit history
5181                --- a/old.rs
5182                +++ b/old.rs
5183                -1
5184                --- a/new.rs
5185                +++ b/new.rs
5186                -2
5187                <|file_sep|>test.rs
5188                <|fim_prefix|>
5189                <|fim_middle|>current
5190                <|user_cursor|>x
5191                <|fim_suffix|>
5192                <|fim_middle|>updated
5193            "#}
5194            .to_string()
5195        );
5196
5197        assert_eq!(
5198            format_with_budget(&input, 60).unwrap(),
5199            indoc! {r#"
5200                <|file_sep|>edit history
5201                --- a/new.rs
5202                +++ b/new.rs
5203                -2
5204                <|file_sep|>test.rs
5205                <|fim_prefix|>
5206                <|fim_middle|>current
5207                <|user_cursor|>x
5208                <|fim_suffix|>
5209                <|fim_middle|>updated
5210            "#}
5211            .to_string()
5212        );
5213    }
5214
5215    #[test]
5216    fn test_cursor_excerpt_always_included_with_minimal_budget() {
5217        let input = make_input(
5218            "fn main() {}",
5219            0..12,
5220            3,
5221            vec![make_event("a.rs", "-old\n+new\n")],
5222            vec![make_related_file("related.rs", "helper\n")],
5223        );
5224
5225        assert!(format_with_budget(&input, 30).is_none())
5226    }
5227
5228    #[track_caller]
5229    fn format_seed_coder(input: &ZetaPromptInput) -> String {
5230        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
5231            .expect("seed coder prompt formatting should succeed")
5232    }
5233
5234    #[track_caller]
5235    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
5236        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
5237            .expect("seed coder prompt formatting should succeed")
5238    }
5239
5240    #[test]
5241    fn test_seed_coder_alias_matches_v0211_seed_coder() {
5242        let input = make_input(
5243            "prefix\neditable\nsuffix",
5244            7..15,
5245            10,
5246            vec![make_event("a.rs", "-old\n+new\n")],
5247            vec![make_related_file("related.rs", "fn helper() {}\n")],
5248        );
5249
5250        assert_eq!(
5251            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 10000),
5252            format_prompt_with_budget_for_format(&input, ZetaFormat::V0331SeedCoderModelPy, 10000)
5253        );
5254        assert_eq!(
5255            ZetaFormat::parse("V0331SeedCoderModelPy").unwrap(),
5256            ZetaFormat::V0331SeedCoderModelPy
5257        );
5258    }
5259
5260    #[test]
5261    fn test_seed_coder_basic_format() {
5262        let input = make_input(
5263            "prefix\neditable\nsuffix",
5264            7..15,
5265            10,
5266            vec![make_event("a.rs", "-old\n+new\n")],
5267            vec![make_related_file("related.rs", "fn helper() {}\n")],
5268        );
5269
5270        assert_eq!(
5271            format_seed_coder(&input),
5272            indoc! {r#"
5273                <[fim-suffix]>
5274                suffix
5275                <[fim-prefix]><filename>related.rs
5276                fn helper() {}
5277
5278                <filename>edit_history
5279                --- a/a.rs
5280                +++ b/a.rs
5281                -old
5282                +new
5283
5284                <filename>test.rs
5285                prefix
5286                <<<<<<< CURRENT
5287                edi<|user_cursor|>table
5288                =======
5289                <[fim-middle]>"#}
5290        );
5291    }
5292
5293    #[test]
5294    fn test_v0420_formats_diagnostics_before_related_files() {
5295        let mut input = make_input(
5296            "prefix\neditable\nsuffix",
5297            7..15,
5298            10,
5299            vec![],
5300            vec![make_related_file("related.rs", "fn helper() {}\n")],
5301        );
5302        input.active_buffer_diagnostics = vec![ActiveBufferDiagnostic {
5303            severity: Some(1),
5304            message: "missing semicolon".to_string(),
5305            snippet: "let value = 1".to_string(),
5306            snippet_buffer_row_range: 1..2,
5307            diagnostic_range_in_snippet: 12..13,
5308        }];
5309
5310        let prompt =
5311            format_prompt_with_budget_for_format(&input, ZetaFormat::V0420Diagnostics, 10000)
5312                .expect("v0420 prompt formatting should succeed");
5313
5314        assert_eq!(
5315            prompt,
5316            indoc! {r#"
5317                <[fim-suffix]>
5318                suffix
5319                <[fim-prefix]><filename>diagnostics
5320                *missing semicolon*:
5321                ```
5322                let value = 1
5323                ```
5324
5325                <filename>related.rs
5326                fn helper() {}
5327
5328                <filename>test.rs
5329                prefix
5330                <|marker_1|>edi<|user_cursor|>table<|marker_2|>
5331                <[fim-middle]>"#}
5332        );
5333    }
5334
5335    #[test]
5336    fn test_v0317_formats_prompt_with_many_related_files() {
5337        let related_files = (0..900)
5338            .map(|index| {
5339                make_related_file(
5340                    &format!("related_{index}.rs"),
5341                    "fn helper() {\n    let value = 1;\n}\n",
5342                )
5343            })
5344            .collect();
5345
5346        let input = make_input(
5347            "code",
5348            0..4,
5349            2,
5350            vec![make_event("a.rs", "-x\n+y\n")],
5351            related_files,
5352        );
5353
5354        let prompt =
5355            format_prompt_with_budget_for_format(&input, ZetaFormat::V0317SeedMultiRegions, 4096);
5356
5357        assert!(prompt.is_some());
5358        let prompt = prompt.expect("v0317 should produce a prompt under high related-file count");
5359        assert!(prompt.contains("test.rs"));
5360        assert!(prompt.contains(CURSOR_MARKER));
5361    }
5362
5363    #[test]
5364    fn test_v0327_formats_single_file_prompt_without_related_files() {
5365        let excerpt = indoc! {"
5366            line01
5367            line02
5368            line03
5369            line04
5370            line05
5371            line06
5372            line07
5373            line08
5374            line09
5375            line10
5376            line11
5377            line12
5378            line13
5379            line14
5380            line15
5381            line16
5382            line17
5383            line18
5384            line19
5385            line20
5386        "};
5387        let cursor_offset = excerpt.find("line10").expect("cursor line exists");
5388        let input = make_input(
5389            excerpt,
5390            0..excerpt.len(),
5391            cursor_offset,
5392            vec![make_event("a.rs", "-x\n+y\n")],
5393            vec![make_related_file("related.rs", "fn helper() {}\n")],
5394        );
5395
5396        let prompt =
5397            format_prompt_with_budget_for_format(&input, ZetaFormat::V0327SingleFile, 4096)
5398                .expect("v0327 prompt should fit");
5399
5400        assert!(prompt.contains("line01"));
5401        assert!(prompt.contains("line20"));
5402        assert!(prompt.contains("<filename>edit_history"));
5403        assert!(prompt.contains("<filename>test.rs"));
5404        assert!(prompt.contains(CURSOR_MARKER));
5405        assert!(!prompt.contains("related.rs"));
5406        assert!(!prompt.contains("fn helper() {}"));
5407    }
5408
5409    #[test]
5410    fn test_v0327_resolve_cursor_region_uses_full_excerpt_context() {
5411        let excerpt = (0..80)
5412            .map(|index| format!("l{index:02}\n"))
5413            .collect::<String>();
5414        let cursor_offset = excerpt.find("l40").expect("cursor line exists");
5415        let input = make_input(&excerpt, 0..excerpt.len(), cursor_offset, vec![], vec![]);
5416
5417        let (context, editable_range, context_range, adjusted_cursor) =
5418            resolve_cursor_region(&input, ZetaFormat::V0327SingleFile);
5419
5420        assert_eq!(context, excerpt);
5421        assert_eq!(context_range, 0..excerpt.len());
5422        assert_eq!(adjusted_cursor, cursor_offset);
5423        assert!(editable_range.start < adjusted_cursor);
5424        assert!(editable_range.end > adjusted_cursor);
5425        assert!(editable_range.end < excerpt.len());
5426    }
5427
5428    #[test]
5429    fn test_seed_coder_no_context() {
5430        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
5431
5432        assert_eq!(
5433            format_seed_coder(&input),
5434            indoc! {r#"
5435                <[fim-suffix]>
5436                after
5437                <[fim-prefix]><filename>test.rs
5438                before
5439                <<<<<<< CURRENT
5440                mid<|user_cursor|>dle
5441                =======
5442                <[fim-middle]>"#}
5443        );
5444    }
5445
5446    #[test]
5447    fn test_seed_coder_truncation_drops_context() {
5448        let input = make_input(
5449            "code",
5450            0..4,
5451            2,
5452            vec![make_event("a.rs", "-x\n+y\n")],
5453            vec![make_related_file("r1.rs", "content\n")],
5454        );
5455
5456        // With large budget, everything is included
5457        assert_eq!(
5458            format_seed_coder(&input),
5459            indoc! {r#"
5460                <[fim-suffix]>
5461                <[fim-prefix]><filename>r1.rs
5462                content
5463
5464                <filename>edit_history
5465                --- a/a.rs
5466                +++ b/a.rs
5467                -x
5468                +y
5469
5470                <filename>test.rs
5471                <<<<<<< CURRENT
5472                co<|user_cursor|>de
5473                =======
5474                <[fim-middle]>"#}
5475        );
5476
5477        assert_eq!(
5478            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
5479            None
5480        );
5481
5482        assert_eq!(
5483            format_seed_coder_with_budget(&input, 40),
5484            indoc! {r#"
5485                <[fim-suffix]>
5486                <[fim-prefix]><filename>test.rs
5487                <<<<<<< CURRENT
5488                co<|user_cursor|>de
5489                =======
5490                <[fim-middle]>"#
5491            }
5492        )
5493    }
5494
5495    #[test]
5496    fn test_seed_coder_truncation_prioritizes_lower_order() {
5497        let input = make_input(
5498            "code",
5499            0..4,
5500            2,
5501            vec![],
5502            vec![
5503                RelatedFile {
5504                    path: Path::new("low_prio.rs").into(),
5505                    max_row: 5,
5506                    in_open_source_repo: false,
5507                    excerpts: vec![RelatedExcerpt {
5508                        row_range: 0..5,
5509                        text: "low prio\n".into(),
5510                        order: 10,
5511                    }],
5512                },
5513                RelatedFile {
5514                    path: Path::new("high_prio.rs").into(),
5515                    max_row: 5,
5516                    in_open_source_repo: false,
5517                    excerpts: vec![RelatedExcerpt {
5518                        row_range: 0..5,
5519                        text: "high prio\n".into(),
5520                        order: 1,
5521                    }],
5522                },
5523            ],
5524        );
5525
5526        // With large budget, both included; rendered in stable lexicographic order.
5527        assert_eq!(
5528            format_seed_coder(&input),
5529            indoc! {r#"
5530                <[fim-suffix]>
5531                <[fim-prefix]><filename>low_prio.rs
5532                low prio
5533                <filename>high_prio.rs
5534                high prio
5535
5536                <filename>test.rs
5537                <<<<<<< CURRENT
5538                co<|user_cursor|>de
5539                =======
5540                <[fim-middle]>"#}
5541        );
5542
5543        // With tight budget under the generic heuristic, context is dropped but the
5544        // minimal cursor section still fits.
5545        assert_eq!(
5546            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
5547            Some(
5548                indoc! {r#"
5549                    <[fim-suffix]>
5550                    <[fim-prefix]><filename>test.rs
5551                    <<<<<<< CURRENT
5552                    co<|user_cursor|>de
5553                    =======
5554                    <[fim-middle]>"#}
5555                .to_string()
5556            )
5557        );
5558    }
5559
5560    #[test]
5561    fn test_format_zeta1_from_input_basic() {
5562        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
5563        let input = ZetaPromptInput {
5564            cursor_path: Path::new("src/main.rs").into(),
5565            cursor_excerpt: excerpt.into(),
5566            cursor_offset_in_excerpt: 30,
5567            excerpt_start_row: Some(0),
5568            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
5569            related_files: Some(vec![]),
5570            active_buffer_diagnostics: vec![],
5571            excerpt_ranges: ExcerptRanges {
5572                editable_150: 15..41,
5573                editable_180: 15..41,
5574                editable_350: 15..41,
5575                editable_150_context_350: 0..excerpt.len(),
5576                editable_180_context_350: 0..excerpt.len(),
5577                editable_350_context_150: 0..excerpt.len(),
5578                ..Default::default()
5579            },
5580            syntax_ranges: None,
5581            in_open_source_repo: false,
5582            can_collect_data: false,
5583            repo_url: None,
5584        };
5585
5586        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
5587
5588        assert_eq!(
5589            prompt,
5590            concat!(
5591                "### Instruction:\n",
5592                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5593                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5594                "into account the cursor location.\n",
5595                "\n",
5596                "### User Edits:\n",
5597                "\n",
5598                "User edited other.rs:\n",
5599                "```diff\n",
5600                "-old\n",
5601                "+new\n",
5602                "\n",
5603                "```\n",
5604                "\n",
5605                "### User Excerpt:\n",
5606                "\n",
5607                "```src/main.rs\n",
5608                "<|start_of_file|>\n",
5609                "fn before() {}\n",
5610                "<|editable_region_start|>\n",
5611                "fn foo() {\n",
5612                "    <|user_cursor_is_here|>let x = 1;\n",
5613                "\n",
5614                "<|editable_region_end|>}\n",
5615                "fn after() {}\n",
5616                "\n",
5617                "```\n",
5618                "\n",
5619                "### Response:\n",
5620            ),
5621        );
5622    }
5623
5624    #[test]
5625    fn test_format_zeta1_from_input_no_start_of_file() {
5626        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
5627        let input = ZetaPromptInput {
5628            cursor_path: Path::new("src/main.rs").into(),
5629            cursor_excerpt: excerpt.into(),
5630            cursor_offset_in_excerpt: 15,
5631            excerpt_start_row: Some(10),
5632            events: vec![],
5633            related_files: Some(vec![]),
5634            active_buffer_diagnostics: vec![],
5635            excerpt_ranges: ExcerptRanges {
5636                editable_150: 0..28,
5637                editable_180: 0..28,
5638                editable_350: 0..28,
5639                editable_150_context_350: 0..28,
5640                editable_180_context_350: 0..28,
5641                editable_350_context_150: 0..28,
5642                ..Default::default()
5643            },
5644            syntax_ranges: None,
5645            in_open_source_repo: false,
5646            can_collect_data: false,
5647            repo_url: None,
5648        };
5649
5650        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
5651
5652        assert_eq!(
5653            prompt,
5654            concat!(
5655                "### Instruction:\n",
5656                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5657                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5658                "into account the cursor location.\n",
5659                "\n",
5660                "### User Edits:\n",
5661                "\n",
5662                "\n",
5663                "\n",
5664                "### User Excerpt:\n",
5665                "\n",
5666                "```src/main.rs\n",
5667                "<|editable_region_start|>\n",
5668                "fn foo() {\n",
5669                "    <|user_cursor_is_here|>let x = 1;\n",
5670                "}\n",
5671                "\n",
5672                "<|editable_region_end|>\n",
5673                "```\n",
5674                "\n",
5675                "### Response:\n",
5676            ),
5677        );
5678    }
5679
5680    #[test]
5681    fn test_format_zeta1_from_input_with_sub_ranges() {
5682        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
5683        let editable_range = 10..37;
5684        let context_range = 0..excerpt.len();
5685
5686        let input = ZetaPromptInput {
5687            cursor_path: Path::new("test.rs").into(),
5688            cursor_excerpt: excerpt.into(),
5689            cursor_offset_in_excerpt: 25,
5690            excerpt_start_row: Some(0),
5691            events: vec![],
5692            related_files: Some(vec![]),
5693            active_buffer_diagnostics: vec![],
5694            excerpt_ranges: ExcerptRanges {
5695                editable_150: editable_range.clone(),
5696                editable_180: editable_range.clone(),
5697                editable_350: editable_range.clone(),
5698                editable_150_context_350: context_range.clone(),
5699                editable_180_context_350: context_range.clone(),
5700                editable_350_context_150: context_range.clone(),
5701                ..Default::default()
5702            },
5703            syntax_ranges: None,
5704            in_open_source_repo: false,
5705            can_collect_data: false,
5706            repo_url: None,
5707        };
5708
5709        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
5710
5711        assert_eq!(
5712            prompt,
5713            concat!(
5714                "### Instruction:\n",
5715                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
5716                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
5717                "into account the cursor location.\n",
5718                "\n",
5719                "### User Edits:\n",
5720                "\n",
5721                "\n",
5722                "\n",
5723                "### User Excerpt:\n",
5724                "\n",
5725                "```test.rs\n",
5726                "<|start_of_file|>\n",
5727                "// prefix\n",
5728                "<|editable_region_start|>\n",
5729                "fn foo() {\n",
5730                "    <|user_cursor_is_here|>let x = 1;\n",
5731                "}\n",
5732                "<|editable_region_end|>\n",
5733                "// suffix\n",
5734                "\n",
5735                "```\n",
5736                "\n",
5737                "### Response:\n",
5738            ),
5739        );
5740    }
5741
5742    #[test]
5743    fn test_max_event_count() {
5744        fn make_numbered_event(index: usize) -> Event {
5745            return make_event(
5746                &format!("event-{index}.rs"),
5747                &format!("-old-{index}\n+new-{index}\n"),
5748            );
5749        }
5750        let input = make_input(
5751            "x",
5752            0..1,
5753            0,
5754            (0..3).map(make_numbered_event).collect(),
5755            vec![],
5756        );
5757
5758        let edit_history_section = format_edit_history_within_budget(
5759            &input.events,
5760            "<|file_sep|>",
5761            "edit history",
5762            usize::MAX,
5763            5,
5764        );
5765
5766        assert_eq!(
5767            &edit_history_section,
5768            indoc!(
5769                "
5770                <|file_sep|>edit history
5771                --- a/event-0.rs
5772                +++ b/event-0.rs
5773                -old-0
5774                +new-0
5775                --- a/event-1.rs
5776                +++ b/event-1.rs
5777                -old-1
5778                +new-1
5779                --- a/event-2.rs
5780                +++ b/event-2.rs
5781                -old-2
5782                +new-2
5783            "
5784            )
5785        );
5786
5787        let edit_history_section = format_edit_history_within_budget(
5788            &input.events,
5789            "<|file_sep|>",
5790            "edit history",
5791            usize::MAX,
5792            2,
5793        );
5794
5795        assert_eq!(
5796            &edit_history_section,
5797            indoc!(
5798                "
5799                <|file_sep|>edit history
5800                --- a/event-1.rs
5801                +++ b/event-1.rs
5802                -old-1
5803                +new-1
5804                --- a/event-2.rs
5805                +++ b/event-2.rs
5806                -old-2
5807                +new-2
5808            "
5809            )
5810        );
5811
5812        let edit_history_section = format_edit_history_within_budget(
5813            &input.events,
5814            "<|file_sep|>",
5815            "edit history",
5816            usize::MAX,
5817            0,
5818        );
5819
5820        assert_eq!(&edit_history_section, "");
5821    }
5822
5823    #[test]
5824    fn test_clean_zeta1_model_output_basic() {
5825        let output = indoc! {"
5826            <|editable_region_start|>
5827            fn main() {
5828                println!(\"hello\");
5829            }
5830            <|editable_region_end|>
5831        "};
5832
5833        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5834        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
5835    }
5836
5837    #[test]
5838    fn test_clean_zeta1_model_output_with_cursor() {
5839        let output = indoc! {"
5840            <|editable_region_start|>
5841            fn main() {
5842                <|user_cursor_is_here|>println!(\"hello\");
5843            }
5844            <|editable_region_end|>
5845        "};
5846
5847        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5848        assert_eq!(
5849            cleaned,
5850            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
5851        );
5852    }
5853
5854    #[test]
5855    fn test_clean_zeta1_model_output_no_markers() {
5856        let output = "fn main() {}\n";
5857        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5858        assert_eq!(cleaned, "fn main() {}\n");
5859    }
5860
5861    #[test]
5862    fn test_clean_zeta1_model_output_empty_region() {
5863        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
5864        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
5865        assert_eq!(cleaned, "");
5866    }
5867
5868    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
5869        let mut result = excerpt.to_string();
5870        result.replace_range(
5871            parsed_output.range_in_excerpt.clone(),
5872            &parsed_output.new_editable_region,
5873        );
5874        result
5875    }
5876
5877    #[test]
5878    fn test_parse_zeta2_model_output() {
5879        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5880        let context_start = excerpt.find("ctx start").unwrap();
5881        let context_end = excerpt.find("after ctx").unwrap();
5882        let editable_start = excerpt.find("editable old").unwrap();
5883        let editable_end = editable_start + "editable old\n".len();
5884        let input = make_input_with_context_range(
5885            excerpt,
5886            editable_start..editable_end,
5887            context_start..context_end,
5888            editable_start,
5889        );
5890
5891        let output = parse_zeta2_model_output(
5892            "editable new\n>>>>>>> UPDATED\n",
5893            ZetaFormat::V0131GitMergeMarkersPrefix,
5894            &input,
5895        )
5896        .unwrap();
5897
5898        assert_eq!(
5899            apply_edit(excerpt, &output),
5900            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
5901        );
5902    }
5903
5904    #[test]
5905    fn test_parse_zeta2_model_output_identity() {
5906        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
5907        let editable_start = excerpt.find("bbb").unwrap();
5908        let editable_end = excerpt.find("ddd").unwrap();
5909        let input = make_input_with_context_range(
5910            excerpt,
5911            editable_start..editable_end,
5912            0..excerpt.len(),
5913            editable_start,
5914        );
5915
5916        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5917        let output =
5918            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5919
5920        assert_eq!(apply_edit(excerpt, &output), excerpt);
5921    }
5922
5923    #[test]
5924    fn test_parse_zeta2_model_output_strips_end_marker() {
5925        let excerpt = "hello\nworld\n";
5926        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5927
5928        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5929        let output1 =
5930            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5931        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5932
5933        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5934        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5935    }
5936
5937    #[test]
5938    fn test_parsed_output_to_patch_round_trips_through_udiff_application() {
5939        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
5940        let context_start = excerpt.find("ctx start").unwrap();
5941        let context_end = excerpt.find("after ctx").unwrap();
5942        let editable_start = excerpt.find("editable old").unwrap();
5943        let editable_end = editable_start + "editable old\n".len();
5944        let input = make_input_with_context_range(
5945            excerpt,
5946            editable_start..editable_end,
5947            context_start..context_end,
5948            editable_start,
5949        );
5950
5951        let parsed = parse_zeta2_model_output(
5952            "editable new\n>>>>>>> UPDATED\n",
5953            ZetaFormat::V0131GitMergeMarkersPrefix,
5954            &input,
5955        )
5956        .unwrap();
5957        let expected = apply_edit(excerpt, &parsed);
5958        let patch = parsed_output_to_patch(&input, parsed).unwrap();
5959        let patched = udiff::apply_diff_to_string(&patch, excerpt).unwrap();
5960
5961        assert_eq!(patched, expected);
5962    }
5963
5964    #[test]
5965    fn test_special_tokens_not_triggered_by_comment_separator() {
5966        // Regression test for https://github.com/zed-industries/zed/issues/52489
5967        let excerpt = "fn main() {\n    // =======\n    println!(\"hello\");\n}\n";
5968        let input = make_input(excerpt, 0..excerpt.len(), 0, vec![], vec![]);
5969        assert!(
5970            !prompt_input_contains_special_tokens(&input, ZetaFormat::V0131GitMergeMarkersPrefix),
5971            "comment containing ======= should not trigger special token detection"
5972        );
5973    }
5974}