zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  29pub struct ZetaPromptInput {
  30    pub cursor_path: Arc<Path>,
  31    pub cursor_excerpt: Arc<str>,
  32    pub cursor_offset_in_excerpt: usize,
  33    #[serde(default, skip_serializing_if = "Option::is_none")]
  34    pub excerpt_start_row: Option<u32>,
  35    pub events: Vec<Arc<Event>>,
  36    #[serde(default)]
  37    pub related_files: Option<Vec<RelatedFile>>,
  38    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  39    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  40    /// These ranges let the server select model-appropriate subsets.
  41    pub excerpt_ranges: ExcerptRanges,
  42    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  43    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  44    /// When present, the server uses these to compute editable/context ranges
  45    /// instead of `excerpt_ranges`.
  46    #[serde(default, skip_serializing_if = "Option::is_none")]
  47    pub syntax_ranges: Option<Vec<Range<usize>>>,
  48    /// The name of the edit prediction model experiment to use.
  49    #[serde(default, skip_serializing_if = "Option::is_none")]
  50    pub experiment: Option<String>,
  51    #[serde(default)]
  52    pub in_open_source_repo: bool,
  53    #[serde(default)]
  54    pub can_collect_data: bool,
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub repo_url: Option<String>,
  57}
  58
  59#[derive(
  60    Default,
  61    Clone,
  62    Copy,
  63    Debug,
  64    PartialEq,
  65    Eq,
  66    Hash,
  67    EnumIter,
  68    IntoStaticStr,
  69    Serialize,
  70    Deserialize,
  71)]
  72#[allow(non_camel_case_types)]
  73pub enum ZetaFormat {
  74    V0112MiddleAtEnd,
  75    V0113Ordered,
  76    V0114180EditableRegion,
  77    V0120GitMergeMarkers,
  78    #[default]
  79    V0131GitMergeMarkersPrefix,
  80    V0211Prefill,
  81    V0211SeedCoder,
  82    v0226Hashline,
  83    V0304VariableEdit,
  84    V0304SeedNoEdits,
  85    V0306SeedMultiRegions,
  86}
  87
  88impl std::fmt::Display for ZetaFormat {
  89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  90        write!(f, "{}", <&'static str>::from(self))
  91    }
  92}
  93
  94impl ZetaFormat {
  95    pub fn parse(format_name: &str) -> Result<Self> {
  96        let mut results = ZetaFormat::iter().filter(|version| {
  97            <&'static str>::from(version)
  98                .to_lowercase()
  99                .contains(&format_name.to_lowercase())
 100        });
 101        let Some(result) = results.next() else {
 102            anyhow::bail!(
 103                "`{format_name}` did not match any of:\n{}",
 104                Self::options_as_string()
 105            );
 106        };
 107        if results.next().is_some() {
 108            anyhow::bail!(
 109                "`{format_name}` matched more than one of:\n{}",
 110                Self::options_as_string()
 111            );
 112        }
 113        Ok(result)
 114    }
 115
 116    pub fn options_as_string() -> String {
 117        ZetaFormat::iter()
 118            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 119            .collect::<Vec<_>>()
 120            .concat()
 121    }
 122}
 123
 124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 125#[serde(tag = "event")]
 126pub enum Event {
 127    BufferChange {
 128        path: Arc<Path>,
 129        old_path: Arc<Path>,
 130        diff: String,
 131        predicted: bool,
 132        in_open_source_repo: bool,
 133    },
 134}
 135
 136impl Event {
 137    pub fn in_open_source_repo(&self) -> bool {
 138        match self {
 139            Event::BufferChange {
 140                in_open_source_repo,
 141                ..
 142            } => *in_open_source_repo,
 143        }
 144    }
 145}
 146
 147pub fn write_event(prompt: &mut String, event: &Event) {
 148    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 149        for component in path.components() {
 150            prompt.push('/');
 151            write!(prompt, "{}", component.as_os_str().display()).ok();
 152        }
 153    }
 154    match event {
 155        Event::BufferChange {
 156            path,
 157            old_path,
 158            diff,
 159            predicted,
 160            in_open_source_repo: _,
 161        } => {
 162            if *predicted {
 163                prompt.push_str("// User accepted prediction:\n");
 164            }
 165            prompt.push_str("--- a");
 166            write_path_as_unix_str(prompt, old_path.as_ref());
 167            prompt.push_str("\n+++ b");
 168            write_path_as_unix_str(prompt, path.as_ref());
 169            prompt.push('\n');
 170            prompt.push_str(diff);
 171        }
 172    }
 173}
 174
 175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 176pub struct ActiveBufferDiagnostic {
 177    pub severity: Option<i32>,
 178    pub message: String,
 179    pub snippet: String,
 180    pub snippet_buffer_row_range: Range<u32>,
 181    pub diagnostic_range_in_snippet: Range<usize>,
 182}
 183
 184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 185pub struct RelatedFile {
 186    pub path: Arc<Path>,
 187    pub max_row: u32,
 188    pub excerpts: Vec<RelatedExcerpt>,
 189    #[serde(default)]
 190    pub in_open_source_repo: bool,
 191}
 192
 193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 194pub struct RelatedExcerpt {
 195    pub row_range: Range<u32>,
 196    pub text: Arc<str>,
 197    #[serde(default)]
 198    pub order: usize,
 199}
 200
 201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 202    special_tokens_for_format(format)
 203        .iter()
 204        .any(|token| input.cursor_excerpt.contains(token))
 205}
 206
 207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 208    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 209}
 210
 211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 212    match format {
 213        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 214        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 215        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 216        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 217        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 218        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 219        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 220        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 221        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 222        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 223        ZetaFormat::V0306SeedMultiRegions => {
 224            static TOKENS: &[&str] = &[
 225                seed_coder::FIM_SUFFIX,
 226                seed_coder::FIM_PREFIX,
 227                seed_coder::FIM_MIDDLE,
 228                seed_coder::FILE_MARKER,
 229                seed_coder::START_MARKER,
 230                seed_coder::SEPARATOR,
 231                seed_coder::END_MARKER,
 232                CURSOR_MARKER,
 233                multi_region::MARKER_TAG_PREFIX,
 234            ];
 235            TOKENS
 236        }
 237    }
 238}
 239
 240/// Returns the (editable_token_limit, context_token_limit) for a given format.
 241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 242    match format {
 243        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 244        ZetaFormat::V0114180EditableRegion => (180, 350),
 245        ZetaFormat::V0120GitMergeMarkers
 246        | ZetaFormat::V0131GitMergeMarkersPrefix
 247        | ZetaFormat::V0211Prefill
 248        | ZetaFormat::V0211SeedCoder
 249        | ZetaFormat::v0226Hashline
 250        | ZetaFormat::V0306SeedMultiRegions
 251        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 252        ZetaFormat::V0304VariableEdit => (1024, 0),
 253    }
 254}
 255
 256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 257    match format {
 258        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 259        ZetaFormat::V0112MiddleAtEnd
 260        | ZetaFormat::V0113Ordered
 261        | ZetaFormat::V0114180EditableRegion
 262        | ZetaFormat::V0120GitMergeMarkers
 263        | ZetaFormat::V0131GitMergeMarkersPrefix
 264        | ZetaFormat::V0211Prefill
 265        | ZetaFormat::V0211SeedCoder
 266        | ZetaFormat::V0304VariableEdit
 267        | ZetaFormat::V0306SeedMultiRegions
 268        | ZetaFormat::V0304SeedNoEdits => &[],
 269    }
 270}
 271
 272pub fn excerpt_ranges_for_format(
 273    format: ZetaFormat,
 274    ranges: &ExcerptRanges,
 275) -> (Range<usize>, Range<usize>) {
 276    match format {
 277        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 278            ranges.editable_150.clone(),
 279            ranges.editable_150_context_350.clone(),
 280        ),
 281        ZetaFormat::V0114180EditableRegion => (
 282            ranges.editable_180.clone(),
 283            ranges.editable_180_context_350.clone(),
 284        ),
 285        ZetaFormat::V0120GitMergeMarkers
 286        | ZetaFormat::V0131GitMergeMarkersPrefix
 287        | ZetaFormat::V0211Prefill
 288        | ZetaFormat::V0211SeedCoder
 289        | ZetaFormat::v0226Hashline
 290        | ZetaFormat::V0304SeedNoEdits
 291        | ZetaFormat::V0306SeedMultiRegions => (
 292            ranges.editable_350.clone(),
 293            ranges.editable_350_context_150.clone(),
 294        ),
 295        ZetaFormat::V0304VariableEdit => {
 296            let context = ranges
 297                .editable_350_context_1024
 298                .clone()
 299                .or(ranges.editable_350_context_512.clone())
 300                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 301            (context.clone(), context)
 302        }
 303    }
 304}
 305
 306pub fn write_cursor_excerpt_section_for_format(
 307    format: ZetaFormat,
 308    prompt: &mut String,
 309    path: &Path,
 310    context: &str,
 311    editable_range: &Range<usize>,
 312    cursor_offset: usize,
 313) {
 314    match format {
 315        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 316            prompt,
 317            path,
 318            context,
 319            editable_range,
 320            cursor_offset,
 321        ),
 322        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 323            v0113_ordered::write_cursor_excerpt_section(
 324                prompt,
 325                path,
 326                context,
 327                editable_range,
 328                cursor_offset,
 329            )
 330        }
 331        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 332            prompt,
 333            path,
 334            context,
 335            editable_range,
 336            cursor_offset,
 337        ),
 338        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 339            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 340                prompt,
 341                path,
 342                context,
 343                editable_range,
 344                cursor_offset,
 345            )
 346        }
 347        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 348            seed_coder::write_cursor_excerpt_section(
 349                prompt,
 350                path,
 351                context,
 352                editable_range,
 353                cursor_offset,
 354            )
 355        }
 356        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 357            prompt,
 358            path,
 359            context,
 360            editable_range,
 361            cursor_offset,
 362        ),
 363        ZetaFormat::V0304VariableEdit => {
 364            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 365        }
 366        ZetaFormat::V0306SeedMultiRegions => {
 367            prompt.push_str(&build_v0306_cursor_prefix(
 368                path,
 369                context,
 370                editable_range,
 371                cursor_offset,
 372            ));
 373        }
 374    }
 375}
 376
 377fn build_v0306_cursor_prefix(
 378    path: &Path,
 379    context: &str,
 380    editable_range: &Range<usize>,
 381    cursor_offset: usize,
 382) -> String {
 383    let mut section = String::new();
 384    let path_str = path.to_string_lossy();
 385    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 386
 387    section.push_str(&context[..editable_range.start]);
 388    section.push_str(seed_coder::START_MARKER);
 389
 390    let editable_text = &context[editable_range.clone()];
 391    let cursor_in_editable = cursor_offset - editable_range.start;
 392    multi_region::write_editable_with_markers(
 393        &mut section,
 394        editable_text,
 395        cursor_in_editable,
 396        CURSOR_MARKER,
 397    );
 398
 399    if !section.ends_with('\n') {
 400        section.push('\n');
 401    }
 402    section.push_str(seed_coder::SEPARATOR);
 403    section
 404}
 405
 406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 407    let start_row = text[0..range.start].matches('\n').count() as u32;
 408    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 409    if !text[..range.end].ends_with('\n') {
 410        end_row += 1;
 411    }
 412    return start_row..end_row;
 413}
 414
 415pub fn format_prompt_with_budget_for_format(
 416    input: &ZetaPromptInput,
 417    format: ZetaFormat,
 418    max_tokens: usize,
 419) -> String {
 420    let (context, editable_range, context_range, cursor_offset) =
 421        resolve_cursor_region(input, format);
 422    let path = &*input.cursor_path;
 423
 424    let empty_files = Vec::new();
 425    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 426    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 427        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 428        let row_range = relative_row_range.start + cursor_excerpt_start_row
 429            ..relative_row_range.end + cursor_excerpt_start_row;
 430        &filter_redundant_excerpts(
 431            input_related_files.to_vec(),
 432            input.cursor_path.as_ref(),
 433            row_range,
 434        )
 435    } else {
 436        input_related_files
 437    };
 438
 439    match format {
 440        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 441            seed_coder::format_prompt_with_budget(
 442                path,
 443                context,
 444                &editable_range,
 445                cursor_offset,
 446                &input.events,
 447                related_files,
 448                max_tokens,
 449            )
 450        }
 451        ZetaFormat::V0306SeedMultiRegions => {
 452            let cursor_prefix =
 453                build_v0306_cursor_prefix(path, context, &editable_range, cursor_offset);
 454            seed_coder::assemble_fim_prompt(
 455                context,
 456                &editable_range,
 457                &cursor_prefix,
 458                &input.events,
 459                related_files,
 460                max_tokens,
 461            )
 462        }
 463        _ => {
 464            let mut cursor_section = String::new();
 465            write_cursor_excerpt_section_for_format(
 466                format,
 467                &mut cursor_section,
 468                path,
 469                context,
 470                &editable_range,
 471                cursor_offset,
 472            );
 473
 474            let cursor_tokens = estimate_tokens(cursor_section.len());
 475            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 476
 477            let edit_history_section = format_edit_history_within_budget(
 478                &input.events,
 479                "<|file_sep|>",
 480                "edit history",
 481                budget_after_cursor,
 482                max_edit_event_count_for_format(&format),
 483            );
 484            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 485            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 486
 487            let related_files_section = format_related_files_within_budget(
 488                &related_files,
 489                "<|file_sep|>",
 490                "",
 491                budget_after_edit_history,
 492            );
 493
 494            let mut prompt = String::new();
 495            prompt.push_str(&related_files_section);
 496            prompt.push_str(&edit_history_section);
 497            prompt.push_str(&cursor_section);
 498            prompt
 499        }
 500    }
 501}
 502
 503pub fn filter_redundant_excerpts(
 504    mut related_files: Vec<RelatedFile>,
 505    cursor_path: &Path,
 506    cursor_row_range: Range<u32>,
 507) -> Vec<RelatedFile> {
 508    for file in &mut related_files {
 509        if file.path.as_ref() == cursor_path {
 510            file.excerpts.retain(|excerpt| {
 511                excerpt.row_range.start < cursor_row_range.start
 512                    || excerpt.row_range.end > cursor_row_range.end
 513            });
 514        }
 515    }
 516    related_files.retain(|file| !file.excerpts.is_empty());
 517    related_files
 518}
 519
 520pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 521    match format {
 522        ZetaFormat::V0112MiddleAtEnd
 523        | ZetaFormat::V0113Ordered
 524        | ZetaFormat::V0114180EditableRegion
 525        | ZetaFormat::V0120GitMergeMarkers
 526        | ZetaFormat::V0131GitMergeMarkersPrefix
 527        | ZetaFormat::V0211Prefill
 528        | ZetaFormat::V0211SeedCoder
 529        | ZetaFormat::v0226Hashline
 530        | ZetaFormat::V0304SeedNoEdits
 531        | ZetaFormat::V0304VariableEdit
 532        | ZetaFormat::V0306SeedMultiRegions => 6,
 533    }
 534}
 535
 536pub fn get_prefill_for_format(
 537    format: ZetaFormat,
 538    context: &str,
 539    editable_range: &Range<usize>,
 540) -> String {
 541    match format {
 542        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 543        ZetaFormat::V0112MiddleAtEnd
 544        | ZetaFormat::V0113Ordered
 545        | ZetaFormat::V0114180EditableRegion
 546        | ZetaFormat::V0120GitMergeMarkers
 547        | ZetaFormat::V0131GitMergeMarkersPrefix
 548        | ZetaFormat::V0211SeedCoder
 549        | ZetaFormat::v0226Hashline
 550        | ZetaFormat::V0304VariableEdit => String::new(),
 551        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
 552    }
 553}
 554
 555pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 556    match format {
 557        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 558        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 559        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 560        ZetaFormat::V0211SeedCoder
 561        | ZetaFormat::V0304SeedNoEdits
 562        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 563        ZetaFormat::V0112MiddleAtEnd
 564        | ZetaFormat::V0113Ordered
 565        | ZetaFormat::V0114180EditableRegion
 566        | ZetaFormat::v0226Hashline
 567        | ZetaFormat::V0304VariableEdit => None,
 568    }
 569}
 570
 571pub fn encode_patch_as_output_for_format(
 572    format: ZetaFormat,
 573    old_editable_region: &str,
 574    patch: &str,
 575    cursor_offset: Option<usize>,
 576) -> Result<Option<String>> {
 577    match format {
 578        ZetaFormat::v0226Hashline => {
 579            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 580        }
 581        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 582            old_editable_region,
 583            patch,
 584            cursor_offset,
 585        )
 586        .map(Some),
 587        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 588            Ok(seed_coder::no_edits(patch))
 589        }
 590        _ => Ok(None),
 591    }
 592}
 593
 594pub struct ParsedOutput {
 595    /// Text that should replace the editable region
 596    pub new_editable_region: String,
 597    /// The byte range within `cursor_excerpt` that this replacement applies to
 598    pub range_in_excerpt: Range<usize>,
 599}
 600
 601/// Parse model output for the given zeta format
 602pub fn parse_zeta2_model_output(
 603    output: &str,
 604    format: ZetaFormat,
 605    prompt_inputs: &ZetaPromptInput,
 606) -> Result<ParsedOutput> {
 607    let output = match output_end_marker_for_format(format) {
 608        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 609        None => output,
 610    };
 611
 612    let (context, editable_range_in_context, context_range, _) =
 613        resolve_cursor_region(prompt_inputs, format);
 614    let context_start = context_range.start;
 615    let old_editable_region = &context[editable_range_in_context.clone()];
 616
 617    let (range_in_context, output) = match format {
 618        ZetaFormat::v0226Hashline => (
 619            editable_range_in_context,
 620            if hashline::output_has_edit_commands(output) {
 621                hashline::apply_edit_commands(old_editable_region, output)
 622            } else {
 623                output.to_string()
 624            },
 625        ),
 626        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 627        ZetaFormat::V0304SeedNoEdits => (
 628            editable_range_in_context,
 629            if output.starts_with(seed_coder::NO_EDITS) {
 630                old_editable_region.to_string()
 631            } else {
 632                output.to_string()
 633            },
 634        ),
 635        ZetaFormat::V0306SeedMultiRegions => (
 636            editable_range_in_context,
 637            if output.starts_with(seed_coder::NO_EDITS) {
 638                old_editable_region.to_string()
 639            } else {
 640                multi_region::apply_marker_span(old_editable_region, output)?
 641            },
 642        ),
 643        _ => (editable_range_in_context, output.to_string()),
 644    };
 645
 646    let range_in_excerpt =
 647        range_in_context.start + context_start..range_in_context.end + context_start;
 648
 649    Ok(ParsedOutput {
 650        new_editable_region: output,
 651        range_in_excerpt,
 652    })
 653}
 654
 655pub fn excerpt_range_for_format(
 656    format: ZetaFormat,
 657    ranges: &ExcerptRanges,
 658) -> (Range<usize>, Range<usize>) {
 659    excerpt_ranges_for_format(format, ranges)
 660}
 661
 662pub fn resolve_cursor_region(
 663    input: &ZetaPromptInput,
 664    format: ZetaFormat,
 665) -> (&str, Range<usize>, Range<usize>, usize) {
 666    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 667        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 668        compute_editable_and_context_ranges(
 669            &input.cursor_excerpt,
 670            input.cursor_offset_in_excerpt,
 671            syntax_ranges,
 672            editable_tokens,
 673            context_tokens,
 674        )
 675    } else {
 676        excerpt_range_for_format(format, &input.excerpt_ranges)
 677    };
 678    let context_start = context_range.start;
 679    let context_text = &input.cursor_excerpt[context_range.clone()];
 680    let adjusted_editable =
 681        (editable_range.start - context_start)..(editable_range.end - context_start);
 682    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 683
 684    (
 685        context_text,
 686        adjusted_editable,
 687        context_range,
 688        adjusted_cursor,
 689    )
 690}
 691
 692pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 693    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 694    get_prefill_for_format(format, context, &editable_range)
 695}
 696
 697fn format_edit_history_within_budget(
 698    events: &[Arc<Event>],
 699    file_marker: &str,
 700    edit_history_name: &str,
 701    max_tokens: usize,
 702    max_edit_event_count: usize,
 703) -> String {
 704    let header = format!("{}{}\n", file_marker, edit_history_name);
 705    let header_tokens = estimate_tokens(header.len());
 706    if header_tokens >= max_tokens {
 707        return String::new();
 708    }
 709
 710    let mut event_strings: Vec<String> = Vec::new();
 711    let mut total_tokens = header_tokens;
 712
 713    for event in events.iter().rev().take(max_edit_event_count) {
 714        let mut event_str = String::new();
 715        write_event(&mut event_str, event);
 716        let event_tokens = estimate_tokens(event_str.len());
 717
 718        if total_tokens + event_tokens > max_tokens {
 719            break;
 720        }
 721        total_tokens += event_tokens;
 722        event_strings.push(event_str);
 723    }
 724
 725    if event_strings.is_empty() {
 726        return String::new();
 727    }
 728
 729    let mut result = header;
 730    for event_str in event_strings.iter().rev() {
 731        result.push_str(event_str);
 732    }
 733    result
 734}
 735
 736fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 737    let needs_newline = !excerpt.text.ends_with('\n');
 738    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 739    let len = excerpt.text.len()
 740        + if needs_newline { "\n".len() } else { 0 }
 741        + if needs_ellipsis { "...\n".len() } else { 0 };
 742    estimate_tokens(len)
 743}
 744
 745pub fn format_related_files_within_budget(
 746    related_files: &[RelatedFile],
 747    file_prefix: &str,
 748    file_suffix: &str,
 749    max_tokens: usize,
 750) -> String {
 751    struct ExcerptCandidate {
 752        file_ix: usize,
 753        excerpt_ix: usize,
 754        order: usize,
 755    }
 756
 757    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 758        .iter()
 759        .enumerate()
 760        .flat_map(|(file_ix, file)| {
 761            file.excerpts
 762                .iter()
 763                .enumerate()
 764                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 765                    file_ix,
 766                    excerpt_ix,
 767                    order: e.order,
 768                })
 769        })
 770        .collect();
 771
 772    // Pre-compute file header strings and their token costs.
 773    let file_headers: Vec<String> = related_files
 774        .iter()
 775        .map(|file| {
 776            let path_str = file.path.to_string_lossy();
 777            format!("{}{}\n", file_prefix, path_str)
 778        })
 779        .collect();
 780
 781    // Sort the excerpts by their order and determine how many fit within the budget.
 782    let mut total_tokens = 0;
 783    let mut included_excerpt_count = 0_usize;
 784    let mut included_file_indices = vec![false; related_files.len()];
 785    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 786    for candidate in &excerpt_candidates {
 787        let file = &related_files[candidate.file_ix];
 788        let excerpt = &file.excerpts[candidate.excerpt_ix];
 789        let file_already_included = included_file_indices[candidate.file_ix];
 790        let header_cost = if file_already_included {
 791            0
 792        } else {
 793            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 794        };
 795        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 796        if total_tokens + header_cost + excerpt_cost > max_tokens {
 797            break;
 798        }
 799        total_tokens += header_cost + excerpt_cost;
 800        if !file_already_included {
 801            included_file_indices[candidate.file_ix] = true;
 802        }
 803        included_excerpt_count += 1;
 804    }
 805
 806    excerpt_candidates.truncate(included_excerpt_count);
 807    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 808
 809    // Render all of the files that fit within the token budget, in the original order.
 810    let mut result = String::new();
 811    let mut last_file_ix = None;
 812    for candidate in &excerpt_candidates {
 813        if last_file_ix != Some(candidate.file_ix) {
 814            if last_file_ix.is_some() {
 815                result.push_str(file_suffix);
 816            }
 817            result.push_str(&file_headers[candidate.file_ix]);
 818            last_file_ix = Some(candidate.file_ix);
 819        }
 820        let file = &related_files[candidate.file_ix];
 821        let excerpt = &file.excerpts[candidate.excerpt_ix];
 822        result.push_str(&excerpt.text);
 823        if !result.ends_with('\n') {
 824            result.push('\n');
 825        }
 826        if excerpt.row_range.end < file.max_row {
 827            result.push_str("...\n");
 828        }
 829    }
 830
 831    result
 832}
 833
 834pub fn write_related_files(
 835    prompt: &mut String,
 836    related_files: &[RelatedFile],
 837) -> Vec<Range<usize>> {
 838    let mut ranges = Vec::new();
 839    for file in related_files {
 840        let start = prompt.len();
 841        let path_str = file.path.to_string_lossy();
 842        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 843        for excerpt in &file.excerpts {
 844            prompt.push_str(&excerpt.text);
 845            if !prompt.ends_with('\n') {
 846                prompt.push('\n');
 847            }
 848            if excerpt.row_range.end < file.max_row {
 849                prompt.push_str("...\n");
 850            }
 851        }
 852        let end = prompt.len();
 853        ranges.push(start..end);
 854    }
 855    ranges
 856}
 857
 858mod v0112_middle_at_end {
 859    use super::*;
 860
 861    pub fn special_tokens() -> &'static [&'static str] {
 862        &[
 863            "<|fim_prefix|>",
 864            "<|fim_suffix|>",
 865            "<|fim_middle|>",
 866            "<|file_sep|>",
 867            CURSOR_MARKER,
 868        ]
 869    }
 870
 871    pub fn write_cursor_excerpt_section(
 872        prompt: &mut String,
 873        path: &Path,
 874        context: &str,
 875        editable_range: &Range<usize>,
 876        cursor_offset: usize,
 877    ) {
 878        let path_str = path.to_string_lossy();
 879        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 880
 881        prompt.push_str("<|fim_prefix|>\n");
 882        prompt.push_str(&context[..editable_range.start]);
 883
 884        prompt.push_str("<|fim_suffix|>\n");
 885        prompt.push_str(&context[editable_range.end..]);
 886        if !prompt.ends_with('\n') {
 887            prompt.push('\n');
 888        }
 889
 890        prompt.push_str("<|fim_middle|>current\n");
 891        prompt.push_str(&context[editable_range.start..cursor_offset]);
 892        prompt.push_str(CURSOR_MARKER);
 893        prompt.push_str(&context[cursor_offset..editable_range.end]);
 894        if !prompt.ends_with('\n') {
 895            prompt.push('\n');
 896        }
 897
 898        prompt.push_str("<|fim_middle|>updated\n");
 899    }
 900}
 901
 902mod v0113_ordered {
 903    use super::*;
 904
 905    pub fn special_tokens() -> &'static [&'static str] {
 906        &[
 907            "<|fim_prefix|>",
 908            "<|fim_suffix|>",
 909            "<|fim_middle|>",
 910            "<|file_sep|>",
 911            CURSOR_MARKER,
 912        ]
 913    }
 914
 915    pub fn write_cursor_excerpt_section(
 916        prompt: &mut String,
 917        path: &Path,
 918        context: &str,
 919        editable_range: &Range<usize>,
 920        cursor_offset: usize,
 921    ) {
 922        let path_str = path.to_string_lossy();
 923        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 924
 925        prompt.push_str("<|fim_prefix|>\n");
 926        prompt.push_str(&context[..editable_range.start]);
 927        if !prompt.ends_with('\n') {
 928            prompt.push('\n');
 929        }
 930
 931        prompt.push_str("<|fim_middle|>current\n");
 932        prompt.push_str(&context[editable_range.start..cursor_offset]);
 933        prompt.push_str(CURSOR_MARKER);
 934        prompt.push_str(&context[cursor_offset..editable_range.end]);
 935        if !prompt.ends_with('\n') {
 936            prompt.push('\n');
 937        }
 938
 939        prompt.push_str("<|fim_suffix|>\n");
 940        prompt.push_str(&context[editable_range.end..]);
 941        if !prompt.ends_with('\n') {
 942            prompt.push('\n');
 943        }
 944
 945        prompt.push_str("<|fim_middle|>updated\n");
 946    }
 947}
 948
 949mod v0114180_editable_region {
 950    use super::*;
 951
 952    pub fn special_tokens() -> &'static [&'static str] {
 953        v0113_ordered::special_tokens()
 954    }
 955}
 956
 957pub mod v0120_git_merge_markers {
 958    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 959    //!
 960    //! Example prompt:
 961    //!
 962    //! <|file_sep|>path/to/target_file.py
 963    //! <|fim_prefix|>
 964    //! code before editable region
 965    //! <|fim_suffix|>
 966    //! code after editable region
 967    //! <|fim_middle|>
 968    //! <<<<<<< CURRENT
 969    //! code that
 970    //! needs to<|user_cursor|>
 971    //! be rewritten
 972    //! =======
 973    //!
 974    //! Expected output (should be generated by the model):
 975    //!
 976    //! updated
 977    //! code with
 978    //! changes applied
 979    //! >>>>>>> UPDATED
 980
 981    use super::*;
 982
 983    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 984    pub const SEPARATOR: &str = "=======\n";
 985    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 986
 987    pub fn special_tokens() -> &'static [&'static str] {
 988        &[
 989            "<|fim_prefix|>",
 990            "<|fim_suffix|>",
 991            "<|fim_middle|>",
 992            "<|file_sep|>",
 993            START_MARKER,
 994            SEPARATOR,
 995            END_MARKER,
 996            CURSOR_MARKER,
 997        ]
 998    }
 999
1000    pub fn write_cursor_excerpt_section(
1001        prompt: &mut String,
1002        path: &Path,
1003        context: &str,
1004        editable_range: &Range<usize>,
1005        cursor_offset: usize,
1006    ) {
1007        let path_str = path.to_string_lossy();
1008        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1009
1010        prompt.push_str("<|fim_prefix|>");
1011        prompt.push_str(&context[..editable_range.start]);
1012
1013        prompt.push_str("<|fim_suffix|>");
1014        prompt.push_str(&context[editable_range.end..]);
1015        if !prompt.ends_with('\n') {
1016            prompt.push('\n');
1017        }
1018
1019        prompt.push_str("<|fim_middle|>");
1020        prompt.push_str(START_MARKER);
1021        prompt.push_str(&context[editable_range.start..cursor_offset]);
1022        prompt.push_str(CURSOR_MARKER);
1023        prompt.push_str(&context[cursor_offset..editable_range.end]);
1024        if !prompt.ends_with('\n') {
1025            prompt.push('\n');
1026        }
1027        prompt.push_str(SEPARATOR);
1028    }
1029}
1030
1031pub mod v0131_git_merge_markers_prefix {
1032    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1033    //!
1034    //! Example prompt:
1035    //!
1036    //! <|file_sep|>path/to/target_file.py
1037    //! <|fim_prefix|>
1038    //! code before editable region
1039    //! <<<<<<< CURRENT
1040    //! code that
1041    //! needs to<|user_cursor|>
1042    //! be rewritten
1043    //! =======
1044    //! <|fim_suffix|>
1045    //! code after editable region
1046    //! <|fim_middle|>
1047    //!
1048    //! Expected output (should be generated by the model):
1049    //!
1050    //! updated
1051    //! code with
1052    //! changes applied
1053    //! >>>>>>> UPDATED
1054
1055    use super::*;
1056
1057    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1058    pub const SEPARATOR: &str = "=======\n";
1059    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1060
1061    pub fn special_tokens() -> &'static [&'static str] {
1062        &[
1063            "<|fim_prefix|>",
1064            "<|fim_suffix|>",
1065            "<|fim_middle|>",
1066            "<|file_sep|>",
1067            START_MARKER,
1068            SEPARATOR,
1069            END_MARKER,
1070            CURSOR_MARKER,
1071        ]
1072    }
1073
1074    pub fn write_cursor_excerpt_section(
1075        prompt: &mut String,
1076        path: &Path,
1077        context: &str,
1078        editable_range: &Range<usize>,
1079        cursor_offset: usize,
1080    ) {
1081        let path_str = path.to_string_lossy();
1082        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1083
1084        prompt.push_str("<|fim_prefix|>");
1085        prompt.push_str(&context[..editable_range.start]);
1086        prompt.push_str(START_MARKER);
1087        prompt.push_str(&context[editable_range.start..cursor_offset]);
1088        prompt.push_str(CURSOR_MARKER);
1089        prompt.push_str(&context[cursor_offset..editable_range.end]);
1090        if !prompt.ends_with('\n') {
1091            prompt.push('\n');
1092        }
1093        prompt.push_str(SEPARATOR);
1094
1095        prompt.push_str("<|fim_suffix|>");
1096        prompt.push_str(&context[editable_range.end..]);
1097        if !prompt.ends_with('\n') {
1098            prompt.push('\n');
1099        }
1100
1101        prompt.push_str("<|fim_middle|>");
1102    }
1103}
1104
1105pub mod v0211_prefill {
1106    use super::*;
1107
1108    pub fn special_tokens() -> &'static [&'static str] {
1109        v0131_git_merge_markers_prefix::special_tokens()
1110    }
1111
1112    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1113        let editable_region = &context[editable_range.start..editable_range.end];
1114
1115        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1116        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1117
1118        // Find a token boundary to avoid splitting tokens in the prefill.
1119        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1120        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1121        // the \n and consume any consecutive \n characters after it.
1122        let prefill = &editable_region[..prefill_len];
1123        match prefill.rfind('\n') {
1124            Some(pos) => {
1125                let mut end = pos + 1;
1126                while end < editable_region.len()
1127                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1128                {
1129                    end += 1;
1130                }
1131                editable_region[..end].to_string()
1132            }
1133            // No newline found. Fall back to splitting before the last space
1134            // (word-level boundary)
1135            None => match prefill.rfind(' ') {
1136                Some(pos) => prefill[..pos].to_string(),
1137                None => prefill.to_string(),
1138            },
1139        }
1140    }
1141}
1142
1143pub mod hashline {
1144
1145    use std::fmt::Display;
1146
1147    pub const END_MARKER: &str = "<|fim_middle|>updated";
1148    pub const START_MARKER: &str = "<|fim_middle|>current";
1149
1150    use super::*;
1151
1152    const SET_COMMAND_MARKER: &str = "<|set|>";
1153    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1154    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1155
1156    pub fn special_tokens() -> &'static [&'static str] {
1157        return &[
1158            SET_COMMAND_MARKER,
1159            "<|set_range|>",
1160            INSERT_COMMAND_MARKER,
1161            NO_EDITS_COMMAND_MARKER,
1162            CURSOR_MARKER,
1163            "<|file_sep|>",
1164            "<|fim_prefix|>",
1165            "<|fim_suffix|>",
1166            "<|fim_middle|>",
1167        ];
1168    }
1169
1170    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1171    #[derive(Debug, Clone, PartialEq, Eq)]
1172    struct LineRef {
1173        index: usize,
1174        hash: u8,
1175    }
1176
1177    impl Display for LineRef {
1178        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1179            write!(f, "{}:{:02x}", self.index, self.hash)
1180        }
1181    }
1182
1183    pub fn hash_line(line: &[u8]) -> u8 {
1184        let mut h: u8 = 0;
1185        for &byte in line {
1186            h = h.wrapping_add(byte);
1187        }
1188        return h;
1189    }
1190
1191    /// Write the hashline-encoded editable region into `out`. Each line of
1192    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1193    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1194    /// to the start of `editable_text`).
1195    pub fn write_hashline_editable_region(
1196        out: &mut String,
1197        editable_text: &str,
1198        cursor_offset_in_editable: usize,
1199    ) {
1200        let mut offset = 0;
1201        for (i, line) in editable_text.lines().enumerate() {
1202            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1203                && cursor_offset_in_editable < offset + line.len()
1204            {
1205                (
1206                    &line[..cursor_offset_in_editable - offset],
1207                    CURSOR_MARKER,
1208                    &line[cursor_offset_in_editable - offset..],
1209                )
1210            } else {
1211                (line, "", "")
1212            };
1213            write!(
1214                out,
1215                "\n{}|{head}{cursor}{tail}",
1216                LineRef {
1217                    index: i,
1218                    hash: hash_line(line.as_bytes())
1219                }
1220            )
1221            .unwrap();
1222            offset += line.len() + 1;
1223        }
1224    }
1225
1226    pub fn write_cursor_excerpt_section(
1227        prompt: &mut String,
1228        path: &Path,
1229        context: &str,
1230        editable_range: &Range<usize>,
1231        cursor_offset: usize,
1232    ) {
1233        let path_str = path.to_string_lossy();
1234        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1235
1236        prompt.push_str("<|fim_prefix|>\n");
1237        prompt.push_str(&context[..editable_range.start]);
1238        prompt.push_str(START_MARKER);
1239
1240        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1241        let editable_region = &context[editable_range.clone()];
1242        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1243
1244        if !prompt.ends_with('\n') {
1245            prompt.push('\n');
1246        }
1247
1248        prompt.push_str("<|fim_suffix|>\n");
1249        prompt.push_str(&context[editable_range.end..]);
1250        if !prompt.ends_with('\n') {
1251            prompt.push('\n');
1252        }
1253
1254        prompt.push_str(END_MARKER);
1255        prompt.push('\n');
1256    }
1257
1258    /// A single edit command parsed from the model output.
1259    #[derive(Debug)]
1260    enum EditCommand<'a> {
1261        /// Replace a range of lines (inclusive on both ends). Single-line set is
1262        /// represented by `start == end`.
1263        Set {
1264            start: LineRef,
1265            end: LineRef,
1266            content: &'a str,
1267        },
1268        /// Insert new lines after the given line, or before the first line if
1269        /// `after` is `None`.
1270        Insert {
1271            after: Option<LineRef>,
1272            content: &'a str,
1273        },
1274    }
1275
1276    /// Parse a line reference like `3:c3` into a `LineRef`.
1277    fn parse_line_ref(s: &str) -> Option<LineRef> {
1278        let (idx_str, hash_str) = s.split_once(':')?;
1279        let index = idx_str.parse::<usize>().ok()?;
1280        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1281        Some(LineRef { index, hash })
1282    }
1283
1284    /// Parse the model output into a list of `EditCommand`s.
1285    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1286        let mut commands = Vec::new();
1287        let mut offset = 0usize;
1288
1289        while offset < model_output.len() {
1290            let next_nl = model_output[offset..]
1291                .find('\n')
1292                .map(|i| offset + i)
1293                .unwrap_or(model_output.len());
1294            let line = &model_output[offset..next_nl];
1295            let line_end = if next_nl < model_output.len() {
1296                next_nl + 1
1297            } else {
1298                next_nl
1299            };
1300
1301            let trimmed = line.trim();
1302            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1303                (true, spec)
1304            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1305                (false, spec)
1306            } else {
1307                offset = line_end;
1308                continue;
1309            };
1310
1311            let mut content_end = line_end;
1312            let mut scan = line_end;
1313
1314            while scan < model_output.len() {
1315                let body_nl = model_output[scan..]
1316                    .find('\n')
1317                    .map(|i| scan + i)
1318                    .unwrap_or(model_output.len());
1319                let body_line = &model_output[scan..body_nl];
1320                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1321                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1322                {
1323                    break;
1324                }
1325                scan = if body_nl < model_output.len() {
1326                    body_nl + 1
1327                } else {
1328                    body_nl
1329                };
1330                content_end = scan;
1331            }
1332
1333            let content = &model_output[line_end..content_end];
1334
1335            if is_set {
1336                if let Some((start_str, end_str)) = specifier.split_once('-') {
1337                    if let (Some(start), Some(end)) =
1338                        (parse_line_ref(start_str), parse_line_ref(end_str))
1339                    {
1340                        commands.push(EditCommand::Set {
1341                            start,
1342                            end,
1343                            content,
1344                        });
1345                    }
1346                } else if let Some(target) = parse_line_ref(specifier) {
1347                    commands.push(EditCommand::Set {
1348                        start: target.clone(),
1349                        end: target,
1350                        content,
1351                    });
1352                }
1353            } else {
1354                let after = parse_line_ref(specifier);
1355                commands.push(EditCommand::Insert { after, content });
1356            }
1357
1358            offset = scan;
1359        }
1360
1361        commands
1362    }
1363
1364    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1365    /// (as opposed to being a plain full-replacement output).
1366    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1367    /// editable region, returning the plain text content.
1368    pub fn strip_hashline_prefixes(region: &str) -> String {
1369        let mut decoded: String = region
1370            .lines()
1371            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1372            .collect::<Vec<_>>()
1373            .join("\n");
1374        if region.ends_with('\n') {
1375            decoded.push('\n');
1376        }
1377        decoded
1378    }
1379
1380    pub fn output_has_edit_commands(model_output: &str) -> bool {
1381        model_output.contains(SET_COMMAND_MARKER)
1382            || model_output.contains(INSERT_COMMAND_MARKER)
1383            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1384    }
1385
1386    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1387    /// original editable region text.
1388    ///
1389    /// `editable_region` is the original text of the editable region (without hash
1390    /// prefixes). `model_output` is the raw model response containing edit commands.
1391    ///
1392    /// Returns the full replacement text for the editable region.
1393    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1394        if model_output
1395            .trim_start()
1396            .starts_with(NO_EDITS_COMMAND_MARKER)
1397        {
1398            return editable_region.to_string();
1399        }
1400
1401        let original_lines: Vec<&str> = editable_region.lines().collect();
1402        let old_hashes: Vec<u8> = original_lines
1403            .iter()
1404            .map(|line| hash_line(line.as_bytes()))
1405            .collect();
1406
1407        let commands = parse_edit_commands(model_output);
1408
1409        // For set operations: indexed by start line → Some((end line index, content))
1410        // For insert operations: indexed by line index → vec of content to insert after
1411        // Insert-before-first is tracked separately.
1412        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1413        let mut insert_before_first: Vec<&str> = Vec::new();
1414        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1415
1416        for command in &commands {
1417            match command {
1418                EditCommand::Set {
1419                    start,
1420                    end,
1421                    content,
1422                } => {
1423                    if start.index < old_hashes.len()
1424                        && end.index < old_hashes.len()
1425                        && start.index <= end.index
1426                        && old_hashes[start.index] == start.hash
1427                        && old_hashes[end.index] == end.hash
1428                    {
1429                        set_ops[start.index] = Some((end.index, *content));
1430                    }
1431                }
1432                EditCommand::Insert { after, content } => match after {
1433                    None => insert_before_first.push(*content),
1434                    Some(line_ref) => {
1435                        if line_ref.index < old_hashes.len()
1436                            && old_hashes[line_ref.index] == line_ref.hash
1437                        {
1438                            insert_after[line_ref.index].push(*content);
1439                        }
1440                    }
1441                },
1442            }
1443        }
1444
1445        let mut result = String::new();
1446
1447        // Emit any insertions before the first line
1448        for content in &insert_before_first {
1449            result.push_str(content);
1450            if !content.ends_with('\n') {
1451                result.push('\n');
1452            }
1453        }
1454
1455        let mut i = 0;
1456        while i < original_lines.len() {
1457            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1458                // Replace lines i..=end_index with the replacement content
1459                result.push_str(replacement);
1460                if !replacement.is_empty() && !replacement.ends_with('\n') {
1461                    result.push('\n');
1462                }
1463                // Emit any insertions after the end of this set range
1464                if *end_index < insert_after.len() {
1465                    for content in &insert_after[*end_index] {
1466                        result.push_str(content);
1467                        if !content.ends_with('\n') {
1468                            result.push('\n');
1469                        }
1470                    }
1471                }
1472                i = end_index + 1;
1473            } else {
1474                // Keep the original line
1475                result.push_str(original_lines[i]);
1476                result.push('\n');
1477                // Emit any insertions after this line
1478                for content in &insert_after[i] {
1479                    result.push_str(content);
1480                    if !content.ends_with('\n') {
1481                        result.push('\n');
1482                    }
1483                }
1484                i += 1;
1485            }
1486        }
1487
1488        // Preserve trailing newline behavior: if the original ended with a
1489        // newline the result already has one; if it didn't, trim the extra one
1490        // we added.
1491        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1492            result.pop();
1493        }
1494
1495        result
1496    }
1497
1498    /// Convert a unified diff patch into hashline edit commands.
1499    ///
1500    /// Parses the unified diff `patch` directly to determine which lines of
1501    /// `old_text` are deleted/replaced and what new lines are added, then emits
1502    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1503    /// `{index}:{hash}` identifiers.
1504    ///
1505    /// `cursor_offset` is an optional byte offset into the first hunk's new
1506    /// text (context + additions) where the cursor marker should be placed.
1507    pub fn patch_to_edit_commands(
1508        old_text: &str,
1509        patch: &str,
1510        cursor_offset: Option<usize>,
1511    ) -> Result<String> {
1512        let old_lines: Vec<&str> = old_text.lines().collect();
1513        let old_hashes: Vec<u8> = old_lines
1514            .iter()
1515            .map(|line| hash_line(line.as_bytes()))
1516            .collect();
1517
1518        let mut result = String::new();
1519        let mut first_hunk = true;
1520
1521        struct Hunk<'a> {
1522            line_range: Range<usize>,
1523            new_text_lines: Vec<&'a str>,
1524            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1525        }
1526
1527        // Parse the patch line by line. We only care about hunk headers,
1528        // context, deletions, and additions.
1529        let mut old_line_index: usize = 0;
1530        let mut current_hunk: Option<Hunk> = None;
1531        // Byte offset tracking within the hunk's new text for cursor placement.
1532        let mut new_text_byte_offset: usize = 0;
1533        // The line index of the last old line seen before/in the current hunk
1534        // (used for insert-after reference).
1535        let mut last_old_line_before_hunk: Option<usize> = None;
1536
1537        fn flush_hunk(
1538            hunk: Hunk,
1539            last_old_line: Option<usize>,
1540            result: &mut String,
1541            old_hashes: &[u8],
1542        ) {
1543            if hunk.line_range.is_empty() {
1544                // Pure insertion — reference the old line to insert after when in bounds.
1545                if let Some(after) = last_old_line
1546                    && let Some(&hash) = old_hashes.get(after)
1547                {
1548                    write!(
1549                        result,
1550                        "{INSERT_COMMAND_MARKER}{}\n",
1551                        LineRef { index: after, hash }
1552                    )
1553                    .unwrap();
1554                } else {
1555                    result.push_str(INSERT_COMMAND_MARKER);
1556                    result.push('\n');
1557                }
1558            } else {
1559                let start = hunk.line_range.start;
1560                let end_exclusive = hunk.line_range.end;
1561                let deleted_line_count = end_exclusive.saturating_sub(start);
1562
1563                if deleted_line_count == 1 {
1564                    if let Some(&hash) = old_hashes.get(start) {
1565                        write!(
1566                            result,
1567                            "{SET_COMMAND_MARKER}{}\n",
1568                            LineRef { index: start, hash }
1569                        )
1570                        .unwrap();
1571                    } else {
1572                        result.push_str(SET_COMMAND_MARKER);
1573                        result.push('\n');
1574                    }
1575                } else {
1576                    let end_inclusive = end_exclusive - 1;
1577                    match (
1578                        old_hashes.get(start).copied(),
1579                        old_hashes.get(end_inclusive).copied(),
1580                    ) {
1581                        (Some(start_hash), Some(end_hash)) => {
1582                            write!(
1583                                result,
1584                                "{SET_COMMAND_MARKER}{}-{}\n",
1585                                LineRef {
1586                                    index: start,
1587                                    hash: start_hash
1588                                },
1589                                LineRef {
1590                                    index: end_inclusive,
1591                                    hash: end_hash
1592                                }
1593                            )
1594                            .unwrap();
1595                        }
1596                        _ => {
1597                            result.push_str(SET_COMMAND_MARKER);
1598                            result.push('\n');
1599                        }
1600                    }
1601                }
1602            }
1603            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1604                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1605                    && line_offset == cursor_line_offset
1606                {
1607                    result.push_str(&line[..char_offset]);
1608                    result.push_str(CURSOR_MARKER);
1609                    result.push_str(&line[char_offset..]);
1610                    continue;
1611                }
1612
1613                result.push_str(line);
1614            }
1615        }
1616
1617        for raw_line in patch.split_inclusive('\n') {
1618            if raw_line.starts_with("@@") {
1619                // Flush any pending change hunk from a previous patch hunk.
1620                if let Some(hunk) = current_hunk.take() {
1621                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1622                }
1623
1624                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1625                // We intentionally do not trust old_start as a direct local index into `old_text`,
1626                // because some patches are produced against a larger file region and carry
1627                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1628                if first_hunk {
1629                    new_text_byte_offset = 0;
1630                    first_hunk = false;
1631                }
1632                continue;
1633            }
1634
1635            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1636                continue;
1637            }
1638            if raw_line.starts_with("\\ No newline") {
1639                continue;
1640            }
1641
1642            if raw_line.starts_with('-') {
1643                // Extend or start a change hunk with this deleted old line.
1644                match &mut current_hunk {
1645                    Some(Hunk {
1646                        line_range: range, ..
1647                    }) => range.end = old_line_index + 1,
1648                    None => {
1649                        current_hunk = Some(Hunk {
1650                            line_range: old_line_index..old_line_index + 1,
1651                            new_text_lines: Vec::new(),
1652                            cursor_line_offset_in_new_text: None,
1653                        });
1654                    }
1655                }
1656                old_line_index += 1;
1657            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1658                // Place cursor marker if cursor_offset falls within this line.
1659                let mut cursor_line_offset = None;
1660                if let Some(cursor_off) = cursor_offset
1661                    && (first_hunk
1662                        || cursor_off >= new_text_byte_offset
1663                            && cursor_off <= new_text_byte_offset + added_content.len())
1664                {
1665                    let line_offset = added_content.floor_char_boundary(
1666                        cursor_off
1667                            .saturating_sub(new_text_byte_offset)
1668                            .min(added_content.len()),
1669                    );
1670                    cursor_line_offset = Some(line_offset);
1671                }
1672
1673                new_text_byte_offset += added_content.len();
1674
1675                let hunk = current_hunk.get_or_insert(Hunk {
1676                    line_range: old_line_index..old_line_index,
1677                    new_text_lines: vec![],
1678                    cursor_line_offset_in_new_text: None,
1679                });
1680                hunk.new_text_lines.push(added_content);
1681                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1682                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1683            } else {
1684                // Context line (starts with ' ' or is empty).
1685                if let Some(hunk) = current_hunk.take() {
1686                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1687                }
1688                last_old_line_before_hunk = Some(old_line_index);
1689                old_line_index += 1;
1690                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1691                new_text_byte_offset += content.len();
1692            }
1693        }
1694
1695        // Flush final group.
1696        if let Some(hunk) = current_hunk.take() {
1697            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1698        }
1699
1700        // Trim a single trailing newline.
1701        if result.ends_with('\n') {
1702            result.pop();
1703        }
1704
1705        if result.is_empty() {
1706            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1707        }
1708
1709        Ok(result)
1710    }
1711
1712    #[cfg(test)]
1713    mod tests {
1714        use super::*;
1715        use indoc::indoc;
1716
1717        #[test]
1718        fn test_format_cursor_region() {
1719            struct Case {
1720                name: &'static str,
1721                context: &'static str,
1722                editable_range: Range<usize>,
1723                cursor_offset: usize,
1724                expected: &'static str,
1725            }
1726
1727            let cases = [
1728                Case {
1729                    name: "basic_cursor_placement",
1730                    context: "hello world\n",
1731                    editable_range: 0..12,
1732                    cursor_offset: 5,
1733                    expected: indoc! {"
1734                    <|file_sep|>test.rs
1735                    <|fim_prefix|>
1736                    <|fim_middle|>current
1737                    0:5c|hello<|user_cursor|> world
1738                    <|fim_suffix|>
1739                    <|fim_middle|>updated
1740                    "},
1741                },
1742                Case {
1743                    name: "multiline_cursor_on_second_line",
1744                    context: "aaa\nbbb\nccc\n",
1745                    editable_range: 0..12,
1746                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1747                    expected: indoc! {"
1748                    <|file_sep|>test.rs
1749                    <|fim_prefix|>
1750                    <|fim_middle|>current
1751                    0:23|aaa
1752                    1:26|b<|user_cursor|>bb
1753                    2:29|ccc
1754                    <|fim_suffix|>
1755                    <|fim_middle|>updated
1756                    "},
1757                },
1758                Case {
1759                    name: "no_trailing_newline_in_context",
1760                    context: "line1\nline2",
1761                    editable_range: 0..11,
1762                    cursor_offset: 3,
1763                    expected: indoc! {"
1764                    <|file_sep|>test.rs
1765                    <|fim_prefix|>
1766                    <|fim_middle|>current
1767                    0:d9|lin<|user_cursor|>e1
1768                    1:da|line2
1769                    <|fim_suffix|>
1770                    <|fim_middle|>updated
1771                    "},
1772                },
1773                Case {
1774                    name: "leading_newline_in_editable_region",
1775                    context: "\nabc\n",
1776                    editable_range: 0..5,
1777                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1778                    expected: indoc! {"
1779                    <|file_sep|>test.rs
1780                    <|fim_prefix|>
1781                    <|fim_middle|>current
1782                    0:00|
1783                    1:26|a<|user_cursor|>bc
1784                    <|fim_suffix|>
1785                    <|fim_middle|>updated
1786                    "},
1787                },
1788                Case {
1789                    name: "with_suffix",
1790                    context: "abc\ndef",
1791                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1792                    cursor_offset: 2,
1793                    expected: indoc! {"
1794                    <|file_sep|>test.rs
1795                    <|fim_prefix|>
1796                    <|fim_middle|>current
1797                    0:26|ab<|user_cursor|>c
1798                    <|fim_suffix|>
1799                    def
1800                    <|fim_middle|>updated
1801                    "},
1802                },
1803                Case {
1804                    name: "unicode_two_byte_chars",
1805                    context: "héllo\n",
1806                    editable_range: 0..7,
1807                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1808                    expected: indoc! {"
1809                    <|file_sep|>test.rs
1810                    <|fim_prefix|>
1811                    <|fim_middle|>current
1812                    0:1b|hé<|user_cursor|>llo
1813                    <|fim_suffix|>
1814                    <|fim_middle|>updated
1815                    "},
1816                },
1817                Case {
1818                    name: "unicode_three_byte_chars",
1819                    context: "日本語\n",
1820                    editable_range: 0..10,
1821                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1822                    expected: indoc! {"
1823                    <|file_sep|>test.rs
1824                    <|fim_prefix|>
1825                    <|fim_middle|>current
1826                    0:80|日本<|user_cursor|>語
1827                    <|fim_suffix|>
1828                    <|fim_middle|>updated
1829                    "},
1830                },
1831                Case {
1832                    name: "unicode_four_byte_chars",
1833                    context: "a🌍b\n",
1834                    editable_range: 0..7,
1835                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1836                    expected: indoc! {"
1837                    <|file_sep|>test.rs
1838                    <|fim_prefix|>
1839                    <|fim_middle|>current
1840                    0:6b|a🌍<|user_cursor|>b
1841                    <|fim_suffix|>
1842                    <|fim_middle|>updated
1843                    "},
1844                },
1845                Case {
1846                    name: "cursor_at_start_of_region_not_placed",
1847                    context: "abc\n",
1848                    editable_range: 0..4,
1849                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1850                    expected: indoc! {"
1851                    <|file_sep|>test.rs
1852                    <|fim_prefix|>
1853                    <|fim_middle|>current
1854                    0:26|abc
1855                    <|fim_suffix|>
1856                    <|fim_middle|>updated
1857                    "},
1858                },
1859                Case {
1860                    name: "cursor_at_end_of_line_not_placed",
1861                    context: "abc\ndef\n",
1862                    editable_range: 0..8,
1863                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1864                    expected: indoc! {"
1865                    <|file_sep|>test.rs
1866                    <|fim_prefix|>
1867                    <|fim_middle|>current
1868                    0:26|abc
1869                    1:2f|def
1870                    <|fim_suffix|>
1871                    <|fim_middle|>updated
1872                    "},
1873                },
1874                Case {
1875                    name: "cursor_offset_relative_to_context_not_editable_region",
1876                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1877                    // write_cursor_excerpt_section must subtract it before comparing against
1878                    // per-line offsets within the editable region.
1879                    context: "pre\naaa\nbbb\nsuf\n",
1880                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1881                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1882                    expected: indoc! {"
1883                    <|file_sep|>test.rs
1884                    <|fim_prefix|>
1885                    pre
1886                    <|fim_middle|>current
1887                    0:23|aaa
1888                    1:26|b<|user_cursor|>bb
1889                    <|fim_suffix|>
1890                    suf
1891                    <|fim_middle|>updated
1892                    "},
1893                },
1894            ];
1895
1896            for case in &cases {
1897                let mut prompt = String::new();
1898                hashline::write_cursor_excerpt_section(
1899                    &mut prompt,
1900                    Path::new("test.rs"),
1901                    case.context,
1902                    &case.editable_range,
1903                    case.cursor_offset,
1904                );
1905                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1906            }
1907        }
1908
1909        #[test]
1910        fn test_apply_edit_commands() {
1911            struct Case {
1912                name: &'static str,
1913                original: &'static str,
1914                model_output: &'static str,
1915                expected: &'static str,
1916            }
1917
1918            let cases = vec![
1919                Case {
1920                    name: "set_single_line",
1921                    original: indoc! {"
1922                    let mut total = 0;
1923                    for product in products {
1924                        total += ;
1925                    }
1926                    total
1927                "},
1928                    model_output: indoc! {"
1929                    <|set|>2:87
1930                        total += product.price;
1931                "},
1932                    expected: indoc! {"
1933                    let mut total = 0;
1934                    for product in products {
1935                        total += product.price;
1936                    }
1937                    total
1938                "},
1939                },
1940                Case {
1941                    name: "set_range",
1942                    original: indoc! {"
1943                    fn foo() {
1944                        let x = 1;
1945                        let y = 2;
1946                        let z = 3;
1947                    }
1948                "},
1949                    model_output: indoc! {"
1950                    <|set|>1:46-3:4a
1951                        let sum = 6;
1952                "},
1953                    expected: indoc! {"
1954                    fn foo() {
1955                        let sum = 6;
1956                    }
1957                "},
1958                },
1959                Case {
1960                    name: "insert_after_line",
1961                    original: indoc! {"
1962                    fn main() {
1963                        let x = 1;
1964                    }
1965                "},
1966                    model_output: indoc! {"
1967                    <|insert|>1:46
1968                        let y = 2;
1969                "},
1970                    expected: indoc! {"
1971                    fn main() {
1972                        let x = 1;
1973                        let y = 2;
1974                    }
1975                "},
1976                },
1977                Case {
1978                    name: "insert_before_first",
1979                    original: indoc! {"
1980                    let x = 1;
1981                    let y = 2;
1982                "},
1983                    model_output: indoc! {"
1984                    <|insert|>
1985                    use std::io;
1986                "},
1987                    expected: indoc! {"
1988                    use std::io;
1989                    let x = 1;
1990                    let y = 2;
1991                "},
1992                },
1993                Case {
1994                    name: "set_with_cursor_marker",
1995                    original: indoc! {"
1996                    fn main() {
1997                        println!();
1998                    }
1999                "},
2000                    model_output: indoc! {"
2001                    <|set|>1:34
2002                        eprintln!(\"<|user_cursor|>\");
2003                "},
2004                    expected: indoc! {"
2005                    fn main() {
2006                        eprintln!(\"<|user_cursor|>\");
2007                    }
2008                "},
2009                },
2010                Case {
2011                    name: "multiple_set_commands",
2012                    original: indoc! {"
2013                    aaa
2014                    bbb
2015                    ccc
2016                    ddd
2017                "},
2018                    model_output: indoc! {"
2019                    <|set|>0:23
2020                    AAA
2021                    <|set|>2:29
2022                    CCC
2023                "},
2024                    expected: indoc! {"
2025                    AAA
2026                    bbb
2027                    CCC
2028                    ddd
2029                "},
2030                },
2031                Case {
2032                    name: "set_range_multiline_replacement",
2033                    original: indoc! {"
2034                    fn handle_submit() {
2035                    }
2036
2037                    fn handle_keystroke() {
2038                "},
2039                    model_output: indoc! {"
2040                    <|set|>0:3f-1:7d
2041                    fn handle_submit(modal_state: &mut ModalState) {
2042                        <|user_cursor|>
2043                    }
2044                "},
2045                    expected: indoc! {"
2046                    fn handle_submit(modal_state: &mut ModalState) {
2047                        <|user_cursor|>
2048                    }
2049
2050                    fn handle_keystroke() {
2051                "},
2052                },
2053                Case {
2054                    name: "no_edit_commands_returns_original",
2055                    original: indoc! {"
2056                    hello
2057                    world
2058                "},
2059                    model_output: "some random text with no commands",
2060                    expected: indoc! {"
2061                    hello
2062                    world
2063                "},
2064                },
2065                Case {
2066                    name: "no_edits_command_returns_original",
2067                    original: indoc! {"
2068                    hello
2069                    world
2070                "},
2071                    model_output: "<|no_edits|>",
2072                    expected: indoc! {"
2073                    hello
2074                    world
2075                "},
2076                },
2077                Case {
2078                    name: "wrong_hash_set_ignored",
2079                    original: indoc! {"
2080                    aaa
2081                    bbb
2082                "},
2083                    model_output: indoc! {"
2084                    <|set|>0:ff
2085                    ZZZ
2086                "},
2087                    expected: indoc! {"
2088                    aaa
2089                    bbb
2090                "},
2091                },
2092                Case {
2093                    name: "insert_and_set_combined",
2094                    original: indoc! {"
2095                    alpha
2096                    beta
2097                    gamma
2098                "},
2099                    model_output: indoc! {"
2100                    <|set|>0:06
2101                    ALPHA
2102                    <|insert|>1:9c
2103                    beta_extra
2104                "},
2105                    expected: indoc! {"
2106                    ALPHA
2107                    beta
2108                    beta_extra
2109                    gamma
2110                "},
2111                },
2112                Case {
2113                    name: "no_trailing_newline_preserved",
2114                    original: "hello\nworld",
2115                    model_output: indoc! {"
2116                    <|set|>0:14
2117                    HELLO
2118                "},
2119                    expected: "HELLO\nworld",
2120                },
2121                Case {
2122                    name: "set_range_hash_mismatch_in_end_bound",
2123                    original: indoc! {"
2124                    one
2125                    two
2126                    three
2127                "},
2128                    model_output: indoc! {"
2129                    <|set|>0:42-2:ff
2130                    ONE_TWO_THREE
2131                "},
2132                    expected: indoc! {"
2133                    one
2134                    two
2135                    three
2136                "},
2137                },
2138                Case {
2139                    name: "set_range_start_greater_than_end_ignored",
2140                    original: indoc! {"
2141                    a
2142                    b
2143                    c
2144                "},
2145                    model_output: indoc! {"
2146                    <|set|>2:63-1:62
2147                    X
2148                "},
2149                    expected: indoc! {"
2150                    a
2151                    b
2152                    c
2153                "},
2154                },
2155                Case {
2156                    name: "insert_out_of_bounds_ignored",
2157                    original: indoc! {"
2158                    x
2159                    y
2160                "},
2161                    model_output: indoc! {"
2162                    <|insert|>99:aa
2163                    z
2164                "},
2165                    expected: indoc! {"
2166                    x
2167                    y
2168                "},
2169                },
2170                Case {
2171                    name: "set_out_of_bounds_ignored",
2172                    original: indoc! {"
2173                    x
2174                    y
2175                "},
2176                    model_output: indoc! {"
2177                    <|set|>99:aa
2178                    z
2179                "},
2180                    expected: indoc! {"
2181                    x
2182                    y
2183                "},
2184                },
2185                Case {
2186                    name: "malformed_set_command_ignored",
2187                    original: indoc! {"
2188                    alpha
2189                    beta
2190                "},
2191                    model_output: indoc! {"
2192                    <|set|>not-a-line-ref
2193                    UPDATED
2194                "},
2195                    expected: indoc! {"
2196                    alpha
2197                    beta
2198                "},
2199                },
2200                Case {
2201                    name: "malformed_insert_hash_treated_as_before_first",
2202                    original: indoc! {"
2203                    alpha
2204                    beta
2205                "},
2206                    model_output: indoc! {"
2207                    <|insert|>1:nothex
2208                    preamble
2209                "},
2210                    expected: indoc! {"
2211                    preamble
2212                    alpha
2213                    beta
2214                "},
2215                },
2216                Case {
2217                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2218                    original: indoc! {"
2219                    cat
2220                    dog
2221                "},
2222                    model_output: indoc! {"
2223                    <|set|>0:38
2224                    CAT
2225                    <|insert|>0:38
2226                    TAIL
2227                "},
2228                    expected: indoc! {"
2229                    CAT
2230                    TAIL
2231                    dog
2232                "},
2233                },
2234                Case {
2235                    name: "overlapping_set_ranges_last_wins",
2236                    original: indoc! {"
2237                    a
2238                    b
2239                    c
2240                    d
2241                "},
2242                    model_output: indoc! {"
2243                    <|set|>0:61-2:63
2244                    FIRST
2245                    <|set|>1:62-3:64
2246                    SECOND
2247                "},
2248                    expected: indoc! {"
2249                    FIRST
2250                    d
2251                "},
2252                },
2253                Case {
2254                    name: "insert_before_first_and_after_line",
2255                    original: indoc! {"
2256                    a
2257                    b
2258                "},
2259                    model_output: indoc! {"
2260                    <|insert|>
2261                    HEAD
2262                    <|insert|>0:61
2263                    MID
2264                "},
2265                    expected: indoc! {"
2266                    HEAD
2267                    a
2268                    MID
2269                    b
2270                "},
2271                },
2272            ];
2273
2274            for case in &cases {
2275                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2276                assert_eq!(result, case.expected, "failed case: {}", case.name);
2277            }
2278        }
2279
2280        #[test]
2281        fn test_output_has_edit_commands() {
2282            assert!(hashline::output_has_edit_commands(&format!(
2283                "{}0:ab\nnew",
2284                SET_COMMAND_MARKER
2285            )));
2286            assert!(hashline::output_has_edit_commands(&format!(
2287                "{}0:ab\nnew",
2288                INSERT_COMMAND_MARKER
2289            )));
2290            assert!(hashline::output_has_edit_commands(&format!(
2291                "some text\n{}1:cd\nstuff",
2292                SET_COMMAND_MARKER
2293            )));
2294            assert!(!hashline::output_has_edit_commands("just plain text"));
2295            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2296            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2297        }
2298
2299        // ---- hashline::patch_to_edit_commands round-trip tests ----
2300
2301        #[test]
2302        fn test_patch_to_edit_commands() {
2303            struct Case {
2304                name: &'static str,
2305                old: &'static str,
2306                patch: &'static str,
2307                expected_new: &'static str,
2308            }
2309
2310            let cases = [
2311                Case {
2312                    name: "single_line_replacement",
2313                    old: indoc! {"
2314                    let mut total = 0;
2315                    for product in products {
2316                        total += ;
2317                    }
2318                    total
2319                "},
2320                    patch: indoc! {"
2321                    @@ -1,5 +1,5 @@
2322                     let mut total = 0;
2323                     for product in products {
2324                    -    total += ;
2325                    +    total += product.price;
2326                     }
2327                     total
2328                "},
2329                    expected_new: indoc! {"
2330                    let mut total = 0;
2331                    for product in products {
2332                        total += product.price;
2333                    }
2334                    total
2335                "},
2336                },
2337                Case {
2338                    name: "multiline_replacement",
2339                    old: indoc! {"
2340                    fn foo() {
2341                        let x = 1;
2342                        let y = 2;
2343                        let z = 3;
2344                    }
2345                "},
2346                    patch: indoc! {"
2347                    @@ -1,5 +1,3 @@
2348                     fn foo() {
2349                    -    let x = 1;
2350                    -    let y = 2;
2351                    -    let z = 3;
2352                    +    let sum = 1 + 2 + 3;
2353                     }
2354                "},
2355                    expected_new: indoc! {"
2356                    fn foo() {
2357                        let sum = 1 + 2 + 3;
2358                    }
2359                "},
2360                },
2361                Case {
2362                    name: "insertion",
2363                    old: indoc! {"
2364                    fn main() {
2365                        let x = 1;
2366                    }
2367                "},
2368                    patch: indoc! {"
2369                    @@ -1,3 +1,4 @@
2370                     fn main() {
2371                         let x = 1;
2372                    +    let y = 2;
2373                     }
2374                "},
2375                    expected_new: indoc! {"
2376                    fn main() {
2377                        let x = 1;
2378                        let y = 2;
2379                    }
2380                "},
2381                },
2382                Case {
2383                    name: "insertion_before_first",
2384                    old: indoc! {"
2385                    let x = 1;
2386                    let y = 2;
2387                "},
2388                    patch: indoc! {"
2389                    @@ -1,2 +1,3 @@
2390                    +use std::io;
2391                     let x = 1;
2392                     let y = 2;
2393                "},
2394                    expected_new: indoc! {"
2395                    use std::io;
2396                    let x = 1;
2397                    let y = 2;
2398                "},
2399                },
2400                Case {
2401                    name: "deletion",
2402                    old: indoc! {"
2403                    aaa
2404                    bbb
2405                    ccc
2406                    ddd
2407                "},
2408                    patch: indoc! {"
2409                    @@ -1,4 +1,2 @@
2410                     aaa
2411                    -bbb
2412                    -ccc
2413                     ddd
2414                "},
2415                    expected_new: indoc! {"
2416                    aaa
2417                    ddd
2418                "},
2419                },
2420                Case {
2421                    name: "multiple_changes",
2422                    old: indoc! {"
2423                    alpha
2424                    beta
2425                    gamma
2426                    delta
2427                    epsilon
2428                "},
2429                    patch: indoc! {"
2430                    @@ -1,5 +1,5 @@
2431                    -alpha
2432                    +ALPHA
2433                     beta
2434                     gamma
2435                    -delta
2436                    +DELTA
2437                     epsilon
2438                "},
2439                    expected_new: indoc! {"
2440                    ALPHA
2441                    beta
2442                    gamma
2443                    DELTA
2444                    epsilon
2445                "},
2446                },
2447                Case {
2448                    name: "replace_with_insertion",
2449                    old: indoc! {r#"
2450                    fn handle() {
2451                        modal_state.close();
2452                        modal_state.dismiss();
2453                "#},
2454                    patch: indoc! {r#"
2455                    @@ -1,3 +1,4 @@
2456                     fn handle() {
2457                         modal_state.close();
2458                    +    eprintln!("");
2459                         modal_state.dismiss();
2460                "#},
2461                    expected_new: indoc! {r#"
2462                    fn handle() {
2463                        modal_state.close();
2464                        eprintln!("");
2465                        modal_state.dismiss();
2466                "#},
2467                },
2468                Case {
2469                    name: "complete_replacement",
2470                    old: indoc! {"
2471                    aaa
2472                    bbb
2473                    ccc
2474                "},
2475                    patch: indoc! {"
2476                    @@ -1,3 +1,3 @@
2477                    -aaa
2478                    -bbb
2479                    -ccc
2480                    +xxx
2481                    +yyy
2482                    +zzz
2483                "},
2484                    expected_new: indoc! {"
2485                    xxx
2486                    yyy
2487                    zzz
2488                "},
2489                },
2490                Case {
2491                    name: "add_function_body",
2492                    old: indoc! {"
2493                    fn foo() {
2494                        modal_state.dismiss();
2495                    }
2496
2497                    fn
2498
2499                    fn handle_keystroke() {
2500                "},
2501                    patch: indoc! {"
2502                    @@ -1,6 +1,8 @@
2503                     fn foo() {
2504                         modal_state.dismiss();
2505                     }
2506
2507                    -fn
2508                    +fn handle_submit() {
2509                    +    todo()
2510                    +}
2511
2512                     fn handle_keystroke() {
2513                "},
2514                    expected_new: indoc! {"
2515                    fn foo() {
2516                        modal_state.dismiss();
2517                    }
2518
2519                    fn handle_submit() {
2520                        todo()
2521                    }
2522
2523                    fn handle_keystroke() {
2524                "},
2525                },
2526                Case {
2527                    name: "with_cursor_offset",
2528                    old: indoc! {r#"
2529                    fn main() {
2530                        println!();
2531                    }
2532                "#},
2533                    patch: indoc! {r#"
2534                        @@ -1,3 +1,3 @@
2535                        fn main() {
2536                        -    println!();
2537                        +    eprintln!("");
2538                        }
2539                    "#},
2540                    expected_new: indoc! {r#"
2541                        fn main() {
2542                            eprintln!("<|user_cursor|>");
2543                        }
2544                    "#},
2545                },
2546                Case {
2547                    name: "non_local_hunk_header_pure_insertion_repro",
2548                    old: indoc! {"
2549                        aaa
2550                        bbb
2551                    "},
2552                    patch: indoc! {"
2553                        @@ -20,2 +20,3 @@
2554                        aaa
2555                        +xxx
2556                        bbb
2557                    "},
2558                    expected_new: indoc! {"
2559                        aaa
2560                        xxx
2561                        bbb
2562                    "},
2563                },
2564                Case {
2565                    name: "empty_patch_produces_no_edits_marker",
2566                    old: indoc! {"
2567                        aaa
2568                        bbb
2569                    "},
2570                    patch: "@@ -20,2 +20,3 @@\n",
2571                    expected_new: indoc! {"
2572                        aaa
2573                        bbb
2574                    "},
2575                },
2576            ];
2577
2578            for case in &cases {
2579                // The cursor_offset for patch_to_edit_commands is relative to
2580                // the first hunk's new text (context + additions). We compute
2581                // it by finding where the marker sits in the expected output
2582                // (which mirrors the new text of the hunk).
2583                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2584
2585                let commands =
2586                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2587                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2588
2589                assert!(
2590                    hashline::output_has_edit_commands(&commands),
2591                    "case {}: expected edit commands, got: {commands:?}",
2592                    case.name,
2593                );
2594
2595                let applied = hashline::apply_edit_commands(case.old, &commands);
2596                assert_eq!(applied, case.expected_new, "case {}", case.name);
2597            }
2598        }
2599    }
2600}
2601
2602pub mod seed_coder {
2603    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2604    //!
2605    //! Seed-Coder uses different FIM tokens and order than Qwen:
2606    //! - SPM order: suffix comes FIRST, then prefix, then middle
2607    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2608    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2609    //!
2610    //! All context (related files, edit history) goes in the PREFIX section.
2611    //! The suffix contains only code after the editable region.
2612    //!
2613    //! Example prompt:
2614    //!
2615    //! <[fim-suffix]>
2616    //! code after editable region
2617    //! <[fim-prefix]><filename>related/file.py
2618    //! related file content
2619    //!
2620    //! <filename>edit_history
2621    //! --- a/some_file.py
2622    //! +++ b/some_file.py
2623    //! -old
2624    //! +new
2625    //!
2626    //! <filename>path/to/target_file.py
2627    //! code before editable region
2628    //! <<<<<<< CURRENT
2629    //! code that
2630    //! needs to<|user_cursor|>
2631    //! be rewritten
2632    //! =======
2633    //! <[fim-middle]>
2634    //!
2635    //! Expected output (model generates):
2636    //!
2637    //! updated
2638    //! code with
2639    //! changes applied
2640    //! >>>>>>> UPDATED
2641
2642    use super::*;
2643
2644    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2645    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2646    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2647    pub const FILE_MARKER: &str = "<filename>";
2648
2649    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2650    pub const SEPARATOR: &str = "=======\n";
2651    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2652
2653    pub const NO_EDITS: &str = "NO_EDITS\n";
2654
2655    pub fn special_tokens() -> &'static [&'static str] {
2656        &[
2657            FIM_SUFFIX,
2658            FIM_PREFIX,
2659            FIM_MIDDLE,
2660            FILE_MARKER,
2661            START_MARKER,
2662            SEPARATOR,
2663            END_MARKER,
2664            CURSOR_MARKER,
2665        ]
2666    }
2667
2668    pub fn write_cursor_excerpt_section(
2669        prompt: &mut String,
2670        path: &Path,
2671        context: &str,
2672        editable_range: &Range<usize>,
2673        cursor_offset: usize,
2674    ) {
2675        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2676        prompt.push_str(&section);
2677    }
2678
2679    pub fn format_prompt_with_budget(
2680        path: &Path,
2681        context: &str,
2682        editable_range: &Range<usize>,
2683        cursor_offset: usize,
2684        events: &[Arc<Event>],
2685        related_files: &[RelatedFile],
2686        max_tokens: usize,
2687    ) -> String {
2688        let cursor_prefix_section =
2689            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2690        assemble_fim_prompt(
2691            context,
2692            editable_range,
2693            &cursor_prefix_section,
2694            events,
2695            related_files,
2696            max_tokens,
2697        )
2698    }
2699
2700    pub fn assemble_fim_prompt(
2701        context: &str,
2702        editable_range: &Range<usize>,
2703        cursor_prefix_section: &str,
2704        events: &[Arc<Event>],
2705        related_files: &[RelatedFile],
2706        max_tokens: usize,
2707    ) -> String {
2708        let suffix_section = build_suffix_section(context, editable_range);
2709
2710        let suffix_tokens = estimate_tokens(suffix_section.len());
2711        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2712        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2713
2714        let edit_history_section = super::format_edit_history_within_budget(
2715            events,
2716            FILE_MARKER,
2717            "edit_history",
2718            budget_after_cursor,
2719            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2720        );
2721        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2722        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2723
2724        let related_files_section = super::format_related_files_within_budget(
2725            related_files,
2726            FILE_MARKER,
2727            "",
2728            budget_after_edit_history,
2729        );
2730
2731        let mut prompt = String::new();
2732        prompt.push_str(&suffix_section);
2733        prompt.push_str(FIM_PREFIX);
2734        prompt.push_str(&related_files_section);
2735        if !related_files_section.is_empty() {
2736            prompt.push('\n');
2737        }
2738        prompt.push_str(&edit_history_section);
2739        if !edit_history_section.is_empty() {
2740            prompt.push('\n');
2741        }
2742        prompt.push_str(cursor_prefix_section);
2743        prompt.push_str(FIM_MIDDLE);
2744        prompt
2745    }
2746
2747    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2748        let mut section = String::new();
2749        section.push_str(FIM_SUFFIX);
2750        section.push_str(&context[editable_range.end..]);
2751        if !section.ends_with('\n') {
2752            section.push('\n');
2753        }
2754        section
2755    }
2756
2757    fn build_cursor_prefix_section(
2758        path: &Path,
2759        context: &str,
2760        editable_range: &Range<usize>,
2761        cursor_offset: usize,
2762    ) -> String {
2763        let mut section = String::new();
2764        let path_str = path.to_string_lossy();
2765        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2766
2767        section.push_str(&context[..editable_range.start]);
2768        section.push_str(START_MARKER);
2769        section.push_str(&context[editable_range.start..cursor_offset]);
2770        section.push_str(CURSOR_MARKER);
2771        section.push_str(&context[cursor_offset..editable_range.end]);
2772        if !section.ends_with('\n') {
2773            section.push('\n');
2774        }
2775        section.push_str(SEPARATOR);
2776        section
2777    }
2778
2779    /// Format patch as containing no changes if it's empty; otherwise return None.
2780    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2781        // Count lines in the patch
2782        let empty_patch = patch.lines().count() <= 3;
2783        if empty_patch {
2784            Some(format!("{NO_EDITS}{END_MARKER}"))
2785        } else {
2786            None
2787        }
2788    }
2789}
2790
2791pub mod v0304_variable_edit {
2792    //! A prompt format with no fixed editable region. The entire context is shown
2793    //! to the model, and it chooses which text to replace by outputting surrounding
2794    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2795    //! text.
2796    //!
2797    //! Example prompt:
2798    //!
2799    //! <|file_sep|>path/to/file.py
2800    //! zero
2801    //! one
2802    //! two
2803    //! three<|user_cursor|>
2804    //! four
2805    //! five
2806    //! <|fim_prefix|>
2807    //
2808    //! Expected output (model generates):
2809    //!
2810    //! two
2811    //! <|fim_middle|>
2812    //! THREE
2813    //! <|fim_suffix|>
2814    //! four
2815    //!
2816    //! The output means: find "two\n...\nfour" in the context, and replace
2817    //! everything between "two\n" and "four" with "THREE\n".
2818
2819    use super::*;
2820
2821    pub fn special_tokens() -> &'static [&'static str] {
2822        &[
2823            "<|fim_prefix|>",
2824            "<|fim_suffix|>",
2825            "<|fim_middle|>",
2826            "<|file_sep|>",
2827            CURSOR_MARKER,
2828        ]
2829    }
2830
2831    pub fn write_cursor_excerpt_section(
2832        prompt: &mut String,
2833        path: &Path,
2834        context: &str,
2835        cursor_offset: usize,
2836    ) {
2837        let path_str = path.to_string_lossy();
2838        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2839
2840        prompt.push_str(&context[..cursor_offset]);
2841        prompt.push_str(CURSOR_MARKER);
2842        prompt.push_str(&context[cursor_offset..]);
2843        if !prompt.ends_with('\n') {
2844            prompt.push('\n');
2845        }
2846        prompt.push_str("<|fim_prefix|>\n")
2847    }
2848
2849    /// Apply a variable-edit model output to the original context text.
2850    ///
2851    /// The model output has the form:
2852    ///
2853    /// - prefix context lines
2854    /// - `<|fim_middle|>`
2855    /// - new text
2856    /// - `<|fim_suffix|>`
2857    /// - suffix context lines
2858    ///
2859    /// We locate the prefix/suffix context lines in the original text and replace
2860    /// everything between them with the new text.
2861    pub fn apply_variable_edit(
2862        context: &str,
2863        model_output: &str,
2864    ) -> Result<(Range<usize>, String)> {
2865        let (prefix_context, rest) = model_output
2866            .split_once("<|fim_middle|>\n")
2867            .or_else(|| model_output.split_once("<|fim_middle|>"))
2868            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2869
2870        let (new_text, suffix_context) = rest
2871            .split_once("<|fim_suffix|>\n")
2872            .or_else(|| rest.split_once("<|fim_suffix|>"))
2873            .unwrap_or((rest, ""));
2874
2875        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2876            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2877        } else {
2878            suffix_context
2879        };
2880
2881        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2882            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2883            + prefix_context.len();
2884        let suffix_offset = if suffix_context.is_empty() {
2885            context.len()
2886        } else {
2887            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2888                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2889                + prefix_offset
2890        };
2891
2892        let edit_range = prefix_offset..suffix_offset;
2893        return Ok((edit_range, new_text.to_string()));
2894    }
2895
2896    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2897        if needle.is_empty() {
2898            return Some(0);
2899        }
2900
2901        haystack.match_indices(needle).find_map(|(offset, _)| {
2902            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2903            matched_line_start.then_some(offset)
2904        })
2905    }
2906
2907    /// Convert a unified diff patch into the variable-edit output format.
2908    ///
2909    /// Parses `patch` as a unified diff against `old_text` and produces model
2910    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2911    /// delimiters. The diff is resolved by content matching rather than line
2912    /// numbers.
2913    pub fn patch_to_variable_edit_output(
2914        old_text: &str,
2915        patch: &str,
2916        cursor_offset: Option<usize>,
2917    ) -> Result<String> {
2918        // Parse the unified diff into hunks. Each hunk has an `old_context`
2919        // string (context + deleted lines interleaved in order) and a list of
2920        // edits expressed as byte ranges within that context plus replacement
2921        // text.
2922        let hunks = parse_hunks(patch);
2923        if hunks.is_empty() {
2924            return Ok(String::new());
2925        }
2926
2927        // Apply each hunk by finding its old_context in the text and
2928        // performing the edits. We search forward from where the previous
2929        // hunk ended so that hunks are applied in order.
2930        let mut new_text = old_text.to_string();
2931        let mut search_from: usize = 0;
2932        let mut first_hunk_pos: Option<usize> = None;
2933
2934        for hunk in &hunks {
2935            let context_pos = new_text[search_from..]
2936                .find(&hunk.old_context)
2937                .map(|pos| pos + search_from)
2938                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2939
2940            if first_hunk_pos.is_none() {
2941                first_hunk_pos = Some(context_pos);
2942            }
2943
2944            // Apply edits in reverse order so byte offsets remain valid.
2945            for edit in hunk.edits.iter().rev() {
2946                let abs_start = context_pos + edit.range.start;
2947                let abs_end = context_pos + edit.range.end;
2948                new_text.replace_range(abs_start..abs_end, &edit.text);
2949            }
2950
2951            // Advance past this hunk's region in the (now modified) text.
2952            let new_region_len: usize =
2953                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2954                    len + edit.text.len() - (edit.range.end - edit.range.start)
2955                });
2956            search_from = context_pos + new_region_len;
2957        }
2958
2959        // Now we have old_text and new_text. Find the changed line range by
2960        // comparing them.
2961        let old_lines: Vec<&str> = old_text.lines().collect();
2962        let new_lines: Vec<&str> = new_text.lines().collect();
2963
2964        // Find first differing line.
2965        let first_changed_row = old_lines
2966            .iter()
2967            .zip(new_lines.iter())
2968            .position(|(a, b)| a != b)
2969            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2970
2971        // Find last differing line (from the end).
2972        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2973        let common_suffix = old_lines
2974            .iter()
2975            .rev()
2976            .zip(new_lines.iter().rev())
2977            .take(max_suffix)
2978            .take_while(|(a, b)| a == b)
2979            .count();
2980
2981        let old_end = old_lines.len() - common_suffix;
2982        let new_end = new_lines.len() - common_suffix;
2983
2984        if first_changed_row == old_end && first_changed_row == new_end {
2985            return Ok(String::new());
2986        }
2987
2988        // Build the replacement text from new_lines[first_diff..new_end].
2989        let mut merged_new_text = String::new();
2990        for line in &new_lines[first_changed_row..new_end] {
2991            merged_new_text.push_str(line);
2992            merged_new_text.push('\n');
2993        }
2994
2995        // cursor_offset is relative to the first hunk's new content in
2996        // new_text. Translate it to an offset within merged_new_text, which
2997        // only contains lines first_diff..new_end of new_text.
2998        if let Some(hunk_offset) = cursor_offset {
2999            let hunk_start = first_hunk_pos.unwrap_or(0);
3000            let absolute_pos = hunk_start + hunk_offset;
3001
3002            // Byte offset where first_diff starts in new_text.
3003            let merged_start: usize = new_lines[..first_changed_row]
3004                .iter()
3005                .map(|line| line.len() + 1)
3006                .sum();
3007
3008            if absolute_pos >= merged_start {
3009                let relative_offset = absolute_pos - merged_start;
3010                if relative_offset <= merged_new_text.len() {
3011                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3012                }
3013            }
3014        }
3015
3016        // Build output with 2 lines of context above and below.
3017        let context_lines_count = 2;
3018        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3019        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3020
3021        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3022            let pattern = &lines[line_range];
3023            let pattern_len = pattern.len();
3024
3025            let mut count = 0;
3026            for offset in 0..=lines.len() - pattern_len {
3027                if &lines[offset..offset + pattern_len] == pattern {
3028                    count += 1;
3029                }
3030            }
3031            count
3032        }
3033
3034        // Expand prefix and suffix until they are unique
3035        while prefix_start > 0 {
3036            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3037                prefix_start -= 1;
3038            } else {
3039                break;
3040            }
3041        }
3042        while suffix_end < old_lines.len() {
3043            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3044                suffix_end += 1;
3045            } else {
3046                break;
3047            }
3048        }
3049
3050        let mut output = String::new();
3051        for line in &old_lines[prefix_start..first_changed_row] {
3052            output.push_str(line);
3053            output.push('\n');
3054        }
3055        output.push_str("<|fim_middle|>\n");
3056        output.push_str(&merged_new_text);
3057        output.push_str("<|fim_suffix|>\n");
3058        for line in &old_lines[old_end..suffix_end] {
3059            output.push_str(line);
3060            output.push('\n');
3061        }
3062
3063        Ok(output)
3064    }
3065
3066    struct ParsedHunk {
3067        old_context: String,
3068        edits: Vec<ParsedEdit>,
3069    }
3070
3071    struct ParsedEdit {
3072        range: Range<usize>,
3073        text: String,
3074    }
3075
3076    /// Parse a unified diff into content-based hunks. Each hunk contains an
3077    /// `old_context` string (context lines + deleted lines, which together
3078    /// form the text that should be found in the original) and a list of edits
3079    /// expressed as byte ranges within that context.
3080    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3081        let mut hunks = Vec::new();
3082        let mut current: Option<ParsedHunk> = None;
3083
3084        for line in patch.lines() {
3085            if line.starts_with("@@") {
3086                if let Some(hunk) = current.take() {
3087                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3088                        hunks.push(hunk);
3089                    }
3090                }
3091                current = Some(ParsedHunk {
3092                    old_context: String::new(),
3093                    edits: Vec::new(),
3094                });
3095            } else if line.starts_with("---") || line.starts_with("+++") {
3096                continue;
3097            } else if let Some(hunk) = &mut current {
3098                if let Some(added) = line.strip_prefix('+') {
3099                    let pos = hunk.old_context.len();
3100                    if let Some(last_edit) = hunk.edits.last_mut() {
3101                        if last_edit.range.end == pos {
3102                            writeln!(&mut last_edit.text, "{added}").ok();
3103                            continue;
3104                        }
3105                    }
3106                    hunk.edits.push(ParsedEdit {
3107                        range: pos..pos,
3108                        text: format!("{added}\n"),
3109                    });
3110                } else if let Some(removed) = line.strip_prefix('-') {
3111                    let start = hunk.old_context.len();
3112                    writeln!(&mut hunk.old_context, "{removed}").ok();
3113                    let end = hunk.old_context.len();
3114                    if let Some(last_edit) = hunk.edits.last_mut() {
3115                        if last_edit.range.end == start {
3116                            last_edit.range.end = end;
3117                            continue;
3118                        }
3119                    }
3120                    hunk.edits.push(ParsedEdit {
3121                        range: start..end,
3122                        text: String::new(),
3123                    });
3124                } else {
3125                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3126                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3127                }
3128            }
3129        }
3130
3131        if let Some(hunk) = current {
3132            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3133                hunks.push(hunk);
3134            }
3135        }
3136
3137        hunks
3138    }
3139
3140    #[cfg(test)]
3141    mod tests {
3142        use super::*;
3143        use indoc::indoc;
3144
3145        #[test]
3146        fn test_apply_variable_edit() {
3147            struct Case {
3148                name: &'static str,
3149                original: &'static str,
3150                model_output: &'static str,
3151                expected: &'static str,
3152            }
3153
3154            let cases = [
3155                Case {
3156                    name: "simple_single_line_replacement",
3157                    original: indoc! {"
3158                        zero
3159                        one
3160                        two
3161                        three
3162                        four
3163                        five
3164                    "},
3165                    model_output: indoc! {"
3166                        two
3167                        <|fim_middle|>
3168                        THREE
3169                        <|fim_suffix|>
3170                        four
3171                    "},
3172                    expected: indoc! {"
3173                        zero
3174                        one
3175                        two
3176                        THREE
3177                        four
3178                        five
3179                    "},
3180                },
3181                Case {
3182                    name: "multi_line_replacement",
3183                    original: indoc! {"
3184                        a
3185                        b
3186                        c
3187                        d
3188                        e
3189                    "},
3190                    model_output: indoc! {"
3191                        a
3192                        <|fim_middle|>
3193                        B
3194                        C
3195                        D
3196                        <|fim_suffix|>
3197                        e
3198                    "},
3199                    expected: indoc! {"
3200                        a
3201                        B
3202                        C
3203                        D
3204                        e
3205                    "},
3206                },
3207                Case {
3208                    name: "insertion_between_existing_lines",
3209                    original: indoc! {"
3210                        a
3211                        b
3212                        c
3213                    "},
3214                    model_output: indoc! {"
3215                        a
3216                        <|fim_middle|>
3217                        X
3218                        <|fim_suffix|>
3219                        b
3220                    "},
3221                    expected: indoc! {"
3222                        a
3223                        X
3224                        b
3225                        c
3226                    "},
3227                },
3228                Case {
3229                    name: "deletion",
3230                    original: indoc! {"
3231                        a
3232                        b
3233                        c
3234                        d
3235                    "},
3236                    model_output: indoc! {"
3237                        a
3238                        <|fim_middle|>
3239                        <|fim_suffix|>
3240                        c
3241                    "},
3242                    expected: indoc! {"
3243                        a
3244                        c
3245                        d
3246                    "},
3247                },
3248                Case {
3249                    name: "replacement_at_start_no_prefix_context",
3250                    original: indoc! {"
3251                        a
3252                        b
3253                        c
3254                    "},
3255                    model_output: indoc! {"
3256                        <|fim_middle|>
3257                        X
3258                        <|fim_suffix|>
3259                        b
3260                    "},
3261                    expected: indoc! {"
3262                        X
3263                        b
3264                        c
3265                    "},
3266                },
3267                Case {
3268                    name: "replacement_at_end_no_suffix_context",
3269                    original: indoc! {"
3270                        a
3271                        b
3272                        c
3273                    "},
3274                    model_output: indoc! {"
3275                        b
3276                        <|fim_middle|>
3277                        Z
3278                        <|fim_suffix|>
3279                    "},
3280                    expected: indoc! {"
3281                        a
3282                        b
3283                        Z
3284                    "},
3285                },
3286                Case {
3287                    name: "context_with_trailing_newline_is_preserved",
3288                    original: indoc! {"
3289                        a
3290                        b
3291                        c
3292                    "},
3293                    model_output: indoc! {"
3294                        a
3295                        <|fim_middle|>
3296                        B
3297                        <|fim_suffix|>
3298                        c
3299                    "},
3300                    expected: indoc! {"
3301                        a
3302                        B
3303                        c
3304                    "},
3305                },
3306                Case {
3307                    name: "cursor_marker_passes_through_untouched",
3308                    original: indoc! {"
3309                        a
3310                        b
3311                        c
3312                    "},
3313                    model_output: indoc! {"
3314                        a
3315                        <|fim_middle|>
3316                        B<|user_cursor|>B
3317                        <|fim_suffix|>
3318                        c
3319                    "},
3320                    expected: indoc! {"
3321                        a
3322                        B<|user_cursor|>B
3323                        c
3324                    "},
3325                },
3326                Case {
3327                    name: "multiple_prefix_context_lines",
3328                    original: indoc! {"
3329                        a
3330                        b
3331                        c
3332                        d
3333                        e
3334                    "},
3335                    model_output: indoc! {"
3336                        b
3337                        c
3338                        <|fim_middle|>
3339                        D
3340                        <|fim_suffix|>
3341                        e
3342                    "},
3343                    expected: indoc! {"
3344                        a
3345                        b
3346                        c
3347                        D
3348                        e
3349                    "},
3350                },
3351            ];
3352
3353            for case in cases {
3354                let (edit_range, replacement) =
3355                    apply_variable_edit(case.original, case.model_output).unwrap();
3356                let mut edited = case.original.to_string();
3357                edited.replace_range(edit_range, &replacement);
3358                assert_eq!(edited, case.expected, "{}", case.name);
3359            }
3360        }
3361
3362        #[test]
3363        fn test_patch_to_variable_edit() {
3364            struct Case {
3365                name: &'static str,
3366                old: &'static str,
3367                patch: &'static str,
3368                cursor_offset: Option<usize>,
3369                expected_variable_edit: &'static str,
3370                expected_after_apply: &'static str,
3371            }
3372
3373            let cases = [
3374                Case {
3375                    name: "simple_replacement",
3376                    old: indoc! {"
3377                        zero
3378                        one
3379                        two
3380                        three
3381                        four
3382                        five
3383                    "},
3384                    patch: indoc! {"
3385                        @@ -3,3 +3,3 @@
3386                         two
3387                        -three
3388                        +THREE
3389                         four
3390                    "},
3391                    cursor_offset: None,
3392                    expected_variable_edit: indoc! {"
3393                        one
3394                        two
3395                        <|fim_middle|>
3396                        THREE
3397                        <|fim_suffix|>
3398                        four
3399                        five
3400                    "},
3401                    expected_after_apply: indoc! {"
3402                        zero
3403                        one
3404                        two
3405                        THREE
3406                        four
3407                        five
3408                    "},
3409                },
3410                Case {
3411                    name: "insertion",
3412                    old: indoc! {"
3413                        a
3414                        b
3415                        c
3416                        d
3417                        e
3418                    "},
3419                    patch: indoc! {"
3420                        @@ -2,0 +3,1 @@
3421                         b
3422                        +X
3423                         c
3424                    "},
3425                    cursor_offset: None,
3426                    expected_variable_edit: indoc! {"
3427                        a
3428                        b
3429                        <|fim_middle|>
3430                        X
3431                        <|fim_suffix|>
3432                        c
3433                        d
3434                    "},
3435                    expected_after_apply: indoc! {"
3436                        a
3437                        b
3438                        X
3439                        c
3440                        d
3441                        e
3442                    "},
3443                },
3444                Case {
3445                    name: "deletion",
3446                    old: indoc! {"
3447                        a
3448                        b
3449                        c
3450                        d
3451                        e
3452                    "},
3453                    patch: indoc! {"
3454                        @@ -2,3 +2,2 @@
3455                         b
3456                        -c
3457                         d
3458                    "},
3459                    cursor_offset: None,
3460                    expected_variable_edit: indoc! {"
3461                        a
3462                        b
3463                        <|fim_middle|>
3464                        <|fim_suffix|>
3465                        d
3466                        e
3467                    "},
3468                    expected_after_apply: indoc! {"
3469                        a
3470                        b
3471                        d
3472                        e
3473                    "},
3474                },
3475                Case {
3476                    name: "edit_near_start",
3477                    old: indoc! {"
3478                        first
3479                        second
3480                        third
3481                        fourth
3482                    "},
3483                    patch: indoc! {"
3484                        @@ -1,1 +1,1 @@
3485                        -first
3486                        +FIRST
3487                    "},
3488                    cursor_offset: None,
3489                    expected_variable_edit: indoc! {"
3490                        <|fim_middle|>
3491                        FIRST
3492                        <|fim_suffix|>
3493                        second
3494                        third
3495                    "},
3496                    expected_after_apply: indoc! {"
3497                        FIRST
3498                        second
3499                        third
3500                        fourth
3501                    "},
3502                },
3503                Case {
3504                    name: "edit_near_end",
3505                    old: indoc! {"
3506                        first
3507                        second
3508                        third
3509                        fourth
3510                    "},
3511                    patch: indoc! {"
3512                        @@ -4,1 +4,1 @@
3513                        -fourth
3514                        +FOURTH
3515                    "},
3516                    cursor_offset: None,
3517                    expected_variable_edit: indoc! {"
3518                        second
3519                        third
3520                        <|fim_middle|>
3521                        FOURTH
3522                        <|fim_suffix|>
3523                    "},
3524                    expected_after_apply: indoc! {"
3525                        first
3526                        second
3527                        third
3528                        FOURTH
3529                    "},
3530                },
3531                Case {
3532                    name: "cursor_at_start_of_replacement",
3533                    old: indoc! {"
3534                        zero
3535                        one
3536                        two
3537                        three
3538                        four
3539                        five
3540                    "},
3541                    patch: indoc! {"
3542                        @@ -3,3 +3,3 @@
3543                         two
3544                        -three
3545                        +THREE
3546                         four
3547                    "},
3548                    cursor_offset: Some(4),
3549                    expected_variable_edit: indoc! {"
3550                        one
3551                        two
3552                        <|fim_middle|>
3553                        <|user_cursor|>THREE
3554                        <|fim_suffix|>
3555                        four
3556                        five
3557                    "},
3558                    expected_after_apply: indoc! {"
3559                        zero
3560                        one
3561                        two
3562                        <|user_cursor|>THREE
3563                        four
3564                        five
3565                    "},
3566                },
3567                Case {
3568                    name: "cursor_in_middle_of_replacement",
3569                    old: indoc! {"
3570                        zero
3571                        one
3572                        two
3573                        three
3574                        four
3575                        five
3576                    "},
3577                    patch: indoc! {"
3578                        @@ -3,3 +3,3 @@
3579                         two
3580                        -three
3581                        +THREE
3582                         four
3583                    "},
3584                    cursor_offset: Some(6),
3585                    expected_variable_edit: indoc! {"
3586                        one
3587                        two
3588                        <|fim_middle|>
3589                        TH<|user_cursor|>REE
3590                        <|fim_suffix|>
3591                        four
3592                        five
3593                    "},
3594                    expected_after_apply: indoc! {"
3595                        zero
3596                        one
3597                        two
3598                        TH<|user_cursor|>REE
3599                        four
3600                        five
3601                    "},
3602                },
3603                Case {
3604                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3605                    old: indoc! {"
3606                        one
3607                        a
3608                        b
3609                        c
3610                        d
3611                        two
3612                        a
3613                        b
3614                        c
3615                        d
3616                        three
3617                        a
3618                        b
3619                        c
3620                        d
3621                        four
3622                    "},
3623                    patch: indoc! {"
3624                        @@ -4,5 +4,5 @@
3625                         two
3626                         a
3627                         b
3628                        -c
3629                        +C
3630                         d
3631                         three
3632                    "},
3633                    cursor_offset: None,
3634                    expected_variable_edit: indoc! {"
3635                        two
3636                        a
3637                        b
3638                        <|fim_middle|>
3639                        C
3640                        <|fim_suffix|>
3641                        d
3642                        three
3643                    "},
3644                    expected_after_apply: indoc! {"
3645                        one
3646                        a
3647                        b
3648                        c
3649                        d
3650                        two
3651                        a
3652                        b
3653                        C
3654                        d
3655                        three
3656                        a
3657                        b
3658                        c
3659                        d
3660                        four
3661                    "},
3662                },
3663                Case {
3664                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3665                    old: indoc! {"
3666                        {
3667                            {
3668                                one();
3669                            }
3670                        }
3671                        {
3672                            {
3673                                two();
3674                            }
3675                        }
3676                        {
3677                            {
3678                                three();
3679                            }
3680                        }
3681                        {
3682                            {
3683                                four();
3684                            }
3685                        }
3686                    "},
3687                    patch: indoc! {"
3688                        @@ -4,5 +4,5 @@
3689                             {
3690                        -        two();
3691                        +        TWO();
3692                             }
3693                    "},
3694                    cursor_offset: None,
3695                    expected_variable_edit: indoc! {"
3696                                one();
3697                            }
3698                        }
3699                        {
3700                            {
3701                        <|fim_middle|>
3702                                TWO();
3703                        <|fim_suffix|>
3704                            }
3705                        }
3706                        {
3707                            {
3708                                three();
3709                    "},
3710                    expected_after_apply: indoc! {"
3711                        {
3712                            {
3713                                one();
3714                            }
3715                        }
3716                        {
3717                            {
3718                                TWO();
3719                            }
3720                        }
3721                        {
3722                            {
3723                                three();
3724                            }
3725                        }
3726                        {
3727                            {
3728                                four();
3729                            }
3730                        }
3731                    "},
3732                },
3733            ];
3734
3735            for case in cases {
3736                let output =
3737                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3738                        .unwrap_or_else(|error| {
3739                            panic!("failed converting patch for {}: {error}", case.name)
3740                        });
3741                assert_eq!(
3742                    output, case.expected_variable_edit,
3743                    "patch->variable_edit mismatch for {}",
3744                    case.name
3745                );
3746
3747                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3748                    .unwrap_or_else(|error| {
3749                        panic!("failed applying variable_edit for {}: {error}", case.name)
3750                    });
3751                let mut edited_by_variable_edit = case.old.to_string();
3752                edited_by_variable_edit.replace_range(edit_range, &replacement);
3753                assert_eq!(
3754                    edited_by_variable_edit, case.expected_after_apply,
3755                    "variable_edit apply mismatch for {}",
3756                    case.name
3757                );
3758
3759                let (expected_edit_range, expected_replacement) =
3760                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3761                        |error| {
3762                            panic!(
3763                                "failed applying expected variable_edit for {}: {error}",
3764                                case.name
3765                            )
3766                        },
3767                    );
3768                let mut edited_by_expected_variable_edit = case.old.to_string();
3769                edited_by_expected_variable_edit
3770                    .replace_range(expected_edit_range, &expected_replacement);
3771                assert_eq!(
3772                    edited_by_expected_variable_edit, case.expected_after_apply,
3773                    "expected variable_edit apply mismatch for {}",
3774                    case.name
3775                );
3776            }
3777        }
3778
3779        #[test]
3780        fn test_write_cursor_excerpt_section() {
3781            let path = Path::new("test.rs");
3782            let context = "fn main() {\n    hello();\n}\n";
3783            let cursor_offset = 17;
3784            let mut prompt = String::new();
3785            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3786            assert_eq!(
3787                prompt,
3788                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3789            );
3790        }
3791    }
3792}
3793
3794/// The zeta1 prompt format
3795pub mod zeta1 {
3796    use super::*;
3797    use std::fmt::Write;
3798
3799    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3800    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3801    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3802    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3803
3804    const INSTRUCTION_HEADER: &str = concat!(
3805        "### Instruction:\n",
3806        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3807        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3808        "into account the cursor location.\n\n",
3809        "### User Edits:\n\n"
3810    );
3811    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3812    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3813
3814    /// Formats a complete zeta1 prompt from the input events and excerpt.
3815    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3816        let mut prompt = String::with_capacity(
3817            INSTRUCTION_HEADER.len()
3818                + input_events.len()
3819                + EXCERPT_HEADER.len()
3820                + input_excerpt.len()
3821                + RESPONSE_HEADER.len(),
3822        );
3823        prompt.push_str(INSTRUCTION_HEADER);
3824        prompt.push_str(input_events);
3825        prompt.push_str(EXCERPT_HEADER);
3826        prompt.push_str(input_excerpt);
3827        prompt.push_str(RESPONSE_HEADER);
3828        prompt
3829    }
3830
3831    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3832    /// editable and context byte-offset ranges within `cursor_excerpt`.
3833    pub fn format_zeta1_from_input(
3834        input: &ZetaPromptInput,
3835        editable_range: Range<usize>,
3836        context_range: Range<usize>,
3837    ) -> String {
3838        let events = format_zeta1_events(&input.events);
3839        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3840        format_zeta1_prompt(&events, &excerpt)
3841    }
3842
3843    /// Formats events in zeta1 style (oldest first).
3844    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3845        let mut result = String::new();
3846        for event in
3847            events
3848                .iter()
3849                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3850                    &ZetaFormat::V0114180EditableRegion,
3851                )))
3852        {
3853            let event_string = format_zeta1_event(event);
3854            if event_string.is_empty() {
3855                continue;
3856            }
3857            if !result.is_empty() {
3858                result.push_str("\n\n");
3859            }
3860            result.push_str(&event_string);
3861        }
3862        result
3863    }
3864
3865    fn format_zeta1_event(event: &Event) -> String {
3866        match event {
3867            Event::BufferChange {
3868                path,
3869                old_path,
3870                diff,
3871                ..
3872            } => {
3873                let mut prompt = String::new();
3874                if old_path != path {
3875                    writeln!(
3876                        prompt,
3877                        "User renamed {} to {}\n",
3878                        old_path.display(),
3879                        path.display()
3880                    )
3881                    .ok();
3882                }
3883                if !diff.is_empty() {
3884                    write!(
3885                        prompt,
3886                        "User edited {}:\n```diff\n{}\n```",
3887                        path.display(),
3888                        diff
3889                    )
3890                    .ok();
3891                }
3892                prompt
3893            }
3894        }
3895    }
3896
3897    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3898    /// within `cursor_excerpt`.
3899    fn format_zeta1_excerpt(
3900        input: &ZetaPromptInput,
3901        editable_range: Range<usize>,
3902        context_range: Range<usize>,
3903    ) -> String {
3904        let path_str = input.cursor_path.to_string_lossy();
3905        let excerpt = &*input.cursor_excerpt;
3906        let cursor_offset = input.cursor_offset_in_excerpt;
3907
3908        let mut prompt = String::new();
3909        writeln!(&mut prompt, "```{path_str}").ok();
3910
3911        let starts_at_file_beginning =
3912            input.excerpt_start_row == Some(0) && context_range.start == 0;
3913        if starts_at_file_beginning {
3914            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3915        }
3916
3917        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3918
3919        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3920        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3921        prompt.push_str(CURSOR_MARKER);
3922        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3923        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3924
3925        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3926        write!(prompt, "\n```").ok();
3927
3928        prompt
3929    }
3930
3931    /// Cleans zeta1 model output by extracting content between editable region
3932    /// markers and converting the zeta1 cursor marker to the universal one.
3933    /// Returns `None` if the output doesn't contain the expected markers.
3934    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3935        let content = output.replace(CURSOR_MARKER, "");
3936
3937        let content_start = content
3938            .find(EDITABLE_REGION_START_MARKER)
3939            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3940            .map(|pos| {
3941                if content.as_bytes().get(pos) == Some(&b'\n') {
3942                    pos + 1
3943                } else {
3944                    pos
3945                }
3946            })
3947            .unwrap_or(0);
3948
3949        let content_end = content
3950            .find(EDITABLE_REGION_END_MARKER)
3951            .map(|pos| {
3952                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3953                    pos - 1
3954                } else {
3955                    pos
3956                }
3957            })
3958            .unwrap_or(content.len());
3959
3960        if content_start > content_end {
3961            return Some(String::new());
3962        }
3963
3964        let extracted = &content[content_start..content_end];
3965
3966        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3967            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3968            let text_before_cursor = text_before_cursor
3969                .find(EDITABLE_REGION_START_MARKER)
3970                .map(|pos| {
3971                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3972                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3973                        after_marker + 1
3974                    } else {
3975                        after_marker
3976                    }
3977                })
3978                .unwrap_or(0);
3979            let offset_in_extracted = zeta1_cursor_pos
3980                .saturating_sub(text_before_cursor)
3981                .min(extracted.len());
3982            offset_in_extracted
3983        });
3984
3985        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3986        if let Some(offset) = cursor_offset {
3987            result.push_str(&extracted[..offset]);
3988            result.push_str(super::CURSOR_MARKER);
3989            result.push_str(&extracted[offset..]);
3990        } else {
3991            result.push_str(extracted);
3992        }
3993
3994        Some(result)
3995    }
3996}
3997
3998#[cfg(test)]
3999mod tests {
4000    use super::*;
4001    use indoc::indoc;
4002
4003    fn make_input(
4004        cursor_excerpt: &str,
4005        editable_range: Range<usize>,
4006        cursor_offset: usize,
4007        events: Vec<Event>,
4008        related_files: Vec<RelatedFile>,
4009    ) -> ZetaPromptInput {
4010        let context_range = 0..cursor_excerpt.len();
4011        ZetaPromptInput {
4012            cursor_path: Path::new("test.rs").into(),
4013            cursor_excerpt: cursor_excerpt.into(),
4014            cursor_offset_in_excerpt: cursor_offset,
4015            excerpt_start_row: None,
4016            events: events.into_iter().map(Arc::new).collect(),
4017            related_files: Some(related_files),
4018            active_buffer_diagnostics: vec![],
4019            excerpt_ranges: ExcerptRanges {
4020                editable_150: editable_range.clone(),
4021                editable_180: editable_range.clone(),
4022                editable_350: editable_range,
4023                editable_150_context_350: context_range.clone(),
4024                editable_180_context_350: context_range.clone(),
4025                editable_350_context_150: context_range,
4026                ..Default::default()
4027            },
4028            syntax_ranges: None,
4029            experiment: None,
4030            in_open_source_repo: false,
4031            can_collect_data: false,
4032            repo_url: None,
4033        }
4034    }
4035
4036    fn make_input_with_context_range(
4037        excerpt: &str,
4038        editable_range: Range<usize>,
4039        context_range: Range<usize>,
4040        cursor_offset: usize,
4041    ) -> ZetaPromptInput {
4042        ZetaPromptInput {
4043            cursor_path: Path::new("test.rs").into(),
4044            cursor_excerpt: excerpt.into(),
4045            cursor_offset_in_excerpt: cursor_offset,
4046            excerpt_start_row: None,
4047            events: vec![],
4048            related_files: Some(vec![]),
4049            active_buffer_diagnostics: vec![],
4050            excerpt_ranges: ExcerptRanges {
4051                editable_150: editable_range.clone(),
4052                editable_180: editable_range.clone(),
4053                editable_350: editable_range,
4054                editable_150_context_350: context_range.clone(),
4055                editable_180_context_350: context_range.clone(),
4056                editable_350_context_150: context_range,
4057                ..Default::default()
4058            },
4059            syntax_ranges: None,
4060            experiment: None,
4061            in_open_source_repo: false,
4062            can_collect_data: false,
4063            repo_url: None,
4064        }
4065    }
4066
4067    fn make_event(path: &str, diff: &str) -> Event {
4068        Event::BufferChange {
4069            path: Path::new(path).into(),
4070            old_path: Path::new(path).into(),
4071            diff: diff.to_string(),
4072            predicted: false,
4073            in_open_source_repo: false,
4074        }
4075    }
4076
4077    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4078        RelatedFile {
4079            path: Path::new(path).into(),
4080            max_row: content.lines().count() as u32,
4081            excerpts: vec![RelatedExcerpt {
4082                row_range: 0..content.lines().count() as u32,
4083                text: content.into(),
4084                order: 0,
4085            }],
4086            in_open_source_repo: false,
4087        }
4088    }
4089
4090    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4091        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4092    }
4093
4094    #[test]
4095    fn test_no_truncation_when_within_budget() {
4096        let input = make_input(
4097            "prefix\neditable\nsuffix",
4098            7..15,
4099            10,
4100            vec![make_event("a.rs", "-old\n+new\n")],
4101            vec![make_related_file("related.rs", "fn helper() {}\n")],
4102        );
4103
4104        assert_eq!(
4105            format_with_budget(&input, 10000),
4106            indoc! {r#"
4107                <|file_sep|>related.rs
4108                fn helper() {}
4109                <|file_sep|>edit history
4110                --- a/a.rs
4111                +++ b/a.rs
4112                -old
4113                +new
4114                <|file_sep|>test.rs
4115                <|fim_prefix|>
4116                prefix
4117                <|fim_middle|>current
4118                edi<|user_cursor|>table
4119                <|fim_suffix|>
4120
4121                suffix
4122                <|fim_middle|>updated
4123            "#}
4124        );
4125    }
4126
4127    #[test]
4128    fn test_truncation_drops_edit_history_when_budget_tight() {
4129        let input = make_input(
4130            "code",
4131            0..4,
4132            2,
4133            vec![make_event("a.rs", "-x\n+y\n")],
4134            vec![
4135                make_related_file("r1.rs", "a\n"),
4136                make_related_file("r2.rs", "b\n"),
4137            ],
4138        );
4139
4140        assert_eq!(
4141            format_with_budget(&input, 10000),
4142            indoc! {r#"
4143                <|file_sep|>r1.rs
4144                a
4145                <|file_sep|>r2.rs
4146                b
4147                <|file_sep|>edit history
4148                --- a/a.rs
4149                +++ b/a.rs
4150                -x
4151                +y
4152                <|file_sep|>test.rs
4153                <|fim_prefix|>
4154                <|fim_middle|>current
4155                co<|user_cursor|>de
4156                <|fim_suffix|>
4157                <|fim_middle|>updated
4158            "#}
4159        );
4160
4161        assert_eq!(
4162            format_with_budget(&input, 50),
4163            indoc! {r#"
4164                <|file_sep|>r1.rs
4165                a
4166                <|file_sep|>r2.rs
4167                b
4168                <|file_sep|>test.rs
4169                <|fim_prefix|>
4170                <|fim_middle|>current
4171                co<|user_cursor|>de
4172                <|fim_suffix|>
4173                <|fim_middle|>updated
4174            "#}
4175        );
4176    }
4177
4178    #[test]
4179    fn test_truncation_includes_partial_excerpts() {
4180        let input = make_input(
4181            "x",
4182            0..1,
4183            0,
4184            vec![],
4185            vec![RelatedFile {
4186                path: Path::new("big.rs").into(),
4187                max_row: 30,
4188                in_open_source_repo: false,
4189                excerpts: vec![
4190                    RelatedExcerpt {
4191                        row_range: 0..10,
4192                        text: "first excerpt\n".into(),
4193                        order: 0,
4194                    },
4195                    RelatedExcerpt {
4196                        row_range: 10..20,
4197                        text: "second excerpt\n".into(),
4198                        order: 0,
4199                    },
4200                    RelatedExcerpt {
4201                        row_range: 20..30,
4202                        text: "third excerpt\n".into(),
4203                        order: 0,
4204                    },
4205                ],
4206            }],
4207        );
4208
4209        assert_eq!(
4210            format_with_budget(&input, 10000),
4211            indoc! {r#"
4212                <|file_sep|>big.rs
4213                first excerpt
4214                ...
4215                second excerpt
4216                ...
4217                third excerpt
4218                <|file_sep|>test.rs
4219                <|fim_prefix|>
4220                <|fim_middle|>current
4221                <|user_cursor|>x
4222                <|fim_suffix|>
4223                <|fim_middle|>updated
4224            "#}
4225        );
4226
4227        assert_eq!(
4228            format_with_budget(&input, 50),
4229            indoc! {r#"
4230                <|file_sep|>big.rs
4231                first excerpt
4232                ...
4233                <|file_sep|>test.rs
4234                <|fim_prefix|>
4235                <|fim_middle|>current
4236                <|user_cursor|>x
4237                <|fim_suffix|>
4238                <|fim_middle|>updated
4239            "#}
4240        );
4241    }
4242
4243    #[test]
4244    fn test_truncation_prioritizes_lower_order_excerpts() {
4245        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4246        // With tight budget, only the lower-order excerpt from file_b should be included.
4247        let input = make_input(
4248            "x",
4249            0..1,
4250            0,
4251            vec![],
4252            vec![
4253                RelatedFile {
4254                    path: Path::new("file_a.rs").into(),
4255                    max_row: 10,
4256                    in_open_source_repo: false,
4257                    excerpts: vec![RelatedExcerpt {
4258                        row_range: 0..10,
4259                        text: "low priority content\n".into(),
4260                        order: 5,
4261                    }],
4262                },
4263                RelatedFile {
4264                    path: Path::new("file_b.rs").into(),
4265                    max_row: 10,
4266                    in_open_source_repo: false,
4267                    excerpts: vec![RelatedExcerpt {
4268                        row_range: 0..10,
4269                        text: "high priority content\n".into(),
4270                        order: 1,
4271                    }],
4272                },
4273            ],
4274        );
4275
4276        // With large budget, both files included; rendered in stable lexicographic order.
4277        assert_eq!(
4278            format_with_budget(&input, 10000),
4279            indoc! {r#"
4280                <|file_sep|>file_a.rs
4281                low priority content
4282                <|file_sep|>file_b.rs
4283                high priority content
4284                <|file_sep|>test.rs
4285                <|fim_prefix|>
4286                <|fim_middle|>current
4287                <|user_cursor|>x
4288                <|fim_suffix|>
4289                <|fim_middle|>updated
4290            "#}
4291        );
4292
4293        // With tight budget, only file_b (lower order) fits.
4294        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4295        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4296        // file_a would need another 14 tokens, which doesn't fit.
4297        assert_eq!(
4298            format_with_budget(&input, 52),
4299            indoc! {r#"
4300                <|file_sep|>file_b.rs
4301                high priority content
4302                <|file_sep|>test.rs
4303                <|fim_prefix|>
4304                <|fim_middle|>current
4305                <|user_cursor|>x
4306                <|fim_suffix|>
4307                <|fim_middle|>updated
4308            "#}
4309        );
4310    }
4311
4312    #[test]
4313    fn test_truncation_drops_high_order_excerpts_within_file() {
4314        // A single file has excerpts at order 1 and order 3. With a tight budget,
4315        // only the order-1 excerpts are included while the order-3 excerpt is
4316        // dropped — even though they belong to the same file. This also preserves
4317        // the parent invariant: parent outline items have order ≤ their best
4318        // child, so they're always included when any child is.
4319        let input = make_input(
4320            "x",
4321            0..1,
4322            0,
4323            vec![],
4324            vec![RelatedFile {
4325                path: Path::new("mod.rs").into(),
4326                max_row: 30,
4327                in_open_source_repo: false,
4328                excerpts: vec![
4329                    RelatedExcerpt {
4330                        row_range: 0..5,
4331                        text: "mod header\n".into(),
4332                        order: 1,
4333                    },
4334                    RelatedExcerpt {
4335                        row_range: 5..15,
4336                        text: "important fn\n".into(),
4337                        order: 1,
4338                    },
4339                    RelatedExcerpt {
4340                        row_range: 15..30,
4341                        text: "less important fn\n".into(),
4342                        order: 3,
4343                    },
4344                ],
4345            }],
4346        );
4347
4348        // With large budget, all three excerpts included.
4349        assert_eq!(
4350            format_with_budget(&input, 10000),
4351            indoc! {r#"
4352                <|file_sep|>mod.rs
4353                mod header
4354                ...
4355                important fn
4356                ...
4357                less important fn
4358                <|file_sep|>test.rs
4359                <|fim_prefix|>
4360                <|fim_middle|>current
4361                <|user_cursor|>x
4362                <|fim_suffix|>
4363                <|fim_middle|>updated
4364            "#}
4365        );
4366
4367        // With tight budget, only order<=1 excerpts included (header + important fn).
4368        assert_eq!(
4369            format_with_budget(&input, 55),
4370            indoc! {r#"
4371                <|file_sep|>mod.rs
4372                mod header
4373                ...
4374                important fn
4375                ...
4376                <|file_sep|>test.rs
4377                <|fim_prefix|>
4378                <|fim_middle|>current
4379                <|user_cursor|>x
4380                <|fim_suffix|>
4381                <|fim_middle|>updated
4382            "#}
4383        );
4384    }
4385
4386    #[test]
4387    fn test_truncation_drops_older_events_first() {
4388        let input = make_input(
4389            "x",
4390            0..1,
4391            0,
4392            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4393            vec![],
4394        );
4395
4396        assert_eq!(
4397            format_with_budget(&input, 10000),
4398            indoc! {r#"
4399                <|file_sep|>edit history
4400                --- a/old.rs
4401                +++ b/old.rs
4402                -1
4403                --- a/new.rs
4404                +++ b/new.rs
4405                -2
4406                <|file_sep|>test.rs
4407                <|fim_prefix|>
4408                <|fim_middle|>current
4409                <|user_cursor|>x
4410                <|fim_suffix|>
4411                <|fim_middle|>updated
4412            "#}
4413        );
4414
4415        assert_eq!(
4416            format_with_budget(&input, 55),
4417            indoc! {r#"
4418                <|file_sep|>edit history
4419                --- a/new.rs
4420                +++ b/new.rs
4421                -2
4422                <|file_sep|>test.rs
4423                <|fim_prefix|>
4424                <|fim_middle|>current
4425                <|user_cursor|>x
4426                <|fim_suffix|>
4427                <|fim_middle|>updated
4428            "#}
4429        );
4430    }
4431
4432    #[test]
4433    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4434        let input = make_input(
4435            "fn main() {}",
4436            0..12,
4437            3,
4438            vec![make_event("a.rs", "-old\n+new\n")],
4439            vec![make_related_file("related.rs", "helper\n")],
4440        );
4441
4442        assert_eq!(
4443            format_with_budget(&input, 30),
4444            indoc! {r#"
4445                <|file_sep|>test.rs
4446                <|fim_prefix|>
4447                <|fim_middle|>current
4448                fn <|user_cursor|>main() {}
4449                <|fim_suffix|>
4450                <|fim_middle|>updated
4451            "#}
4452        );
4453    }
4454
4455    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4456        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4457    }
4458
4459    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4460        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4461    }
4462
4463    #[test]
4464    fn test_seed_coder_basic_format() {
4465        let input = make_input(
4466            "prefix\neditable\nsuffix",
4467            7..15,
4468            10,
4469            vec![make_event("a.rs", "-old\n+new\n")],
4470            vec![make_related_file("related.rs", "fn helper() {}\n")],
4471        );
4472
4473        assert_eq!(
4474            format_seed_coder(&input),
4475            indoc! {r#"
4476                <[fim-suffix]>
4477                suffix
4478                <[fim-prefix]><filename>related.rs
4479                fn helper() {}
4480
4481                <filename>edit_history
4482                --- a/a.rs
4483                +++ b/a.rs
4484                -old
4485                +new
4486
4487                <filename>test.rs
4488                prefix
4489                <<<<<<< CURRENT
4490                edi<|user_cursor|>table
4491                =======
4492                <[fim-middle]>"#}
4493        );
4494    }
4495
4496    #[test]
4497    fn test_seed_coder_no_context() {
4498        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4499
4500        assert_eq!(
4501            format_seed_coder(&input),
4502            indoc! {r#"
4503                <[fim-suffix]>
4504                after
4505                <[fim-prefix]><filename>test.rs
4506                before
4507                <<<<<<< CURRENT
4508                mid<|user_cursor|>dle
4509                =======
4510                <[fim-middle]>"#}
4511        );
4512    }
4513
4514    #[test]
4515    fn test_seed_coder_truncation_drops_context() {
4516        let input = make_input(
4517            "code",
4518            0..4,
4519            2,
4520            vec![make_event("a.rs", "-x\n+y\n")],
4521            vec![make_related_file("r1.rs", "content\n")],
4522        );
4523
4524        // With large budget, everything is included
4525        assert_eq!(
4526            format_seed_coder(&input),
4527            indoc! {r#"
4528                <[fim-suffix]>
4529                <[fim-prefix]><filename>r1.rs
4530                content
4531
4532                <filename>edit_history
4533                --- a/a.rs
4534                +++ b/a.rs
4535                -x
4536                +y
4537
4538                <filename>test.rs
4539                <<<<<<< CURRENT
4540                co<|user_cursor|>de
4541                =======
4542                <[fim-middle]>"#}
4543        );
4544
4545        // With tight budget, context is dropped but cursor section remains
4546        assert_eq!(
4547            format_seed_coder_with_budget(&input, 30),
4548            indoc! {r#"
4549                <[fim-suffix]>
4550                <[fim-prefix]><filename>test.rs
4551                <<<<<<< CURRENT
4552                co<|user_cursor|>de
4553                =======
4554                <[fim-middle]>"#}
4555        );
4556    }
4557
4558    #[test]
4559    fn test_seed_coder_truncation_prioritizes_lower_order() {
4560        let input = make_input(
4561            "code",
4562            0..4,
4563            2,
4564            vec![],
4565            vec![
4566                RelatedFile {
4567                    path: Path::new("low_prio.rs").into(),
4568                    max_row: 5,
4569                    in_open_source_repo: false,
4570                    excerpts: vec![RelatedExcerpt {
4571                        row_range: 0..5,
4572                        text: "low prio\n".into(),
4573                        order: 10,
4574                    }],
4575                },
4576                RelatedFile {
4577                    path: Path::new("high_prio.rs").into(),
4578                    max_row: 5,
4579                    in_open_source_repo: false,
4580                    excerpts: vec![RelatedExcerpt {
4581                        row_range: 0..5,
4582                        text: "high prio\n".into(),
4583                        order: 1,
4584                    }],
4585                },
4586            ],
4587        );
4588
4589        // With large budget, both included; rendered in stable lexicographic order.
4590        assert_eq!(
4591            format_seed_coder(&input),
4592            indoc! {r#"
4593                <[fim-suffix]>
4594                <[fim-prefix]><filename>low_prio.rs
4595                low prio
4596                <filename>high_prio.rs
4597                high prio
4598
4599                <filename>test.rs
4600                <<<<<<< CURRENT
4601                co<|user_cursor|>de
4602                =======
4603                <[fim-middle]>"#}
4604        );
4605
4606        // With tight budget, only high_prio included.
4607        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4608        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4609        assert_eq!(
4610            format_seed_coder_with_budget(&input, 44),
4611            indoc! {r#"
4612                <[fim-suffix]>
4613                <[fim-prefix]><filename>high_prio.rs
4614                high prio
4615
4616                <filename>test.rs
4617                <<<<<<< CURRENT
4618                co<|user_cursor|>de
4619                =======
4620                <[fim-middle]>"#}
4621        );
4622    }
4623
4624    #[test]
4625    fn test_format_zeta1_from_input_basic() {
4626        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4627        let input = ZetaPromptInput {
4628            cursor_path: Path::new("src/main.rs").into(),
4629            cursor_excerpt: excerpt.into(),
4630            cursor_offset_in_excerpt: 30,
4631            excerpt_start_row: Some(0),
4632            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4633            related_files: Some(vec![]),
4634            active_buffer_diagnostics: vec![],
4635            excerpt_ranges: ExcerptRanges {
4636                editable_150: 15..41,
4637                editable_180: 15..41,
4638                editable_350: 15..41,
4639                editable_150_context_350: 0..excerpt.len(),
4640                editable_180_context_350: 0..excerpt.len(),
4641                editable_350_context_150: 0..excerpt.len(),
4642                ..Default::default()
4643            },
4644            syntax_ranges: None,
4645            experiment: None,
4646            in_open_source_repo: false,
4647            can_collect_data: false,
4648            repo_url: None,
4649        };
4650
4651        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4652
4653        assert_eq!(
4654            prompt,
4655            concat!(
4656                "### Instruction:\n",
4657                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4658                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4659                "into account the cursor location.\n",
4660                "\n",
4661                "### User Edits:\n",
4662                "\n",
4663                "User edited other.rs:\n",
4664                "```diff\n",
4665                "-old\n",
4666                "+new\n",
4667                "\n",
4668                "```\n",
4669                "\n",
4670                "### User Excerpt:\n",
4671                "\n",
4672                "```src/main.rs\n",
4673                "<|start_of_file|>\n",
4674                "fn before() {}\n",
4675                "<|editable_region_start|>\n",
4676                "fn foo() {\n",
4677                "    <|user_cursor_is_here|>let x = 1;\n",
4678                "\n",
4679                "<|editable_region_end|>}\n",
4680                "fn after() {}\n",
4681                "\n",
4682                "```\n",
4683                "\n",
4684                "### Response:\n",
4685            ),
4686        );
4687    }
4688
4689    #[test]
4690    fn test_format_zeta1_from_input_no_start_of_file() {
4691        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4692        let input = ZetaPromptInput {
4693            cursor_path: Path::new("src/main.rs").into(),
4694            cursor_excerpt: excerpt.into(),
4695            cursor_offset_in_excerpt: 15,
4696            excerpt_start_row: Some(10),
4697            events: vec![],
4698            related_files: Some(vec![]),
4699            active_buffer_diagnostics: vec![],
4700            excerpt_ranges: ExcerptRanges {
4701                editable_150: 0..28,
4702                editable_180: 0..28,
4703                editable_350: 0..28,
4704                editable_150_context_350: 0..28,
4705                editable_180_context_350: 0..28,
4706                editable_350_context_150: 0..28,
4707                ..Default::default()
4708            },
4709            syntax_ranges: None,
4710            experiment: None,
4711            in_open_source_repo: false,
4712            can_collect_data: false,
4713            repo_url: None,
4714        };
4715
4716        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4717
4718        assert_eq!(
4719            prompt,
4720            concat!(
4721                "### Instruction:\n",
4722                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4723                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4724                "into account the cursor location.\n",
4725                "\n",
4726                "### User Edits:\n",
4727                "\n",
4728                "\n",
4729                "\n",
4730                "### User Excerpt:\n",
4731                "\n",
4732                "```src/main.rs\n",
4733                "<|editable_region_start|>\n",
4734                "fn foo() {\n",
4735                "    <|user_cursor_is_here|>let x = 1;\n",
4736                "}\n",
4737                "\n",
4738                "<|editable_region_end|>\n",
4739                "```\n",
4740                "\n",
4741                "### Response:\n",
4742            ),
4743        );
4744    }
4745
4746    #[test]
4747    fn test_format_zeta1_from_input_with_sub_ranges() {
4748        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4749        let editable_range = 10..37;
4750        let context_range = 0..excerpt.len();
4751
4752        let input = ZetaPromptInput {
4753            cursor_path: Path::new("test.rs").into(),
4754            cursor_excerpt: excerpt.into(),
4755            cursor_offset_in_excerpt: 25,
4756            excerpt_start_row: Some(0),
4757            events: vec![],
4758            related_files: Some(vec![]),
4759            active_buffer_diagnostics: vec![],
4760            excerpt_ranges: ExcerptRanges {
4761                editable_150: editable_range.clone(),
4762                editable_180: editable_range.clone(),
4763                editable_350: editable_range.clone(),
4764                editable_150_context_350: context_range.clone(),
4765                editable_180_context_350: context_range.clone(),
4766                editable_350_context_150: context_range.clone(),
4767                ..Default::default()
4768            },
4769            syntax_ranges: None,
4770            experiment: None,
4771            in_open_source_repo: false,
4772            can_collect_data: false,
4773            repo_url: None,
4774        };
4775
4776        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4777
4778        assert_eq!(
4779            prompt,
4780            concat!(
4781                "### Instruction:\n",
4782                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4783                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4784                "into account the cursor location.\n",
4785                "\n",
4786                "### User Edits:\n",
4787                "\n",
4788                "\n",
4789                "\n",
4790                "### User Excerpt:\n",
4791                "\n",
4792                "```test.rs\n",
4793                "<|start_of_file|>\n",
4794                "// prefix\n",
4795                "<|editable_region_start|>\n",
4796                "fn foo() {\n",
4797                "    <|user_cursor_is_here|>let x = 1;\n",
4798                "}\n",
4799                "<|editable_region_end|>\n",
4800                "// suffix\n",
4801                "\n",
4802                "```\n",
4803                "\n",
4804                "### Response:\n",
4805            ),
4806        );
4807    }
4808
4809    #[test]
4810    fn test_max_event_count() {
4811        fn make_numbered_event(index: usize) -> Event {
4812            return make_event(
4813                &format!("event-{index}.rs"),
4814                &format!("-old-{index}\n+new-{index}\n"),
4815            );
4816        }
4817        let input = make_input(
4818            "x",
4819            0..1,
4820            0,
4821            (0..3).map(make_numbered_event).collect(),
4822            vec![],
4823        );
4824
4825        let edit_history_section = format_edit_history_within_budget(
4826            &input.events,
4827            "<|file_sep|>",
4828            "edit history",
4829            usize::MAX,
4830            5,
4831        );
4832
4833        assert_eq!(
4834            &edit_history_section,
4835            indoc!(
4836                "
4837                <|file_sep|>edit history
4838                --- a/event-0.rs
4839                +++ b/event-0.rs
4840                -old-0
4841                +new-0
4842                --- a/event-1.rs
4843                +++ b/event-1.rs
4844                -old-1
4845                +new-1
4846                --- a/event-2.rs
4847                +++ b/event-2.rs
4848                -old-2
4849                +new-2
4850            "
4851            )
4852        );
4853
4854        let edit_history_section = format_edit_history_within_budget(
4855            &input.events,
4856            "<|file_sep|>",
4857            "edit history",
4858            usize::MAX,
4859            2,
4860        );
4861
4862        assert_eq!(
4863            &edit_history_section,
4864            indoc!(
4865                "
4866                <|file_sep|>edit history
4867                --- a/event-1.rs
4868                +++ b/event-1.rs
4869                -old-1
4870                +new-1
4871                --- a/event-2.rs
4872                +++ b/event-2.rs
4873                -old-2
4874                +new-2
4875            "
4876            )
4877        );
4878
4879        let edit_history_section = format_edit_history_within_budget(
4880            &input.events,
4881            "<|file_sep|>",
4882            "edit history",
4883            usize::MAX,
4884            0,
4885        );
4886
4887        assert_eq!(&edit_history_section, "");
4888    }
4889
4890    #[test]
4891    fn test_clean_zeta1_model_output_basic() {
4892        let output = indoc! {"
4893            <|editable_region_start|>
4894            fn main() {
4895                println!(\"hello\");
4896            }
4897            <|editable_region_end|>
4898        "};
4899
4900        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4901        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4902    }
4903
4904    #[test]
4905    fn test_clean_zeta1_model_output_with_cursor() {
4906        let output = indoc! {"
4907            <|editable_region_start|>
4908            fn main() {
4909                <|user_cursor_is_here|>println!(\"hello\");
4910            }
4911            <|editable_region_end|>
4912        "};
4913
4914        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4915        assert_eq!(
4916            cleaned,
4917            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4918        );
4919    }
4920
4921    #[test]
4922    fn test_clean_zeta1_model_output_no_markers() {
4923        let output = "fn main() {}\n";
4924        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4925        assert_eq!(cleaned, "fn main() {}\n");
4926    }
4927
4928    #[test]
4929    fn test_clean_zeta1_model_output_empty_region() {
4930        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4931        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4932        assert_eq!(cleaned, "");
4933    }
4934
4935    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4936        let mut result = excerpt.to_string();
4937        result.replace_range(
4938            parsed_output.range_in_excerpt.clone(),
4939            &parsed_output.new_editable_region,
4940        );
4941        result
4942    }
4943
4944    #[test]
4945    fn test_parse_zeta2_model_output() {
4946        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4947        let context_start = excerpt.find("ctx start").unwrap();
4948        let context_end = excerpt.find("after ctx").unwrap();
4949        let editable_start = excerpt.find("editable old").unwrap();
4950        let editable_end = editable_start + "editable old\n".len();
4951        let input = make_input_with_context_range(
4952            excerpt,
4953            editable_start..editable_end,
4954            context_start..context_end,
4955            editable_start,
4956        );
4957
4958        let output = parse_zeta2_model_output(
4959            "editable new\n>>>>>>> UPDATED\n",
4960            ZetaFormat::V0131GitMergeMarkersPrefix,
4961            &input,
4962        )
4963        .unwrap();
4964
4965        assert_eq!(
4966            apply_edit(excerpt, &output),
4967            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4968        );
4969    }
4970
4971    #[test]
4972    fn test_parse_zeta2_model_output_identity() {
4973        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4974        let editable_start = excerpt.find("bbb").unwrap();
4975        let editable_end = excerpt.find("ddd").unwrap();
4976        let input = make_input_with_context_range(
4977            excerpt,
4978            editable_start..editable_end,
4979            0..excerpt.len(),
4980            editable_start,
4981        );
4982
4983        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4984        let output =
4985            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4986
4987        assert_eq!(apply_edit(excerpt, &output), excerpt);
4988    }
4989
4990    #[test]
4991    fn test_parse_zeta2_model_output_strips_end_marker() {
4992        let excerpt = "hello\nworld\n";
4993        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4994
4995        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4996        let output1 =
4997            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4998        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4999
5000        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5001        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5002    }
5003}