zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  29pub struct ZetaPromptInput {
  30    pub cursor_path: Arc<Path>,
  31    pub cursor_excerpt: Arc<str>,
  32    pub cursor_offset_in_excerpt: usize,
  33    #[serde(default, skip_serializing_if = "Option::is_none")]
  34    pub excerpt_start_row: Option<u32>,
  35    pub events: Vec<Arc<Event>>,
  36    #[serde(default)]
  37    pub related_files: Option<Vec<RelatedFile>>,
  38    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  39    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  40    /// These ranges let the server select model-appropriate subsets.
  41    pub excerpt_ranges: ExcerptRanges,
  42    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  43    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  44    /// When present, the server uses these to compute editable/context ranges
  45    /// instead of `excerpt_ranges`.
  46    #[serde(default, skip_serializing_if = "Option::is_none")]
  47    pub syntax_ranges: Option<Vec<Range<usize>>>,
  48    /// The name of the edit prediction model experiment to use.
  49    #[serde(default, skip_serializing_if = "Option::is_none")]
  50    pub experiment: Option<String>,
  51    #[serde(default)]
  52    pub in_open_source_repo: bool,
  53    #[serde(default)]
  54    pub can_collect_data: bool,
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub repo_url: Option<String>,
  57}
  58
  59#[derive(
  60    Default,
  61    Clone,
  62    Copy,
  63    Debug,
  64    PartialEq,
  65    Eq,
  66    Hash,
  67    EnumIter,
  68    IntoStaticStr,
  69    Serialize,
  70    Deserialize,
  71)]
  72#[allow(non_camel_case_types)]
  73pub enum ZetaFormat {
  74    V0112MiddleAtEnd,
  75    V0113Ordered,
  76    V0114180EditableRegion,
  77    V0120GitMergeMarkers,
  78    #[default]
  79    V0131GitMergeMarkersPrefix,
  80    V0211Prefill,
  81    V0211SeedCoder,
  82    v0226Hashline,
  83    V0304VariableEdit,
  84    V0304SeedNoEdits,
  85    V0306SeedMultiRegions,
  86}
  87
  88impl std::fmt::Display for ZetaFormat {
  89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  90        write!(f, "{}", <&'static str>::from(self))
  91    }
  92}
  93
  94impl ZetaFormat {
  95    pub fn parse(format_name: &str) -> Result<Self> {
  96        let mut results = ZetaFormat::iter().filter(|version| {
  97            <&'static str>::from(version)
  98                .to_lowercase()
  99                .contains(&format_name.to_lowercase())
 100        });
 101        let Some(result) = results.next() else {
 102            anyhow::bail!(
 103                "`{format_name}` did not match any of:\n{}",
 104                Self::options_as_string()
 105            );
 106        };
 107        if results.next().is_some() {
 108            anyhow::bail!(
 109                "`{format_name}` matched more than one of:\n{}",
 110                Self::options_as_string()
 111            );
 112        }
 113        Ok(result)
 114    }
 115
 116    pub fn options_as_string() -> String {
 117        ZetaFormat::iter()
 118            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 119            .collect::<Vec<_>>()
 120            .concat()
 121    }
 122}
 123
 124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 125#[serde(tag = "event")]
 126pub enum Event {
 127    BufferChange {
 128        path: Arc<Path>,
 129        old_path: Arc<Path>,
 130        diff: String,
 131        predicted: bool,
 132        in_open_source_repo: bool,
 133    },
 134}
 135
 136impl Event {
 137    pub fn in_open_source_repo(&self) -> bool {
 138        match self {
 139            Event::BufferChange {
 140                in_open_source_repo,
 141                ..
 142            } => *in_open_source_repo,
 143        }
 144    }
 145}
 146
 147pub fn write_event(prompt: &mut String, event: &Event) {
 148    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 149        for component in path.components() {
 150            prompt.push('/');
 151            write!(prompt, "{}", component.as_os_str().display()).ok();
 152        }
 153    }
 154    match event {
 155        Event::BufferChange {
 156            path,
 157            old_path,
 158            diff,
 159            predicted,
 160            in_open_source_repo: _,
 161        } => {
 162            if *predicted {
 163                prompt.push_str("// User accepted prediction:\n");
 164            }
 165            prompt.push_str("--- a");
 166            write_path_as_unix_str(prompt, old_path.as_ref());
 167            prompt.push_str("\n+++ b");
 168            write_path_as_unix_str(prompt, path.as_ref());
 169            prompt.push('\n');
 170            prompt.push_str(diff);
 171        }
 172    }
 173}
 174
 175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 176pub struct ActiveBufferDiagnostic {
 177    pub severity: Option<i32>,
 178    pub message: String,
 179    pub snippet: String,
 180    pub snippet_buffer_row_range: Range<u32>,
 181    pub diagnostic_range_in_snippet: Range<usize>,
 182}
 183
 184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 185pub struct RelatedFile {
 186    pub path: Arc<Path>,
 187    pub max_row: u32,
 188    pub excerpts: Vec<RelatedExcerpt>,
 189    #[serde(default)]
 190    pub in_open_source_repo: bool,
 191}
 192
 193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 194pub struct RelatedExcerpt {
 195    pub row_range: Range<u32>,
 196    pub text: Arc<str>,
 197    #[serde(default)]
 198    pub order: usize,
 199}
 200
 201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 202    special_tokens_for_format(format)
 203        .iter()
 204        .any(|token| input.cursor_excerpt.contains(token))
 205}
 206
 207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> Option<String> {
 208    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 209}
 210
 211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 212    match format {
 213        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 214        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 215        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 216        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 217        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 218        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 219        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 220        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 221        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 222        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 223        ZetaFormat::V0306SeedMultiRegions => {
 224            static TOKENS: &[&str] = &[
 225                seed_coder::FIM_SUFFIX,
 226                seed_coder::FIM_PREFIX,
 227                seed_coder::FIM_MIDDLE,
 228                seed_coder::FILE_MARKER,
 229                seed_coder::START_MARKER,
 230                seed_coder::SEPARATOR,
 231                seed_coder::END_MARKER,
 232                CURSOR_MARKER,
 233                multi_region::MARKER_TAG_PREFIX,
 234            ];
 235            TOKENS
 236        }
 237    }
 238}
 239
 240/// Returns the (editable_token_limit, context_token_limit) for a given format.
 241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 242    match format {
 243        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 244        ZetaFormat::V0114180EditableRegion => (180, 350),
 245        ZetaFormat::V0120GitMergeMarkers
 246        | ZetaFormat::V0131GitMergeMarkersPrefix
 247        | ZetaFormat::V0211Prefill
 248        | ZetaFormat::V0211SeedCoder
 249        | ZetaFormat::v0226Hashline
 250        | ZetaFormat::V0306SeedMultiRegions
 251        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 252        ZetaFormat::V0304VariableEdit => (1024, 0),
 253    }
 254}
 255
 256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 257    match format {
 258        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 259        ZetaFormat::V0112MiddleAtEnd
 260        | ZetaFormat::V0113Ordered
 261        | ZetaFormat::V0114180EditableRegion
 262        | ZetaFormat::V0120GitMergeMarkers
 263        | ZetaFormat::V0131GitMergeMarkersPrefix
 264        | ZetaFormat::V0211Prefill
 265        | ZetaFormat::V0211SeedCoder
 266        | ZetaFormat::V0304VariableEdit
 267        | ZetaFormat::V0306SeedMultiRegions
 268        | ZetaFormat::V0304SeedNoEdits => &[],
 269    }
 270}
 271
 272pub fn excerpt_ranges_for_format(
 273    format: ZetaFormat,
 274    ranges: &ExcerptRanges,
 275) -> (Range<usize>, Range<usize>) {
 276    match format {
 277        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 278            ranges.editable_150.clone(),
 279            ranges.editable_150_context_350.clone(),
 280        ),
 281        ZetaFormat::V0114180EditableRegion => (
 282            ranges.editable_180.clone(),
 283            ranges.editable_180_context_350.clone(),
 284        ),
 285        ZetaFormat::V0120GitMergeMarkers
 286        | ZetaFormat::V0131GitMergeMarkersPrefix
 287        | ZetaFormat::V0211Prefill
 288        | ZetaFormat::V0211SeedCoder
 289        | ZetaFormat::v0226Hashline
 290        | ZetaFormat::V0304SeedNoEdits
 291        | ZetaFormat::V0306SeedMultiRegions => (
 292            ranges.editable_350.clone(),
 293            ranges.editable_350_context_150.clone(),
 294        ),
 295        ZetaFormat::V0304VariableEdit => {
 296            let context = ranges
 297                .editable_350_context_1024
 298                .clone()
 299                .or(ranges.editable_350_context_512.clone())
 300                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 301            (context.clone(), context)
 302        }
 303    }
 304}
 305
 306pub fn write_cursor_excerpt_section_for_format(
 307    format: ZetaFormat,
 308    prompt: &mut String,
 309    path: &Path,
 310    context: &str,
 311    editable_range: &Range<usize>,
 312    cursor_offset: usize,
 313) {
 314    match format {
 315        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 316            prompt,
 317            path,
 318            context,
 319            editable_range,
 320            cursor_offset,
 321        ),
 322        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 323            v0113_ordered::write_cursor_excerpt_section(
 324                prompt,
 325                path,
 326                context,
 327                editable_range,
 328                cursor_offset,
 329            )
 330        }
 331        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 332            prompt,
 333            path,
 334            context,
 335            editable_range,
 336            cursor_offset,
 337        ),
 338        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 339            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 340                prompt,
 341                path,
 342                context,
 343                editable_range,
 344                cursor_offset,
 345            )
 346        }
 347        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 348            seed_coder::write_cursor_excerpt_section(
 349                prompt,
 350                path,
 351                context,
 352                editable_range,
 353                cursor_offset,
 354            )
 355        }
 356        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 357            prompt,
 358            path,
 359            context,
 360            editable_range,
 361            cursor_offset,
 362        ),
 363        ZetaFormat::V0304VariableEdit => {
 364            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 365        }
 366        ZetaFormat::V0306SeedMultiRegions => {
 367            prompt.push_str(&build_v0306_cursor_prefix(
 368                path,
 369                context,
 370                editable_range,
 371                cursor_offset,
 372            ));
 373        }
 374    }
 375}
 376
 377fn build_v0306_cursor_prefix(
 378    path: &Path,
 379    context: &str,
 380    editable_range: &Range<usize>,
 381    cursor_offset: usize,
 382) -> String {
 383    let mut section = String::new();
 384    let path_str = path.to_string_lossy();
 385    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 386
 387    section.push_str(&context[..editable_range.start]);
 388    section.push_str(seed_coder::START_MARKER);
 389
 390    let editable_text = &context[editable_range.clone()];
 391    let cursor_in_editable = cursor_offset - editable_range.start;
 392    multi_region::write_editable_with_markers(
 393        &mut section,
 394        editable_text,
 395        cursor_in_editable,
 396        CURSOR_MARKER,
 397    );
 398
 399    if !section.ends_with('\n') {
 400        section.push('\n');
 401    }
 402    section.push_str(seed_coder::SEPARATOR);
 403    section
 404}
 405
 406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 407    let start_row = text[0..range.start].matches('\n').count() as u32;
 408    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 409    if !text[..range.end].ends_with('\n') {
 410        end_row += 1;
 411    }
 412    return start_row..end_row;
 413}
 414
 415pub fn format_prompt_with_budget_for_format(
 416    input: &ZetaPromptInput,
 417    format: ZetaFormat,
 418    max_tokens: usize,
 419) -> Option<String> {
 420    let (context, editable_range, context_range, cursor_offset) =
 421        resolve_cursor_region(input, format);
 422    let path = &*input.cursor_path;
 423
 424    let empty_files = Vec::new();
 425    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 426    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 427        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 428        let row_range = relative_row_range.start + cursor_excerpt_start_row
 429            ..relative_row_range.end + cursor_excerpt_start_row;
 430        &filter_redundant_excerpts(
 431            input_related_files.to_vec(),
 432            input.cursor_path.as_ref(),
 433            row_range,
 434        )
 435    } else {
 436        input_related_files
 437    };
 438
 439    let prompt = match format {
 440        ZetaFormat::V0211SeedCoder
 441        | ZetaFormat::V0304SeedNoEdits
 442        | ZetaFormat::V0306SeedMultiRegions => {
 443            let mut cursor_section = String::new();
 444            write_cursor_excerpt_section_for_format(
 445                format,
 446                &mut cursor_section,
 447                path,
 448                context,
 449                &editable_range,
 450                cursor_offset,
 451            );
 452
 453            seed_coder::assemble_fim_prompt(
 454                context,
 455                &editable_range,
 456                &cursor_section,
 457                &input.events,
 458                related_files,
 459                max_tokens,
 460            )
 461        }
 462        _ => {
 463            let mut cursor_section = String::new();
 464            write_cursor_excerpt_section_for_format(
 465                format,
 466                &mut cursor_section,
 467                path,
 468                context,
 469                &editable_range,
 470                cursor_offset,
 471            );
 472
 473            let cursor_tokens = estimate_tokens(cursor_section.len());
 474            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 475
 476            let edit_history_section = format_edit_history_within_budget(
 477                &input.events,
 478                "<|file_sep|>",
 479                "edit history",
 480                budget_after_cursor,
 481                max_edit_event_count_for_format(&format),
 482            );
 483            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 484            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 485
 486            let related_files_section = format_related_files_within_budget(
 487                &related_files,
 488                "<|file_sep|>",
 489                "",
 490                budget_after_edit_history,
 491            );
 492
 493            let mut prompt = String::new();
 494            prompt.push_str(&related_files_section);
 495            prompt.push_str(&edit_history_section);
 496            prompt.push_str(&cursor_section);
 497            prompt
 498        }
 499    };
 500    let prompt_tokens = estimate_tokens(prompt.len());
 501    if prompt_tokens > max_tokens {
 502        return None;
 503    }
 504    return Some(prompt);
 505}
 506
 507pub fn filter_redundant_excerpts(
 508    mut related_files: Vec<RelatedFile>,
 509    cursor_path: &Path,
 510    cursor_row_range: Range<u32>,
 511) -> Vec<RelatedFile> {
 512    for file in &mut related_files {
 513        if file.path.as_ref() == cursor_path {
 514            file.excerpts.retain(|excerpt| {
 515                excerpt.row_range.start < cursor_row_range.start
 516                    || excerpt.row_range.end > cursor_row_range.end
 517            });
 518        }
 519    }
 520    related_files.retain(|file| !file.excerpts.is_empty());
 521    related_files
 522}
 523
 524pub fn max_edit_event_count_for_format(format: &ZetaFormat) -> usize {
 525    match format {
 526        ZetaFormat::V0112MiddleAtEnd
 527        | ZetaFormat::V0113Ordered
 528        | ZetaFormat::V0114180EditableRegion
 529        | ZetaFormat::V0120GitMergeMarkers
 530        | ZetaFormat::V0131GitMergeMarkersPrefix
 531        | ZetaFormat::V0211Prefill
 532        | ZetaFormat::V0211SeedCoder
 533        | ZetaFormat::v0226Hashline
 534        | ZetaFormat::V0304SeedNoEdits
 535        | ZetaFormat::V0304VariableEdit
 536        | ZetaFormat::V0306SeedMultiRegions => 6,
 537    }
 538}
 539
 540pub fn get_prefill_for_format(
 541    format: ZetaFormat,
 542    context: &str,
 543    editable_range: &Range<usize>,
 544) -> String {
 545    match format {
 546        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 547        ZetaFormat::V0112MiddleAtEnd
 548        | ZetaFormat::V0113Ordered
 549        | ZetaFormat::V0114180EditableRegion
 550        | ZetaFormat::V0120GitMergeMarkers
 551        | ZetaFormat::V0131GitMergeMarkersPrefix
 552        | ZetaFormat::V0211SeedCoder
 553        | ZetaFormat::v0226Hashline
 554        | ZetaFormat::V0304VariableEdit => String::new(),
 555        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
 556    }
 557}
 558
 559pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 560    match format {
 561        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 562        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 563        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 564        ZetaFormat::V0211SeedCoder
 565        | ZetaFormat::V0304SeedNoEdits
 566        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 567        ZetaFormat::V0112MiddleAtEnd
 568        | ZetaFormat::V0113Ordered
 569        | ZetaFormat::V0114180EditableRegion
 570        | ZetaFormat::v0226Hashline
 571        | ZetaFormat::V0304VariableEdit => None,
 572    }
 573}
 574
 575pub fn encode_patch_as_output_for_format(
 576    format: ZetaFormat,
 577    old_editable_region: &str,
 578    patch: &str,
 579    cursor_offset: Option<usize>,
 580) -> Result<Option<String>> {
 581    match format {
 582        ZetaFormat::v0226Hashline => {
 583            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 584        }
 585        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 586            old_editable_region,
 587            patch,
 588            cursor_offset,
 589        )
 590        .map(Some),
 591        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 592            Ok(seed_coder::no_edits(patch))
 593        }
 594        _ => Ok(None),
 595    }
 596}
 597
 598pub struct ParsedOutput {
 599    /// Text that should replace the editable region
 600    pub new_editable_region: String,
 601    /// The byte range within `cursor_excerpt` that this replacement applies to
 602    pub range_in_excerpt: Range<usize>,
 603}
 604
 605/// Parse model output for the given zeta format
 606pub fn parse_zeta2_model_output(
 607    output: &str,
 608    format: ZetaFormat,
 609    prompt_inputs: &ZetaPromptInput,
 610) -> Result<ParsedOutput> {
 611    let output = match output_end_marker_for_format(format) {
 612        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 613        None => output,
 614    };
 615
 616    let (context, editable_range_in_context, context_range, _) =
 617        resolve_cursor_region(prompt_inputs, format);
 618    let context_start = context_range.start;
 619    let old_editable_region = &context[editable_range_in_context.clone()];
 620
 621    let (range_in_context, output) = match format {
 622        ZetaFormat::v0226Hashline => (
 623            editable_range_in_context,
 624            if hashline::output_has_edit_commands(output) {
 625                hashline::apply_edit_commands(old_editable_region, output)
 626            } else {
 627                output.to_string()
 628            },
 629        ),
 630        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 631        ZetaFormat::V0304SeedNoEdits => (
 632            editable_range_in_context,
 633            if output.starts_with(seed_coder::NO_EDITS) {
 634                old_editable_region.to_string()
 635            } else {
 636                output.to_string()
 637            },
 638        ),
 639        ZetaFormat::V0306SeedMultiRegions => (
 640            editable_range_in_context,
 641            if output.starts_with(seed_coder::NO_EDITS) {
 642                old_editable_region.to_string()
 643            } else {
 644                multi_region::apply_marker_span(old_editable_region, output)?
 645            },
 646        ),
 647        _ => (editable_range_in_context, output.to_string()),
 648    };
 649
 650    let range_in_excerpt =
 651        range_in_context.start + context_start..range_in_context.end + context_start;
 652
 653    Ok(ParsedOutput {
 654        new_editable_region: output,
 655        range_in_excerpt,
 656    })
 657}
 658
 659pub fn excerpt_range_for_format(
 660    format: ZetaFormat,
 661    ranges: &ExcerptRanges,
 662) -> (Range<usize>, Range<usize>) {
 663    excerpt_ranges_for_format(format, ranges)
 664}
 665
 666pub fn resolve_cursor_region(
 667    input: &ZetaPromptInput,
 668    format: ZetaFormat,
 669) -> (&str, Range<usize>, Range<usize>, usize) {
 670    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 671        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 672        compute_editable_and_context_ranges(
 673            &input.cursor_excerpt,
 674            input.cursor_offset_in_excerpt,
 675            syntax_ranges,
 676            editable_tokens,
 677            context_tokens,
 678        )
 679    } else {
 680        excerpt_range_for_format(format, &input.excerpt_ranges)
 681    };
 682    let context_start = context_range.start;
 683    let context_text = &input.cursor_excerpt[context_range.clone()];
 684    let adjusted_editable =
 685        (editable_range.start - context_start)..(editable_range.end - context_start);
 686    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 687
 688    (
 689        context_text,
 690        adjusted_editable,
 691        context_range,
 692        adjusted_cursor,
 693    )
 694}
 695
 696pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 697    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 698    get_prefill_for_format(format, context, &editable_range)
 699}
 700
 701fn format_edit_history_within_budget(
 702    events: &[Arc<Event>],
 703    file_marker: &str,
 704    edit_history_name: &str,
 705    max_tokens: usize,
 706    max_edit_event_count: usize,
 707) -> String {
 708    let header = format!("{}{}\n", file_marker, edit_history_name);
 709    let header_tokens = estimate_tokens(header.len());
 710    if header_tokens >= max_tokens {
 711        return String::new();
 712    }
 713
 714    let mut event_strings: Vec<String> = Vec::new();
 715    let mut total_tokens = header_tokens;
 716
 717    for event in events.iter().rev().take(max_edit_event_count) {
 718        let mut event_str = String::new();
 719        write_event(&mut event_str, event);
 720        let event_tokens = estimate_tokens(event_str.len());
 721
 722        if total_tokens + event_tokens > max_tokens {
 723            break;
 724        }
 725        total_tokens += event_tokens;
 726        event_strings.push(event_str);
 727    }
 728
 729    if event_strings.is_empty() {
 730        return String::new();
 731    }
 732
 733    let mut result = header;
 734    for event_str in event_strings.iter().rev() {
 735        result.push_str(event_str);
 736    }
 737    result
 738}
 739
 740fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 741    let needs_newline = !excerpt.text.ends_with('\n');
 742    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 743    let len = excerpt.text.len()
 744        + if needs_newline { "\n".len() } else { 0 }
 745        + if needs_ellipsis { "...\n".len() } else { 0 };
 746    estimate_tokens(len)
 747}
 748
 749pub fn format_related_files_within_budget(
 750    related_files: &[RelatedFile],
 751    file_prefix: &str,
 752    file_suffix: &str,
 753    max_tokens: usize,
 754) -> String {
 755    struct ExcerptCandidate {
 756        file_ix: usize,
 757        excerpt_ix: usize,
 758        order: usize,
 759    }
 760
 761    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 762        .iter()
 763        .enumerate()
 764        .flat_map(|(file_ix, file)| {
 765            file.excerpts
 766                .iter()
 767                .enumerate()
 768                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 769                    file_ix,
 770                    excerpt_ix,
 771                    order: e.order,
 772                })
 773        })
 774        .collect();
 775
 776    // Pre-compute file header strings and their token costs.
 777    let file_headers: Vec<String> = related_files
 778        .iter()
 779        .map(|file| {
 780            let path_str = file.path.to_string_lossy();
 781            format!("{}{}\n", file_prefix, path_str)
 782        })
 783        .collect();
 784
 785    // Sort the excerpts by their order and determine how many fit within the budget.
 786    let mut total_tokens = 0;
 787    let mut included_excerpt_count = 0_usize;
 788    let mut included_file_indices = vec![false; related_files.len()];
 789    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 790    for candidate in &excerpt_candidates {
 791        let file = &related_files[candidate.file_ix];
 792        let excerpt = &file.excerpts[candidate.excerpt_ix];
 793        let file_already_included = included_file_indices[candidate.file_ix];
 794        let header_cost = if file_already_included {
 795            0
 796        } else {
 797            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 798        };
 799        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 800        if total_tokens + header_cost + excerpt_cost > max_tokens {
 801            break;
 802        }
 803        total_tokens += header_cost + excerpt_cost;
 804        if !file_already_included {
 805            included_file_indices[candidate.file_ix] = true;
 806        }
 807        included_excerpt_count += 1;
 808    }
 809
 810    excerpt_candidates.truncate(included_excerpt_count);
 811    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 812
 813    // Render all of the files that fit within the token budget, in the original order.
 814    let mut result = String::new();
 815    let mut last_file_ix = None;
 816    for candidate in &excerpt_candidates {
 817        if last_file_ix != Some(candidate.file_ix) {
 818            if last_file_ix.is_some() {
 819                result.push_str(file_suffix);
 820            }
 821            result.push_str(&file_headers[candidate.file_ix]);
 822            last_file_ix = Some(candidate.file_ix);
 823        }
 824        let file = &related_files[candidate.file_ix];
 825        let excerpt = &file.excerpts[candidate.excerpt_ix];
 826        result.push_str(&excerpt.text);
 827        if !result.ends_with('\n') {
 828            result.push('\n');
 829        }
 830        if excerpt.row_range.end < file.max_row {
 831            result.push_str("...\n");
 832        }
 833    }
 834
 835    result
 836}
 837
 838pub fn write_related_files(
 839    prompt: &mut String,
 840    related_files: &[RelatedFile],
 841) -> Vec<Range<usize>> {
 842    let mut ranges = Vec::new();
 843    for file in related_files {
 844        let start = prompt.len();
 845        let path_str = file.path.to_string_lossy();
 846        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 847        for excerpt in &file.excerpts {
 848            prompt.push_str(&excerpt.text);
 849            if !prompt.ends_with('\n') {
 850                prompt.push('\n');
 851            }
 852            if excerpt.row_range.end < file.max_row {
 853                prompt.push_str("...\n");
 854            }
 855        }
 856        let end = prompt.len();
 857        ranges.push(start..end);
 858    }
 859    ranges
 860}
 861
 862mod v0112_middle_at_end {
 863    use super::*;
 864
 865    pub fn special_tokens() -> &'static [&'static str] {
 866        &[
 867            "<|fim_prefix|>",
 868            "<|fim_suffix|>",
 869            "<|fim_middle|>",
 870            "<|file_sep|>",
 871            CURSOR_MARKER,
 872        ]
 873    }
 874
 875    pub fn write_cursor_excerpt_section(
 876        prompt: &mut String,
 877        path: &Path,
 878        context: &str,
 879        editable_range: &Range<usize>,
 880        cursor_offset: usize,
 881    ) {
 882        let path_str = path.to_string_lossy();
 883        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 884
 885        prompt.push_str("<|fim_prefix|>\n");
 886        prompt.push_str(&context[..editable_range.start]);
 887
 888        prompt.push_str("<|fim_suffix|>\n");
 889        prompt.push_str(&context[editable_range.end..]);
 890        if !prompt.ends_with('\n') {
 891            prompt.push('\n');
 892        }
 893
 894        prompt.push_str("<|fim_middle|>current\n");
 895        prompt.push_str(&context[editable_range.start..cursor_offset]);
 896        prompt.push_str(CURSOR_MARKER);
 897        prompt.push_str(&context[cursor_offset..editable_range.end]);
 898        if !prompt.ends_with('\n') {
 899            prompt.push('\n');
 900        }
 901
 902        prompt.push_str("<|fim_middle|>updated\n");
 903    }
 904}
 905
 906mod v0113_ordered {
 907    use super::*;
 908
 909    pub fn special_tokens() -> &'static [&'static str] {
 910        &[
 911            "<|fim_prefix|>",
 912            "<|fim_suffix|>",
 913            "<|fim_middle|>",
 914            "<|file_sep|>",
 915            CURSOR_MARKER,
 916        ]
 917    }
 918
 919    pub fn write_cursor_excerpt_section(
 920        prompt: &mut String,
 921        path: &Path,
 922        context: &str,
 923        editable_range: &Range<usize>,
 924        cursor_offset: usize,
 925    ) {
 926        let path_str = path.to_string_lossy();
 927        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 928
 929        prompt.push_str("<|fim_prefix|>\n");
 930        prompt.push_str(&context[..editable_range.start]);
 931        if !prompt.ends_with('\n') {
 932            prompt.push('\n');
 933        }
 934
 935        prompt.push_str("<|fim_middle|>current\n");
 936        prompt.push_str(&context[editable_range.start..cursor_offset]);
 937        prompt.push_str(CURSOR_MARKER);
 938        prompt.push_str(&context[cursor_offset..editable_range.end]);
 939        if !prompt.ends_with('\n') {
 940            prompt.push('\n');
 941        }
 942
 943        prompt.push_str("<|fim_suffix|>\n");
 944        prompt.push_str(&context[editable_range.end..]);
 945        if !prompt.ends_with('\n') {
 946            prompt.push('\n');
 947        }
 948
 949        prompt.push_str("<|fim_middle|>updated\n");
 950    }
 951}
 952
 953mod v0114180_editable_region {
 954    use super::*;
 955
 956    pub fn special_tokens() -> &'static [&'static str] {
 957        v0113_ordered::special_tokens()
 958    }
 959}
 960
 961pub mod v0120_git_merge_markers {
 962    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 963    //!
 964    //! Example prompt:
 965    //!
 966    //! <|file_sep|>path/to/target_file.py
 967    //! <|fim_prefix|>
 968    //! code before editable region
 969    //! <|fim_suffix|>
 970    //! code after editable region
 971    //! <|fim_middle|>
 972    //! <<<<<<< CURRENT
 973    //! code that
 974    //! needs to<|user_cursor|>
 975    //! be rewritten
 976    //! =======
 977    //!
 978    //! Expected output (should be generated by the model):
 979    //!
 980    //! updated
 981    //! code with
 982    //! changes applied
 983    //! >>>>>>> UPDATED
 984
 985    use super::*;
 986
 987    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 988    pub const SEPARATOR: &str = "=======\n";
 989    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 990
 991    pub fn special_tokens() -> &'static [&'static str] {
 992        &[
 993            "<|fim_prefix|>",
 994            "<|fim_suffix|>",
 995            "<|fim_middle|>",
 996            "<|file_sep|>",
 997            START_MARKER,
 998            SEPARATOR,
 999            END_MARKER,
1000            CURSOR_MARKER,
1001        ]
1002    }
1003
1004    pub fn write_cursor_excerpt_section(
1005        prompt: &mut String,
1006        path: &Path,
1007        context: &str,
1008        editable_range: &Range<usize>,
1009        cursor_offset: usize,
1010    ) {
1011        let path_str = path.to_string_lossy();
1012        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1013
1014        prompt.push_str("<|fim_prefix|>");
1015        prompt.push_str(&context[..editable_range.start]);
1016
1017        prompt.push_str("<|fim_suffix|>");
1018        prompt.push_str(&context[editable_range.end..]);
1019        if !prompt.ends_with('\n') {
1020            prompt.push('\n');
1021        }
1022
1023        prompt.push_str("<|fim_middle|>");
1024        prompt.push_str(START_MARKER);
1025        prompt.push_str(&context[editable_range.start..cursor_offset]);
1026        prompt.push_str(CURSOR_MARKER);
1027        prompt.push_str(&context[cursor_offset..editable_range.end]);
1028        if !prompt.ends_with('\n') {
1029            prompt.push('\n');
1030        }
1031        prompt.push_str(SEPARATOR);
1032    }
1033}
1034
1035pub mod v0131_git_merge_markers_prefix {
1036    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1037    //!
1038    //! Example prompt:
1039    //!
1040    //! <|file_sep|>path/to/target_file.py
1041    //! <|fim_prefix|>
1042    //! code before editable region
1043    //! <<<<<<< CURRENT
1044    //! code that
1045    //! needs to<|user_cursor|>
1046    //! be rewritten
1047    //! =======
1048    //! <|fim_suffix|>
1049    //! code after editable region
1050    //! <|fim_middle|>
1051    //!
1052    //! Expected output (should be generated by the model):
1053    //!
1054    //! updated
1055    //! code with
1056    //! changes applied
1057    //! >>>>>>> UPDATED
1058
1059    use super::*;
1060
1061    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1062    pub const SEPARATOR: &str = "=======\n";
1063    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1064
1065    pub fn special_tokens() -> &'static [&'static str] {
1066        &[
1067            "<|fim_prefix|>",
1068            "<|fim_suffix|>",
1069            "<|fim_middle|>",
1070            "<|file_sep|>",
1071            START_MARKER,
1072            SEPARATOR,
1073            END_MARKER,
1074            CURSOR_MARKER,
1075        ]
1076    }
1077
1078    pub fn write_cursor_excerpt_section(
1079        prompt: &mut String,
1080        path: &Path,
1081        context: &str,
1082        editable_range: &Range<usize>,
1083        cursor_offset: usize,
1084    ) {
1085        let path_str = path.to_string_lossy();
1086        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1087
1088        prompt.push_str("<|fim_prefix|>");
1089        prompt.push_str(&context[..editable_range.start]);
1090        prompt.push_str(START_MARKER);
1091        prompt.push_str(&context[editable_range.start..cursor_offset]);
1092        prompt.push_str(CURSOR_MARKER);
1093        prompt.push_str(&context[cursor_offset..editable_range.end]);
1094        if !prompt.ends_with('\n') {
1095            prompt.push('\n');
1096        }
1097        prompt.push_str(SEPARATOR);
1098
1099        prompt.push_str("<|fim_suffix|>");
1100        prompt.push_str(&context[editable_range.end..]);
1101        if !prompt.ends_with('\n') {
1102            prompt.push('\n');
1103        }
1104
1105        prompt.push_str("<|fim_middle|>");
1106    }
1107}
1108
1109pub mod v0211_prefill {
1110    use super::*;
1111
1112    pub fn special_tokens() -> &'static [&'static str] {
1113        v0131_git_merge_markers_prefix::special_tokens()
1114    }
1115
1116    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1117        let editable_region = &context[editable_range.start..editable_range.end];
1118
1119        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1120        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1121
1122        // Find a token boundary to avoid splitting tokens in the prefill.
1123        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1124        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1125        // the \n and consume any consecutive \n characters after it.
1126        let prefill = &editable_region[..prefill_len];
1127        match prefill.rfind('\n') {
1128            Some(pos) => {
1129                let mut end = pos + 1;
1130                while end < editable_region.len()
1131                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1132                {
1133                    end += 1;
1134                }
1135                editable_region[..end].to_string()
1136            }
1137            // No newline found. Fall back to splitting before the last space
1138            // (word-level boundary)
1139            None => match prefill.rfind(' ') {
1140                Some(pos) => prefill[..pos].to_string(),
1141                None => prefill.to_string(),
1142            },
1143        }
1144    }
1145}
1146
1147pub mod hashline {
1148
1149    use std::fmt::Display;
1150
1151    pub const END_MARKER: &str = "<|fim_middle|>updated";
1152    pub const START_MARKER: &str = "<|fim_middle|>current";
1153
1154    use super::*;
1155
1156    const SET_COMMAND_MARKER: &str = "<|set|>";
1157    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1158    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1159
1160    pub fn special_tokens() -> &'static [&'static str] {
1161        return &[
1162            SET_COMMAND_MARKER,
1163            "<|set_range|>",
1164            INSERT_COMMAND_MARKER,
1165            NO_EDITS_COMMAND_MARKER,
1166            CURSOR_MARKER,
1167            "<|file_sep|>",
1168            "<|fim_prefix|>",
1169            "<|fim_suffix|>",
1170            "<|fim_middle|>",
1171        ];
1172    }
1173
1174    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1175    #[derive(Debug, Clone, PartialEq, Eq)]
1176    struct LineRef {
1177        index: usize,
1178        hash: u8,
1179    }
1180
1181    impl Display for LineRef {
1182        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1183            write!(f, "{}:{:02x}", self.index, self.hash)
1184        }
1185    }
1186
1187    pub fn hash_line(line: &[u8]) -> u8 {
1188        let mut h: u8 = 0;
1189        for &byte in line {
1190            h = h.wrapping_add(byte);
1191        }
1192        return h;
1193    }
1194
1195    /// Write the hashline-encoded editable region into `out`. Each line of
1196    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1197    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1198    /// to the start of `editable_text`).
1199    pub fn write_hashline_editable_region(
1200        out: &mut String,
1201        editable_text: &str,
1202        cursor_offset_in_editable: usize,
1203    ) {
1204        let mut offset = 0;
1205        for (i, line) in editable_text.lines().enumerate() {
1206            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1207                && cursor_offset_in_editable < offset + line.len()
1208            {
1209                (
1210                    &line[..cursor_offset_in_editable - offset],
1211                    CURSOR_MARKER,
1212                    &line[cursor_offset_in_editable - offset..],
1213                )
1214            } else {
1215                (line, "", "")
1216            };
1217            write!(
1218                out,
1219                "\n{}|{head}{cursor}{tail}",
1220                LineRef {
1221                    index: i,
1222                    hash: hash_line(line.as_bytes())
1223                }
1224            )
1225            .unwrap();
1226            offset += line.len() + 1;
1227        }
1228    }
1229
1230    pub fn write_cursor_excerpt_section(
1231        prompt: &mut String,
1232        path: &Path,
1233        context: &str,
1234        editable_range: &Range<usize>,
1235        cursor_offset: usize,
1236    ) {
1237        let path_str = path.to_string_lossy();
1238        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1239
1240        prompt.push_str("<|fim_prefix|>\n");
1241        prompt.push_str(&context[..editable_range.start]);
1242        prompt.push_str(START_MARKER);
1243
1244        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1245        let editable_region = &context[editable_range.clone()];
1246        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1247
1248        if !prompt.ends_with('\n') {
1249            prompt.push('\n');
1250        }
1251
1252        prompt.push_str("<|fim_suffix|>\n");
1253        prompt.push_str(&context[editable_range.end..]);
1254        if !prompt.ends_with('\n') {
1255            prompt.push('\n');
1256        }
1257
1258        prompt.push_str(END_MARKER);
1259        prompt.push('\n');
1260    }
1261
1262    /// A single edit command parsed from the model output.
1263    #[derive(Debug)]
1264    enum EditCommand<'a> {
1265        /// Replace a range of lines (inclusive on both ends). Single-line set is
1266        /// represented by `start == end`.
1267        Set {
1268            start: LineRef,
1269            end: LineRef,
1270            content: &'a str,
1271        },
1272        /// Insert new lines after the given line, or before the first line if
1273        /// `after` is `None`.
1274        Insert {
1275            after: Option<LineRef>,
1276            content: &'a str,
1277        },
1278    }
1279
1280    /// Parse a line reference like `3:c3` into a `LineRef`.
1281    fn parse_line_ref(s: &str) -> Option<LineRef> {
1282        let (idx_str, hash_str) = s.split_once(':')?;
1283        let index = idx_str.parse::<usize>().ok()?;
1284        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1285        Some(LineRef { index, hash })
1286    }
1287
1288    /// Parse the model output into a list of `EditCommand`s.
1289    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1290        let mut commands = Vec::new();
1291        let mut offset = 0usize;
1292
1293        while offset < model_output.len() {
1294            let next_nl = model_output[offset..]
1295                .find('\n')
1296                .map(|i| offset + i)
1297                .unwrap_or(model_output.len());
1298            let line = &model_output[offset..next_nl];
1299            let line_end = if next_nl < model_output.len() {
1300                next_nl + 1
1301            } else {
1302                next_nl
1303            };
1304
1305            let trimmed = line.trim();
1306            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1307                (true, spec)
1308            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1309                (false, spec)
1310            } else {
1311                offset = line_end;
1312                continue;
1313            };
1314
1315            let mut content_end = line_end;
1316            let mut scan = line_end;
1317
1318            while scan < model_output.len() {
1319                let body_nl = model_output[scan..]
1320                    .find('\n')
1321                    .map(|i| scan + i)
1322                    .unwrap_or(model_output.len());
1323                let body_line = &model_output[scan..body_nl];
1324                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1325                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1326                {
1327                    break;
1328                }
1329                scan = if body_nl < model_output.len() {
1330                    body_nl + 1
1331                } else {
1332                    body_nl
1333                };
1334                content_end = scan;
1335            }
1336
1337            let content = &model_output[line_end..content_end];
1338
1339            if is_set {
1340                if let Some((start_str, end_str)) = specifier.split_once('-') {
1341                    if let (Some(start), Some(end)) =
1342                        (parse_line_ref(start_str), parse_line_ref(end_str))
1343                    {
1344                        commands.push(EditCommand::Set {
1345                            start,
1346                            end,
1347                            content,
1348                        });
1349                    }
1350                } else if let Some(target) = parse_line_ref(specifier) {
1351                    commands.push(EditCommand::Set {
1352                        start: target.clone(),
1353                        end: target,
1354                        content,
1355                    });
1356                }
1357            } else {
1358                let after = parse_line_ref(specifier);
1359                commands.push(EditCommand::Insert { after, content });
1360            }
1361
1362            offset = scan;
1363        }
1364
1365        commands
1366    }
1367
1368    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1369    /// (as opposed to being a plain full-replacement output).
1370    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1371    /// editable region, returning the plain text content.
1372    pub fn strip_hashline_prefixes(region: &str) -> String {
1373        let mut decoded: String = region
1374            .lines()
1375            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1376            .collect::<Vec<_>>()
1377            .join("\n");
1378        if region.ends_with('\n') {
1379            decoded.push('\n');
1380        }
1381        decoded
1382    }
1383
1384    pub fn output_has_edit_commands(model_output: &str) -> bool {
1385        model_output.contains(SET_COMMAND_MARKER)
1386            || model_output.contains(INSERT_COMMAND_MARKER)
1387            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1388    }
1389
1390    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1391    /// original editable region text.
1392    ///
1393    /// `editable_region` is the original text of the editable region (without hash
1394    /// prefixes). `model_output` is the raw model response containing edit commands.
1395    ///
1396    /// Returns the full replacement text for the editable region.
1397    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1398        if model_output
1399            .trim_start()
1400            .starts_with(NO_EDITS_COMMAND_MARKER)
1401        {
1402            return editable_region.to_string();
1403        }
1404
1405        let original_lines: Vec<&str> = editable_region.lines().collect();
1406        let old_hashes: Vec<u8> = original_lines
1407            .iter()
1408            .map(|line| hash_line(line.as_bytes()))
1409            .collect();
1410
1411        let commands = parse_edit_commands(model_output);
1412
1413        // For set operations: indexed by start line → Some((end line index, content))
1414        // For insert operations: indexed by line index → vec of content to insert after
1415        // Insert-before-first is tracked separately.
1416        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1417        let mut insert_before_first: Vec<&str> = Vec::new();
1418        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1419
1420        for command in &commands {
1421            match command {
1422                EditCommand::Set {
1423                    start,
1424                    end,
1425                    content,
1426                } => {
1427                    if start.index < old_hashes.len()
1428                        && end.index < old_hashes.len()
1429                        && start.index <= end.index
1430                        && old_hashes[start.index] == start.hash
1431                        && old_hashes[end.index] == end.hash
1432                    {
1433                        set_ops[start.index] = Some((end.index, *content));
1434                    }
1435                }
1436                EditCommand::Insert { after, content } => match after {
1437                    None => insert_before_first.push(*content),
1438                    Some(line_ref) => {
1439                        if line_ref.index < old_hashes.len()
1440                            && old_hashes[line_ref.index] == line_ref.hash
1441                        {
1442                            insert_after[line_ref.index].push(*content);
1443                        }
1444                    }
1445                },
1446            }
1447        }
1448
1449        let mut result = String::new();
1450
1451        // Emit any insertions before the first line
1452        for content in &insert_before_first {
1453            result.push_str(content);
1454            if !content.ends_with('\n') {
1455                result.push('\n');
1456            }
1457        }
1458
1459        let mut i = 0;
1460        while i < original_lines.len() {
1461            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1462                // Replace lines i..=end_index with the replacement content
1463                result.push_str(replacement);
1464                if !replacement.is_empty() && !replacement.ends_with('\n') {
1465                    result.push('\n');
1466                }
1467                // Emit any insertions after the end of this set range
1468                if *end_index < insert_after.len() {
1469                    for content in &insert_after[*end_index] {
1470                        result.push_str(content);
1471                        if !content.ends_with('\n') {
1472                            result.push('\n');
1473                        }
1474                    }
1475                }
1476                i = end_index + 1;
1477            } else {
1478                // Keep the original line
1479                result.push_str(original_lines[i]);
1480                result.push('\n');
1481                // Emit any insertions after this line
1482                for content in &insert_after[i] {
1483                    result.push_str(content);
1484                    if !content.ends_with('\n') {
1485                        result.push('\n');
1486                    }
1487                }
1488                i += 1;
1489            }
1490        }
1491
1492        // Preserve trailing newline behavior: if the original ended with a
1493        // newline the result already has one; if it didn't, trim the extra one
1494        // we added.
1495        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1496            result.pop();
1497        }
1498
1499        result
1500    }
1501
1502    /// Convert a unified diff patch into hashline edit commands.
1503    ///
1504    /// Parses the unified diff `patch` directly to determine which lines of
1505    /// `old_text` are deleted/replaced and what new lines are added, then emits
1506    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1507    /// `{index}:{hash}` identifiers.
1508    ///
1509    /// `cursor_offset` is an optional byte offset into the first hunk's new
1510    /// text (context + additions) where the cursor marker should be placed.
1511    pub fn patch_to_edit_commands(
1512        old_text: &str,
1513        patch: &str,
1514        cursor_offset: Option<usize>,
1515    ) -> Result<String> {
1516        let old_lines: Vec<&str> = old_text.lines().collect();
1517        let old_hashes: Vec<u8> = old_lines
1518            .iter()
1519            .map(|line| hash_line(line.as_bytes()))
1520            .collect();
1521
1522        let mut result = String::new();
1523        let mut first_hunk = true;
1524
1525        struct Hunk<'a> {
1526            line_range: Range<usize>,
1527            new_text_lines: Vec<&'a str>,
1528            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1529        }
1530
1531        // Parse the patch line by line. We only care about hunk headers,
1532        // context, deletions, and additions.
1533        let mut old_line_index: usize = 0;
1534        let mut current_hunk: Option<Hunk> = None;
1535        // Byte offset tracking within the hunk's new text for cursor placement.
1536        let mut new_text_byte_offset: usize = 0;
1537        // The line index of the last old line seen before/in the current hunk
1538        // (used for insert-after reference).
1539        let mut last_old_line_before_hunk: Option<usize> = None;
1540
1541        fn flush_hunk(
1542            hunk: Hunk,
1543            last_old_line: Option<usize>,
1544            result: &mut String,
1545            old_hashes: &[u8],
1546        ) {
1547            if hunk.line_range.is_empty() {
1548                // Pure insertion — reference the old line to insert after when in bounds.
1549                if let Some(after) = last_old_line
1550                    && let Some(&hash) = old_hashes.get(after)
1551                {
1552                    write!(
1553                        result,
1554                        "{INSERT_COMMAND_MARKER}{}\n",
1555                        LineRef { index: after, hash }
1556                    )
1557                    .unwrap();
1558                } else {
1559                    result.push_str(INSERT_COMMAND_MARKER);
1560                    result.push('\n');
1561                }
1562            } else {
1563                let start = hunk.line_range.start;
1564                let end_exclusive = hunk.line_range.end;
1565                let deleted_line_count = end_exclusive.saturating_sub(start);
1566
1567                if deleted_line_count == 1 {
1568                    if let Some(&hash) = old_hashes.get(start) {
1569                        write!(
1570                            result,
1571                            "{SET_COMMAND_MARKER}{}\n",
1572                            LineRef { index: start, hash }
1573                        )
1574                        .unwrap();
1575                    } else {
1576                        result.push_str(SET_COMMAND_MARKER);
1577                        result.push('\n');
1578                    }
1579                } else {
1580                    let end_inclusive = end_exclusive - 1;
1581                    match (
1582                        old_hashes.get(start).copied(),
1583                        old_hashes.get(end_inclusive).copied(),
1584                    ) {
1585                        (Some(start_hash), Some(end_hash)) => {
1586                            write!(
1587                                result,
1588                                "{SET_COMMAND_MARKER}{}-{}\n",
1589                                LineRef {
1590                                    index: start,
1591                                    hash: start_hash
1592                                },
1593                                LineRef {
1594                                    index: end_inclusive,
1595                                    hash: end_hash
1596                                }
1597                            )
1598                            .unwrap();
1599                        }
1600                        _ => {
1601                            result.push_str(SET_COMMAND_MARKER);
1602                            result.push('\n');
1603                        }
1604                    }
1605                }
1606            }
1607            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1608                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1609                    && line_offset == cursor_line_offset
1610                {
1611                    result.push_str(&line[..char_offset]);
1612                    result.push_str(CURSOR_MARKER);
1613                    result.push_str(&line[char_offset..]);
1614                    continue;
1615                }
1616
1617                result.push_str(line);
1618            }
1619        }
1620
1621        for raw_line in patch.split_inclusive('\n') {
1622            if raw_line.starts_with("@@") {
1623                // Flush any pending change hunk from a previous patch hunk.
1624                if let Some(hunk) = current_hunk.take() {
1625                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1626                }
1627
1628                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1629                // We intentionally do not trust old_start as a direct local index into `old_text`,
1630                // because some patches are produced against a larger file region and carry
1631                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1632                if first_hunk {
1633                    new_text_byte_offset = 0;
1634                    first_hunk = false;
1635                }
1636                continue;
1637            }
1638
1639            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1640                continue;
1641            }
1642            if raw_line.starts_with("\\ No newline") {
1643                continue;
1644            }
1645
1646            if raw_line.starts_with('-') {
1647                // Extend or start a change hunk with this deleted old line.
1648                match &mut current_hunk {
1649                    Some(Hunk {
1650                        line_range: range, ..
1651                    }) => range.end = old_line_index + 1,
1652                    None => {
1653                        current_hunk = Some(Hunk {
1654                            line_range: old_line_index..old_line_index + 1,
1655                            new_text_lines: Vec::new(),
1656                            cursor_line_offset_in_new_text: None,
1657                        });
1658                    }
1659                }
1660                old_line_index += 1;
1661            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1662                // Place cursor marker if cursor_offset falls within this line.
1663                let mut cursor_line_offset = None;
1664                if let Some(cursor_off) = cursor_offset
1665                    && (first_hunk
1666                        || cursor_off >= new_text_byte_offset
1667                            && cursor_off <= new_text_byte_offset + added_content.len())
1668                {
1669                    let line_offset = added_content.floor_char_boundary(
1670                        cursor_off
1671                            .saturating_sub(new_text_byte_offset)
1672                            .min(added_content.len()),
1673                    );
1674                    cursor_line_offset = Some(line_offset);
1675                }
1676
1677                new_text_byte_offset += added_content.len();
1678
1679                let hunk = current_hunk.get_or_insert(Hunk {
1680                    line_range: old_line_index..old_line_index,
1681                    new_text_lines: vec![],
1682                    cursor_line_offset_in_new_text: None,
1683                });
1684                hunk.new_text_lines.push(added_content);
1685                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1686                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1687            } else {
1688                // Context line (starts with ' ' or is empty).
1689                if let Some(hunk) = current_hunk.take() {
1690                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1691                }
1692                last_old_line_before_hunk = Some(old_line_index);
1693                old_line_index += 1;
1694                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1695                new_text_byte_offset += content.len();
1696            }
1697        }
1698
1699        // Flush final group.
1700        if let Some(hunk) = current_hunk.take() {
1701            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1702        }
1703
1704        // Trim a single trailing newline.
1705        if result.ends_with('\n') {
1706            result.pop();
1707        }
1708
1709        if result.is_empty() {
1710            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1711        }
1712
1713        Ok(result)
1714    }
1715
1716    #[cfg(test)]
1717    mod tests {
1718        use super::*;
1719        use indoc::indoc;
1720
1721        #[test]
1722        fn test_format_cursor_region() {
1723            struct Case {
1724                name: &'static str,
1725                context: &'static str,
1726                editable_range: Range<usize>,
1727                cursor_offset: usize,
1728                expected: &'static str,
1729            }
1730
1731            let cases = [
1732                Case {
1733                    name: "basic_cursor_placement",
1734                    context: "hello world\n",
1735                    editable_range: 0..12,
1736                    cursor_offset: 5,
1737                    expected: indoc! {"
1738                    <|file_sep|>test.rs
1739                    <|fim_prefix|>
1740                    <|fim_middle|>current
1741                    0:5c|hello<|user_cursor|> world
1742                    <|fim_suffix|>
1743                    <|fim_middle|>updated
1744                    "},
1745                },
1746                Case {
1747                    name: "multiline_cursor_on_second_line",
1748                    context: "aaa\nbbb\nccc\n",
1749                    editable_range: 0..12,
1750                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1751                    expected: indoc! {"
1752                    <|file_sep|>test.rs
1753                    <|fim_prefix|>
1754                    <|fim_middle|>current
1755                    0:23|aaa
1756                    1:26|b<|user_cursor|>bb
1757                    2:29|ccc
1758                    <|fim_suffix|>
1759                    <|fim_middle|>updated
1760                    "},
1761                },
1762                Case {
1763                    name: "no_trailing_newline_in_context",
1764                    context: "line1\nline2",
1765                    editable_range: 0..11,
1766                    cursor_offset: 3,
1767                    expected: indoc! {"
1768                    <|file_sep|>test.rs
1769                    <|fim_prefix|>
1770                    <|fim_middle|>current
1771                    0:d9|lin<|user_cursor|>e1
1772                    1:da|line2
1773                    <|fim_suffix|>
1774                    <|fim_middle|>updated
1775                    "},
1776                },
1777                Case {
1778                    name: "leading_newline_in_editable_region",
1779                    context: "\nabc\n",
1780                    editable_range: 0..5,
1781                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1782                    expected: indoc! {"
1783                    <|file_sep|>test.rs
1784                    <|fim_prefix|>
1785                    <|fim_middle|>current
1786                    0:00|
1787                    1:26|a<|user_cursor|>bc
1788                    <|fim_suffix|>
1789                    <|fim_middle|>updated
1790                    "},
1791                },
1792                Case {
1793                    name: "with_suffix",
1794                    context: "abc\ndef",
1795                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1796                    cursor_offset: 2,
1797                    expected: indoc! {"
1798                    <|file_sep|>test.rs
1799                    <|fim_prefix|>
1800                    <|fim_middle|>current
1801                    0:26|ab<|user_cursor|>c
1802                    <|fim_suffix|>
1803                    def
1804                    <|fim_middle|>updated
1805                    "},
1806                },
1807                Case {
1808                    name: "unicode_two_byte_chars",
1809                    context: "héllo\n",
1810                    editable_range: 0..7,
1811                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1812                    expected: indoc! {"
1813                    <|file_sep|>test.rs
1814                    <|fim_prefix|>
1815                    <|fim_middle|>current
1816                    0:1b|hé<|user_cursor|>llo
1817                    <|fim_suffix|>
1818                    <|fim_middle|>updated
1819                    "},
1820                },
1821                Case {
1822                    name: "unicode_three_byte_chars",
1823                    context: "日本語\n",
1824                    editable_range: 0..10,
1825                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1826                    expected: indoc! {"
1827                    <|file_sep|>test.rs
1828                    <|fim_prefix|>
1829                    <|fim_middle|>current
1830                    0:80|日本<|user_cursor|>語
1831                    <|fim_suffix|>
1832                    <|fim_middle|>updated
1833                    "},
1834                },
1835                Case {
1836                    name: "unicode_four_byte_chars",
1837                    context: "a🌍b\n",
1838                    editable_range: 0..7,
1839                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1840                    expected: indoc! {"
1841                    <|file_sep|>test.rs
1842                    <|fim_prefix|>
1843                    <|fim_middle|>current
1844                    0:6b|a🌍<|user_cursor|>b
1845                    <|fim_suffix|>
1846                    <|fim_middle|>updated
1847                    "},
1848                },
1849                Case {
1850                    name: "cursor_at_start_of_region_not_placed",
1851                    context: "abc\n",
1852                    editable_range: 0..4,
1853                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1854                    expected: indoc! {"
1855                    <|file_sep|>test.rs
1856                    <|fim_prefix|>
1857                    <|fim_middle|>current
1858                    0:26|abc
1859                    <|fim_suffix|>
1860                    <|fim_middle|>updated
1861                    "},
1862                },
1863                Case {
1864                    name: "cursor_at_end_of_line_not_placed",
1865                    context: "abc\ndef\n",
1866                    editable_range: 0..8,
1867                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1868                    expected: indoc! {"
1869                    <|file_sep|>test.rs
1870                    <|fim_prefix|>
1871                    <|fim_middle|>current
1872                    0:26|abc
1873                    1:2f|def
1874                    <|fim_suffix|>
1875                    <|fim_middle|>updated
1876                    "},
1877                },
1878                Case {
1879                    name: "cursor_offset_relative_to_context_not_editable_region",
1880                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1881                    // write_cursor_excerpt_section must subtract it before comparing against
1882                    // per-line offsets within the editable region.
1883                    context: "pre\naaa\nbbb\nsuf\n",
1884                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1885                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1886                    expected: indoc! {"
1887                    <|file_sep|>test.rs
1888                    <|fim_prefix|>
1889                    pre
1890                    <|fim_middle|>current
1891                    0:23|aaa
1892                    1:26|b<|user_cursor|>bb
1893                    <|fim_suffix|>
1894                    suf
1895                    <|fim_middle|>updated
1896                    "},
1897                },
1898            ];
1899
1900            for case in &cases {
1901                let mut prompt = String::new();
1902                hashline::write_cursor_excerpt_section(
1903                    &mut prompt,
1904                    Path::new("test.rs"),
1905                    case.context,
1906                    &case.editable_range,
1907                    case.cursor_offset,
1908                );
1909                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1910            }
1911        }
1912
1913        #[test]
1914        fn test_apply_edit_commands() {
1915            struct Case {
1916                name: &'static str,
1917                original: &'static str,
1918                model_output: &'static str,
1919                expected: &'static str,
1920            }
1921
1922            let cases = vec![
1923                Case {
1924                    name: "set_single_line",
1925                    original: indoc! {"
1926                    let mut total = 0;
1927                    for product in products {
1928                        total += ;
1929                    }
1930                    total
1931                "},
1932                    model_output: indoc! {"
1933                    <|set|>2:87
1934                        total += product.price;
1935                "},
1936                    expected: indoc! {"
1937                    let mut total = 0;
1938                    for product in products {
1939                        total += product.price;
1940                    }
1941                    total
1942                "},
1943                },
1944                Case {
1945                    name: "set_range",
1946                    original: indoc! {"
1947                    fn foo() {
1948                        let x = 1;
1949                        let y = 2;
1950                        let z = 3;
1951                    }
1952                "},
1953                    model_output: indoc! {"
1954                    <|set|>1:46-3:4a
1955                        let sum = 6;
1956                "},
1957                    expected: indoc! {"
1958                    fn foo() {
1959                        let sum = 6;
1960                    }
1961                "},
1962                },
1963                Case {
1964                    name: "insert_after_line",
1965                    original: indoc! {"
1966                    fn main() {
1967                        let x = 1;
1968                    }
1969                "},
1970                    model_output: indoc! {"
1971                    <|insert|>1:46
1972                        let y = 2;
1973                "},
1974                    expected: indoc! {"
1975                    fn main() {
1976                        let x = 1;
1977                        let y = 2;
1978                    }
1979                "},
1980                },
1981                Case {
1982                    name: "insert_before_first",
1983                    original: indoc! {"
1984                    let x = 1;
1985                    let y = 2;
1986                "},
1987                    model_output: indoc! {"
1988                    <|insert|>
1989                    use std::io;
1990                "},
1991                    expected: indoc! {"
1992                    use std::io;
1993                    let x = 1;
1994                    let y = 2;
1995                "},
1996                },
1997                Case {
1998                    name: "set_with_cursor_marker",
1999                    original: indoc! {"
2000                    fn main() {
2001                        println!();
2002                    }
2003                "},
2004                    model_output: indoc! {"
2005                    <|set|>1:34
2006                        eprintln!(\"<|user_cursor|>\");
2007                "},
2008                    expected: indoc! {"
2009                    fn main() {
2010                        eprintln!(\"<|user_cursor|>\");
2011                    }
2012                "},
2013                },
2014                Case {
2015                    name: "multiple_set_commands",
2016                    original: indoc! {"
2017                    aaa
2018                    bbb
2019                    ccc
2020                    ddd
2021                "},
2022                    model_output: indoc! {"
2023                    <|set|>0:23
2024                    AAA
2025                    <|set|>2:29
2026                    CCC
2027                "},
2028                    expected: indoc! {"
2029                    AAA
2030                    bbb
2031                    CCC
2032                    ddd
2033                "},
2034                },
2035                Case {
2036                    name: "set_range_multiline_replacement",
2037                    original: indoc! {"
2038                    fn handle_submit() {
2039                    }
2040
2041                    fn handle_keystroke() {
2042                "},
2043                    model_output: indoc! {"
2044                    <|set|>0:3f-1:7d
2045                    fn handle_submit(modal_state: &mut ModalState) {
2046                        <|user_cursor|>
2047                    }
2048                "},
2049                    expected: indoc! {"
2050                    fn handle_submit(modal_state: &mut ModalState) {
2051                        <|user_cursor|>
2052                    }
2053
2054                    fn handle_keystroke() {
2055                "},
2056                },
2057                Case {
2058                    name: "no_edit_commands_returns_original",
2059                    original: indoc! {"
2060                    hello
2061                    world
2062                "},
2063                    model_output: "some random text with no commands",
2064                    expected: indoc! {"
2065                    hello
2066                    world
2067                "},
2068                },
2069                Case {
2070                    name: "no_edits_command_returns_original",
2071                    original: indoc! {"
2072                    hello
2073                    world
2074                "},
2075                    model_output: "<|no_edits|>",
2076                    expected: indoc! {"
2077                    hello
2078                    world
2079                "},
2080                },
2081                Case {
2082                    name: "wrong_hash_set_ignored",
2083                    original: indoc! {"
2084                    aaa
2085                    bbb
2086                "},
2087                    model_output: indoc! {"
2088                    <|set|>0:ff
2089                    ZZZ
2090                "},
2091                    expected: indoc! {"
2092                    aaa
2093                    bbb
2094                "},
2095                },
2096                Case {
2097                    name: "insert_and_set_combined",
2098                    original: indoc! {"
2099                    alpha
2100                    beta
2101                    gamma
2102                "},
2103                    model_output: indoc! {"
2104                    <|set|>0:06
2105                    ALPHA
2106                    <|insert|>1:9c
2107                    beta_extra
2108                "},
2109                    expected: indoc! {"
2110                    ALPHA
2111                    beta
2112                    beta_extra
2113                    gamma
2114                "},
2115                },
2116                Case {
2117                    name: "no_trailing_newline_preserved",
2118                    original: "hello\nworld",
2119                    model_output: indoc! {"
2120                    <|set|>0:14
2121                    HELLO
2122                "},
2123                    expected: "HELLO\nworld",
2124                },
2125                Case {
2126                    name: "set_range_hash_mismatch_in_end_bound",
2127                    original: indoc! {"
2128                    one
2129                    two
2130                    three
2131                "},
2132                    model_output: indoc! {"
2133                    <|set|>0:42-2:ff
2134                    ONE_TWO_THREE
2135                "},
2136                    expected: indoc! {"
2137                    one
2138                    two
2139                    three
2140                "},
2141                },
2142                Case {
2143                    name: "set_range_start_greater_than_end_ignored",
2144                    original: indoc! {"
2145                    a
2146                    b
2147                    c
2148                "},
2149                    model_output: indoc! {"
2150                    <|set|>2:63-1:62
2151                    X
2152                "},
2153                    expected: indoc! {"
2154                    a
2155                    b
2156                    c
2157                "},
2158                },
2159                Case {
2160                    name: "insert_out_of_bounds_ignored",
2161                    original: indoc! {"
2162                    x
2163                    y
2164                "},
2165                    model_output: indoc! {"
2166                    <|insert|>99:aa
2167                    z
2168                "},
2169                    expected: indoc! {"
2170                    x
2171                    y
2172                "},
2173                },
2174                Case {
2175                    name: "set_out_of_bounds_ignored",
2176                    original: indoc! {"
2177                    x
2178                    y
2179                "},
2180                    model_output: indoc! {"
2181                    <|set|>99:aa
2182                    z
2183                "},
2184                    expected: indoc! {"
2185                    x
2186                    y
2187                "},
2188                },
2189                Case {
2190                    name: "malformed_set_command_ignored",
2191                    original: indoc! {"
2192                    alpha
2193                    beta
2194                "},
2195                    model_output: indoc! {"
2196                    <|set|>not-a-line-ref
2197                    UPDATED
2198                "},
2199                    expected: indoc! {"
2200                    alpha
2201                    beta
2202                "},
2203                },
2204                Case {
2205                    name: "malformed_insert_hash_treated_as_before_first",
2206                    original: indoc! {"
2207                    alpha
2208                    beta
2209                "},
2210                    model_output: indoc! {"
2211                    <|insert|>1:nothex
2212                    preamble
2213                "},
2214                    expected: indoc! {"
2215                    preamble
2216                    alpha
2217                    beta
2218                "},
2219                },
2220                Case {
2221                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2222                    original: indoc! {"
2223                    cat
2224                    dog
2225                "},
2226                    model_output: indoc! {"
2227                    <|set|>0:38
2228                    CAT
2229                    <|insert|>0:38
2230                    TAIL
2231                "},
2232                    expected: indoc! {"
2233                    CAT
2234                    TAIL
2235                    dog
2236                "},
2237                },
2238                Case {
2239                    name: "overlapping_set_ranges_last_wins",
2240                    original: indoc! {"
2241                    a
2242                    b
2243                    c
2244                    d
2245                "},
2246                    model_output: indoc! {"
2247                    <|set|>0:61-2:63
2248                    FIRST
2249                    <|set|>1:62-3:64
2250                    SECOND
2251                "},
2252                    expected: indoc! {"
2253                    FIRST
2254                    d
2255                "},
2256                },
2257                Case {
2258                    name: "insert_before_first_and_after_line",
2259                    original: indoc! {"
2260                        a
2261                        b
2262                    "},
2263                    model_output: indoc! {"
2264                        <|insert|>
2265                        HEAD
2266                        <|insert|>0:61
2267                        MID
2268                    "},
2269                    expected: indoc! {"
2270                        HEAD
2271                        a
2272                        MID
2273                        b
2274                    "},
2275                },
2276            ];
2277
2278            for case in &cases {
2279                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2280                assert_eq!(result, case.expected, "failed case: {}", case.name);
2281            }
2282        }
2283
2284        #[test]
2285        fn test_output_has_edit_commands() {
2286            assert!(hashline::output_has_edit_commands(&format!(
2287                "{}0:ab\nnew",
2288                SET_COMMAND_MARKER
2289            )));
2290            assert!(hashline::output_has_edit_commands(&format!(
2291                "{}0:ab\nnew",
2292                INSERT_COMMAND_MARKER
2293            )));
2294            assert!(hashline::output_has_edit_commands(&format!(
2295                "some text\n{}1:cd\nstuff",
2296                SET_COMMAND_MARKER
2297            )));
2298            assert!(!hashline::output_has_edit_commands("just plain text"));
2299            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2300            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2301        }
2302
2303        // ---- hashline::patch_to_edit_commands round-trip tests ----
2304
2305        #[test]
2306        fn test_patch_to_edit_commands() {
2307            struct Case {
2308                name: &'static str,
2309                old: &'static str,
2310                patch: &'static str,
2311                expected_new: &'static str,
2312            }
2313
2314            let cases = [
2315                Case {
2316                    name: "single_line_replacement",
2317                    old: indoc! {"
2318                    let mut total = 0;
2319                    for product in products {
2320                        total += ;
2321                    }
2322                    total
2323                "},
2324                    patch: indoc! {"
2325                    @@ -1,5 +1,5 @@
2326                     let mut total = 0;
2327                     for product in products {
2328                    -    total += ;
2329                    +    total += product.price;
2330                     }
2331                     total
2332                "},
2333                    expected_new: indoc! {"
2334                    let mut total = 0;
2335                    for product in products {
2336                        total += product.price;
2337                    }
2338                    total
2339                "},
2340                },
2341                Case {
2342                    name: "multiline_replacement",
2343                    old: indoc! {"
2344                    fn foo() {
2345                        let x = 1;
2346                        let y = 2;
2347                        let z = 3;
2348                    }
2349                "},
2350                    patch: indoc! {"
2351                    @@ -1,5 +1,3 @@
2352                     fn foo() {
2353                    -    let x = 1;
2354                    -    let y = 2;
2355                    -    let z = 3;
2356                    +    let sum = 1 + 2 + 3;
2357                     }
2358                "},
2359                    expected_new: indoc! {"
2360                    fn foo() {
2361                        let sum = 1 + 2 + 3;
2362                    }
2363                "},
2364                },
2365                Case {
2366                    name: "insertion",
2367                    old: indoc! {"
2368                    fn main() {
2369                        let x = 1;
2370                    }
2371                "},
2372                    patch: indoc! {"
2373                    @@ -1,3 +1,4 @@
2374                     fn main() {
2375                         let x = 1;
2376                    +    let y = 2;
2377                     }
2378                "},
2379                    expected_new: indoc! {"
2380                    fn main() {
2381                        let x = 1;
2382                        let y = 2;
2383                    }
2384                "},
2385                },
2386                Case {
2387                    name: "insertion_before_first",
2388                    old: indoc! {"
2389                    let x = 1;
2390                    let y = 2;
2391                "},
2392                    patch: indoc! {"
2393                    @@ -1,2 +1,3 @@
2394                    +use std::io;
2395                     let x = 1;
2396                     let y = 2;
2397                "},
2398                    expected_new: indoc! {"
2399                    use std::io;
2400                    let x = 1;
2401                    let y = 2;
2402                "},
2403                },
2404                Case {
2405                    name: "deletion",
2406                    old: indoc! {"
2407                    aaa
2408                    bbb
2409                    ccc
2410                    ddd
2411                "},
2412                    patch: indoc! {"
2413                    @@ -1,4 +1,2 @@
2414                     aaa
2415                    -bbb
2416                    -ccc
2417                     ddd
2418                "},
2419                    expected_new: indoc! {"
2420                    aaa
2421                    ddd
2422                "},
2423                },
2424                Case {
2425                    name: "multiple_changes",
2426                    old: indoc! {"
2427                    alpha
2428                    beta
2429                    gamma
2430                    delta
2431                    epsilon
2432                "},
2433                    patch: indoc! {"
2434                    @@ -1,5 +1,5 @@
2435                    -alpha
2436                    +ALPHA
2437                     beta
2438                     gamma
2439                    -delta
2440                    +DELTA
2441                     epsilon
2442                "},
2443                    expected_new: indoc! {"
2444                    ALPHA
2445                    beta
2446                    gamma
2447                    DELTA
2448                    epsilon
2449                "},
2450                },
2451                Case {
2452                    name: "replace_with_insertion",
2453                    old: indoc! {r#"
2454                    fn handle() {
2455                        modal_state.close();
2456                        modal_state.dismiss();
2457                "#},
2458                    patch: indoc! {r#"
2459                    @@ -1,3 +1,4 @@
2460                     fn handle() {
2461                         modal_state.close();
2462                    +    eprintln!("");
2463                         modal_state.dismiss();
2464                "#},
2465                    expected_new: indoc! {r#"
2466                    fn handle() {
2467                        modal_state.close();
2468                        eprintln!("");
2469                        modal_state.dismiss();
2470                "#},
2471                },
2472                Case {
2473                    name: "complete_replacement",
2474                    old: indoc! {"
2475                    aaa
2476                    bbb
2477                    ccc
2478                "},
2479                    patch: indoc! {"
2480                    @@ -1,3 +1,3 @@
2481                    -aaa
2482                    -bbb
2483                    -ccc
2484                    +xxx
2485                    +yyy
2486                    +zzz
2487                "},
2488                    expected_new: indoc! {"
2489                    xxx
2490                    yyy
2491                    zzz
2492                "},
2493                },
2494                Case {
2495                    name: "add_function_body",
2496                    old: indoc! {"
2497                    fn foo() {
2498                        modal_state.dismiss();
2499                    }
2500
2501                    fn
2502
2503                    fn handle_keystroke() {
2504                "},
2505                    patch: indoc! {"
2506                    @@ -1,6 +1,8 @@
2507                     fn foo() {
2508                         modal_state.dismiss();
2509                     }
2510
2511                    -fn
2512                    +fn handle_submit() {
2513                    +    todo()
2514                    +}
2515
2516                     fn handle_keystroke() {
2517                "},
2518                    expected_new: indoc! {"
2519                    fn foo() {
2520                        modal_state.dismiss();
2521                    }
2522
2523                    fn handle_submit() {
2524                        todo()
2525                    }
2526
2527                    fn handle_keystroke() {
2528                "},
2529                },
2530                Case {
2531                    name: "with_cursor_offset",
2532                    old: indoc! {r#"
2533                    fn main() {
2534                        println!();
2535                    }
2536                "#},
2537                    patch: indoc! {r#"
2538                        @@ -1,3 +1,3 @@
2539                        fn main() {
2540                        -    println!();
2541                        +    eprintln!("");
2542                        }
2543                    "#},
2544                    expected_new: indoc! {r#"
2545                        fn main() {
2546                            eprintln!("<|user_cursor|>");
2547                        }
2548                    "#},
2549                },
2550                Case {
2551                    name: "non_local_hunk_header_pure_insertion_repro",
2552                    old: indoc! {"
2553                        aaa
2554                        bbb
2555                    "},
2556                    patch: indoc! {"
2557                        @@ -20,2 +20,3 @@
2558                        aaa
2559                        +xxx
2560                        bbb
2561                    "},
2562                    expected_new: indoc! {"
2563                        aaa
2564                        xxx
2565                        bbb
2566                    "},
2567                },
2568                Case {
2569                    name: "empty_patch_produces_no_edits_marker",
2570                    old: indoc! {"
2571                        aaa
2572                        bbb
2573                    "},
2574                    patch: "@@ -20,2 +20,3 @@\n",
2575                    expected_new: indoc! {"
2576                        aaa
2577                        bbb
2578                    "},
2579                },
2580            ];
2581
2582            for case in &cases {
2583                // The cursor_offset for patch_to_edit_commands is relative to
2584                // the first hunk's new text (context + additions). We compute
2585                // it by finding where the marker sits in the expected output
2586                // (which mirrors the new text of the hunk).
2587                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2588
2589                let commands =
2590                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2591                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2592
2593                assert!(
2594                    hashline::output_has_edit_commands(&commands),
2595                    "case {}: expected edit commands, got: {commands:?}",
2596                    case.name,
2597                );
2598
2599                let applied = hashline::apply_edit_commands(case.old, &commands);
2600                assert_eq!(applied, case.expected_new, "case {}", case.name);
2601            }
2602        }
2603    }
2604}
2605
2606pub mod seed_coder {
2607    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2608    //!
2609    //! Seed-Coder uses different FIM tokens and order than Qwen:
2610    //! - SPM order: suffix comes FIRST, then prefix, then middle
2611    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2612    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2613    //!
2614    //! All context (related files, edit history) goes in the PREFIX section.
2615    //! The suffix contains only code after the editable region.
2616    //!
2617    //! Example prompt:
2618    //!
2619    //! <[fim-suffix]>
2620    //! code after editable region
2621    //! <[fim-prefix]><filename>related/file.py
2622    //! related file content
2623    //!
2624    //! <filename>edit_history
2625    //! --- a/some_file.py
2626    //! +++ b/some_file.py
2627    //! -old
2628    //! +new
2629    //!
2630    //! <filename>path/to/target_file.py
2631    //! code before editable region
2632    //! <<<<<<< CURRENT
2633    //! code that
2634    //! needs to<|user_cursor|>
2635    //! be rewritten
2636    //! =======
2637    //! <[fim-middle]>
2638    //!
2639    //! Expected output (model generates):
2640    //!
2641    //! updated
2642    //! code with
2643    //! changes applied
2644    //! >>>>>>> UPDATED
2645
2646    use super::*;
2647
2648    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2649    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2650    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2651    pub const FILE_MARKER: &str = "<filename>";
2652
2653    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2654    pub const SEPARATOR: &str = "=======\n";
2655    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2656
2657    pub const NO_EDITS: &str = "NO_EDITS\n";
2658
2659    pub fn special_tokens() -> &'static [&'static str] {
2660        &[
2661            FIM_SUFFIX,
2662            FIM_PREFIX,
2663            FIM_MIDDLE,
2664            FILE_MARKER,
2665            START_MARKER,
2666            SEPARATOR,
2667            END_MARKER,
2668            CURSOR_MARKER,
2669        ]
2670    }
2671
2672    pub fn write_cursor_excerpt_section(
2673        prompt: &mut String,
2674        path: &Path,
2675        context: &str,
2676        editable_range: &Range<usize>,
2677        cursor_offset: usize,
2678    ) {
2679        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2680        prompt.push_str(&section);
2681    }
2682
2683    pub fn format_prompt_with_budget(
2684        path: &Path,
2685        context: &str,
2686        editable_range: &Range<usize>,
2687        cursor_offset: usize,
2688        events: &[Arc<Event>],
2689        related_files: &[RelatedFile],
2690        max_tokens: usize,
2691    ) -> String {
2692        let cursor_prefix_section =
2693            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2694        assemble_fim_prompt(
2695            context,
2696            editable_range,
2697            &cursor_prefix_section,
2698            events,
2699            related_files,
2700            max_tokens,
2701        )
2702    }
2703
2704    pub fn assemble_fim_prompt(
2705        context: &str,
2706        editable_range: &Range<usize>,
2707        cursor_prefix_section: &str,
2708        events: &[Arc<Event>],
2709        related_files: &[RelatedFile],
2710        max_tokens: usize,
2711    ) -> String {
2712        let suffix_section = build_suffix_section(context, editable_range);
2713
2714        let suffix_tokens = estimate_tokens(suffix_section.len() + FIM_PREFIX.len());
2715        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len() + FIM_MIDDLE.len());
2716        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2717
2718        let edit_history_section = super::format_edit_history_within_budget(
2719            events,
2720            FILE_MARKER,
2721            "edit_history",
2722            budget_after_cursor,
2723            max_edit_event_count_for_format(&ZetaFormat::V0211SeedCoder),
2724        );
2725        let edit_history_tokens = estimate_tokens(edit_history_section.len() + "\n".len());
2726        let budget_after_edit_history =
2727            budget_after_cursor.saturating_sub(edit_history_tokens + "\n".len());
2728
2729        let related_files_section = super::format_related_files_within_budget(
2730            related_files,
2731            FILE_MARKER,
2732            "",
2733            budget_after_edit_history,
2734        );
2735
2736        let mut prompt = String::new();
2737        prompt.push_str(&suffix_section);
2738        prompt.push_str(FIM_PREFIX);
2739        prompt.push_str(&related_files_section);
2740        if !related_files_section.is_empty() {
2741            prompt.push('\n');
2742        }
2743        prompt.push_str(&edit_history_section);
2744        if !edit_history_section.is_empty() {
2745            prompt.push('\n');
2746        }
2747        prompt.push_str(cursor_prefix_section);
2748        prompt.push_str(FIM_MIDDLE);
2749
2750        prompt
2751    }
2752
2753    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2754        let mut section = String::new();
2755        section.push_str(FIM_SUFFIX);
2756        section.push_str(&context[editable_range.end..]);
2757        if !section.ends_with('\n') {
2758            section.push('\n');
2759        }
2760        section
2761    }
2762
2763    fn build_cursor_prefix_section(
2764        path: &Path,
2765        context: &str,
2766        editable_range: &Range<usize>,
2767        cursor_offset: usize,
2768    ) -> String {
2769        let mut section = String::new();
2770        let path_str = path.to_string_lossy();
2771        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2772
2773        section.push_str(&context[..editable_range.start]);
2774        section.push_str(START_MARKER);
2775        section.push_str(&context[editable_range.start..cursor_offset]);
2776        section.push_str(CURSOR_MARKER);
2777        section.push_str(&context[cursor_offset..editable_range.end]);
2778        if !section.ends_with('\n') {
2779            section.push('\n');
2780        }
2781        section.push_str(SEPARATOR);
2782        section
2783    }
2784
2785    /// Format patch as containing no changes if it's empty; otherwise return None.
2786    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2787        // Count lines in the patch
2788        let empty_patch = patch.lines().count() <= 3;
2789        if empty_patch {
2790            Some(format!("{NO_EDITS}{END_MARKER}"))
2791        } else {
2792            None
2793        }
2794    }
2795}
2796
2797pub mod v0304_variable_edit {
2798    //! A prompt format with no fixed editable region. The entire context is shown
2799    //! to the model, and it chooses which text to replace by outputting surrounding
2800    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2801    //! text.
2802    //!
2803    //! Example prompt:
2804    //!
2805    //! <|file_sep|>path/to/file.py
2806    //! zero
2807    //! one
2808    //! two
2809    //! three<|user_cursor|>
2810    //! four
2811    //! five
2812    //! <|fim_prefix|>
2813    //
2814    //! Expected output (model generates):
2815    //!
2816    //! two
2817    //! <|fim_middle|>
2818    //! THREE
2819    //! <|fim_suffix|>
2820    //! four
2821    //!
2822    //! The output means: find "two\n...\nfour" in the context, and replace
2823    //! everything between "two\n" and "four" with "THREE\n".
2824
2825    use super::*;
2826
2827    pub fn special_tokens() -> &'static [&'static str] {
2828        &[
2829            "<|fim_prefix|>",
2830            "<|fim_suffix|>",
2831            "<|fim_middle|>",
2832            "<|file_sep|>",
2833            CURSOR_MARKER,
2834        ]
2835    }
2836
2837    pub fn write_cursor_excerpt_section(
2838        prompt: &mut String,
2839        path: &Path,
2840        context: &str,
2841        cursor_offset: usize,
2842    ) {
2843        let path_str = path.to_string_lossy();
2844        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2845
2846        prompt.push_str(&context[..cursor_offset]);
2847        prompt.push_str(CURSOR_MARKER);
2848        prompt.push_str(&context[cursor_offset..]);
2849        if !prompt.ends_with('\n') {
2850            prompt.push('\n');
2851        }
2852        prompt.push_str("<|fim_prefix|>\n")
2853    }
2854
2855    /// Apply a variable-edit model output to the original context text.
2856    ///
2857    /// The model output has the form:
2858    ///
2859    /// - prefix context lines
2860    /// - `<|fim_middle|>`
2861    /// - new text
2862    /// - `<|fim_suffix|>`
2863    /// - suffix context lines
2864    ///
2865    /// We locate the prefix/suffix context lines in the original text and replace
2866    /// everything between them with the new text.
2867    pub fn apply_variable_edit(
2868        context: &str,
2869        model_output: &str,
2870    ) -> Result<(Range<usize>, String)> {
2871        let (prefix_context, rest) = model_output
2872            .split_once("<|fim_middle|>\n")
2873            .or_else(|| model_output.split_once("<|fim_middle|>"))
2874            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2875
2876        let (new_text, suffix_context) = rest
2877            .split_once("<|fim_suffix|>\n")
2878            .or_else(|| rest.split_once("<|fim_suffix|>"))
2879            .unwrap_or((rest, ""));
2880
2881        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2882            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2883        } else {
2884            suffix_context
2885        };
2886
2887        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2888            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2889            + prefix_context.len();
2890        let suffix_offset = if suffix_context.is_empty() {
2891            context.len()
2892        } else {
2893            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2894                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2895                + prefix_offset
2896        };
2897
2898        let edit_range = prefix_offset..suffix_offset;
2899        return Ok((edit_range, new_text.to_string()));
2900    }
2901
2902    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2903        if needle.is_empty() {
2904            return Some(0);
2905        }
2906
2907        haystack.match_indices(needle).find_map(|(offset, _)| {
2908            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2909            matched_line_start.then_some(offset)
2910        })
2911    }
2912
2913    /// Convert a unified diff patch into the variable-edit output format.
2914    ///
2915    /// Parses `patch` as a unified diff against `old_text` and produces model
2916    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2917    /// delimiters. The diff is resolved by content matching rather than line
2918    /// numbers.
2919    pub fn patch_to_variable_edit_output(
2920        old_text: &str,
2921        patch: &str,
2922        cursor_offset: Option<usize>,
2923    ) -> Result<String> {
2924        // Parse the unified diff into hunks. Each hunk has an `old_context`
2925        // string (context + deleted lines interleaved in order) and a list of
2926        // edits expressed as byte ranges within that context plus replacement
2927        // text.
2928        let hunks = parse_hunks(patch);
2929        if hunks.is_empty() {
2930            return Ok(String::new());
2931        }
2932
2933        // Apply each hunk by finding its old_context in the text and
2934        // performing the edits. We search forward from where the previous
2935        // hunk ended so that hunks are applied in order.
2936        let mut new_text = old_text.to_string();
2937        let mut search_from: usize = 0;
2938        let mut first_hunk_pos: Option<usize> = None;
2939
2940        for hunk in &hunks {
2941            let context_pos = new_text[search_from..]
2942                .find(&hunk.old_context)
2943                .map(|pos| pos + search_from)
2944                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2945
2946            if first_hunk_pos.is_none() {
2947                first_hunk_pos = Some(context_pos);
2948            }
2949
2950            // Apply edits in reverse order so byte offsets remain valid.
2951            for edit in hunk.edits.iter().rev() {
2952                let abs_start = context_pos + edit.range.start;
2953                let abs_end = context_pos + edit.range.end;
2954                new_text.replace_range(abs_start..abs_end, &edit.text);
2955            }
2956
2957            // Advance past this hunk's region in the (now modified) text.
2958            let new_region_len: usize =
2959                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2960                    len + edit.text.len() - (edit.range.end - edit.range.start)
2961                });
2962            search_from = context_pos + new_region_len;
2963        }
2964
2965        // Now we have old_text and new_text. Find the changed line range by
2966        // comparing them.
2967        let old_lines: Vec<&str> = old_text.lines().collect();
2968        let new_lines: Vec<&str> = new_text.lines().collect();
2969
2970        // Find first differing line.
2971        let first_changed_row = old_lines
2972            .iter()
2973            .zip(new_lines.iter())
2974            .position(|(a, b)| a != b)
2975            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2976
2977        // Find last differing line (from the end).
2978        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2979        let common_suffix = old_lines
2980            .iter()
2981            .rev()
2982            .zip(new_lines.iter().rev())
2983            .take(max_suffix)
2984            .take_while(|(a, b)| a == b)
2985            .count();
2986
2987        let old_end = old_lines.len() - common_suffix;
2988        let new_end = new_lines.len() - common_suffix;
2989
2990        if first_changed_row == old_end && first_changed_row == new_end {
2991            return Ok(String::new());
2992        }
2993
2994        // Build the replacement text from new_lines[first_diff..new_end].
2995        let mut merged_new_text = String::new();
2996        for line in &new_lines[first_changed_row..new_end] {
2997            merged_new_text.push_str(line);
2998            merged_new_text.push('\n');
2999        }
3000
3001        // cursor_offset is relative to the first hunk's new content in
3002        // new_text. Translate it to an offset within merged_new_text, which
3003        // only contains lines first_diff..new_end of new_text.
3004        if let Some(hunk_offset) = cursor_offset {
3005            let hunk_start = first_hunk_pos.unwrap_or(0);
3006            let absolute_pos = hunk_start + hunk_offset;
3007
3008            // Byte offset where first_diff starts in new_text.
3009            let merged_start: usize = new_lines[..first_changed_row]
3010                .iter()
3011                .map(|line| line.len() + 1)
3012                .sum();
3013
3014            if absolute_pos >= merged_start {
3015                let relative_offset = absolute_pos - merged_start;
3016                if relative_offset <= merged_new_text.len() {
3017                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
3018                }
3019            }
3020        }
3021
3022        // Build output with 2 lines of context above and below.
3023        let context_lines_count = 2;
3024        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3025        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3026
3027        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3028            let pattern = &lines[line_range];
3029            let pattern_len = pattern.len();
3030
3031            let mut count = 0;
3032            for offset in 0..=lines.len() - pattern_len {
3033                if &lines[offset..offset + pattern_len] == pattern {
3034                    count += 1;
3035                }
3036            }
3037            count
3038        }
3039
3040        // Expand prefix and suffix until they are unique
3041        while prefix_start > 0 {
3042            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3043                prefix_start -= 1;
3044            } else {
3045                break;
3046            }
3047        }
3048        while suffix_end < old_lines.len() {
3049            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3050                suffix_end += 1;
3051            } else {
3052                break;
3053            }
3054        }
3055
3056        let mut output = String::new();
3057        for line in &old_lines[prefix_start..first_changed_row] {
3058            output.push_str(line);
3059            output.push('\n');
3060        }
3061        output.push_str("<|fim_middle|>\n");
3062        output.push_str(&merged_new_text);
3063        output.push_str("<|fim_suffix|>\n");
3064        for line in &old_lines[old_end..suffix_end] {
3065            output.push_str(line);
3066            output.push('\n');
3067        }
3068
3069        Ok(output)
3070    }
3071
3072    struct ParsedHunk {
3073        old_context: String,
3074        edits: Vec<ParsedEdit>,
3075    }
3076
3077    struct ParsedEdit {
3078        range: Range<usize>,
3079        text: String,
3080    }
3081
3082    /// Parse a unified diff into content-based hunks. Each hunk contains an
3083    /// `old_context` string (context lines + deleted lines, which together
3084    /// form the text that should be found in the original) and a list of edits
3085    /// expressed as byte ranges within that context.
3086    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3087        let mut hunks = Vec::new();
3088        let mut current: Option<ParsedHunk> = None;
3089
3090        for line in patch.lines() {
3091            if line.starts_with("@@") {
3092                if let Some(hunk) = current.take() {
3093                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3094                        hunks.push(hunk);
3095                    }
3096                }
3097                current = Some(ParsedHunk {
3098                    old_context: String::new(),
3099                    edits: Vec::new(),
3100                });
3101            } else if line.starts_with("---") || line.starts_with("+++") {
3102                continue;
3103            } else if let Some(hunk) = &mut current {
3104                if let Some(added) = line.strip_prefix('+') {
3105                    let pos = hunk.old_context.len();
3106                    if let Some(last_edit) = hunk.edits.last_mut() {
3107                        if last_edit.range.end == pos {
3108                            writeln!(&mut last_edit.text, "{added}").ok();
3109                            continue;
3110                        }
3111                    }
3112                    hunk.edits.push(ParsedEdit {
3113                        range: pos..pos,
3114                        text: format!("{added}\n"),
3115                    });
3116                } else if let Some(removed) = line.strip_prefix('-') {
3117                    let start = hunk.old_context.len();
3118                    writeln!(&mut hunk.old_context, "{removed}").ok();
3119                    let end = hunk.old_context.len();
3120                    if let Some(last_edit) = hunk.edits.last_mut() {
3121                        if last_edit.range.end == start {
3122                            last_edit.range.end = end;
3123                            continue;
3124                        }
3125                    }
3126                    hunk.edits.push(ParsedEdit {
3127                        range: start..end,
3128                        text: String::new(),
3129                    });
3130                } else {
3131                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3132                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3133                }
3134            }
3135        }
3136
3137        if let Some(hunk) = current {
3138            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3139                hunks.push(hunk);
3140            }
3141        }
3142
3143        hunks
3144    }
3145
3146    #[cfg(test)]
3147    mod tests {
3148        use super::*;
3149        use indoc::indoc;
3150
3151        #[test]
3152        fn test_apply_variable_edit() {
3153            struct Case {
3154                name: &'static str,
3155                original: &'static str,
3156                model_output: &'static str,
3157                expected: &'static str,
3158            }
3159
3160            let cases = [
3161                Case {
3162                    name: "simple_single_line_replacement",
3163                    original: indoc! {"
3164                        zero
3165                        one
3166                        two
3167                        three
3168                        four
3169                        five
3170                    "},
3171                    model_output: indoc! {"
3172                        two
3173                        <|fim_middle|>
3174                        THREE
3175                        <|fim_suffix|>
3176                        four
3177                    "},
3178                    expected: indoc! {"
3179                        zero
3180                        one
3181                        two
3182                        THREE
3183                        four
3184                        five
3185                    "},
3186                },
3187                Case {
3188                    name: "multi_line_replacement",
3189                    original: indoc! {"
3190                        a
3191                        b
3192                        c
3193                        d
3194                        e
3195                    "},
3196                    model_output: indoc! {"
3197                        a
3198                        <|fim_middle|>
3199                        B
3200                        C
3201                        D
3202                        <|fim_suffix|>
3203                        e
3204                    "},
3205                    expected: indoc! {"
3206                        a
3207                        B
3208                        C
3209                        D
3210                        e
3211                    "},
3212                },
3213                Case {
3214                    name: "insertion_between_existing_lines",
3215                    original: indoc! {"
3216                        a
3217                        b
3218                        c
3219                    "},
3220                    model_output: indoc! {"
3221                        a
3222                        <|fim_middle|>
3223                        X
3224                        <|fim_suffix|>
3225                        b
3226                    "},
3227                    expected: indoc! {"
3228                        a
3229                        X
3230                        b
3231                        c
3232                    "},
3233                },
3234                Case {
3235                    name: "deletion",
3236                    original: indoc! {"
3237                        a
3238                        b
3239                        c
3240                        d
3241                    "},
3242                    model_output: indoc! {"
3243                        a
3244                        <|fim_middle|>
3245                        <|fim_suffix|>
3246                        c
3247                    "},
3248                    expected: indoc! {"
3249                        a
3250                        c
3251                        d
3252                    "},
3253                },
3254                Case {
3255                    name: "replacement_at_start_no_prefix_context",
3256                    original: indoc! {"
3257                        a
3258                        b
3259                        c
3260                    "},
3261                    model_output: indoc! {"
3262                        <|fim_middle|>
3263                        X
3264                        <|fim_suffix|>
3265                        b
3266                    "},
3267                    expected: indoc! {"
3268                        X
3269                        b
3270                        c
3271                    "},
3272                },
3273                Case {
3274                    name: "replacement_at_end_no_suffix_context",
3275                    original: indoc! {"
3276                        a
3277                        b
3278                        c
3279                    "},
3280                    model_output: indoc! {"
3281                        b
3282                        <|fim_middle|>
3283                        Z
3284                        <|fim_suffix|>
3285                    "},
3286                    expected: indoc! {"
3287                        a
3288                        b
3289                        Z
3290                    "},
3291                },
3292                Case {
3293                    name: "context_with_trailing_newline_is_preserved",
3294                    original: indoc! {"
3295                        a
3296                        b
3297                        c
3298                    "},
3299                    model_output: indoc! {"
3300                        a
3301                        <|fim_middle|>
3302                        B
3303                        <|fim_suffix|>
3304                        c
3305                    "},
3306                    expected: indoc! {"
3307                        a
3308                        B
3309                        c
3310                    "},
3311                },
3312                Case {
3313                    name: "cursor_marker_passes_through_untouched",
3314                    original: indoc! {"
3315                        a
3316                        b
3317                        c
3318                    "},
3319                    model_output: indoc! {"
3320                        a
3321                        <|fim_middle|>
3322                        B<|user_cursor|>B
3323                        <|fim_suffix|>
3324                        c
3325                    "},
3326                    expected: indoc! {"
3327                        a
3328                        B<|user_cursor|>B
3329                        c
3330                    "},
3331                },
3332                Case {
3333                    name: "multiple_prefix_context_lines",
3334                    original: indoc! {"
3335                        a
3336                        b
3337                        c
3338                        d
3339                        e
3340                    "},
3341                    model_output: indoc! {"
3342                        b
3343                        c
3344                        <|fim_middle|>
3345                        D
3346                        <|fim_suffix|>
3347                        e
3348                    "},
3349                    expected: indoc! {"
3350                        a
3351                        b
3352                        c
3353                        D
3354                        e
3355                    "},
3356                },
3357            ];
3358
3359            for case in cases {
3360                let (edit_range, replacement) =
3361                    apply_variable_edit(case.original, case.model_output).unwrap();
3362                let mut edited = case.original.to_string();
3363                edited.replace_range(edit_range, &replacement);
3364                assert_eq!(edited, case.expected, "{}", case.name);
3365            }
3366        }
3367
3368        #[test]
3369        fn test_patch_to_variable_edit() {
3370            struct Case {
3371                name: &'static str,
3372                old: &'static str,
3373                patch: &'static str,
3374                cursor_offset: Option<usize>,
3375                expected_variable_edit: &'static str,
3376                expected_after_apply: &'static str,
3377            }
3378
3379            let cases = [
3380                Case {
3381                    name: "simple_replacement",
3382                    old: indoc! {"
3383                        zero
3384                        one
3385                        two
3386                        three
3387                        four
3388                        five
3389                    "},
3390                    patch: indoc! {"
3391                        @@ -3,3 +3,3 @@
3392                         two
3393                        -three
3394                        +THREE
3395                         four
3396                    "},
3397                    cursor_offset: None,
3398                    expected_variable_edit: indoc! {"
3399                        one
3400                        two
3401                        <|fim_middle|>
3402                        THREE
3403                        <|fim_suffix|>
3404                        four
3405                        five
3406                    "},
3407                    expected_after_apply: indoc! {"
3408                        zero
3409                        one
3410                        two
3411                        THREE
3412                        four
3413                        five
3414                    "},
3415                },
3416                Case {
3417                    name: "insertion",
3418                    old: indoc! {"
3419                        a
3420                        b
3421                        c
3422                        d
3423                        e
3424                    "},
3425                    patch: indoc! {"
3426                        @@ -2,0 +3,1 @@
3427                         b
3428                        +X
3429                         c
3430                    "},
3431                    cursor_offset: None,
3432                    expected_variable_edit: indoc! {"
3433                        a
3434                        b
3435                        <|fim_middle|>
3436                        X
3437                        <|fim_suffix|>
3438                        c
3439                        d
3440                    "},
3441                    expected_after_apply: indoc! {"
3442                        a
3443                        b
3444                        X
3445                        c
3446                        d
3447                        e
3448                    "},
3449                },
3450                Case {
3451                    name: "deletion",
3452                    old: indoc! {"
3453                        a
3454                        b
3455                        c
3456                        d
3457                        e
3458                    "},
3459                    patch: indoc! {"
3460                        @@ -2,3 +2,2 @@
3461                         b
3462                        -c
3463                         d
3464                    "},
3465                    cursor_offset: None,
3466                    expected_variable_edit: indoc! {"
3467                        a
3468                        b
3469                        <|fim_middle|>
3470                        <|fim_suffix|>
3471                        d
3472                        e
3473                    "},
3474                    expected_after_apply: indoc! {"
3475                        a
3476                        b
3477                        d
3478                        e
3479                    "},
3480                },
3481                Case {
3482                    name: "edit_near_start",
3483                    old: indoc! {"
3484                        first
3485                        second
3486                        third
3487                        fourth
3488                    "},
3489                    patch: indoc! {"
3490                        @@ -1,1 +1,1 @@
3491                        -first
3492                        +FIRST
3493                    "},
3494                    cursor_offset: None,
3495                    expected_variable_edit: indoc! {"
3496                        <|fim_middle|>
3497                        FIRST
3498                        <|fim_suffix|>
3499                        second
3500                        third
3501                    "},
3502                    expected_after_apply: indoc! {"
3503                        FIRST
3504                        second
3505                        third
3506                        fourth
3507                    "},
3508                },
3509                Case {
3510                    name: "edit_near_end",
3511                    old: indoc! {"
3512                        first
3513                        second
3514                        third
3515                        fourth
3516                    "},
3517                    patch: indoc! {"
3518                        @@ -4,1 +4,1 @@
3519                        -fourth
3520                        +FOURTH
3521                    "},
3522                    cursor_offset: None,
3523                    expected_variable_edit: indoc! {"
3524                        second
3525                        third
3526                        <|fim_middle|>
3527                        FOURTH
3528                        <|fim_suffix|>
3529                    "},
3530                    expected_after_apply: indoc! {"
3531                        first
3532                        second
3533                        third
3534                        FOURTH
3535                    "},
3536                },
3537                Case {
3538                    name: "cursor_at_start_of_replacement",
3539                    old: indoc! {"
3540                        zero
3541                        one
3542                        two
3543                        three
3544                        four
3545                        five
3546                    "},
3547                    patch: indoc! {"
3548                        @@ -3,3 +3,3 @@
3549                         two
3550                        -three
3551                        +THREE
3552                         four
3553                    "},
3554                    cursor_offset: Some(4),
3555                    expected_variable_edit: indoc! {"
3556                        one
3557                        two
3558                        <|fim_middle|>
3559                        <|user_cursor|>THREE
3560                        <|fim_suffix|>
3561                        four
3562                        five
3563                    "},
3564                    expected_after_apply: indoc! {"
3565                        zero
3566                        one
3567                        two
3568                        <|user_cursor|>THREE
3569                        four
3570                        five
3571                    "},
3572                },
3573                Case {
3574                    name: "cursor_in_middle_of_replacement",
3575                    old: indoc! {"
3576                        zero
3577                        one
3578                        two
3579                        three
3580                        four
3581                        five
3582                    "},
3583                    patch: indoc! {"
3584                        @@ -3,3 +3,3 @@
3585                         two
3586                        -three
3587                        +THREE
3588                         four
3589                    "},
3590                    cursor_offset: Some(6),
3591                    expected_variable_edit: indoc! {"
3592                        one
3593                        two
3594                        <|fim_middle|>
3595                        TH<|user_cursor|>REE
3596                        <|fim_suffix|>
3597                        four
3598                        five
3599                    "},
3600                    expected_after_apply: indoc! {"
3601                        zero
3602                        one
3603                        two
3604                        TH<|user_cursor|>REE
3605                        four
3606                        five
3607                    "},
3608                },
3609                Case {
3610                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3611                    old: indoc! {"
3612                        one
3613                        a
3614                        b
3615                        c
3616                        d
3617                        two
3618                        a
3619                        b
3620                        c
3621                        d
3622                        three
3623                        a
3624                        b
3625                        c
3626                        d
3627                        four
3628                    "},
3629                    patch: indoc! {"
3630                        @@ -4,5 +4,5 @@
3631                         two
3632                         a
3633                         b
3634                        -c
3635                        +C
3636                         d
3637                         three
3638                    "},
3639                    cursor_offset: None,
3640                    expected_variable_edit: indoc! {"
3641                        two
3642                        a
3643                        b
3644                        <|fim_middle|>
3645                        C
3646                        <|fim_suffix|>
3647                        d
3648                        three
3649                    "},
3650                    expected_after_apply: indoc! {"
3651                        one
3652                        a
3653                        b
3654                        c
3655                        d
3656                        two
3657                        a
3658                        b
3659                        C
3660                        d
3661                        three
3662                        a
3663                        b
3664                        c
3665                        d
3666                        four
3667                    "},
3668                },
3669                Case {
3670                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3671                    old: indoc! {"
3672                        {
3673                            {
3674                                one();
3675                            }
3676                        }
3677                        {
3678                            {
3679                                two();
3680                            }
3681                        }
3682                        {
3683                            {
3684                                three();
3685                            }
3686                        }
3687                        {
3688                            {
3689                                four();
3690                            }
3691                        }
3692                    "},
3693                    patch: indoc! {"
3694                        @@ -4,5 +4,5 @@
3695                             {
3696                        -        two();
3697                        +        TWO();
3698                             }
3699                    "},
3700                    cursor_offset: None,
3701                    expected_variable_edit: indoc! {"
3702                                one();
3703                            }
3704                        }
3705                        {
3706                            {
3707                        <|fim_middle|>
3708                                TWO();
3709                        <|fim_suffix|>
3710                            }
3711                        }
3712                        {
3713                            {
3714                                three();
3715                    "},
3716                    expected_after_apply: indoc! {"
3717                        {
3718                            {
3719                                one();
3720                            }
3721                        }
3722                        {
3723                            {
3724                                TWO();
3725                            }
3726                        }
3727                        {
3728                            {
3729                                three();
3730                            }
3731                        }
3732                        {
3733                            {
3734                                four();
3735                            }
3736                        }
3737                    "},
3738                },
3739            ];
3740
3741            for case in cases {
3742                let output =
3743                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3744                        .unwrap_or_else(|error| {
3745                            panic!("failed converting patch for {}: {error}", case.name)
3746                        });
3747                assert_eq!(
3748                    output, case.expected_variable_edit,
3749                    "patch->variable_edit mismatch for {}",
3750                    case.name
3751                );
3752
3753                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3754                    .unwrap_or_else(|error| {
3755                        panic!("failed applying variable_edit for {}: {error}", case.name)
3756                    });
3757                let mut edited_by_variable_edit = case.old.to_string();
3758                edited_by_variable_edit.replace_range(edit_range, &replacement);
3759                assert_eq!(
3760                    edited_by_variable_edit, case.expected_after_apply,
3761                    "variable_edit apply mismatch for {}",
3762                    case.name
3763                );
3764
3765                let (expected_edit_range, expected_replacement) =
3766                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3767                        |error| {
3768                            panic!(
3769                                "failed applying expected variable_edit for {}: {error}",
3770                                case.name
3771                            )
3772                        },
3773                    );
3774                let mut edited_by_expected_variable_edit = case.old.to_string();
3775                edited_by_expected_variable_edit
3776                    .replace_range(expected_edit_range, &expected_replacement);
3777                assert_eq!(
3778                    edited_by_expected_variable_edit, case.expected_after_apply,
3779                    "expected variable_edit apply mismatch for {}",
3780                    case.name
3781                );
3782            }
3783        }
3784
3785        #[test]
3786        fn test_write_cursor_excerpt_section() {
3787            let path = Path::new("test.rs");
3788            let context = "fn main() {\n    hello();\n}\n";
3789            let cursor_offset = 17;
3790            let mut prompt = String::new();
3791            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3792            assert_eq!(
3793                prompt,
3794                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3795            );
3796        }
3797    }
3798}
3799
3800/// The zeta1 prompt format
3801pub mod zeta1 {
3802    use super::*;
3803    use std::fmt::Write;
3804
3805    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3806    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3807    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3808    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3809
3810    const INSTRUCTION_HEADER: &str = concat!(
3811        "### Instruction:\n",
3812        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3813        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3814        "into account the cursor location.\n\n",
3815        "### User Edits:\n\n"
3816    );
3817    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3818    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3819
3820    /// Formats a complete zeta1 prompt from the input events and excerpt.
3821    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3822        let mut prompt = String::with_capacity(
3823            INSTRUCTION_HEADER.len()
3824                + input_events.len()
3825                + EXCERPT_HEADER.len()
3826                + input_excerpt.len()
3827                + RESPONSE_HEADER.len(),
3828        );
3829        prompt.push_str(INSTRUCTION_HEADER);
3830        prompt.push_str(input_events);
3831        prompt.push_str(EXCERPT_HEADER);
3832        prompt.push_str(input_excerpt);
3833        prompt.push_str(RESPONSE_HEADER);
3834        prompt
3835    }
3836
3837    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3838    /// editable and context byte-offset ranges within `cursor_excerpt`.
3839    pub fn format_zeta1_from_input(
3840        input: &ZetaPromptInput,
3841        editable_range: Range<usize>,
3842        context_range: Range<usize>,
3843    ) -> String {
3844        let events = format_zeta1_events(&input.events);
3845        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3846        format_zeta1_prompt(&events, &excerpt)
3847    }
3848
3849    /// Formats events in zeta1 style (oldest first).
3850    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3851        let mut result = String::new();
3852        for event in
3853            events
3854                .iter()
3855                .skip(events.len().saturating_sub(max_edit_event_count_for_format(
3856                    &ZetaFormat::V0114180EditableRegion,
3857                )))
3858        {
3859            let event_string = format_zeta1_event(event);
3860            if event_string.is_empty() {
3861                continue;
3862            }
3863            if !result.is_empty() {
3864                result.push_str("\n\n");
3865            }
3866            result.push_str(&event_string);
3867        }
3868        result
3869    }
3870
3871    fn format_zeta1_event(event: &Event) -> String {
3872        match event {
3873            Event::BufferChange {
3874                path,
3875                old_path,
3876                diff,
3877                ..
3878            } => {
3879                let mut prompt = String::new();
3880                if old_path != path {
3881                    writeln!(
3882                        prompt,
3883                        "User renamed {} to {}\n",
3884                        old_path.display(),
3885                        path.display()
3886                    )
3887                    .ok();
3888                }
3889                if !diff.is_empty() {
3890                    write!(
3891                        prompt,
3892                        "User edited {}:\n```diff\n{}\n```",
3893                        path.display(),
3894                        diff
3895                    )
3896                    .ok();
3897                }
3898                prompt
3899            }
3900        }
3901    }
3902
3903    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3904    /// within `cursor_excerpt`.
3905    fn format_zeta1_excerpt(
3906        input: &ZetaPromptInput,
3907        editable_range: Range<usize>,
3908        context_range: Range<usize>,
3909    ) -> String {
3910        let path_str = input.cursor_path.to_string_lossy();
3911        let excerpt = &*input.cursor_excerpt;
3912        let cursor_offset = input.cursor_offset_in_excerpt;
3913
3914        let mut prompt = String::new();
3915        writeln!(&mut prompt, "```{path_str}").ok();
3916
3917        let starts_at_file_beginning =
3918            input.excerpt_start_row == Some(0) && context_range.start == 0;
3919        if starts_at_file_beginning {
3920            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3921        }
3922
3923        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3924
3925        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3926        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3927        prompt.push_str(CURSOR_MARKER);
3928        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3929        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3930
3931        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3932        write!(prompt, "\n```").ok();
3933
3934        prompt
3935    }
3936
3937    /// Cleans zeta1 model output by extracting content between editable region
3938    /// markers and converting the zeta1 cursor marker to the universal one.
3939    /// Returns `None` if the output doesn't contain the expected markers.
3940    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3941        let content = output.replace(CURSOR_MARKER, "");
3942
3943        let content_start = content
3944            .find(EDITABLE_REGION_START_MARKER)
3945            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3946            .map(|pos| {
3947                if content.as_bytes().get(pos) == Some(&b'\n') {
3948                    pos + 1
3949                } else {
3950                    pos
3951                }
3952            })
3953            .unwrap_or(0);
3954
3955        let content_end = content
3956            .find(EDITABLE_REGION_END_MARKER)
3957            .map(|pos| {
3958                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3959                    pos - 1
3960                } else {
3961                    pos
3962                }
3963            })
3964            .unwrap_or(content.len());
3965
3966        if content_start > content_end {
3967            return Some(String::new());
3968        }
3969
3970        let extracted = &content[content_start..content_end];
3971
3972        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3973            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3974            let text_before_cursor = text_before_cursor
3975                .find(EDITABLE_REGION_START_MARKER)
3976                .map(|pos| {
3977                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3978                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3979                        after_marker + 1
3980                    } else {
3981                        after_marker
3982                    }
3983                })
3984                .unwrap_or(0);
3985            let offset_in_extracted = zeta1_cursor_pos
3986                .saturating_sub(text_before_cursor)
3987                .min(extracted.len());
3988            offset_in_extracted
3989        });
3990
3991        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3992        if let Some(offset) = cursor_offset {
3993            result.push_str(&extracted[..offset]);
3994            result.push_str(super::CURSOR_MARKER);
3995            result.push_str(&extracted[offset..]);
3996        } else {
3997            result.push_str(extracted);
3998        }
3999
4000        Some(result)
4001    }
4002}
4003
4004#[cfg(test)]
4005mod tests {
4006    use super::*;
4007    use indoc::indoc;
4008
4009    fn make_input(
4010        cursor_excerpt: &str,
4011        editable_range: Range<usize>,
4012        cursor_offset: usize,
4013        events: Vec<Event>,
4014        related_files: Vec<RelatedFile>,
4015    ) -> ZetaPromptInput {
4016        let context_range = 0..cursor_excerpt.len();
4017        ZetaPromptInput {
4018            cursor_path: Path::new("test.rs").into(),
4019            cursor_excerpt: cursor_excerpt.into(),
4020            cursor_offset_in_excerpt: cursor_offset,
4021            excerpt_start_row: None,
4022            events: events.into_iter().map(Arc::new).collect(),
4023            related_files: Some(related_files),
4024            active_buffer_diagnostics: vec![],
4025            excerpt_ranges: ExcerptRanges {
4026                editable_150: editable_range.clone(),
4027                editable_180: editable_range.clone(),
4028                editable_350: editable_range,
4029                editable_150_context_350: context_range.clone(),
4030                editable_180_context_350: context_range.clone(),
4031                editable_350_context_150: context_range,
4032                ..Default::default()
4033            },
4034            syntax_ranges: None,
4035            experiment: None,
4036            in_open_source_repo: false,
4037            can_collect_data: false,
4038            repo_url: None,
4039        }
4040    }
4041
4042    fn make_input_with_context_range(
4043        excerpt: &str,
4044        editable_range: Range<usize>,
4045        context_range: Range<usize>,
4046        cursor_offset: usize,
4047    ) -> ZetaPromptInput {
4048        ZetaPromptInput {
4049            cursor_path: Path::new("test.rs").into(),
4050            cursor_excerpt: excerpt.into(),
4051            cursor_offset_in_excerpt: cursor_offset,
4052            excerpt_start_row: None,
4053            events: vec![],
4054            related_files: Some(vec![]),
4055            active_buffer_diagnostics: vec![],
4056            excerpt_ranges: ExcerptRanges {
4057                editable_150: editable_range.clone(),
4058                editable_180: editable_range.clone(),
4059                editable_350: editable_range,
4060                editable_150_context_350: context_range.clone(),
4061                editable_180_context_350: context_range.clone(),
4062                editable_350_context_150: context_range,
4063                ..Default::default()
4064            },
4065            syntax_ranges: None,
4066            experiment: None,
4067            in_open_source_repo: false,
4068            can_collect_data: false,
4069            repo_url: None,
4070        }
4071    }
4072
4073    fn make_event(path: &str, diff: &str) -> Event {
4074        Event::BufferChange {
4075            path: Path::new(path).into(),
4076            old_path: Path::new(path).into(),
4077            diff: diff.to_string(),
4078            predicted: false,
4079            in_open_source_repo: false,
4080        }
4081    }
4082
4083    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4084        RelatedFile {
4085            path: Path::new(path).into(),
4086            max_row: content.lines().count() as u32,
4087            excerpts: vec![RelatedExcerpt {
4088                row_range: 0..content.lines().count() as u32,
4089                text: content.into(),
4090                order: 0,
4091            }],
4092            in_open_source_repo: false,
4093        }
4094    }
4095
4096    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> Option<String> {
4097        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4098    }
4099
4100    #[test]
4101    fn test_no_truncation_when_within_budget() {
4102        let input = make_input(
4103            "prefix\neditable\nsuffix",
4104            7..15,
4105            10,
4106            vec![make_event("a.rs", "-old\n+new\n")],
4107            vec![make_related_file("related.rs", "fn helper() {}\n")],
4108        );
4109
4110        assert_eq!(
4111            format_with_budget(&input, 10000).unwrap(),
4112            indoc! {r#"
4113                <|file_sep|>related.rs
4114                fn helper() {}
4115                <|file_sep|>edit history
4116                --- a/a.rs
4117                +++ b/a.rs
4118                -old
4119                +new
4120                <|file_sep|>test.rs
4121                <|fim_prefix|>
4122                prefix
4123                <|fim_middle|>current
4124                edi<|user_cursor|>table
4125                <|fim_suffix|>
4126
4127                suffix
4128                <|fim_middle|>updated
4129            "#}
4130            .to_string()
4131        );
4132    }
4133
4134    #[test]
4135    fn test_truncation_drops_edit_history_when_budget_tight() {
4136        let input = make_input(
4137            "code",
4138            0..4,
4139            2,
4140            vec![make_event("a.rs", "-x\n+y\n")],
4141            vec![
4142                make_related_file("r1.rs", "aaaaaaa\n"),
4143                make_related_file("r2.rs", "bbbbbbb\n"),
4144            ],
4145        );
4146
4147        assert_eq!(
4148            format_with_budget(&input, 10000).unwrap(),
4149            indoc! {r#"
4150                <|file_sep|>r1.rs
4151                aaaaaaa
4152                <|file_sep|>r2.rs
4153                bbbbbbb
4154                <|file_sep|>edit history
4155                --- a/a.rs
4156                +++ b/a.rs
4157                -x
4158                +y
4159                <|file_sep|>test.rs
4160                <|fim_prefix|>
4161                <|fim_middle|>current
4162                co<|user_cursor|>de
4163                <|fim_suffix|>
4164                <|fim_middle|>updated
4165            "#}
4166            .to_string()
4167        );
4168
4169        assert_eq!(
4170            format_with_budget(&input, 55),
4171            Some(
4172                indoc! {r#"
4173                <|file_sep|>edit history
4174                --- a/a.rs
4175                +++ b/a.rs
4176                -x
4177                +y
4178                <|file_sep|>test.rs
4179                <|fim_prefix|>
4180                <|fim_middle|>current
4181                co<|user_cursor|>de
4182                <|fim_suffix|>
4183                <|fim_middle|>updated
4184            "#}
4185                .to_string()
4186            )
4187        );
4188    }
4189
4190    #[test]
4191    fn test_truncation_includes_partial_excerpts() {
4192        let input = make_input(
4193            "x",
4194            0..1,
4195            0,
4196            vec![],
4197            vec![RelatedFile {
4198                path: Path::new("big.rs").into(),
4199                max_row: 30,
4200                in_open_source_repo: false,
4201                excerpts: vec![
4202                    RelatedExcerpt {
4203                        row_range: 0..10,
4204                        text: "first excerpt\n".into(),
4205                        order: 0,
4206                    },
4207                    RelatedExcerpt {
4208                        row_range: 10..20,
4209                        text: "second excerpt\n".into(),
4210                        order: 0,
4211                    },
4212                    RelatedExcerpt {
4213                        row_range: 20..30,
4214                        text: "third excerpt\n".into(),
4215                        order: 0,
4216                    },
4217                ],
4218            }],
4219        );
4220
4221        assert_eq!(
4222            format_with_budget(&input, 10000).unwrap(),
4223            indoc! {r#"
4224                <|file_sep|>big.rs
4225                first excerpt
4226                ...
4227                second excerpt
4228                ...
4229                third excerpt
4230                <|file_sep|>test.rs
4231                <|fim_prefix|>
4232                <|fim_middle|>current
4233                <|user_cursor|>x
4234                <|fim_suffix|>
4235                <|fim_middle|>updated
4236            "#}
4237            .to_string()
4238        );
4239
4240        assert_eq!(
4241            format_with_budget(&input, 50).unwrap(),
4242            indoc! {r#"
4243                <|file_sep|>big.rs
4244                first excerpt
4245                ...
4246                <|file_sep|>test.rs
4247                <|fim_prefix|>
4248                <|fim_middle|>current
4249                <|user_cursor|>x
4250                <|fim_suffix|>
4251                <|fim_middle|>updated
4252            "#}
4253            .to_string()
4254        );
4255    }
4256
4257    #[test]
4258    fn test_truncation_prioritizes_lower_order_excerpts() {
4259        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4260        // With tight budget, only the lower-order excerpt from file_b should be included.
4261        let input = make_input(
4262            "x",
4263            0..1,
4264            0,
4265            vec![],
4266            vec![
4267                RelatedFile {
4268                    path: Path::new("file_a.rs").into(),
4269                    max_row: 10,
4270                    in_open_source_repo: false,
4271                    excerpts: vec![RelatedExcerpt {
4272                        row_range: 0..10,
4273                        text: "low priority content\n".into(),
4274                        order: 5,
4275                    }],
4276                },
4277                RelatedFile {
4278                    path: Path::new("file_b.rs").into(),
4279                    max_row: 10,
4280                    in_open_source_repo: false,
4281                    excerpts: vec![RelatedExcerpt {
4282                        row_range: 0..10,
4283                        text: "high priority content\n".into(),
4284                        order: 1,
4285                    }],
4286                },
4287            ],
4288        );
4289
4290        // With large budget, both files included; rendered in stable lexicographic order.
4291        assert_eq!(
4292            format_with_budget(&input, 10000).unwrap(),
4293            indoc! {r#"
4294                <|file_sep|>file_a.rs
4295                low priority content
4296                <|file_sep|>file_b.rs
4297                high priority content
4298                <|file_sep|>test.rs
4299                <|fim_prefix|>
4300                <|fim_middle|>current
4301                <|user_cursor|>x
4302                <|fim_suffix|>
4303                <|fim_middle|>updated
4304            "#}
4305            .to_string()
4306        );
4307
4308        // With tight budget, only file_b (lower order) fits.
4309        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4310        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4311        // file_a would need another 14 tokens, which doesn't fit.
4312        assert_eq!(
4313            format_with_budget(&input, 52).unwrap(),
4314            indoc! {r#"
4315                <|file_sep|>file_b.rs
4316                high priority content
4317                <|file_sep|>test.rs
4318                <|fim_prefix|>
4319                <|fim_middle|>current
4320                <|user_cursor|>x
4321                <|fim_suffix|>
4322                <|fim_middle|>updated
4323            "#}
4324            .to_string()
4325        );
4326    }
4327
4328    #[test]
4329    fn test_truncation_drops_high_order_excerpts_within_file() {
4330        // A single file has excerpts at order 1 and order 3. With a tight budget,
4331        // only the order-1 excerpts are included while the order-3 excerpt is
4332        // dropped — even though they belong to the same file. This also preserves
4333        // the parent invariant: parent outline items have order ≤ their best
4334        // child, so they're always included when any child is.
4335        let input = make_input(
4336            "x",
4337            0..1,
4338            0,
4339            vec![],
4340            vec![RelatedFile {
4341                path: Path::new("mod.rs").into(),
4342                max_row: 30,
4343                in_open_source_repo: false,
4344                excerpts: vec![
4345                    RelatedExcerpt {
4346                        row_range: 0..5,
4347                        text: "mod header\n".into(),
4348                        order: 1,
4349                    },
4350                    RelatedExcerpt {
4351                        row_range: 5..15,
4352                        text: "important fn\n".into(),
4353                        order: 1,
4354                    },
4355                    RelatedExcerpt {
4356                        row_range: 15..30,
4357                        text: "less important fn\n".into(),
4358                        order: 3,
4359                    },
4360                ],
4361            }],
4362        );
4363
4364        // With large budget, all three excerpts included.
4365        assert_eq!(
4366            format_with_budget(&input, 10000).unwrap(),
4367            indoc! {r#"
4368                <|file_sep|>mod.rs
4369                mod header
4370                ...
4371                important fn
4372                ...
4373                less important fn
4374                <|file_sep|>test.rs
4375                <|fim_prefix|>
4376                <|fim_middle|>current
4377                <|user_cursor|>x
4378                <|fim_suffix|>
4379                <|fim_middle|>updated
4380            "#}
4381            .to_string()
4382        );
4383
4384        // With tight budget, only order<=1 excerpts included (header + important fn).
4385        assert_eq!(
4386            format_with_budget(&input, 55).unwrap(),
4387            indoc! {r#"
4388                <|file_sep|>mod.rs
4389                mod header
4390                ...
4391                important fn
4392                ...
4393                <|file_sep|>test.rs
4394                <|fim_prefix|>
4395                <|fim_middle|>current
4396                <|user_cursor|>x
4397                <|fim_suffix|>
4398                <|fim_middle|>updated
4399            "#}
4400            .to_string()
4401        );
4402    }
4403
4404    #[test]
4405    fn test_truncation_drops_older_events_first() {
4406        let input = make_input(
4407            "x",
4408            0..1,
4409            0,
4410            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4411            vec![],
4412        );
4413
4414        assert_eq!(
4415            format_with_budget(&input, 10000).unwrap(),
4416            indoc! {r#"
4417                <|file_sep|>edit history
4418                --- a/old.rs
4419                +++ b/old.rs
4420                -1
4421                --- a/new.rs
4422                +++ b/new.rs
4423                -2
4424                <|file_sep|>test.rs
4425                <|fim_prefix|>
4426                <|fim_middle|>current
4427                <|user_cursor|>x
4428                <|fim_suffix|>
4429                <|fim_middle|>updated
4430            "#}
4431            .to_string()
4432        );
4433
4434        assert_eq!(
4435            format_with_budget(&input, 60).unwrap(),
4436            indoc! {r#"
4437                <|file_sep|>edit history
4438                --- a/new.rs
4439                +++ b/new.rs
4440                -2
4441                <|file_sep|>test.rs
4442                <|fim_prefix|>
4443                <|fim_middle|>current
4444                <|user_cursor|>x
4445                <|fim_suffix|>
4446                <|fim_middle|>updated
4447            "#}
4448            .to_string()
4449        );
4450    }
4451
4452    #[test]
4453    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4454        let input = make_input(
4455            "fn main() {}",
4456            0..12,
4457            3,
4458            vec![make_event("a.rs", "-old\n+new\n")],
4459            vec![make_related_file("related.rs", "helper\n")],
4460        );
4461
4462        assert!(format_with_budget(&input, 30).is_none())
4463    }
4464
4465    #[track_caller]
4466    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4467        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4468            .expect("seed coder prompt formatting should succeed")
4469    }
4470
4471    #[track_caller]
4472    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4473        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4474            .expect("seed coder prompt formatting should succeed")
4475    }
4476
4477    #[test]
4478    fn test_seed_coder_basic_format() {
4479        let input = make_input(
4480            "prefix\neditable\nsuffix",
4481            7..15,
4482            10,
4483            vec![make_event("a.rs", "-old\n+new\n")],
4484            vec![make_related_file("related.rs", "fn helper() {}\n")],
4485        );
4486
4487        assert_eq!(
4488            format_seed_coder(&input),
4489            indoc! {r#"
4490                <[fim-suffix]>
4491                suffix
4492                <[fim-prefix]><filename>related.rs
4493                fn helper() {}
4494
4495                <filename>edit_history
4496                --- a/a.rs
4497                +++ b/a.rs
4498                -old
4499                +new
4500
4501                <filename>test.rs
4502                prefix
4503                <<<<<<< CURRENT
4504                edi<|user_cursor|>table
4505                =======
4506                <[fim-middle]>"#}
4507        );
4508    }
4509
4510    #[test]
4511    fn test_seed_coder_no_context() {
4512        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4513
4514        assert_eq!(
4515            format_seed_coder(&input),
4516            indoc! {r#"
4517                <[fim-suffix]>
4518                after
4519                <[fim-prefix]><filename>test.rs
4520                before
4521                <<<<<<< CURRENT
4522                mid<|user_cursor|>dle
4523                =======
4524                <[fim-middle]>"#}
4525        );
4526    }
4527
4528    #[test]
4529    fn test_seed_coder_truncation_drops_context() {
4530        let input = make_input(
4531            "code",
4532            0..4,
4533            2,
4534            vec![make_event("a.rs", "-x\n+y\n")],
4535            vec![make_related_file("r1.rs", "content\n")],
4536        );
4537
4538        // With large budget, everything is included
4539        assert_eq!(
4540            format_seed_coder(&input),
4541            indoc! {r#"
4542                <[fim-suffix]>
4543                <[fim-prefix]><filename>r1.rs
4544                content
4545
4546                <filename>edit_history
4547                --- a/a.rs
4548                +++ b/a.rs
4549                -x
4550                +y
4551
4552                <filename>test.rs
4553                <<<<<<< CURRENT
4554                co<|user_cursor|>de
4555                =======
4556                <[fim-middle]>"#}
4557        );
4558
4559        assert_eq!(
4560            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 24),
4561            None
4562        );
4563
4564        assert_eq!(
4565            format_seed_coder_with_budget(&input, 40),
4566            indoc! {r#"
4567                <[fim-suffix]>
4568                <[fim-prefix]><filename>test.rs
4569                <<<<<<< CURRENT
4570                co<|user_cursor|>de
4571                =======
4572                <[fim-middle]>"#
4573            }
4574        )
4575    }
4576
4577    #[test]
4578    fn test_seed_coder_truncation_prioritizes_lower_order() {
4579        let input = make_input(
4580            "code",
4581            0..4,
4582            2,
4583            vec![],
4584            vec![
4585                RelatedFile {
4586                    path: Path::new("low_prio.rs").into(),
4587                    max_row: 5,
4588                    in_open_source_repo: false,
4589                    excerpts: vec![RelatedExcerpt {
4590                        row_range: 0..5,
4591                        text: "low prio\n".into(),
4592                        order: 10,
4593                    }],
4594                },
4595                RelatedFile {
4596                    path: Path::new("high_prio.rs").into(),
4597                    max_row: 5,
4598                    in_open_source_repo: false,
4599                    excerpts: vec![RelatedExcerpt {
4600                        row_range: 0..5,
4601                        text: "high prio\n".into(),
4602                        order: 1,
4603                    }],
4604                },
4605            ],
4606        );
4607
4608        // With large budget, both included; rendered in stable lexicographic order.
4609        assert_eq!(
4610            format_seed_coder(&input),
4611            indoc! {r#"
4612                <[fim-suffix]>
4613                <[fim-prefix]><filename>low_prio.rs
4614                low prio
4615                <filename>high_prio.rs
4616                high prio
4617
4618                <filename>test.rs
4619                <<<<<<< CURRENT
4620                co<|user_cursor|>de
4621                =======
4622                <[fim-middle]>"#}
4623        );
4624
4625        // With tight budget under the generic heuristic, context is dropped but the
4626        // minimal cursor section still fits.
4627        assert_eq!(
4628            format_prompt_with_budget_for_format(&input, ZetaFormat::V0211SeedCoder, 44),
4629            Some(
4630                indoc! {r#"
4631                    <[fim-suffix]>
4632                    <[fim-prefix]><filename>test.rs
4633                    <<<<<<< CURRENT
4634                    co<|user_cursor|>de
4635                    =======
4636                    <[fim-middle]>"#}
4637                .to_string()
4638            )
4639        );
4640    }
4641
4642    #[test]
4643    fn test_format_zeta1_from_input_basic() {
4644        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4645        let input = ZetaPromptInput {
4646            cursor_path: Path::new("src/main.rs").into(),
4647            cursor_excerpt: excerpt.into(),
4648            cursor_offset_in_excerpt: 30,
4649            excerpt_start_row: Some(0),
4650            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4651            related_files: Some(vec![]),
4652            active_buffer_diagnostics: vec![],
4653            excerpt_ranges: ExcerptRanges {
4654                editable_150: 15..41,
4655                editable_180: 15..41,
4656                editable_350: 15..41,
4657                editable_150_context_350: 0..excerpt.len(),
4658                editable_180_context_350: 0..excerpt.len(),
4659                editable_350_context_150: 0..excerpt.len(),
4660                ..Default::default()
4661            },
4662            syntax_ranges: None,
4663            experiment: None,
4664            in_open_source_repo: false,
4665            can_collect_data: false,
4666            repo_url: None,
4667        };
4668
4669        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4670
4671        assert_eq!(
4672            prompt,
4673            concat!(
4674                "### Instruction:\n",
4675                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4676                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4677                "into account the cursor location.\n",
4678                "\n",
4679                "### User Edits:\n",
4680                "\n",
4681                "User edited other.rs:\n",
4682                "```diff\n",
4683                "-old\n",
4684                "+new\n",
4685                "\n",
4686                "```\n",
4687                "\n",
4688                "### User Excerpt:\n",
4689                "\n",
4690                "```src/main.rs\n",
4691                "<|start_of_file|>\n",
4692                "fn before() {}\n",
4693                "<|editable_region_start|>\n",
4694                "fn foo() {\n",
4695                "    <|user_cursor_is_here|>let x = 1;\n",
4696                "\n",
4697                "<|editable_region_end|>}\n",
4698                "fn after() {}\n",
4699                "\n",
4700                "```\n",
4701                "\n",
4702                "### Response:\n",
4703            ),
4704        );
4705    }
4706
4707    #[test]
4708    fn test_format_zeta1_from_input_no_start_of_file() {
4709        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4710        let input = ZetaPromptInput {
4711            cursor_path: Path::new("src/main.rs").into(),
4712            cursor_excerpt: excerpt.into(),
4713            cursor_offset_in_excerpt: 15,
4714            excerpt_start_row: Some(10),
4715            events: vec![],
4716            related_files: Some(vec![]),
4717            active_buffer_diagnostics: vec![],
4718            excerpt_ranges: ExcerptRanges {
4719                editable_150: 0..28,
4720                editable_180: 0..28,
4721                editable_350: 0..28,
4722                editable_150_context_350: 0..28,
4723                editable_180_context_350: 0..28,
4724                editable_350_context_150: 0..28,
4725                ..Default::default()
4726            },
4727            syntax_ranges: None,
4728            experiment: None,
4729            in_open_source_repo: false,
4730            can_collect_data: false,
4731            repo_url: None,
4732        };
4733
4734        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4735
4736        assert_eq!(
4737            prompt,
4738            concat!(
4739                "### Instruction:\n",
4740                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4741                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4742                "into account the cursor location.\n",
4743                "\n",
4744                "### User Edits:\n",
4745                "\n",
4746                "\n",
4747                "\n",
4748                "### User Excerpt:\n",
4749                "\n",
4750                "```src/main.rs\n",
4751                "<|editable_region_start|>\n",
4752                "fn foo() {\n",
4753                "    <|user_cursor_is_here|>let x = 1;\n",
4754                "}\n",
4755                "\n",
4756                "<|editable_region_end|>\n",
4757                "```\n",
4758                "\n",
4759                "### Response:\n",
4760            ),
4761        );
4762    }
4763
4764    #[test]
4765    fn test_format_zeta1_from_input_with_sub_ranges() {
4766        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4767        let editable_range = 10..37;
4768        let context_range = 0..excerpt.len();
4769
4770        let input = ZetaPromptInput {
4771            cursor_path: Path::new("test.rs").into(),
4772            cursor_excerpt: excerpt.into(),
4773            cursor_offset_in_excerpt: 25,
4774            excerpt_start_row: Some(0),
4775            events: vec![],
4776            related_files: Some(vec![]),
4777            active_buffer_diagnostics: vec![],
4778            excerpt_ranges: ExcerptRanges {
4779                editable_150: editable_range.clone(),
4780                editable_180: editable_range.clone(),
4781                editable_350: editable_range.clone(),
4782                editable_150_context_350: context_range.clone(),
4783                editable_180_context_350: context_range.clone(),
4784                editable_350_context_150: context_range.clone(),
4785                ..Default::default()
4786            },
4787            syntax_ranges: None,
4788            experiment: None,
4789            in_open_source_repo: false,
4790            can_collect_data: false,
4791            repo_url: None,
4792        };
4793
4794        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4795
4796        assert_eq!(
4797            prompt,
4798            concat!(
4799                "### Instruction:\n",
4800                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4801                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4802                "into account the cursor location.\n",
4803                "\n",
4804                "### User Edits:\n",
4805                "\n",
4806                "\n",
4807                "\n",
4808                "### User Excerpt:\n",
4809                "\n",
4810                "```test.rs\n",
4811                "<|start_of_file|>\n",
4812                "// prefix\n",
4813                "<|editable_region_start|>\n",
4814                "fn foo() {\n",
4815                "    <|user_cursor_is_here|>let x = 1;\n",
4816                "}\n",
4817                "<|editable_region_end|>\n",
4818                "// suffix\n",
4819                "\n",
4820                "```\n",
4821                "\n",
4822                "### Response:\n",
4823            ),
4824        );
4825    }
4826
4827    #[test]
4828    fn test_max_event_count() {
4829        fn make_numbered_event(index: usize) -> Event {
4830            return make_event(
4831                &format!("event-{index}.rs"),
4832                &format!("-old-{index}\n+new-{index}\n"),
4833            );
4834        }
4835        let input = make_input(
4836            "x",
4837            0..1,
4838            0,
4839            (0..3).map(make_numbered_event).collect(),
4840            vec![],
4841        );
4842
4843        let edit_history_section = format_edit_history_within_budget(
4844            &input.events,
4845            "<|file_sep|>",
4846            "edit history",
4847            usize::MAX,
4848            5,
4849        );
4850
4851        assert_eq!(
4852            &edit_history_section,
4853            indoc!(
4854                "
4855                <|file_sep|>edit history
4856                --- a/event-0.rs
4857                +++ b/event-0.rs
4858                -old-0
4859                +new-0
4860                --- a/event-1.rs
4861                +++ b/event-1.rs
4862                -old-1
4863                +new-1
4864                --- a/event-2.rs
4865                +++ b/event-2.rs
4866                -old-2
4867                +new-2
4868            "
4869            )
4870        );
4871
4872        let edit_history_section = format_edit_history_within_budget(
4873            &input.events,
4874            "<|file_sep|>",
4875            "edit history",
4876            usize::MAX,
4877            2,
4878        );
4879
4880        assert_eq!(
4881            &edit_history_section,
4882            indoc!(
4883                "
4884                <|file_sep|>edit history
4885                --- a/event-1.rs
4886                +++ b/event-1.rs
4887                -old-1
4888                +new-1
4889                --- a/event-2.rs
4890                +++ b/event-2.rs
4891                -old-2
4892                +new-2
4893            "
4894            )
4895        );
4896
4897        let edit_history_section = format_edit_history_within_budget(
4898            &input.events,
4899            "<|file_sep|>",
4900            "edit history",
4901            usize::MAX,
4902            0,
4903        );
4904
4905        assert_eq!(&edit_history_section, "");
4906    }
4907
4908    #[test]
4909    fn test_clean_zeta1_model_output_basic() {
4910        let output = indoc! {"
4911            <|editable_region_start|>
4912            fn main() {
4913                println!(\"hello\");
4914            }
4915            <|editable_region_end|>
4916        "};
4917
4918        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4919        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4920    }
4921
4922    #[test]
4923    fn test_clean_zeta1_model_output_with_cursor() {
4924        let output = indoc! {"
4925            <|editable_region_start|>
4926            fn main() {
4927                <|user_cursor_is_here|>println!(\"hello\");
4928            }
4929            <|editable_region_end|>
4930        "};
4931
4932        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4933        assert_eq!(
4934            cleaned,
4935            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4936        );
4937    }
4938
4939    #[test]
4940    fn test_clean_zeta1_model_output_no_markers() {
4941        let output = "fn main() {}\n";
4942        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4943        assert_eq!(cleaned, "fn main() {}\n");
4944    }
4945
4946    #[test]
4947    fn test_clean_zeta1_model_output_empty_region() {
4948        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4949        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4950        assert_eq!(cleaned, "");
4951    }
4952
4953    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4954        let mut result = excerpt.to_string();
4955        result.replace_range(
4956            parsed_output.range_in_excerpt.clone(),
4957            &parsed_output.new_editable_region,
4958        );
4959        result
4960    }
4961
4962    #[test]
4963    fn test_parse_zeta2_model_output() {
4964        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4965        let context_start = excerpt.find("ctx start").unwrap();
4966        let context_end = excerpt.find("after ctx").unwrap();
4967        let editable_start = excerpt.find("editable old").unwrap();
4968        let editable_end = editable_start + "editable old\n".len();
4969        let input = make_input_with_context_range(
4970            excerpt,
4971            editable_start..editable_end,
4972            context_start..context_end,
4973            editable_start,
4974        );
4975
4976        let output = parse_zeta2_model_output(
4977            "editable new\n>>>>>>> UPDATED\n",
4978            ZetaFormat::V0131GitMergeMarkersPrefix,
4979            &input,
4980        )
4981        .unwrap();
4982
4983        assert_eq!(
4984            apply_edit(excerpt, &output),
4985            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4986        );
4987    }
4988
4989    #[test]
4990    fn test_parse_zeta2_model_output_identity() {
4991        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4992        let editable_start = excerpt.find("bbb").unwrap();
4993        let editable_end = excerpt.find("ddd").unwrap();
4994        let input = make_input_with_context_range(
4995            excerpt,
4996            editable_start..editable_end,
4997            0..excerpt.len(),
4998            editable_start,
4999        );
5000
5001        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5002        let output =
5003            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
5004
5005        assert_eq!(apply_edit(excerpt, &output), excerpt);
5006    }
5007
5008    #[test]
5009    fn test_parse_zeta2_model_output_strips_end_marker() {
5010        let excerpt = "hello\nworld\n";
5011        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
5012
5013        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
5014        let output1 =
5015            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
5016        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
5017
5018        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
5019        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
5020    }
5021}