zeta_prompt.rs

   1pub mod excerpt_ranges;
   2pub mod multi_region;
   3
   4use anyhow::{Result, anyhow};
   5use serde::{Deserialize, Serialize};
   6use std::fmt::Write;
   7use std::ops::Range;
   8use std::path::Path;
   9use std::sync::Arc;
  10use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
  11
  12pub use crate::excerpt_ranges::{
  13    ExcerptRanges, compute_editable_and_context_ranges, compute_legacy_excerpt_ranges,
  14};
  15
  16pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  17pub const MAX_PROMPT_TOKENS: usize = 4096;
  18
  19/// Use up to this amount of the editable region for prefill.
  20/// Larger values may result in more robust generation, but
  21/// this region becomes non-editable.
  22pub const PREFILL_RATIO: f64 = 0.1; // 10%
  23
  24fn estimate_tokens(bytes: usize) -> usize {
  25    bytes / 3
  26}
  27
  28#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  29pub struct ZetaPromptInput {
  30    pub cursor_path: Arc<Path>,
  31    pub cursor_excerpt: Arc<str>,
  32    pub cursor_offset_in_excerpt: usize,
  33    #[serde(default, skip_serializing_if = "Option::is_none")]
  34    pub excerpt_start_row: Option<u32>,
  35    pub events: Vec<Arc<Event>>,
  36    #[serde(default)]
  37    pub related_files: Option<Vec<RelatedFile>>,
  38    #[serde(default, skip_serializing_if = "Vec::is_empty")]
  39    pub active_buffer_diagnostics: Vec<ActiveBufferDiagnostic>,
  40    /// These ranges let the server select model-appropriate subsets.
  41    pub excerpt_ranges: ExcerptRanges,
  42    /// Byte offset ranges within `cursor_excerpt` for all syntax nodes that
  43    /// contain `cursor_offset_in_excerpt`, ordered from innermost to outermost.
  44    /// When present, the server uses these to compute editable/context ranges
  45    /// instead of `excerpt_ranges`.
  46    #[serde(default, skip_serializing_if = "Option::is_none")]
  47    pub syntax_ranges: Option<Vec<Range<usize>>>,
  48    /// The name of the edit prediction model experiment to use.
  49    #[serde(default, skip_serializing_if = "Option::is_none")]
  50    pub experiment: Option<String>,
  51    #[serde(default)]
  52    pub in_open_source_repo: bool,
  53    #[serde(default)]
  54    pub can_collect_data: bool,
  55    #[serde(default, skip_serializing_if = "Option::is_none")]
  56    pub repo_url: Option<String>,
  57}
  58
  59#[derive(
  60    Default,
  61    Clone,
  62    Copy,
  63    Debug,
  64    PartialEq,
  65    Eq,
  66    Hash,
  67    EnumIter,
  68    IntoStaticStr,
  69    Serialize,
  70    Deserialize,
  71)]
  72#[allow(non_camel_case_types)]
  73pub enum ZetaFormat {
  74    V0112MiddleAtEnd,
  75    V0113Ordered,
  76    V0114180EditableRegion,
  77    V0120GitMergeMarkers,
  78    #[default]
  79    V0131GitMergeMarkersPrefix,
  80    V0211Prefill,
  81    V0211SeedCoder,
  82    v0226Hashline,
  83    V0304VariableEdit,
  84    V0304SeedNoEdits,
  85    V0306SeedMultiRegions,
  86}
  87
  88impl std::fmt::Display for ZetaFormat {
  89    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  90        write!(f, "{}", <&'static str>::from(self))
  91    }
  92}
  93
  94impl ZetaFormat {
  95    pub fn parse(format_name: &str) -> Result<Self> {
  96        let mut results = ZetaFormat::iter().filter(|version| {
  97            <&'static str>::from(version)
  98                .to_lowercase()
  99                .contains(&format_name.to_lowercase())
 100        });
 101        let Some(result) = results.next() else {
 102            anyhow::bail!(
 103                "`{format_name}` did not match any of:\n{}",
 104                Self::options_as_string()
 105            );
 106        };
 107        if results.next().is_some() {
 108            anyhow::bail!(
 109                "`{format_name}` matched more than one of:\n{}",
 110                Self::options_as_string()
 111            );
 112        }
 113        Ok(result)
 114    }
 115
 116    pub fn options_as_string() -> String {
 117        ZetaFormat::iter()
 118            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 119            .collect::<Vec<_>>()
 120            .concat()
 121    }
 122}
 123
 124#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 125#[serde(tag = "event")]
 126pub enum Event {
 127    BufferChange {
 128        path: Arc<Path>,
 129        old_path: Arc<Path>,
 130        diff: String,
 131        predicted: bool,
 132        in_open_source_repo: bool,
 133    },
 134}
 135
 136impl Event {
 137    pub fn in_open_source_repo(&self) -> bool {
 138        match self {
 139            Event::BufferChange {
 140                in_open_source_repo,
 141                ..
 142            } => *in_open_source_repo,
 143        }
 144    }
 145}
 146
 147pub fn write_event(prompt: &mut String, event: &Event) {
 148    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 149        for component in path.components() {
 150            prompt.push('/');
 151            write!(prompt, "{}", component.as_os_str().display()).ok();
 152        }
 153    }
 154    match event {
 155        Event::BufferChange {
 156            path,
 157            old_path,
 158            diff,
 159            predicted,
 160            in_open_source_repo: _,
 161        } => {
 162            if *predicted {
 163                prompt.push_str("// User accepted prediction:\n");
 164            }
 165            prompt.push_str("--- a");
 166            write_path_as_unix_str(prompt, old_path.as_ref());
 167            prompt.push_str("\n+++ b");
 168            write_path_as_unix_str(prompt, path.as_ref());
 169            prompt.push('\n');
 170            prompt.push_str(diff);
 171        }
 172    }
 173}
 174
 175#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 176pub struct ActiveBufferDiagnostic {
 177    pub severity: Option<i32>,
 178    pub message: String,
 179    pub snippet: String,
 180    pub snippet_buffer_row_range: Range<u32>,
 181    pub diagnostic_range_in_snippet: Range<usize>,
 182}
 183
 184#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 185pub struct RelatedFile {
 186    pub path: Arc<Path>,
 187    pub max_row: u32,
 188    pub excerpts: Vec<RelatedExcerpt>,
 189    #[serde(default)]
 190    pub in_open_source_repo: bool,
 191}
 192
 193#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 194pub struct RelatedExcerpt {
 195    pub row_range: Range<u32>,
 196    pub text: Arc<str>,
 197    #[serde(default)]
 198    pub order: usize,
 199}
 200
 201pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 202    special_tokens_for_format(format)
 203        .iter()
 204        .any(|token| input.cursor_excerpt.contains(token))
 205}
 206
 207pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 208    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 209}
 210
 211pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 212    match format {
 213        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 214        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 215        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 216        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 217        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 218        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 219        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 220        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 221        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 222        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 223        ZetaFormat::V0306SeedMultiRegions => {
 224            static TOKENS: &[&str] = &[
 225                seed_coder::FIM_SUFFIX,
 226                seed_coder::FIM_PREFIX,
 227                seed_coder::FIM_MIDDLE,
 228                seed_coder::FILE_MARKER,
 229                seed_coder::START_MARKER,
 230                seed_coder::SEPARATOR,
 231                seed_coder::END_MARKER,
 232                CURSOR_MARKER,
 233                multi_region::MARKER_TAG_PREFIX,
 234            ];
 235            TOKENS
 236        }
 237    }
 238}
 239
 240/// Returns the (editable_token_limit, context_token_limit) for a given format.
 241pub fn token_limits_for_format(format: ZetaFormat) -> (usize, usize) {
 242    match format {
 243        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (150, 350),
 244        ZetaFormat::V0114180EditableRegion => (180, 350),
 245        ZetaFormat::V0120GitMergeMarkers
 246        | ZetaFormat::V0131GitMergeMarkersPrefix
 247        | ZetaFormat::V0211Prefill
 248        | ZetaFormat::V0211SeedCoder
 249        | ZetaFormat::v0226Hashline
 250        | ZetaFormat::V0306SeedMultiRegions
 251        | ZetaFormat::V0304SeedNoEdits => (350, 150),
 252        ZetaFormat::V0304VariableEdit => (1024, 0),
 253    }
 254}
 255
 256pub fn stop_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 257    match format {
 258        ZetaFormat::v0226Hashline => &[hashline::NO_EDITS_COMMAND_MARKER],
 259        ZetaFormat::V0112MiddleAtEnd
 260        | ZetaFormat::V0113Ordered
 261        | ZetaFormat::V0114180EditableRegion
 262        | ZetaFormat::V0120GitMergeMarkers
 263        | ZetaFormat::V0131GitMergeMarkersPrefix
 264        | ZetaFormat::V0211Prefill
 265        | ZetaFormat::V0211SeedCoder
 266        | ZetaFormat::V0304VariableEdit
 267        | ZetaFormat::V0306SeedMultiRegions
 268        | ZetaFormat::V0304SeedNoEdits => &[],
 269    }
 270}
 271
 272pub fn excerpt_ranges_for_format(
 273    format: ZetaFormat,
 274    ranges: &ExcerptRanges,
 275) -> (Range<usize>, Range<usize>) {
 276    match format {
 277        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 278            ranges.editable_150.clone(),
 279            ranges.editable_150_context_350.clone(),
 280        ),
 281        ZetaFormat::V0114180EditableRegion => (
 282            ranges.editable_180.clone(),
 283            ranges.editable_180_context_350.clone(),
 284        ),
 285        ZetaFormat::V0120GitMergeMarkers
 286        | ZetaFormat::V0131GitMergeMarkersPrefix
 287        | ZetaFormat::V0211Prefill
 288        | ZetaFormat::V0211SeedCoder
 289        | ZetaFormat::v0226Hashline
 290        | ZetaFormat::V0304SeedNoEdits
 291        | ZetaFormat::V0306SeedMultiRegions => (
 292            ranges.editable_350.clone(),
 293            ranges.editable_350_context_150.clone(),
 294        ),
 295        ZetaFormat::V0304VariableEdit => {
 296            let context = ranges
 297                .editable_350_context_1024
 298                .clone()
 299                .or(ranges.editable_350_context_512.clone())
 300                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 301            (context.clone(), context)
 302        }
 303    }
 304}
 305
 306pub fn write_cursor_excerpt_section_for_format(
 307    format: ZetaFormat,
 308    prompt: &mut String,
 309    path: &Path,
 310    context: &str,
 311    editable_range: &Range<usize>,
 312    cursor_offset: usize,
 313) {
 314    match format {
 315        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 316            prompt,
 317            path,
 318            context,
 319            editable_range,
 320            cursor_offset,
 321        ),
 322        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 323            v0113_ordered::write_cursor_excerpt_section(
 324                prompt,
 325                path,
 326                context,
 327                editable_range,
 328                cursor_offset,
 329            )
 330        }
 331        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 332            prompt,
 333            path,
 334            context,
 335            editable_range,
 336            cursor_offset,
 337        ),
 338        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 339            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 340                prompt,
 341                path,
 342                context,
 343                editable_range,
 344                cursor_offset,
 345            )
 346        }
 347        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 348            seed_coder::write_cursor_excerpt_section(
 349                prompt,
 350                path,
 351                context,
 352                editable_range,
 353                cursor_offset,
 354            )
 355        }
 356        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 357            prompt,
 358            path,
 359            context,
 360            editable_range,
 361            cursor_offset,
 362        ),
 363        ZetaFormat::V0304VariableEdit => {
 364            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 365        }
 366        ZetaFormat::V0306SeedMultiRegions => {
 367            prompt.push_str(&build_v0306_cursor_prefix(
 368                path,
 369                context,
 370                editable_range,
 371                cursor_offset,
 372            ));
 373        }
 374    }
 375}
 376
 377fn build_v0306_cursor_prefix(
 378    path: &Path,
 379    context: &str,
 380    editable_range: &Range<usize>,
 381    cursor_offset: usize,
 382) -> String {
 383    let mut section = String::new();
 384    let path_str = path.to_string_lossy();
 385    write!(section, "{}{}\n", seed_coder::FILE_MARKER, path_str).ok();
 386
 387    section.push_str(&context[..editable_range.start]);
 388    section.push_str(seed_coder::START_MARKER);
 389
 390    let editable_text = &context[editable_range.clone()];
 391    let cursor_in_editable = cursor_offset - editable_range.start;
 392    multi_region::write_editable_with_markers(
 393        &mut section,
 394        editable_text,
 395        cursor_in_editable,
 396        CURSOR_MARKER,
 397    );
 398
 399    if !section.ends_with('\n') {
 400        section.push('\n');
 401    }
 402    section.push_str(seed_coder::SEPARATOR);
 403    section
 404}
 405
 406fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 407    let start_row = text[0..range.start].matches('\n').count() as u32;
 408    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 409    if !text[..range.end].ends_with('\n') {
 410        end_row += 1;
 411    }
 412    return start_row..end_row;
 413}
 414
 415pub fn format_prompt_with_budget_for_format(
 416    input: &ZetaPromptInput,
 417    format: ZetaFormat,
 418    max_tokens: usize,
 419) -> String {
 420    let (context, editable_range, context_range, cursor_offset) =
 421        resolve_cursor_region(input, format);
 422    let path = &*input.cursor_path;
 423
 424    let empty_files = Vec::new();
 425    let input_related_files = input.related_files.as_deref().unwrap_or(&empty_files);
 426    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 427        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 428        let row_range = relative_row_range.start + cursor_excerpt_start_row
 429            ..relative_row_range.end + cursor_excerpt_start_row;
 430        &filter_redundant_excerpts(
 431            input_related_files.to_vec(),
 432            input.cursor_path.as_ref(),
 433            row_range,
 434        )
 435    } else {
 436        input_related_files
 437    };
 438
 439    match format {
 440        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 441            seed_coder::format_prompt_with_budget(
 442                path,
 443                context,
 444                &editable_range,
 445                cursor_offset,
 446                &input.events,
 447                related_files,
 448                max_tokens,
 449            )
 450        }
 451        ZetaFormat::V0306SeedMultiRegions => {
 452            let cursor_prefix =
 453                build_v0306_cursor_prefix(path, context, &editable_range, cursor_offset);
 454            seed_coder::assemble_fim_prompt(
 455                context,
 456                &editable_range,
 457                &cursor_prefix,
 458                &input.events,
 459                related_files,
 460                max_tokens,
 461            )
 462        }
 463        _ => {
 464            let mut cursor_section = String::new();
 465            write_cursor_excerpt_section_for_format(
 466                format,
 467                &mut cursor_section,
 468                path,
 469                context,
 470                &editable_range,
 471                cursor_offset,
 472            );
 473
 474            let cursor_tokens = estimate_tokens(cursor_section.len());
 475            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 476
 477            let edit_history_section = format_edit_history_within_budget(
 478                &input.events,
 479                "<|file_sep|>",
 480                "edit history",
 481                budget_after_cursor,
 482            );
 483            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 484            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 485
 486            let related_files_section = format_related_files_within_budget(
 487                &related_files,
 488                "<|file_sep|>",
 489                "",
 490                budget_after_edit_history,
 491            );
 492
 493            let mut prompt = String::new();
 494            prompt.push_str(&related_files_section);
 495            prompt.push_str(&edit_history_section);
 496            prompt.push_str(&cursor_section);
 497            prompt
 498        }
 499    }
 500}
 501
 502pub fn filter_redundant_excerpts(
 503    mut related_files: Vec<RelatedFile>,
 504    cursor_path: &Path,
 505    cursor_row_range: Range<u32>,
 506) -> Vec<RelatedFile> {
 507    for file in &mut related_files {
 508        if file.path.as_ref() == cursor_path {
 509            file.excerpts.retain(|excerpt| {
 510                excerpt.row_range.start < cursor_row_range.start
 511                    || excerpt.row_range.end > cursor_row_range.end
 512            });
 513        }
 514    }
 515    related_files.retain(|file| !file.excerpts.is_empty());
 516    related_files
 517}
 518
 519pub fn get_prefill_for_format(
 520    format: ZetaFormat,
 521    context: &str,
 522    editable_range: &Range<usize>,
 523) -> String {
 524    match format {
 525        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 526        ZetaFormat::V0112MiddleAtEnd
 527        | ZetaFormat::V0113Ordered
 528        | ZetaFormat::V0114180EditableRegion
 529        | ZetaFormat::V0120GitMergeMarkers
 530        | ZetaFormat::V0131GitMergeMarkersPrefix
 531        | ZetaFormat::V0211SeedCoder
 532        | ZetaFormat::v0226Hashline
 533        | ZetaFormat::V0304VariableEdit => String::new(),
 534        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => String::new(),
 535    }
 536}
 537
 538pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 539    match format {
 540        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 541        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 542        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 543        ZetaFormat::V0211SeedCoder
 544        | ZetaFormat::V0304SeedNoEdits
 545        | ZetaFormat::V0306SeedMultiRegions => Some(seed_coder::END_MARKER),
 546        ZetaFormat::V0112MiddleAtEnd
 547        | ZetaFormat::V0113Ordered
 548        | ZetaFormat::V0114180EditableRegion
 549        | ZetaFormat::v0226Hashline
 550        | ZetaFormat::V0304VariableEdit => None,
 551    }
 552}
 553
 554pub fn encode_patch_as_output_for_format(
 555    format: ZetaFormat,
 556    old_editable_region: &str,
 557    patch: &str,
 558    cursor_offset: Option<usize>,
 559) -> Result<Option<String>> {
 560    match format {
 561        ZetaFormat::v0226Hashline => {
 562            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 563        }
 564        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 565            old_editable_region,
 566            patch,
 567            cursor_offset,
 568        )
 569        .map(Some),
 570        ZetaFormat::V0304SeedNoEdits | ZetaFormat::V0306SeedMultiRegions => {
 571            Ok(seed_coder::no_edits(patch))
 572        }
 573        _ => Ok(None),
 574    }
 575}
 576
 577pub struct ParsedOutput {
 578    /// Text that should replace the editable region
 579    pub new_editable_region: String,
 580    /// The byte range within `cursor_excerpt` that this replacement applies to
 581    pub range_in_excerpt: Range<usize>,
 582}
 583
 584/// Parse model output for the given zeta format
 585pub fn parse_zeta2_model_output(
 586    output: &str,
 587    format: ZetaFormat,
 588    prompt_inputs: &ZetaPromptInput,
 589) -> Result<ParsedOutput> {
 590    let output = match output_end_marker_for_format(format) {
 591        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 592        None => output,
 593    };
 594
 595    let (context, editable_range_in_context, context_range, _) =
 596        resolve_cursor_region(prompt_inputs, format);
 597    let context_start = context_range.start;
 598    let old_editable_region = &context[editable_range_in_context.clone()];
 599
 600    let (range_in_context, output) = match format {
 601        ZetaFormat::v0226Hashline => (
 602            editable_range_in_context,
 603            if hashline::output_has_edit_commands(output) {
 604                hashline::apply_edit_commands(old_editable_region, output)
 605            } else {
 606                output.to_string()
 607            },
 608        ),
 609        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 610        ZetaFormat::V0304SeedNoEdits => (
 611            editable_range_in_context,
 612            if output.starts_with(seed_coder::NO_EDITS) {
 613                old_editable_region.to_string()
 614            } else {
 615                output.to_string()
 616            },
 617        ),
 618        ZetaFormat::V0306SeedMultiRegions => (
 619            editable_range_in_context,
 620            if output.starts_with(seed_coder::NO_EDITS) {
 621                old_editable_region.to_string()
 622            } else {
 623                multi_region::apply_marker_span(old_editable_region, output)?
 624            },
 625        ),
 626        _ => (editable_range_in_context, output.to_string()),
 627    };
 628
 629    let range_in_excerpt =
 630        range_in_context.start + context_start..range_in_context.end + context_start;
 631
 632    Ok(ParsedOutput {
 633        new_editable_region: output,
 634        range_in_excerpt,
 635    })
 636}
 637
 638pub fn excerpt_range_for_format(
 639    format: ZetaFormat,
 640    ranges: &ExcerptRanges,
 641) -> (Range<usize>, Range<usize>) {
 642    excerpt_ranges_for_format(format, ranges)
 643}
 644
 645pub fn resolve_cursor_region(
 646    input: &ZetaPromptInput,
 647    format: ZetaFormat,
 648) -> (&str, Range<usize>, Range<usize>, usize) {
 649    let (editable_range, context_range) = if let Some(syntax_ranges) = &input.syntax_ranges {
 650        let (editable_tokens, context_tokens) = token_limits_for_format(format);
 651        compute_editable_and_context_ranges(
 652            &input.cursor_excerpt,
 653            input.cursor_offset_in_excerpt,
 654            syntax_ranges,
 655            editable_tokens,
 656            context_tokens,
 657        )
 658    } else {
 659        excerpt_range_for_format(format, &input.excerpt_ranges)
 660    };
 661    let context_start = context_range.start;
 662    let context_text = &input.cursor_excerpt[context_range.clone()];
 663    let adjusted_editable =
 664        (editable_range.start - context_start)..(editable_range.end - context_start);
 665    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 666
 667    (
 668        context_text,
 669        adjusted_editable,
 670        context_range,
 671        adjusted_cursor,
 672    )
 673}
 674
 675pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 676    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 677    get_prefill_for_format(format, context, &editable_range)
 678}
 679
 680fn format_edit_history_within_budget(
 681    events: &[Arc<Event>],
 682    file_marker: &str,
 683    edit_history_name: &str,
 684    max_tokens: usize,
 685) -> String {
 686    let header = format!("{}{}\n", file_marker, edit_history_name);
 687    let header_tokens = estimate_tokens(header.len());
 688    if header_tokens >= max_tokens {
 689        return String::new();
 690    }
 691
 692    let mut event_strings: Vec<String> = Vec::new();
 693    let mut total_tokens = header_tokens;
 694
 695    for event in events.iter().rev() {
 696        let mut event_str = String::new();
 697        write_event(&mut event_str, event);
 698        let event_tokens = estimate_tokens(event_str.len());
 699
 700        if total_tokens + event_tokens > max_tokens {
 701            break;
 702        }
 703        total_tokens += event_tokens;
 704        event_strings.push(event_str);
 705    }
 706
 707    if event_strings.is_empty() {
 708        return String::new();
 709    }
 710
 711    let mut result = header;
 712    for event_str in event_strings.iter().rev() {
 713        result.push_str(event_str);
 714    }
 715    result
 716}
 717
 718fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 719    let needs_newline = !excerpt.text.ends_with('\n');
 720    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 721    let len = excerpt.text.len()
 722        + if needs_newline { "\n".len() } else { 0 }
 723        + if needs_ellipsis { "...\n".len() } else { 0 };
 724    estimate_tokens(len)
 725}
 726
 727pub fn format_related_files_within_budget(
 728    related_files: &[RelatedFile],
 729    file_prefix: &str,
 730    file_suffix: &str,
 731    max_tokens: usize,
 732) -> String {
 733    struct ExcerptCandidate {
 734        file_ix: usize,
 735        excerpt_ix: usize,
 736        order: usize,
 737    }
 738
 739    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 740        .iter()
 741        .enumerate()
 742        .flat_map(|(file_ix, file)| {
 743            file.excerpts
 744                .iter()
 745                .enumerate()
 746                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 747                    file_ix,
 748                    excerpt_ix,
 749                    order: e.order,
 750                })
 751        })
 752        .collect();
 753
 754    // Pre-compute file header strings and their token costs.
 755    let file_headers: Vec<String> = related_files
 756        .iter()
 757        .map(|file| {
 758            let path_str = file.path.to_string_lossy();
 759            format!("{}{}\n", file_prefix, path_str)
 760        })
 761        .collect();
 762
 763    // Sort the excerpts by their order and determine how many fit within the budget.
 764    let mut total_tokens = 0;
 765    let mut included_excerpt_count = 0_usize;
 766    let mut included_file_indices = vec![false; related_files.len()];
 767    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 768    for candidate in &excerpt_candidates {
 769        let file = &related_files[candidate.file_ix];
 770        let excerpt = &file.excerpts[candidate.excerpt_ix];
 771        let file_already_included = included_file_indices[candidate.file_ix];
 772        let header_cost = if file_already_included {
 773            0
 774        } else {
 775            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 776        };
 777        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 778        if total_tokens + header_cost + excerpt_cost > max_tokens {
 779            break;
 780        }
 781        total_tokens += header_cost + excerpt_cost;
 782        if !file_already_included {
 783            included_file_indices[candidate.file_ix] = true;
 784        }
 785        included_excerpt_count += 1;
 786    }
 787
 788    excerpt_candidates.truncate(included_excerpt_count);
 789    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 790
 791    // Render all of the files that fit within the token budget, in the original order.
 792    let mut result = String::new();
 793    let mut last_file_ix = None;
 794    for candidate in &excerpt_candidates {
 795        if last_file_ix != Some(candidate.file_ix) {
 796            if last_file_ix.is_some() {
 797                result.push_str(file_suffix);
 798            }
 799            result.push_str(&file_headers[candidate.file_ix]);
 800            last_file_ix = Some(candidate.file_ix);
 801        }
 802        let file = &related_files[candidate.file_ix];
 803        let excerpt = &file.excerpts[candidate.excerpt_ix];
 804        result.push_str(&excerpt.text);
 805        if !result.ends_with('\n') {
 806            result.push('\n');
 807        }
 808        if excerpt.row_range.end < file.max_row {
 809            result.push_str("...\n");
 810        }
 811    }
 812
 813    result
 814}
 815
 816pub fn write_related_files(
 817    prompt: &mut String,
 818    related_files: &[RelatedFile],
 819) -> Vec<Range<usize>> {
 820    let mut ranges = Vec::new();
 821    for file in related_files {
 822        let start = prompt.len();
 823        let path_str = file.path.to_string_lossy();
 824        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 825        for excerpt in &file.excerpts {
 826            prompt.push_str(&excerpt.text);
 827            if !prompt.ends_with('\n') {
 828                prompt.push('\n');
 829            }
 830            if excerpt.row_range.end < file.max_row {
 831                prompt.push_str("...\n");
 832            }
 833        }
 834        let end = prompt.len();
 835        ranges.push(start..end);
 836    }
 837    ranges
 838}
 839
 840mod v0112_middle_at_end {
 841    use super::*;
 842
 843    pub fn special_tokens() -> &'static [&'static str] {
 844        &[
 845            "<|fim_prefix|>",
 846            "<|fim_suffix|>",
 847            "<|fim_middle|>",
 848            "<|file_sep|>",
 849            CURSOR_MARKER,
 850        ]
 851    }
 852
 853    pub fn write_cursor_excerpt_section(
 854        prompt: &mut String,
 855        path: &Path,
 856        context: &str,
 857        editable_range: &Range<usize>,
 858        cursor_offset: usize,
 859    ) {
 860        let path_str = path.to_string_lossy();
 861        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 862
 863        prompt.push_str("<|fim_prefix|>\n");
 864        prompt.push_str(&context[..editable_range.start]);
 865
 866        prompt.push_str("<|fim_suffix|>\n");
 867        prompt.push_str(&context[editable_range.end..]);
 868        if !prompt.ends_with('\n') {
 869            prompt.push('\n');
 870        }
 871
 872        prompt.push_str("<|fim_middle|>current\n");
 873        prompt.push_str(&context[editable_range.start..cursor_offset]);
 874        prompt.push_str(CURSOR_MARKER);
 875        prompt.push_str(&context[cursor_offset..editable_range.end]);
 876        if !prompt.ends_with('\n') {
 877            prompt.push('\n');
 878        }
 879
 880        prompt.push_str("<|fim_middle|>updated\n");
 881    }
 882}
 883
 884mod v0113_ordered {
 885    use super::*;
 886
 887    pub fn special_tokens() -> &'static [&'static str] {
 888        &[
 889            "<|fim_prefix|>",
 890            "<|fim_suffix|>",
 891            "<|fim_middle|>",
 892            "<|file_sep|>",
 893            CURSOR_MARKER,
 894        ]
 895    }
 896
 897    pub fn write_cursor_excerpt_section(
 898        prompt: &mut String,
 899        path: &Path,
 900        context: &str,
 901        editable_range: &Range<usize>,
 902        cursor_offset: usize,
 903    ) {
 904        let path_str = path.to_string_lossy();
 905        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 906
 907        prompt.push_str("<|fim_prefix|>\n");
 908        prompt.push_str(&context[..editable_range.start]);
 909        if !prompt.ends_with('\n') {
 910            prompt.push('\n');
 911        }
 912
 913        prompt.push_str("<|fim_middle|>current\n");
 914        prompt.push_str(&context[editable_range.start..cursor_offset]);
 915        prompt.push_str(CURSOR_MARKER);
 916        prompt.push_str(&context[cursor_offset..editable_range.end]);
 917        if !prompt.ends_with('\n') {
 918            prompt.push('\n');
 919        }
 920
 921        prompt.push_str("<|fim_suffix|>\n");
 922        prompt.push_str(&context[editable_range.end..]);
 923        if !prompt.ends_with('\n') {
 924            prompt.push('\n');
 925        }
 926
 927        prompt.push_str("<|fim_middle|>updated\n");
 928    }
 929}
 930
 931mod v0114180_editable_region {
 932    use super::*;
 933
 934    pub fn special_tokens() -> &'static [&'static str] {
 935        v0113_ordered::special_tokens()
 936    }
 937}
 938
 939pub mod v0120_git_merge_markers {
 940    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 941    //!
 942    //! Example prompt:
 943    //!
 944    //! <|file_sep|>path/to/target_file.py
 945    //! <|fim_prefix|>
 946    //! code before editable region
 947    //! <|fim_suffix|>
 948    //! code after editable region
 949    //! <|fim_middle|>
 950    //! <<<<<<< CURRENT
 951    //! code that
 952    //! needs to<|user_cursor|>
 953    //! be rewritten
 954    //! =======
 955    //!
 956    //! Expected output (should be generated by the model):
 957    //!
 958    //! updated
 959    //! code with
 960    //! changes applied
 961    //! >>>>>>> UPDATED
 962
 963    use super::*;
 964
 965    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 966    pub const SEPARATOR: &str = "=======\n";
 967    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 968
 969    pub fn special_tokens() -> &'static [&'static str] {
 970        &[
 971            "<|fim_prefix|>",
 972            "<|fim_suffix|>",
 973            "<|fim_middle|>",
 974            "<|file_sep|>",
 975            START_MARKER,
 976            SEPARATOR,
 977            END_MARKER,
 978            CURSOR_MARKER,
 979        ]
 980    }
 981
 982    pub fn write_cursor_excerpt_section(
 983        prompt: &mut String,
 984        path: &Path,
 985        context: &str,
 986        editable_range: &Range<usize>,
 987        cursor_offset: usize,
 988    ) {
 989        let path_str = path.to_string_lossy();
 990        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 991
 992        prompt.push_str("<|fim_prefix|>");
 993        prompt.push_str(&context[..editable_range.start]);
 994
 995        prompt.push_str("<|fim_suffix|>");
 996        prompt.push_str(&context[editable_range.end..]);
 997        if !prompt.ends_with('\n') {
 998            prompt.push('\n');
 999        }
1000
1001        prompt.push_str("<|fim_middle|>");
1002        prompt.push_str(START_MARKER);
1003        prompt.push_str(&context[editable_range.start..cursor_offset]);
1004        prompt.push_str(CURSOR_MARKER);
1005        prompt.push_str(&context[cursor_offset..editable_range.end]);
1006        if !prompt.ends_with('\n') {
1007            prompt.push('\n');
1008        }
1009        prompt.push_str(SEPARATOR);
1010    }
1011}
1012
1013pub mod v0131_git_merge_markers_prefix {
1014    //! A prompt that uses git-style merge conflict markers to represent the editable region.
1015    //!
1016    //! Example prompt:
1017    //!
1018    //! <|file_sep|>path/to/target_file.py
1019    //! <|fim_prefix|>
1020    //! code before editable region
1021    //! <<<<<<< CURRENT
1022    //! code that
1023    //! needs to<|user_cursor|>
1024    //! be rewritten
1025    //! =======
1026    //! <|fim_suffix|>
1027    //! code after editable region
1028    //! <|fim_middle|>
1029    //!
1030    //! Expected output (should be generated by the model):
1031    //!
1032    //! updated
1033    //! code with
1034    //! changes applied
1035    //! >>>>>>> UPDATED
1036
1037    use super::*;
1038
1039    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
1040    pub const SEPARATOR: &str = "=======\n";
1041    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
1042
1043    pub fn special_tokens() -> &'static [&'static str] {
1044        &[
1045            "<|fim_prefix|>",
1046            "<|fim_suffix|>",
1047            "<|fim_middle|>",
1048            "<|file_sep|>",
1049            START_MARKER,
1050            SEPARATOR,
1051            END_MARKER,
1052            CURSOR_MARKER,
1053        ]
1054    }
1055
1056    pub fn write_cursor_excerpt_section(
1057        prompt: &mut String,
1058        path: &Path,
1059        context: &str,
1060        editable_range: &Range<usize>,
1061        cursor_offset: usize,
1062    ) {
1063        let path_str = path.to_string_lossy();
1064        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1065
1066        prompt.push_str("<|fim_prefix|>");
1067        prompt.push_str(&context[..editable_range.start]);
1068        prompt.push_str(START_MARKER);
1069        prompt.push_str(&context[editable_range.start..cursor_offset]);
1070        prompt.push_str(CURSOR_MARKER);
1071        prompt.push_str(&context[cursor_offset..editable_range.end]);
1072        if !prompt.ends_with('\n') {
1073            prompt.push('\n');
1074        }
1075        prompt.push_str(SEPARATOR);
1076
1077        prompt.push_str("<|fim_suffix|>");
1078        prompt.push_str(&context[editable_range.end..]);
1079        if !prompt.ends_with('\n') {
1080            prompt.push('\n');
1081        }
1082
1083        prompt.push_str("<|fim_middle|>");
1084    }
1085}
1086
1087pub mod v0211_prefill {
1088    use super::*;
1089
1090    pub fn special_tokens() -> &'static [&'static str] {
1091        v0131_git_merge_markers_prefix::special_tokens()
1092    }
1093
1094    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
1095        let editable_region = &context[editable_range.start..editable_range.end];
1096
1097        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
1098        let prefill_len = editable_region.floor_char_boundary(prefill_len);
1099
1100        // Find a token boundary to avoid splitting tokens in the prefill.
1101        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
1102        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
1103        // the \n and consume any consecutive \n characters after it.
1104        let prefill = &editable_region[..prefill_len];
1105        match prefill.rfind('\n') {
1106            Some(pos) => {
1107                let mut end = pos + 1;
1108                while end < editable_region.len()
1109                    && editable_region.as_bytes().get(end) == Some(&b'\n')
1110                {
1111                    end += 1;
1112                }
1113                editable_region[..end].to_string()
1114            }
1115            // No newline found. Fall back to splitting before the last space
1116            // (word-level boundary)
1117            None => match prefill.rfind(' ') {
1118                Some(pos) => prefill[..pos].to_string(),
1119                None => prefill.to_string(),
1120            },
1121        }
1122    }
1123}
1124
1125pub mod hashline {
1126
1127    use std::fmt::Display;
1128
1129    pub const END_MARKER: &str = "<|fim_middle|>updated";
1130    pub const START_MARKER: &str = "<|fim_middle|>current";
1131
1132    use super::*;
1133
1134    const SET_COMMAND_MARKER: &str = "<|set|>";
1135    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1136    pub const NO_EDITS_COMMAND_MARKER: &str = "<|no_edits|>";
1137
1138    pub fn special_tokens() -> &'static [&'static str] {
1139        return &[
1140            SET_COMMAND_MARKER,
1141            "<|set_range|>",
1142            INSERT_COMMAND_MARKER,
1143            NO_EDITS_COMMAND_MARKER,
1144            CURSOR_MARKER,
1145            "<|file_sep|>",
1146            "<|fim_prefix|>",
1147            "<|fim_suffix|>",
1148            "<|fim_middle|>",
1149        ];
1150    }
1151
1152    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1153    #[derive(Debug, Clone, PartialEq, Eq)]
1154    struct LineRef {
1155        index: usize,
1156        hash: u8,
1157    }
1158
1159    impl Display for LineRef {
1160        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1161            write!(f, "{}:{:02x}", self.index, self.hash)
1162        }
1163    }
1164
1165    pub fn hash_line(line: &[u8]) -> u8 {
1166        let mut h: u8 = 0;
1167        for &byte in line {
1168            h = h.wrapping_add(byte);
1169        }
1170        return h;
1171    }
1172
1173    /// Write the hashline-encoded editable region into `out`. Each line of
1174    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1175    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1176    /// to the start of `editable_text`).
1177    pub fn write_hashline_editable_region(
1178        out: &mut String,
1179        editable_text: &str,
1180        cursor_offset_in_editable: usize,
1181    ) {
1182        let mut offset = 0;
1183        for (i, line) in editable_text.lines().enumerate() {
1184            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1185                && cursor_offset_in_editable < offset + line.len()
1186            {
1187                (
1188                    &line[..cursor_offset_in_editable - offset],
1189                    CURSOR_MARKER,
1190                    &line[cursor_offset_in_editable - offset..],
1191                )
1192            } else {
1193                (line, "", "")
1194            };
1195            write!(
1196                out,
1197                "\n{}|{head}{cursor}{tail}",
1198                LineRef {
1199                    index: i,
1200                    hash: hash_line(line.as_bytes())
1201                }
1202            )
1203            .unwrap();
1204            offset += line.len() + 1;
1205        }
1206    }
1207
1208    pub fn write_cursor_excerpt_section(
1209        prompt: &mut String,
1210        path: &Path,
1211        context: &str,
1212        editable_range: &Range<usize>,
1213        cursor_offset: usize,
1214    ) {
1215        let path_str = path.to_string_lossy();
1216        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1217
1218        prompt.push_str("<|fim_prefix|>\n");
1219        prompt.push_str(&context[..editable_range.start]);
1220        prompt.push_str(START_MARKER);
1221
1222        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1223        let editable_region = &context[editable_range.clone()];
1224        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1225
1226        if !prompt.ends_with('\n') {
1227            prompt.push('\n');
1228        }
1229
1230        prompt.push_str("<|fim_suffix|>\n");
1231        prompt.push_str(&context[editable_range.end..]);
1232        if !prompt.ends_with('\n') {
1233            prompt.push('\n');
1234        }
1235
1236        prompt.push_str(END_MARKER);
1237        prompt.push('\n');
1238    }
1239
1240    /// A single edit command parsed from the model output.
1241    #[derive(Debug)]
1242    enum EditCommand<'a> {
1243        /// Replace a range of lines (inclusive on both ends). Single-line set is
1244        /// represented by `start == end`.
1245        Set {
1246            start: LineRef,
1247            end: LineRef,
1248            content: &'a str,
1249        },
1250        /// Insert new lines after the given line, or before the first line if
1251        /// `after` is `None`.
1252        Insert {
1253            after: Option<LineRef>,
1254            content: &'a str,
1255        },
1256    }
1257
1258    /// Parse a line reference like `3:c3` into a `LineRef`.
1259    fn parse_line_ref(s: &str) -> Option<LineRef> {
1260        let (idx_str, hash_str) = s.split_once(':')?;
1261        let index = idx_str.parse::<usize>().ok()?;
1262        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1263        Some(LineRef { index, hash })
1264    }
1265
1266    /// Parse the model output into a list of `EditCommand`s.
1267    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1268        let mut commands = Vec::new();
1269        let mut offset = 0usize;
1270
1271        while offset < model_output.len() {
1272            let next_nl = model_output[offset..]
1273                .find('\n')
1274                .map(|i| offset + i)
1275                .unwrap_or(model_output.len());
1276            let line = &model_output[offset..next_nl];
1277            let line_end = if next_nl < model_output.len() {
1278                next_nl + 1
1279            } else {
1280                next_nl
1281            };
1282
1283            let trimmed = line.trim();
1284            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1285                (true, spec)
1286            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1287                (false, spec)
1288            } else {
1289                offset = line_end;
1290                continue;
1291            };
1292
1293            let mut content_end = line_end;
1294            let mut scan = line_end;
1295
1296            while scan < model_output.len() {
1297                let body_nl = model_output[scan..]
1298                    .find('\n')
1299                    .map(|i| scan + i)
1300                    .unwrap_or(model_output.len());
1301                let body_line = &model_output[scan..body_nl];
1302                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1303                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1304                {
1305                    break;
1306                }
1307                scan = if body_nl < model_output.len() {
1308                    body_nl + 1
1309                } else {
1310                    body_nl
1311                };
1312                content_end = scan;
1313            }
1314
1315            let content = &model_output[line_end..content_end];
1316
1317            if is_set {
1318                if let Some((start_str, end_str)) = specifier.split_once('-') {
1319                    if let (Some(start), Some(end)) =
1320                        (parse_line_ref(start_str), parse_line_ref(end_str))
1321                    {
1322                        commands.push(EditCommand::Set {
1323                            start,
1324                            end,
1325                            content,
1326                        });
1327                    }
1328                } else if let Some(target) = parse_line_ref(specifier) {
1329                    commands.push(EditCommand::Set {
1330                        start: target.clone(),
1331                        end: target,
1332                        content,
1333                    });
1334                }
1335            } else {
1336                let after = parse_line_ref(specifier);
1337                commands.push(EditCommand::Insert { after, content });
1338            }
1339
1340            offset = scan;
1341        }
1342
1343        commands
1344    }
1345
1346    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1347    /// (as opposed to being a plain full-replacement output).
1348    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1349    /// editable region, returning the plain text content.
1350    pub fn strip_hashline_prefixes(region: &str) -> String {
1351        let mut decoded: String = region
1352            .lines()
1353            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1354            .collect::<Vec<_>>()
1355            .join("\n");
1356        if region.ends_with('\n') {
1357            decoded.push('\n');
1358        }
1359        decoded
1360    }
1361
1362    pub fn output_has_edit_commands(model_output: &str) -> bool {
1363        model_output.contains(SET_COMMAND_MARKER)
1364            || model_output.contains(INSERT_COMMAND_MARKER)
1365            || model_output.contains(NO_EDITS_COMMAND_MARKER)
1366    }
1367
1368    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1369    /// original editable region text.
1370    ///
1371    /// `editable_region` is the original text of the editable region (without hash
1372    /// prefixes). `model_output` is the raw model response containing edit commands.
1373    ///
1374    /// Returns the full replacement text for the editable region.
1375    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1376        if model_output
1377            .trim_start()
1378            .starts_with(NO_EDITS_COMMAND_MARKER)
1379        {
1380            return editable_region.to_string();
1381        }
1382
1383        let original_lines: Vec<&str> = editable_region.lines().collect();
1384        let old_hashes: Vec<u8> = original_lines
1385            .iter()
1386            .map(|line| hash_line(line.as_bytes()))
1387            .collect();
1388
1389        let commands = parse_edit_commands(model_output);
1390
1391        // For set operations: indexed by start line → Some((end line index, content))
1392        // For insert operations: indexed by line index → vec of content to insert after
1393        // Insert-before-first is tracked separately.
1394        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1395        let mut insert_before_first: Vec<&str> = Vec::new();
1396        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1397
1398        for command in &commands {
1399            match command {
1400                EditCommand::Set {
1401                    start,
1402                    end,
1403                    content,
1404                } => {
1405                    if start.index < old_hashes.len()
1406                        && end.index < old_hashes.len()
1407                        && start.index <= end.index
1408                        && old_hashes[start.index] == start.hash
1409                        && old_hashes[end.index] == end.hash
1410                    {
1411                        set_ops[start.index] = Some((end.index, *content));
1412                    }
1413                }
1414                EditCommand::Insert { after, content } => match after {
1415                    None => insert_before_first.push(*content),
1416                    Some(line_ref) => {
1417                        if line_ref.index < old_hashes.len()
1418                            && old_hashes[line_ref.index] == line_ref.hash
1419                        {
1420                            insert_after[line_ref.index].push(*content);
1421                        }
1422                    }
1423                },
1424            }
1425        }
1426
1427        let mut result = String::new();
1428
1429        // Emit any insertions before the first line
1430        for content in &insert_before_first {
1431            result.push_str(content);
1432            if !content.ends_with('\n') {
1433                result.push('\n');
1434            }
1435        }
1436
1437        let mut i = 0;
1438        while i < original_lines.len() {
1439            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1440                // Replace lines i..=end_index with the replacement content
1441                result.push_str(replacement);
1442                if !replacement.is_empty() && !replacement.ends_with('\n') {
1443                    result.push('\n');
1444                }
1445                // Emit any insertions after the end of this set range
1446                if *end_index < insert_after.len() {
1447                    for content in &insert_after[*end_index] {
1448                        result.push_str(content);
1449                        if !content.ends_with('\n') {
1450                            result.push('\n');
1451                        }
1452                    }
1453                }
1454                i = end_index + 1;
1455            } else {
1456                // Keep the original line
1457                result.push_str(original_lines[i]);
1458                result.push('\n');
1459                // Emit any insertions after this line
1460                for content in &insert_after[i] {
1461                    result.push_str(content);
1462                    if !content.ends_with('\n') {
1463                        result.push('\n');
1464                    }
1465                }
1466                i += 1;
1467            }
1468        }
1469
1470        // Preserve trailing newline behavior: if the original ended with a
1471        // newline the result already has one; if it didn't, trim the extra one
1472        // we added.
1473        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1474            result.pop();
1475        }
1476
1477        result
1478    }
1479
1480    /// Convert a unified diff patch into hashline edit commands.
1481    ///
1482    /// Parses the unified diff `patch` directly to determine which lines of
1483    /// `old_text` are deleted/replaced and what new lines are added, then emits
1484    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1485    /// `{index}:{hash}` identifiers.
1486    ///
1487    /// `cursor_offset` is an optional byte offset into the first hunk's new
1488    /// text (context + additions) where the cursor marker should be placed.
1489    pub fn patch_to_edit_commands(
1490        old_text: &str,
1491        patch: &str,
1492        cursor_offset: Option<usize>,
1493    ) -> Result<String> {
1494        let old_lines: Vec<&str> = old_text.lines().collect();
1495        let old_hashes: Vec<u8> = old_lines
1496            .iter()
1497            .map(|line| hash_line(line.as_bytes()))
1498            .collect();
1499
1500        let mut result = String::new();
1501        let mut first_hunk = true;
1502
1503        struct Hunk<'a> {
1504            line_range: Range<usize>,
1505            new_text_lines: Vec<&'a str>,
1506            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1507        }
1508
1509        // Parse the patch line by line. We only care about hunk headers,
1510        // context, deletions, and additions.
1511        let mut old_line_index: usize = 0;
1512        let mut current_hunk: Option<Hunk> = None;
1513        // Byte offset tracking within the hunk's new text for cursor placement.
1514        let mut new_text_byte_offset: usize = 0;
1515        // The line index of the last old line seen before/in the current hunk
1516        // (used for insert-after reference).
1517        let mut last_old_line_before_hunk: Option<usize> = None;
1518
1519        fn flush_hunk(
1520            hunk: Hunk,
1521            last_old_line: Option<usize>,
1522            result: &mut String,
1523            old_hashes: &[u8],
1524        ) {
1525            if hunk.line_range.is_empty() {
1526                // Pure insertion — reference the old line to insert after when in bounds.
1527                if let Some(after) = last_old_line
1528                    && let Some(&hash) = old_hashes.get(after)
1529                {
1530                    write!(
1531                        result,
1532                        "{INSERT_COMMAND_MARKER}{}\n",
1533                        LineRef { index: after, hash }
1534                    )
1535                    .unwrap();
1536                } else {
1537                    result.push_str(INSERT_COMMAND_MARKER);
1538                    result.push('\n');
1539                }
1540            } else {
1541                let start = hunk.line_range.start;
1542                let end_exclusive = hunk.line_range.end;
1543                let deleted_line_count = end_exclusive.saturating_sub(start);
1544
1545                if deleted_line_count == 1 {
1546                    if let Some(&hash) = old_hashes.get(start) {
1547                        write!(
1548                            result,
1549                            "{SET_COMMAND_MARKER}{}\n",
1550                            LineRef { index: start, hash }
1551                        )
1552                        .unwrap();
1553                    } else {
1554                        result.push_str(SET_COMMAND_MARKER);
1555                        result.push('\n');
1556                    }
1557                } else {
1558                    let end_inclusive = end_exclusive - 1;
1559                    match (
1560                        old_hashes.get(start).copied(),
1561                        old_hashes.get(end_inclusive).copied(),
1562                    ) {
1563                        (Some(start_hash), Some(end_hash)) => {
1564                            write!(
1565                                result,
1566                                "{SET_COMMAND_MARKER}{}-{}\n",
1567                                LineRef {
1568                                    index: start,
1569                                    hash: start_hash
1570                                },
1571                                LineRef {
1572                                    index: end_inclusive,
1573                                    hash: end_hash
1574                                }
1575                            )
1576                            .unwrap();
1577                        }
1578                        _ => {
1579                            result.push_str(SET_COMMAND_MARKER);
1580                            result.push('\n');
1581                        }
1582                    }
1583                }
1584            }
1585            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1586                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1587                    && line_offset == cursor_line_offset
1588                {
1589                    result.push_str(&line[..char_offset]);
1590                    result.push_str(CURSOR_MARKER);
1591                    result.push_str(&line[char_offset..]);
1592                    continue;
1593                }
1594
1595                result.push_str(line);
1596            }
1597        }
1598
1599        for raw_line in patch.split_inclusive('\n') {
1600            if raw_line.starts_with("@@") {
1601                // Flush any pending change hunk from a previous patch hunk.
1602                if let Some(hunk) = current_hunk.take() {
1603                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1604                }
1605
1606                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1607                // We intentionally do not trust old_start as a direct local index into `old_text`,
1608                // because some patches are produced against a larger file region and carry
1609                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1610                if first_hunk {
1611                    new_text_byte_offset = 0;
1612                    first_hunk = false;
1613                }
1614                continue;
1615            }
1616
1617            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1618                continue;
1619            }
1620            if raw_line.starts_with("\\ No newline") {
1621                continue;
1622            }
1623
1624            if raw_line.starts_with('-') {
1625                // Extend or start a change hunk with this deleted old line.
1626                match &mut current_hunk {
1627                    Some(Hunk {
1628                        line_range: range, ..
1629                    }) => range.end = old_line_index + 1,
1630                    None => {
1631                        current_hunk = Some(Hunk {
1632                            line_range: old_line_index..old_line_index + 1,
1633                            new_text_lines: Vec::new(),
1634                            cursor_line_offset_in_new_text: None,
1635                        });
1636                    }
1637                }
1638                old_line_index += 1;
1639            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1640                // Place cursor marker if cursor_offset falls within this line.
1641                let mut cursor_line_offset = None;
1642                if let Some(cursor_off) = cursor_offset
1643                    && (first_hunk
1644                        || cursor_off >= new_text_byte_offset
1645                            && cursor_off <= new_text_byte_offset + added_content.len())
1646                {
1647                    let line_offset = added_content.floor_char_boundary(
1648                        cursor_off
1649                            .saturating_sub(new_text_byte_offset)
1650                            .min(added_content.len()),
1651                    );
1652                    cursor_line_offset = Some(line_offset);
1653                }
1654
1655                new_text_byte_offset += added_content.len();
1656
1657                let hunk = current_hunk.get_or_insert(Hunk {
1658                    line_range: old_line_index..old_line_index,
1659                    new_text_lines: vec![],
1660                    cursor_line_offset_in_new_text: None,
1661                });
1662                hunk.new_text_lines.push(added_content);
1663                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1664                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1665            } else {
1666                // Context line (starts with ' ' or is empty).
1667                if let Some(hunk) = current_hunk.take() {
1668                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1669                }
1670                last_old_line_before_hunk = Some(old_line_index);
1671                old_line_index += 1;
1672                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1673                new_text_byte_offset += content.len();
1674            }
1675        }
1676
1677        // Flush final group.
1678        if let Some(hunk) = current_hunk.take() {
1679            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1680        }
1681
1682        // Trim a single trailing newline.
1683        if result.ends_with('\n') {
1684            result.pop();
1685        }
1686
1687        if result.is_empty() {
1688            return Ok(NO_EDITS_COMMAND_MARKER.to_string());
1689        }
1690
1691        Ok(result)
1692    }
1693
1694    #[cfg(test)]
1695    mod tests {
1696        use super::*;
1697        use indoc::indoc;
1698
1699        #[test]
1700        fn test_format_cursor_region() {
1701            struct Case {
1702                name: &'static str,
1703                context: &'static str,
1704                editable_range: Range<usize>,
1705                cursor_offset: usize,
1706                expected: &'static str,
1707            }
1708
1709            let cases = [
1710                Case {
1711                    name: "basic_cursor_placement",
1712                    context: "hello world\n",
1713                    editable_range: 0..12,
1714                    cursor_offset: 5,
1715                    expected: indoc! {"
1716                    <|file_sep|>test.rs
1717                    <|fim_prefix|>
1718                    <|fim_middle|>current
1719                    0:5c|hello<|user_cursor|> world
1720                    <|fim_suffix|>
1721                    <|fim_middle|>updated
1722                    "},
1723                },
1724                Case {
1725                    name: "multiline_cursor_on_second_line",
1726                    context: "aaa\nbbb\nccc\n",
1727                    editable_range: 0..12,
1728                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1729                    expected: indoc! {"
1730                    <|file_sep|>test.rs
1731                    <|fim_prefix|>
1732                    <|fim_middle|>current
1733                    0:23|aaa
1734                    1:26|b<|user_cursor|>bb
1735                    2:29|ccc
1736                    <|fim_suffix|>
1737                    <|fim_middle|>updated
1738                    "},
1739                },
1740                Case {
1741                    name: "no_trailing_newline_in_context",
1742                    context: "line1\nline2",
1743                    editable_range: 0..11,
1744                    cursor_offset: 3,
1745                    expected: indoc! {"
1746                    <|file_sep|>test.rs
1747                    <|fim_prefix|>
1748                    <|fim_middle|>current
1749                    0:d9|lin<|user_cursor|>e1
1750                    1:da|line2
1751                    <|fim_suffix|>
1752                    <|fim_middle|>updated
1753                    "},
1754                },
1755                Case {
1756                    name: "leading_newline_in_editable_region",
1757                    context: "\nabc\n",
1758                    editable_range: 0..5,
1759                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1760                    expected: indoc! {"
1761                    <|file_sep|>test.rs
1762                    <|fim_prefix|>
1763                    <|fim_middle|>current
1764                    0:00|
1765                    1:26|a<|user_cursor|>bc
1766                    <|fim_suffix|>
1767                    <|fim_middle|>updated
1768                    "},
1769                },
1770                Case {
1771                    name: "with_suffix",
1772                    context: "abc\ndef",
1773                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1774                    cursor_offset: 2,
1775                    expected: indoc! {"
1776                    <|file_sep|>test.rs
1777                    <|fim_prefix|>
1778                    <|fim_middle|>current
1779                    0:26|ab<|user_cursor|>c
1780                    <|fim_suffix|>
1781                    def
1782                    <|fim_middle|>updated
1783                    "},
1784                },
1785                Case {
1786                    name: "unicode_two_byte_chars",
1787                    context: "héllo\n",
1788                    editable_range: 0..7,
1789                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1790                    expected: indoc! {"
1791                    <|file_sep|>test.rs
1792                    <|fim_prefix|>
1793                    <|fim_middle|>current
1794                    0:1b|hé<|user_cursor|>llo
1795                    <|fim_suffix|>
1796                    <|fim_middle|>updated
1797                    "},
1798                },
1799                Case {
1800                    name: "unicode_three_byte_chars",
1801                    context: "日本語\n",
1802                    editable_range: 0..10,
1803                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1804                    expected: indoc! {"
1805                    <|file_sep|>test.rs
1806                    <|fim_prefix|>
1807                    <|fim_middle|>current
1808                    0:80|日本<|user_cursor|>語
1809                    <|fim_suffix|>
1810                    <|fim_middle|>updated
1811                    "},
1812                },
1813                Case {
1814                    name: "unicode_four_byte_chars",
1815                    context: "a🌍b\n",
1816                    editable_range: 0..7,
1817                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1818                    expected: indoc! {"
1819                    <|file_sep|>test.rs
1820                    <|fim_prefix|>
1821                    <|fim_middle|>current
1822                    0:6b|a🌍<|user_cursor|>b
1823                    <|fim_suffix|>
1824                    <|fim_middle|>updated
1825                    "},
1826                },
1827                Case {
1828                    name: "cursor_at_start_of_region_not_placed",
1829                    context: "abc\n",
1830                    editable_range: 0..4,
1831                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1832                    expected: indoc! {"
1833                    <|file_sep|>test.rs
1834                    <|fim_prefix|>
1835                    <|fim_middle|>current
1836                    0:26|abc
1837                    <|fim_suffix|>
1838                    <|fim_middle|>updated
1839                    "},
1840                },
1841                Case {
1842                    name: "cursor_at_end_of_line_not_placed",
1843                    context: "abc\ndef\n",
1844                    editable_range: 0..8,
1845                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1846                    expected: indoc! {"
1847                    <|file_sep|>test.rs
1848                    <|fim_prefix|>
1849                    <|fim_middle|>current
1850                    0:26|abc
1851                    1:2f|def
1852                    <|fim_suffix|>
1853                    <|fim_middle|>updated
1854                    "},
1855                },
1856                Case {
1857                    name: "cursor_offset_relative_to_context_not_editable_region",
1858                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1859                    // write_cursor_excerpt_section must subtract it before comparing against
1860                    // per-line offsets within the editable region.
1861                    context: "pre\naaa\nbbb\nsuf\n",
1862                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1863                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1864                    expected: indoc! {"
1865                    <|file_sep|>test.rs
1866                    <|fim_prefix|>
1867                    pre
1868                    <|fim_middle|>current
1869                    0:23|aaa
1870                    1:26|b<|user_cursor|>bb
1871                    <|fim_suffix|>
1872                    suf
1873                    <|fim_middle|>updated
1874                    "},
1875                },
1876            ];
1877
1878            for case in &cases {
1879                let mut prompt = String::new();
1880                hashline::write_cursor_excerpt_section(
1881                    &mut prompt,
1882                    Path::new("test.rs"),
1883                    case.context,
1884                    &case.editable_range,
1885                    case.cursor_offset,
1886                );
1887                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1888            }
1889        }
1890
1891        #[test]
1892        fn test_apply_edit_commands() {
1893            struct Case {
1894                name: &'static str,
1895                original: &'static str,
1896                model_output: &'static str,
1897                expected: &'static str,
1898            }
1899
1900            let cases = vec![
1901                Case {
1902                    name: "set_single_line",
1903                    original: indoc! {"
1904                    let mut total = 0;
1905                    for product in products {
1906                        total += ;
1907                    }
1908                    total
1909                "},
1910                    model_output: indoc! {"
1911                    <|set|>2:87
1912                        total += product.price;
1913                "},
1914                    expected: indoc! {"
1915                    let mut total = 0;
1916                    for product in products {
1917                        total += product.price;
1918                    }
1919                    total
1920                "},
1921                },
1922                Case {
1923                    name: "set_range",
1924                    original: indoc! {"
1925                    fn foo() {
1926                        let x = 1;
1927                        let y = 2;
1928                        let z = 3;
1929                    }
1930                "},
1931                    model_output: indoc! {"
1932                    <|set|>1:46-3:4a
1933                        let sum = 6;
1934                "},
1935                    expected: indoc! {"
1936                    fn foo() {
1937                        let sum = 6;
1938                    }
1939                "},
1940                },
1941                Case {
1942                    name: "insert_after_line",
1943                    original: indoc! {"
1944                    fn main() {
1945                        let x = 1;
1946                    }
1947                "},
1948                    model_output: indoc! {"
1949                    <|insert|>1:46
1950                        let y = 2;
1951                "},
1952                    expected: indoc! {"
1953                    fn main() {
1954                        let x = 1;
1955                        let y = 2;
1956                    }
1957                "},
1958                },
1959                Case {
1960                    name: "insert_before_first",
1961                    original: indoc! {"
1962                    let x = 1;
1963                    let y = 2;
1964                "},
1965                    model_output: indoc! {"
1966                    <|insert|>
1967                    use std::io;
1968                "},
1969                    expected: indoc! {"
1970                    use std::io;
1971                    let x = 1;
1972                    let y = 2;
1973                "},
1974                },
1975                Case {
1976                    name: "set_with_cursor_marker",
1977                    original: indoc! {"
1978                    fn main() {
1979                        println!();
1980                    }
1981                "},
1982                    model_output: indoc! {"
1983                    <|set|>1:34
1984                        eprintln!(\"<|user_cursor|>\");
1985                "},
1986                    expected: indoc! {"
1987                    fn main() {
1988                        eprintln!(\"<|user_cursor|>\");
1989                    }
1990                "},
1991                },
1992                Case {
1993                    name: "multiple_set_commands",
1994                    original: indoc! {"
1995                    aaa
1996                    bbb
1997                    ccc
1998                    ddd
1999                "},
2000                    model_output: indoc! {"
2001                    <|set|>0:23
2002                    AAA
2003                    <|set|>2:29
2004                    CCC
2005                "},
2006                    expected: indoc! {"
2007                    AAA
2008                    bbb
2009                    CCC
2010                    ddd
2011                "},
2012                },
2013                Case {
2014                    name: "set_range_multiline_replacement",
2015                    original: indoc! {"
2016                    fn handle_submit() {
2017                    }
2018
2019                    fn handle_keystroke() {
2020                "},
2021                    model_output: indoc! {"
2022                    <|set|>0:3f-1:7d
2023                    fn handle_submit(modal_state: &mut ModalState) {
2024                        <|user_cursor|>
2025                    }
2026                "},
2027                    expected: indoc! {"
2028                    fn handle_submit(modal_state: &mut ModalState) {
2029                        <|user_cursor|>
2030                    }
2031
2032                    fn handle_keystroke() {
2033                "},
2034                },
2035                Case {
2036                    name: "no_edit_commands_returns_original",
2037                    original: indoc! {"
2038                    hello
2039                    world
2040                "},
2041                    model_output: "some random text with no commands",
2042                    expected: indoc! {"
2043                    hello
2044                    world
2045                "},
2046                },
2047                Case {
2048                    name: "no_edits_command_returns_original",
2049                    original: indoc! {"
2050                    hello
2051                    world
2052                "},
2053                    model_output: "<|no_edits|>",
2054                    expected: indoc! {"
2055                    hello
2056                    world
2057                "},
2058                },
2059                Case {
2060                    name: "wrong_hash_set_ignored",
2061                    original: indoc! {"
2062                    aaa
2063                    bbb
2064                "},
2065                    model_output: indoc! {"
2066                    <|set|>0:ff
2067                    ZZZ
2068                "},
2069                    expected: indoc! {"
2070                    aaa
2071                    bbb
2072                "},
2073                },
2074                Case {
2075                    name: "insert_and_set_combined",
2076                    original: indoc! {"
2077                    alpha
2078                    beta
2079                    gamma
2080                "},
2081                    model_output: indoc! {"
2082                    <|set|>0:06
2083                    ALPHA
2084                    <|insert|>1:9c
2085                    beta_extra
2086                "},
2087                    expected: indoc! {"
2088                    ALPHA
2089                    beta
2090                    beta_extra
2091                    gamma
2092                "},
2093                },
2094                Case {
2095                    name: "no_trailing_newline_preserved",
2096                    original: "hello\nworld",
2097                    model_output: indoc! {"
2098                    <|set|>0:14
2099                    HELLO
2100                "},
2101                    expected: "HELLO\nworld",
2102                },
2103                Case {
2104                    name: "set_range_hash_mismatch_in_end_bound",
2105                    original: indoc! {"
2106                    one
2107                    two
2108                    three
2109                "},
2110                    model_output: indoc! {"
2111                    <|set|>0:42-2:ff
2112                    ONE_TWO_THREE
2113                "},
2114                    expected: indoc! {"
2115                    one
2116                    two
2117                    three
2118                "},
2119                },
2120                Case {
2121                    name: "set_range_start_greater_than_end_ignored",
2122                    original: indoc! {"
2123                    a
2124                    b
2125                    c
2126                "},
2127                    model_output: indoc! {"
2128                    <|set|>2:63-1:62
2129                    X
2130                "},
2131                    expected: indoc! {"
2132                    a
2133                    b
2134                    c
2135                "},
2136                },
2137                Case {
2138                    name: "insert_out_of_bounds_ignored",
2139                    original: indoc! {"
2140                    x
2141                    y
2142                "},
2143                    model_output: indoc! {"
2144                    <|insert|>99:aa
2145                    z
2146                "},
2147                    expected: indoc! {"
2148                    x
2149                    y
2150                "},
2151                },
2152                Case {
2153                    name: "set_out_of_bounds_ignored",
2154                    original: indoc! {"
2155                    x
2156                    y
2157                "},
2158                    model_output: indoc! {"
2159                    <|set|>99:aa
2160                    z
2161                "},
2162                    expected: indoc! {"
2163                    x
2164                    y
2165                "},
2166                },
2167                Case {
2168                    name: "malformed_set_command_ignored",
2169                    original: indoc! {"
2170                    alpha
2171                    beta
2172                "},
2173                    model_output: indoc! {"
2174                    <|set|>not-a-line-ref
2175                    UPDATED
2176                "},
2177                    expected: indoc! {"
2178                    alpha
2179                    beta
2180                "},
2181                },
2182                Case {
2183                    name: "malformed_insert_hash_treated_as_before_first",
2184                    original: indoc! {"
2185                    alpha
2186                    beta
2187                "},
2188                    model_output: indoc! {"
2189                    <|insert|>1:nothex
2190                    preamble
2191                "},
2192                    expected: indoc! {"
2193                    preamble
2194                    alpha
2195                    beta
2196                "},
2197                },
2198                Case {
2199                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2200                    original: indoc! {"
2201                    cat
2202                    dog
2203                "},
2204                    model_output: indoc! {"
2205                    <|set|>0:38
2206                    CAT
2207                    <|insert|>0:38
2208                    TAIL
2209                "},
2210                    expected: indoc! {"
2211                    CAT
2212                    TAIL
2213                    dog
2214                "},
2215                },
2216                Case {
2217                    name: "overlapping_set_ranges_last_wins",
2218                    original: indoc! {"
2219                    a
2220                    b
2221                    c
2222                    d
2223                "},
2224                    model_output: indoc! {"
2225                    <|set|>0:61-2:63
2226                    FIRST
2227                    <|set|>1:62-3:64
2228                    SECOND
2229                "},
2230                    expected: indoc! {"
2231                    FIRST
2232                    d
2233                "},
2234                },
2235                Case {
2236                    name: "insert_before_first_and_after_line",
2237                    original: indoc! {"
2238                    a
2239                    b
2240                "},
2241                    model_output: indoc! {"
2242                    <|insert|>
2243                    HEAD
2244                    <|insert|>0:61
2245                    MID
2246                "},
2247                    expected: indoc! {"
2248                    HEAD
2249                    a
2250                    MID
2251                    b
2252                "},
2253                },
2254            ];
2255
2256            for case in &cases {
2257                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2258                assert_eq!(result, case.expected, "failed case: {}", case.name);
2259            }
2260        }
2261
2262        #[test]
2263        fn test_output_has_edit_commands() {
2264            assert!(hashline::output_has_edit_commands(&format!(
2265                "{}0:ab\nnew",
2266                SET_COMMAND_MARKER
2267            )));
2268            assert!(hashline::output_has_edit_commands(&format!(
2269                "{}0:ab\nnew",
2270                INSERT_COMMAND_MARKER
2271            )));
2272            assert!(hashline::output_has_edit_commands(&format!(
2273                "some text\n{}1:cd\nstuff",
2274                SET_COMMAND_MARKER
2275            )));
2276            assert!(!hashline::output_has_edit_commands("just plain text"));
2277            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2278            assert!(hashline::output_has_edit_commands("<|no_edits|>"));
2279        }
2280
2281        // ---- hashline::patch_to_edit_commands round-trip tests ----
2282
2283        #[test]
2284        fn test_patch_to_edit_commands() {
2285            struct Case {
2286                name: &'static str,
2287                old: &'static str,
2288                patch: &'static str,
2289                expected_new: &'static str,
2290            }
2291
2292            let cases = [
2293                Case {
2294                    name: "single_line_replacement",
2295                    old: indoc! {"
2296                    let mut total = 0;
2297                    for product in products {
2298                        total += ;
2299                    }
2300                    total
2301                "},
2302                    patch: indoc! {"
2303                    @@ -1,5 +1,5 @@
2304                     let mut total = 0;
2305                     for product in products {
2306                    -    total += ;
2307                    +    total += product.price;
2308                     }
2309                     total
2310                "},
2311                    expected_new: indoc! {"
2312                    let mut total = 0;
2313                    for product in products {
2314                        total += product.price;
2315                    }
2316                    total
2317                "},
2318                },
2319                Case {
2320                    name: "multiline_replacement",
2321                    old: indoc! {"
2322                    fn foo() {
2323                        let x = 1;
2324                        let y = 2;
2325                        let z = 3;
2326                    }
2327                "},
2328                    patch: indoc! {"
2329                    @@ -1,5 +1,3 @@
2330                     fn foo() {
2331                    -    let x = 1;
2332                    -    let y = 2;
2333                    -    let z = 3;
2334                    +    let sum = 1 + 2 + 3;
2335                     }
2336                "},
2337                    expected_new: indoc! {"
2338                    fn foo() {
2339                        let sum = 1 + 2 + 3;
2340                    }
2341                "},
2342                },
2343                Case {
2344                    name: "insertion",
2345                    old: indoc! {"
2346                    fn main() {
2347                        let x = 1;
2348                    }
2349                "},
2350                    patch: indoc! {"
2351                    @@ -1,3 +1,4 @@
2352                     fn main() {
2353                         let x = 1;
2354                    +    let y = 2;
2355                     }
2356                "},
2357                    expected_new: indoc! {"
2358                    fn main() {
2359                        let x = 1;
2360                        let y = 2;
2361                    }
2362                "},
2363                },
2364                Case {
2365                    name: "insertion_before_first",
2366                    old: indoc! {"
2367                    let x = 1;
2368                    let y = 2;
2369                "},
2370                    patch: indoc! {"
2371                    @@ -1,2 +1,3 @@
2372                    +use std::io;
2373                     let x = 1;
2374                     let y = 2;
2375                "},
2376                    expected_new: indoc! {"
2377                    use std::io;
2378                    let x = 1;
2379                    let y = 2;
2380                "},
2381                },
2382                Case {
2383                    name: "deletion",
2384                    old: indoc! {"
2385                    aaa
2386                    bbb
2387                    ccc
2388                    ddd
2389                "},
2390                    patch: indoc! {"
2391                    @@ -1,4 +1,2 @@
2392                     aaa
2393                    -bbb
2394                    -ccc
2395                     ddd
2396                "},
2397                    expected_new: indoc! {"
2398                    aaa
2399                    ddd
2400                "},
2401                },
2402                Case {
2403                    name: "multiple_changes",
2404                    old: indoc! {"
2405                    alpha
2406                    beta
2407                    gamma
2408                    delta
2409                    epsilon
2410                "},
2411                    patch: indoc! {"
2412                    @@ -1,5 +1,5 @@
2413                    -alpha
2414                    +ALPHA
2415                     beta
2416                     gamma
2417                    -delta
2418                    +DELTA
2419                     epsilon
2420                "},
2421                    expected_new: indoc! {"
2422                    ALPHA
2423                    beta
2424                    gamma
2425                    DELTA
2426                    epsilon
2427                "},
2428                },
2429                Case {
2430                    name: "replace_with_insertion",
2431                    old: indoc! {r#"
2432                    fn handle() {
2433                        modal_state.close();
2434                        modal_state.dismiss();
2435                "#},
2436                    patch: indoc! {r#"
2437                    @@ -1,3 +1,4 @@
2438                     fn handle() {
2439                         modal_state.close();
2440                    +    eprintln!("");
2441                         modal_state.dismiss();
2442                "#},
2443                    expected_new: indoc! {r#"
2444                    fn handle() {
2445                        modal_state.close();
2446                        eprintln!("");
2447                        modal_state.dismiss();
2448                "#},
2449                },
2450                Case {
2451                    name: "complete_replacement",
2452                    old: indoc! {"
2453                    aaa
2454                    bbb
2455                    ccc
2456                "},
2457                    patch: indoc! {"
2458                    @@ -1,3 +1,3 @@
2459                    -aaa
2460                    -bbb
2461                    -ccc
2462                    +xxx
2463                    +yyy
2464                    +zzz
2465                "},
2466                    expected_new: indoc! {"
2467                    xxx
2468                    yyy
2469                    zzz
2470                "},
2471                },
2472                Case {
2473                    name: "add_function_body",
2474                    old: indoc! {"
2475                    fn foo() {
2476                        modal_state.dismiss();
2477                    }
2478
2479                    fn
2480
2481                    fn handle_keystroke() {
2482                "},
2483                    patch: indoc! {"
2484                    @@ -1,6 +1,8 @@
2485                     fn foo() {
2486                         modal_state.dismiss();
2487                     }
2488
2489                    -fn
2490                    +fn handle_submit() {
2491                    +    todo()
2492                    +}
2493
2494                     fn handle_keystroke() {
2495                "},
2496                    expected_new: indoc! {"
2497                    fn foo() {
2498                        modal_state.dismiss();
2499                    }
2500
2501                    fn handle_submit() {
2502                        todo()
2503                    }
2504
2505                    fn handle_keystroke() {
2506                "},
2507                },
2508                Case {
2509                    name: "with_cursor_offset",
2510                    old: indoc! {r#"
2511                    fn main() {
2512                        println!();
2513                    }
2514                "#},
2515                    patch: indoc! {r#"
2516                        @@ -1,3 +1,3 @@
2517                        fn main() {
2518                        -    println!();
2519                        +    eprintln!("");
2520                        }
2521                    "#},
2522                    expected_new: indoc! {r#"
2523                        fn main() {
2524                            eprintln!("<|user_cursor|>");
2525                        }
2526                    "#},
2527                },
2528                Case {
2529                    name: "non_local_hunk_header_pure_insertion_repro",
2530                    old: indoc! {"
2531                        aaa
2532                        bbb
2533                    "},
2534                    patch: indoc! {"
2535                        @@ -20,2 +20,3 @@
2536                        aaa
2537                        +xxx
2538                        bbb
2539                    "},
2540                    expected_new: indoc! {"
2541                        aaa
2542                        xxx
2543                        bbb
2544                    "},
2545                },
2546                Case {
2547                    name: "empty_patch_produces_no_edits_marker",
2548                    old: indoc! {"
2549                        aaa
2550                        bbb
2551                    "},
2552                    patch: "@@ -20,2 +20,3 @@\n",
2553                    expected_new: indoc! {"
2554                        aaa
2555                        bbb
2556                    "},
2557                },
2558            ];
2559
2560            for case in &cases {
2561                // The cursor_offset for patch_to_edit_commands is relative to
2562                // the first hunk's new text (context + additions). We compute
2563                // it by finding where the marker sits in the expected output
2564                // (which mirrors the new text of the hunk).
2565                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2566
2567                let commands =
2568                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2569                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2570
2571                assert!(
2572                    hashline::output_has_edit_commands(&commands),
2573                    "case {}: expected edit commands, got: {commands:?}",
2574                    case.name,
2575                );
2576
2577                let applied = hashline::apply_edit_commands(case.old, &commands);
2578                assert_eq!(applied, case.expected_new, "case {}", case.name);
2579            }
2580        }
2581    }
2582}
2583
2584pub mod seed_coder {
2585    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2586    //!
2587    //! Seed-Coder uses different FIM tokens and order than Qwen:
2588    //! - SPM order: suffix comes FIRST, then prefix, then middle
2589    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2590    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2591    //!
2592    //! All context (related files, edit history) goes in the PREFIX section.
2593    //! The suffix contains only code after the editable region.
2594    //!
2595    //! Example prompt:
2596    //!
2597    //! <[fim-suffix]>
2598    //! code after editable region
2599    //! <[fim-prefix]><filename>related/file.py
2600    //! related file content
2601    //!
2602    //! <filename>edit_history
2603    //! --- a/some_file.py
2604    //! +++ b/some_file.py
2605    //! -old
2606    //! +new
2607    //!
2608    //! <filename>path/to/target_file.py
2609    //! code before editable region
2610    //! <<<<<<< CURRENT
2611    //! code that
2612    //! needs to<|user_cursor|>
2613    //! be rewritten
2614    //! =======
2615    //! <[fim-middle]>
2616    //!
2617    //! Expected output (model generates):
2618    //!
2619    //! updated
2620    //! code with
2621    //! changes applied
2622    //! >>>>>>> UPDATED
2623
2624    use super::*;
2625
2626    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2627    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2628    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2629    pub const FILE_MARKER: &str = "<filename>";
2630
2631    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2632    pub const SEPARATOR: &str = "=======\n";
2633    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2634
2635    pub const NO_EDITS: &str = "NO_EDITS\n";
2636
2637    pub fn special_tokens() -> &'static [&'static str] {
2638        &[
2639            FIM_SUFFIX,
2640            FIM_PREFIX,
2641            FIM_MIDDLE,
2642            FILE_MARKER,
2643            START_MARKER,
2644            SEPARATOR,
2645            END_MARKER,
2646            CURSOR_MARKER,
2647        ]
2648    }
2649
2650    pub fn write_cursor_excerpt_section(
2651        prompt: &mut String,
2652        path: &Path,
2653        context: &str,
2654        editable_range: &Range<usize>,
2655        cursor_offset: usize,
2656    ) {
2657        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2658        prompt.push_str(&section);
2659    }
2660
2661    pub fn format_prompt_with_budget(
2662        path: &Path,
2663        context: &str,
2664        editable_range: &Range<usize>,
2665        cursor_offset: usize,
2666        events: &[Arc<Event>],
2667        related_files: &[RelatedFile],
2668        max_tokens: usize,
2669    ) -> String {
2670        let cursor_prefix_section =
2671            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2672        assemble_fim_prompt(
2673            context,
2674            editable_range,
2675            &cursor_prefix_section,
2676            events,
2677            related_files,
2678            max_tokens,
2679        )
2680    }
2681
2682    pub fn assemble_fim_prompt(
2683        context: &str,
2684        editable_range: &Range<usize>,
2685        cursor_prefix_section: &str,
2686        events: &[Arc<Event>],
2687        related_files: &[RelatedFile],
2688        max_tokens: usize,
2689    ) -> String {
2690        let suffix_section = build_suffix_section(context, editable_range);
2691
2692        let suffix_tokens = estimate_tokens(suffix_section.len());
2693        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2694        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2695
2696        let edit_history_section = super::format_edit_history_within_budget(
2697            events,
2698            FILE_MARKER,
2699            "edit_history",
2700            budget_after_cursor,
2701        );
2702        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2703        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2704
2705        let related_files_section = super::format_related_files_within_budget(
2706            related_files,
2707            FILE_MARKER,
2708            "",
2709            budget_after_edit_history,
2710        );
2711
2712        let mut prompt = String::new();
2713        prompt.push_str(&suffix_section);
2714        prompt.push_str(FIM_PREFIX);
2715        prompt.push_str(&related_files_section);
2716        if !related_files_section.is_empty() {
2717            prompt.push('\n');
2718        }
2719        prompt.push_str(&edit_history_section);
2720        if !edit_history_section.is_empty() {
2721            prompt.push('\n');
2722        }
2723        prompt.push_str(cursor_prefix_section);
2724        prompt.push_str(FIM_MIDDLE);
2725        prompt
2726    }
2727
2728    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2729        let mut section = String::new();
2730        section.push_str(FIM_SUFFIX);
2731        section.push_str(&context[editable_range.end..]);
2732        if !section.ends_with('\n') {
2733            section.push('\n');
2734        }
2735        section
2736    }
2737
2738    fn build_cursor_prefix_section(
2739        path: &Path,
2740        context: &str,
2741        editable_range: &Range<usize>,
2742        cursor_offset: usize,
2743    ) -> String {
2744        let mut section = String::new();
2745        let path_str = path.to_string_lossy();
2746        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2747
2748        section.push_str(&context[..editable_range.start]);
2749        section.push_str(START_MARKER);
2750        section.push_str(&context[editable_range.start..cursor_offset]);
2751        section.push_str(CURSOR_MARKER);
2752        section.push_str(&context[cursor_offset..editable_range.end]);
2753        if !section.ends_with('\n') {
2754            section.push('\n');
2755        }
2756        section.push_str(SEPARATOR);
2757        section
2758    }
2759
2760    /// Format patch as containing no changes if it's empty; otherwise return None.
2761    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2762        // Count lines in the patch
2763        let empty_patch = patch.lines().count() <= 3;
2764        if empty_patch {
2765            Some(format!("{NO_EDITS}{END_MARKER}"))
2766        } else {
2767            None
2768        }
2769    }
2770}
2771
2772pub mod v0304_variable_edit {
2773    //! A prompt format with no fixed editable region. The entire context is shown
2774    //! to the model, and it chooses which text to replace by outputting surrounding
2775    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2776    //! text.
2777    //!
2778    //! Example prompt:
2779    //!
2780    //! <|file_sep|>path/to/file.py
2781    //! zero
2782    //! one
2783    //! two
2784    //! three<|user_cursor|>
2785    //! four
2786    //! five
2787    //! <|fim_prefix|>
2788    //
2789    //! Expected output (model generates):
2790    //!
2791    //! two
2792    //! <|fim_middle|>
2793    //! THREE
2794    //! <|fim_suffix|>
2795    //! four
2796    //!
2797    //! The output means: find "two\n...\nfour" in the context, and replace
2798    //! everything between "two\n" and "four" with "THREE\n".
2799
2800    use super::*;
2801
2802    pub fn special_tokens() -> &'static [&'static str] {
2803        &[
2804            "<|fim_prefix|>",
2805            "<|fim_suffix|>",
2806            "<|fim_middle|>",
2807            "<|file_sep|>",
2808            CURSOR_MARKER,
2809        ]
2810    }
2811
2812    pub fn write_cursor_excerpt_section(
2813        prompt: &mut String,
2814        path: &Path,
2815        context: &str,
2816        cursor_offset: usize,
2817    ) {
2818        let path_str = path.to_string_lossy();
2819        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2820
2821        prompt.push_str(&context[..cursor_offset]);
2822        prompt.push_str(CURSOR_MARKER);
2823        prompt.push_str(&context[cursor_offset..]);
2824        if !prompt.ends_with('\n') {
2825            prompt.push('\n');
2826        }
2827        prompt.push_str("<|fim_prefix|>\n")
2828    }
2829
2830    /// Apply a variable-edit model output to the original context text.
2831    ///
2832    /// The model output has the form:
2833    ///
2834    /// - prefix context lines
2835    /// - `<|fim_middle|>`
2836    /// - new text
2837    /// - `<|fim_suffix|>`
2838    /// - suffix context lines
2839    ///
2840    /// We locate the prefix/suffix context lines in the original text and replace
2841    /// everything between them with the new text.
2842    pub fn apply_variable_edit(
2843        context: &str,
2844        model_output: &str,
2845    ) -> Result<(Range<usize>, String)> {
2846        let (prefix_context, rest) = model_output
2847            .split_once("<|fim_middle|>\n")
2848            .or_else(|| model_output.split_once("<|fim_middle|>"))
2849            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2850
2851        let (new_text, suffix_context) = rest
2852            .split_once("<|fim_suffix|>\n")
2853            .or_else(|| rest.split_once("<|fim_suffix|>"))
2854            .unwrap_or((rest, ""));
2855
2856        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2857            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2858        } else {
2859            suffix_context
2860        };
2861
2862        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2863            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2864            + prefix_context.len();
2865        let suffix_offset = if suffix_context.is_empty() {
2866            context.len()
2867        } else {
2868            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2869                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2870                + prefix_offset
2871        };
2872
2873        let edit_range = prefix_offset..suffix_offset;
2874        return Ok((edit_range, new_text.to_string()));
2875    }
2876
2877    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2878        if needle.is_empty() {
2879            return Some(0);
2880        }
2881
2882        haystack.match_indices(needle).find_map(|(offset, _)| {
2883            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2884            matched_line_start.then_some(offset)
2885        })
2886    }
2887
2888    /// Convert a unified diff patch into the variable-edit output format.
2889    ///
2890    /// Parses `patch` as a unified diff against `old_text` and produces model
2891    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2892    /// delimiters. The diff is resolved by content matching rather than line
2893    /// numbers.
2894    pub fn patch_to_variable_edit_output(
2895        old_text: &str,
2896        patch: &str,
2897        cursor_offset: Option<usize>,
2898    ) -> Result<String> {
2899        // Parse the unified diff into hunks. Each hunk has an `old_context`
2900        // string (context + deleted lines interleaved in order) and a list of
2901        // edits expressed as byte ranges within that context plus replacement
2902        // text.
2903        let hunks = parse_hunks(patch);
2904        if hunks.is_empty() {
2905            return Ok(String::new());
2906        }
2907
2908        // Apply each hunk by finding its old_context in the text and
2909        // performing the edits. We search forward from where the previous
2910        // hunk ended so that hunks are applied in order.
2911        let mut new_text = old_text.to_string();
2912        let mut search_from: usize = 0;
2913        let mut first_hunk_pos: Option<usize> = None;
2914
2915        for hunk in &hunks {
2916            let context_pos = new_text[search_from..]
2917                .find(&hunk.old_context)
2918                .map(|pos| pos + search_from)
2919                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2920
2921            if first_hunk_pos.is_none() {
2922                first_hunk_pos = Some(context_pos);
2923            }
2924
2925            // Apply edits in reverse order so byte offsets remain valid.
2926            for edit in hunk.edits.iter().rev() {
2927                let abs_start = context_pos + edit.range.start;
2928                let abs_end = context_pos + edit.range.end;
2929                new_text.replace_range(abs_start..abs_end, &edit.text);
2930            }
2931
2932            // Advance past this hunk's region in the (now modified) text.
2933            let new_region_len: usize =
2934                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2935                    len + edit.text.len() - (edit.range.end - edit.range.start)
2936                });
2937            search_from = context_pos + new_region_len;
2938        }
2939
2940        // Now we have old_text and new_text. Find the changed line range by
2941        // comparing them.
2942        let old_lines: Vec<&str> = old_text.lines().collect();
2943        let new_lines: Vec<&str> = new_text.lines().collect();
2944
2945        // Find first differing line.
2946        let first_changed_row = old_lines
2947            .iter()
2948            .zip(new_lines.iter())
2949            .position(|(a, b)| a != b)
2950            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2951
2952        // Find last differing line (from the end).
2953        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2954        let common_suffix = old_lines
2955            .iter()
2956            .rev()
2957            .zip(new_lines.iter().rev())
2958            .take(max_suffix)
2959            .take_while(|(a, b)| a == b)
2960            .count();
2961
2962        let old_end = old_lines.len() - common_suffix;
2963        let new_end = new_lines.len() - common_suffix;
2964
2965        if first_changed_row == old_end && first_changed_row == new_end {
2966            return Ok(String::new());
2967        }
2968
2969        // Build the replacement text from new_lines[first_diff..new_end].
2970        let mut merged_new_text = String::new();
2971        for line in &new_lines[first_changed_row..new_end] {
2972            merged_new_text.push_str(line);
2973            merged_new_text.push('\n');
2974        }
2975
2976        // cursor_offset is relative to the first hunk's new content in
2977        // new_text. Translate it to an offset within merged_new_text, which
2978        // only contains lines first_diff..new_end of new_text.
2979        if let Some(hunk_offset) = cursor_offset {
2980            let hunk_start = first_hunk_pos.unwrap_or(0);
2981            let absolute_pos = hunk_start + hunk_offset;
2982
2983            // Byte offset where first_diff starts in new_text.
2984            let merged_start: usize = new_lines[..first_changed_row]
2985                .iter()
2986                .map(|line| line.len() + 1)
2987                .sum();
2988
2989            if absolute_pos >= merged_start {
2990                let relative_offset = absolute_pos - merged_start;
2991                if relative_offset <= merged_new_text.len() {
2992                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2993                }
2994            }
2995        }
2996
2997        // Build output with 2 lines of context above and below.
2998        let context_lines_count = 2;
2999        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
3000        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
3001
3002        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
3003            let pattern = &lines[line_range];
3004            let pattern_len = pattern.len();
3005
3006            let mut count = 0;
3007            for offset in 0..=lines.len() - pattern_len {
3008                if &lines[offset..offset + pattern_len] == pattern {
3009                    count += 1;
3010                }
3011            }
3012            count
3013        }
3014
3015        // Expand prefix and suffix until they are unique
3016        while prefix_start > 0 {
3017            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
3018                prefix_start -= 1;
3019            } else {
3020                break;
3021            }
3022        }
3023        while suffix_end < old_lines.len() {
3024            if count_matches(old_end..suffix_end, &old_lines) > 1 {
3025                suffix_end += 1;
3026            } else {
3027                break;
3028            }
3029        }
3030
3031        let mut output = String::new();
3032        for line in &old_lines[prefix_start..first_changed_row] {
3033            output.push_str(line);
3034            output.push('\n');
3035        }
3036        output.push_str("<|fim_middle|>\n");
3037        output.push_str(&merged_new_text);
3038        output.push_str("<|fim_suffix|>\n");
3039        for line in &old_lines[old_end..suffix_end] {
3040            output.push_str(line);
3041            output.push('\n');
3042        }
3043
3044        Ok(output)
3045    }
3046
3047    struct ParsedHunk {
3048        old_context: String,
3049        edits: Vec<ParsedEdit>,
3050    }
3051
3052    struct ParsedEdit {
3053        range: Range<usize>,
3054        text: String,
3055    }
3056
3057    /// Parse a unified diff into content-based hunks. Each hunk contains an
3058    /// `old_context` string (context lines + deleted lines, which together
3059    /// form the text that should be found in the original) and a list of edits
3060    /// expressed as byte ranges within that context.
3061    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
3062        let mut hunks = Vec::new();
3063        let mut current: Option<ParsedHunk> = None;
3064
3065        for line in patch.lines() {
3066            if line.starts_with("@@") {
3067                if let Some(hunk) = current.take() {
3068                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3069                        hunks.push(hunk);
3070                    }
3071                }
3072                current = Some(ParsedHunk {
3073                    old_context: String::new(),
3074                    edits: Vec::new(),
3075                });
3076            } else if line.starts_with("---") || line.starts_with("+++") {
3077                continue;
3078            } else if let Some(hunk) = &mut current {
3079                if let Some(added) = line.strip_prefix('+') {
3080                    let pos = hunk.old_context.len();
3081                    if let Some(last_edit) = hunk.edits.last_mut() {
3082                        if last_edit.range.end == pos {
3083                            writeln!(&mut last_edit.text, "{added}").ok();
3084                            continue;
3085                        }
3086                    }
3087                    hunk.edits.push(ParsedEdit {
3088                        range: pos..pos,
3089                        text: format!("{added}\n"),
3090                    });
3091                } else if let Some(removed) = line.strip_prefix('-') {
3092                    let start = hunk.old_context.len();
3093                    writeln!(&mut hunk.old_context, "{removed}").ok();
3094                    let end = hunk.old_context.len();
3095                    if let Some(last_edit) = hunk.edits.last_mut() {
3096                        if last_edit.range.end == start {
3097                            last_edit.range.end = end;
3098                            continue;
3099                        }
3100                    }
3101                    hunk.edits.push(ParsedEdit {
3102                        range: start..end,
3103                        text: String::new(),
3104                    });
3105                } else {
3106                    let ctx = line.strip_prefix(' ').unwrap_or(line);
3107                    writeln!(&mut hunk.old_context, "{ctx}").ok();
3108                }
3109            }
3110        }
3111
3112        if let Some(hunk) = current {
3113            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
3114                hunks.push(hunk);
3115            }
3116        }
3117
3118        hunks
3119    }
3120
3121    #[cfg(test)]
3122    mod tests {
3123        use super::*;
3124        use indoc::indoc;
3125
3126        #[test]
3127        fn test_apply_variable_edit() {
3128            struct Case {
3129                name: &'static str,
3130                original: &'static str,
3131                model_output: &'static str,
3132                expected: &'static str,
3133            }
3134
3135            let cases = [
3136                Case {
3137                    name: "simple_single_line_replacement",
3138                    original: indoc! {"
3139                        zero
3140                        one
3141                        two
3142                        three
3143                        four
3144                        five
3145                    "},
3146                    model_output: indoc! {"
3147                        two
3148                        <|fim_middle|>
3149                        THREE
3150                        <|fim_suffix|>
3151                        four
3152                    "},
3153                    expected: indoc! {"
3154                        zero
3155                        one
3156                        two
3157                        THREE
3158                        four
3159                        five
3160                    "},
3161                },
3162                Case {
3163                    name: "multi_line_replacement",
3164                    original: indoc! {"
3165                        a
3166                        b
3167                        c
3168                        d
3169                        e
3170                    "},
3171                    model_output: indoc! {"
3172                        a
3173                        <|fim_middle|>
3174                        B
3175                        C
3176                        D
3177                        <|fim_suffix|>
3178                        e
3179                    "},
3180                    expected: indoc! {"
3181                        a
3182                        B
3183                        C
3184                        D
3185                        e
3186                    "},
3187                },
3188                Case {
3189                    name: "insertion_between_existing_lines",
3190                    original: indoc! {"
3191                        a
3192                        b
3193                        c
3194                    "},
3195                    model_output: indoc! {"
3196                        a
3197                        <|fim_middle|>
3198                        X
3199                        <|fim_suffix|>
3200                        b
3201                    "},
3202                    expected: indoc! {"
3203                        a
3204                        X
3205                        b
3206                        c
3207                    "},
3208                },
3209                Case {
3210                    name: "deletion",
3211                    original: indoc! {"
3212                        a
3213                        b
3214                        c
3215                        d
3216                    "},
3217                    model_output: indoc! {"
3218                        a
3219                        <|fim_middle|>
3220                        <|fim_suffix|>
3221                        c
3222                    "},
3223                    expected: indoc! {"
3224                        a
3225                        c
3226                        d
3227                    "},
3228                },
3229                Case {
3230                    name: "replacement_at_start_no_prefix_context",
3231                    original: indoc! {"
3232                        a
3233                        b
3234                        c
3235                    "},
3236                    model_output: indoc! {"
3237                        <|fim_middle|>
3238                        X
3239                        <|fim_suffix|>
3240                        b
3241                    "},
3242                    expected: indoc! {"
3243                        X
3244                        b
3245                        c
3246                    "},
3247                },
3248                Case {
3249                    name: "replacement_at_end_no_suffix_context",
3250                    original: indoc! {"
3251                        a
3252                        b
3253                        c
3254                    "},
3255                    model_output: indoc! {"
3256                        b
3257                        <|fim_middle|>
3258                        Z
3259                        <|fim_suffix|>
3260                    "},
3261                    expected: indoc! {"
3262                        a
3263                        b
3264                        Z
3265                    "},
3266                },
3267                Case {
3268                    name: "context_with_trailing_newline_is_preserved",
3269                    original: indoc! {"
3270                        a
3271                        b
3272                        c
3273                    "},
3274                    model_output: indoc! {"
3275                        a
3276                        <|fim_middle|>
3277                        B
3278                        <|fim_suffix|>
3279                        c
3280                    "},
3281                    expected: indoc! {"
3282                        a
3283                        B
3284                        c
3285                    "},
3286                },
3287                Case {
3288                    name: "cursor_marker_passes_through_untouched",
3289                    original: indoc! {"
3290                        a
3291                        b
3292                        c
3293                    "},
3294                    model_output: indoc! {"
3295                        a
3296                        <|fim_middle|>
3297                        B<|user_cursor|>B
3298                        <|fim_suffix|>
3299                        c
3300                    "},
3301                    expected: indoc! {"
3302                        a
3303                        B<|user_cursor|>B
3304                        c
3305                    "},
3306                },
3307                Case {
3308                    name: "multiple_prefix_context_lines",
3309                    original: indoc! {"
3310                        a
3311                        b
3312                        c
3313                        d
3314                        e
3315                    "},
3316                    model_output: indoc! {"
3317                        b
3318                        c
3319                        <|fim_middle|>
3320                        D
3321                        <|fim_suffix|>
3322                        e
3323                    "},
3324                    expected: indoc! {"
3325                        a
3326                        b
3327                        c
3328                        D
3329                        e
3330                    "},
3331                },
3332            ];
3333
3334            for case in cases {
3335                let (edit_range, replacement) =
3336                    apply_variable_edit(case.original, case.model_output).unwrap();
3337                let mut edited = case.original.to_string();
3338                edited.replace_range(edit_range, &replacement);
3339                assert_eq!(edited, case.expected, "{}", case.name);
3340            }
3341        }
3342
3343        #[test]
3344        fn test_patch_to_variable_edit() {
3345            struct Case {
3346                name: &'static str,
3347                old: &'static str,
3348                patch: &'static str,
3349                cursor_offset: Option<usize>,
3350                expected_variable_edit: &'static str,
3351                expected_after_apply: &'static str,
3352            }
3353
3354            let cases = [
3355                Case {
3356                    name: "simple_replacement",
3357                    old: indoc! {"
3358                        zero
3359                        one
3360                        two
3361                        three
3362                        four
3363                        five
3364                    "},
3365                    patch: indoc! {"
3366                        @@ -3,3 +3,3 @@
3367                         two
3368                        -three
3369                        +THREE
3370                         four
3371                    "},
3372                    cursor_offset: None,
3373                    expected_variable_edit: indoc! {"
3374                        one
3375                        two
3376                        <|fim_middle|>
3377                        THREE
3378                        <|fim_suffix|>
3379                        four
3380                        five
3381                    "},
3382                    expected_after_apply: indoc! {"
3383                        zero
3384                        one
3385                        two
3386                        THREE
3387                        four
3388                        five
3389                    "},
3390                },
3391                Case {
3392                    name: "insertion",
3393                    old: indoc! {"
3394                        a
3395                        b
3396                        c
3397                        d
3398                        e
3399                    "},
3400                    patch: indoc! {"
3401                        @@ -2,0 +3,1 @@
3402                         b
3403                        +X
3404                         c
3405                    "},
3406                    cursor_offset: None,
3407                    expected_variable_edit: indoc! {"
3408                        a
3409                        b
3410                        <|fim_middle|>
3411                        X
3412                        <|fim_suffix|>
3413                        c
3414                        d
3415                    "},
3416                    expected_after_apply: indoc! {"
3417                        a
3418                        b
3419                        X
3420                        c
3421                        d
3422                        e
3423                    "},
3424                },
3425                Case {
3426                    name: "deletion",
3427                    old: indoc! {"
3428                        a
3429                        b
3430                        c
3431                        d
3432                        e
3433                    "},
3434                    patch: indoc! {"
3435                        @@ -2,3 +2,2 @@
3436                         b
3437                        -c
3438                         d
3439                    "},
3440                    cursor_offset: None,
3441                    expected_variable_edit: indoc! {"
3442                        a
3443                        b
3444                        <|fim_middle|>
3445                        <|fim_suffix|>
3446                        d
3447                        e
3448                    "},
3449                    expected_after_apply: indoc! {"
3450                        a
3451                        b
3452                        d
3453                        e
3454                    "},
3455                },
3456                Case {
3457                    name: "edit_near_start",
3458                    old: indoc! {"
3459                        first
3460                        second
3461                        third
3462                        fourth
3463                    "},
3464                    patch: indoc! {"
3465                        @@ -1,1 +1,1 @@
3466                        -first
3467                        +FIRST
3468                    "},
3469                    cursor_offset: None,
3470                    expected_variable_edit: indoc! {"
3471                        <|fim_middle|>
3472                        FIRST
3473                        <|fim_suffix|>
3474                        second
3475                        third
3476                    "},
3477                    expected_after_apply: indoc! {"
3478                        FIRST
3479                        second
3480                        third
3481                        fourth
3482                    "},
3483                },
3484                Case {
3485                    name: "edit_near_end",
3486                    old: indoc! {"
3487                        first
3488                        second
3489                        third
3490                        fourth
3491                    "},
3492                    patch: indoc! {"
3493                        @@ -4,1 +4,1 @@
3494                        -fourth
3495                        +FOURTH
3496                    "},
3497                    cursor_offset: None,
3498                    expected_variable_edit: indoc! {"
3499                        second
3500                        third
3501                        <|fim_middle|>
3502                        FOURTH
3503                        <|fim_suffix|>
3504                    "},
3505                    expected_after_apply: indoc! {"
3506                        first
3507                        second
3508                        third
3509                        FOURTH
3510                    "},
3511                },
3512                Case {
3513                    name: "cursor_at_start_of_replacement",
3514                    old: indoc! {"
3515                        zero
3516                        one
3517                        two
3518                        three
3519                        four
3520                        five
3521                    "},
3522                    patch: indoc! {"
3523                        @@ -3,3 +3,3 @@
3524                         two
3525                        -three
3526                        +THREE
3527                         four
3528                    "},
3529                    cursor_offset: Some(4),
3530                    expected_variable_edit: indoc! {"
3531                        one
3532                        two
3533                        <|fim_middle|>
3534                        <|user_cursor|>THREE
3535                        <|fim_suffix|>
3536                        four
3537                        five
3538                    "},
3539                    expected_after_apply: indoc! {"
3540                        zero
3541                        one
3542                        two
3543                        <|user_cursor|>THREE
3544                        four
3545                        five
3546                    "},
3547                },
3548                Case {
3549                    name: "cursor_in_middle_of_replacement",
3550                    old: indoc! {"
3551                        zero
3552                        one
3553                        two
3554                        three
3555                        four
3556                        five
3557                    "},
3558                    patch: indoc! {"
3559                        @@ -3,3 +3,3 @@
3560                         two
3561                        -three
3562                        +THREE
3563                         four
3564                    "},
3565                    cursor_offset: Some(6),
3566                    expected_variable_edit: indoc! {"
3567                        one
3568                        two
3569                        <|fim_middle|>
3570                        TH<|user_cursor|>REE
3571                        <|fim_suffix|>
3572                        four
3573                        five
3574                    "},
3575                    expected_after_apply: indoc! {"
3576                        zero
3577                        one
3578                        two
3579                        TH<|user_cursor|>REE
3580                        four
3581                        five
3582                    "},
3583                },
3584                Case {
3585                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3586                    old: indoc! {"
3587                        one
3588                        a
3589                        b
3590                        c
3591                        d
3592                        two
3593                        a
3594                        b
3595                        c
3596                        d
3597                        three
3598                        a
3599                        b
3600                        c
3601                        d
3602                        four
3603                    "},
3604                    patch: indoc! {"
3605                        @@ -4,5 +4,5 @@
3606                         two
3607                         a
3608                         b
3609                        -c
3610                        +C
3611                         d
3612                         three
3613                    "},
3614                    cursor_offset: None,
3615                    expected_variable_edit: indoc! {"
3616                        two
3617                        a
3618                        b
3619                        <|fim_middle|>
3620                        C
3621                        <|fim_suffix|>
3622                        d
3623                        three
3624                    "},
3625                    expected_after_apply: indoc! {"
3626                        one
3627                        a
3628                        b
3629                        c
3630                        d
3631                        two
3632                        a
3633                        b
3634                        C
3635                        d
3636                        three
3637                        a
3638                        b
3639                        c
3640                        d
3641                        four
3642                    "},
3643                },
3644                Case {
3645                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3646                    old: indoc! {"
3647                        {
3648                            {
3649                                one();
3650                            }
3651                        }
3652                        {
3653                            {
3654                                two();
3655                            }
3656                        }
3657                        {
3658                            {
3659                                three();
3660                            }
3661                        }
3662                        {
3663                            {
3664                                four();
3665                            }
3666                        }
3667                    "},
3668                    patch: indoc! {"
3669                        @@ -4,5 +4,5 @@
3670                             {
3671                        -        two();
3672                        +        TWO();
3673                             }
3674                    "},
3675                    cursor_offset: None,
3676                    expected_variable_edit: indoc! {"
3677                                one();
3678                            }
3679                        }
3680                        {
3681                            {
3682                        <|fim_middle|>
3683                                TWO();
3684                        <|fim_suffix|>
3685                            }
3686                        }
3687                        {
3688                            {
3689                                three();
3690                    "},
3691                    expected_after_apply: indoc! {"
3692                        {
3693                            {
3694                                one();
3695                            }
3696                        }
3697                        {
3698                            {
3699                                TWO();
3700                            }
3701                        }
3702                        {
3703                            {
3704                                three();
3705                            }
3706                        }
3707                        {
3708                            {
3709                                four();
3710                            }
3711                        }
3712                    "},
3713                },
3714            ];
3715
3716            for case in cases {
3717                let output =
3718                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3719                        .unwrap_or_else(|error| {
3720                            panic!("failed converting patch for {}: {error}", case.name)
3721                        });
3722                assert_eq!(
3723                    output, case.expected_variable_edit,
3724                    "patch->variable_edit mismatch for {}",
3725                    case.name
3726                );
3727
3728                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3729                    .unwrap_or_else(|error| {
3730                        panic!("failed applying variable_edit for {}: {error}", case.name)
3731                    });
3732                let mut edited_by_variable_edit = case.old.to_string();
3733                edited_by_variable_edit.replace_range(edit_range, &replacement);
3734                assert_eq!(
3735                    edited_by_variable_edit, case.expected_after_apply,
3736                    "variable_edit apply mismatch for {}",
3737                    case.name
3738                );
3739
3740                let (expected_edit_range, expected_replacement) =
3741                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3742                        |error| {
3743                            panic!(
3744                                "failed applying expected variable_edit for {}: {error}",
3745                                case.name
3746                            )
3747                        },
3748                    );
3749                let mut edited_by_expected_variable_edit = case.old.to_string();
3750                edited_by_expected_variable_edit
3751                    .replace_range(expected_edit_range, &expected_replacement);
3752                assert_eq!(
3753                    edited_by_expected_variable_edit, case.expected_after_apply,
3754                    "expected variable_edit apply mismatch for {}",
3755                    case.name
3756                );
3757            }
3758        }
3759
3760        #[test]
3761        fn test_write_cursor_excerpt_section() {
3762            let path = Path::new("test.rs");
3763            let context = "fn main() {\n    hello();\n}\n";
3764            let cursor_offset = 17;
3765            let mut prompt = String::new();
3766            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3767            assert_eq!(
3768                prompt,
3769                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3770            );
3771        }
3772    }
3773}
3774
3775/// The zeta1 prompt format
3776pub mod zeta1 {
3777    use super::*;
3778    use std::fmt::Write;
3779
3780    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3781    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3782    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3783    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3784
3785    const INSTRUCTION_HEADER: &str = concat!(
3786        "### Instruction:\n",
3787        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3788        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3789        "into account the cursor location.\n\n",
3790        "### User Edits:\n\n"
3791    );
3792    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3793    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3794
3795    /// Formats a complete zeta1 prompt from the input events and excerpt.
3796    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3797        let mut prompt = String::with_capacity(
3798            INSTRUCTION_HEADER.len()
3799                + input_events.len()
3800                + EXCERPT_HEADER.len()
3801                + input_excerpt.len()
3802                + RESPONSE_HEADER.len(),
3803        );
3804        prompt.push_str(INSTRUCTION_HEADER);
3805        prompt.push_str(input_events);
3806        prompt.push_str(EXCERPT_HEADER);
3807        prompt.push_str(input_excerpt);
3808        prompt.push_str(RESPONSE_HEADER);
3809        prompt
3810    }
3811
3812    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3813    /// editable and context byte-offset ranges within `cursor_excerpt`.
3814    pub fn format_zeta1_from_input(
3815        input: &ZetaPromptInput,
3816        editable_range: Range<usize>,
3817        context_range: Range<usize>,
3818    ) -> String {
3819        let events = format_zeta1_events(&input.events);
3820        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3821        format_zeta1_prompt(&events, &excerpt)
3822    }
3823
3824    /// Formats events in zeta1 style (oldest first).
3825    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3826        let mut result = String::new();
3827        for event in events {
3828            let event_string = format_zeta1_event(event);
3829            if event_string.is_empty() {
3830                continue;
3831            }
3832            if !result.is_empty() {
3833                result.push_str("\n\n");
3834            }
3835            result.push_str(&event_string);
3836        }
3837        result
3838    }
3839
3840    fn format_zeta1_event(event: &Event) -> String {
3841        match event {
3842            Event::BufferChange {
3843                path,
3844                old_path,
3845                diff,
3846                ..
3847            } => {
3848                let mut prompt = String::new();
3849                if old_path != path {
3850                    writeln!(
3851                        prompt,
3852                        "User renamed {} to {}\n",
3853                        old_path.display(),
3854                        path.display()
3855                    )
3856                    .ok();
3857                }
3858                if !diff.is_empty() {
3859                    write!(
3860                        prompt,
3861                        "User edited {}:\n```diff\n{}\n```",
3862                        path.display(),
3863                        diff
3864                    )
3865                    .ok();
3866                }
3867                prompt
3868            }
3869        }
3870    }
3871
3872    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3873    /// within `cursor_excerpt`.
3874    fn format_zeta1_excerpt(
3875        input: &ZetaPromptInput,
3876        editable_range: Range<usize>,
3877        context_range: Range<usize>,
3878    ) -> String {
3879        let path_str = input.cursor_path.to_string_lossy();
3880        let excerpt = &*input.cursor_excerpt;
3881        let cursor_offset = input.cursor_offset_in_excerpt;
3882
3883        let mut prompt = String::new();
3884        writeln!(&mut prompt, "```{path_str}").ok();
3885
3886        let starts_at_file_beginning =
3887            input.excerpt_start_row == Some(0) && context_range.start == 0;
3888        if starts_at_file_beginning {
3889            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3890        }
3891
3892        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3893
3894        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3895        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3896        prompt.push_str(CURSOR_MARKER);
3897        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3898        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3899
3900        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3901        write!(prompt, "\n```").ok();
3902
3903        prompt
3904    }
3905
3906    /// Cleans zeta1 model output by extracting content between editable region
3907    /// markers and converting the zeta1 cursor marker to the universal one.
3908    /// Returns `None` if the output doesn't contain the expected markers.
3909    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3910        let content = output.replace(CURSOR_MARKER, "");
3911
3912        let content_start = content
3913            .find(EDITABLE_REGION_START_MARKER)
3914            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3915            .map(|pos| {
3916                if content.as_bytes().get(pos) == Some(&b'\n') {
3917                    pos + 1
3918                } else {
3919                    pos
3920                }
3921            })
3922            .unwrap_or(0);
3923
3924        let content_end = content
3925            .find(EDITABLE_REGION_END_MARKER)
3926            .map(|pos| {
3927                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3928                    pos - 1
3929                } else {
3930                    pos
3931                }
3932            })
3933            .unwrap_or(content.len());
3934
3935        if content_start > content_end {
3936            return Some(String::new());
3937        }
3938
3939        let extracted = &content[content_start..content_end];
3940
3941        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3942            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3943            let text_before_cursor = text_before_cursor
3944                .find(EDITABLE_REGION_START_MARKER)
3945                .map(|pos| {
3946                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3947                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3948                        after_marker + 1
3949                    } else {
3950                        after_marker
3951                    }
3952                })
3953                .unwrap_or(0);
3954            let offset_in_extracted = zeta1_cursor_pos
3955                .saturating_sub(text_before_cursor)
3956                .min(extracted.len());
3957            offset_in_extracted
3958        });
3959
3960        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3961        if let Some(offset) = cursor_offset {
3962            result.push_str(&extracted[..offset]);
3963            result.push_str(super::CURSOR_MARKER);
3964            result.push_str(&extracted[offset..]);
3965        } else {
3966            result.push_str(extracted);
3967        }
3968
3969        Some(result)
3970    }
3971}
3972
3973#[cfg(test)]
3974mod tests {
3975    use super::*;
3976    use indoc::indoc;
3977
3978    fn make_input(
3979        cursor_excerpt: &str,
3980        editable_range: Range<usize>,
3981        cursor_offset: usize,
3982        events: Vec<Event>,
3983        related_files: Vec<RelatedFile>,
3984    ) -> ZetaPromptInput {
3985        let context_range = 0..cursor_excerpt.len();
3986        ZetaPromptInput {
3987            cursor_path: Path::new("test.rs").into(),
3988            cursor_excerpt: cursor_excerpt.into(),
3989            cursor_offset_in_excerpt: cursor_offset,
3990            excerpt_start_row: None,
3991            events: events.into_iter().map(Arc::new).collect(),
3992            related_files: Some(related_files),
3993            active_buffer_diagnostics: vec![],
3994            excerpt_ranges: ExcerptRanges {
3995                editable_150: editable_range.clone(),
3996                editable_180: editable_range.clone(),
3997                editable_350: editable_range,
3998                editable_150_context_350: context_range.clone(),
3999                editable_180_context_350: context_range.clone(),
4000                editable_350_context_150: context_range,
4001                ..Default::default()
4002            },
4003            syntax_ranges: None,
4004            experiment: None,
4005            in_open_source_repo: false,
4006            can_collect_data: false,
4007            repo_url: None,
4008        }
4009    }
4010
4011    fn make_input_with_context_range(
4012        excerpt: &str,
4013        editable_range: Range<usize>,
4014        context_range: Range<usize>,
4015        cursor_offset: usize,
4016    ) -> ZetaPromptInput {
4017        ZetaPromptInput {
4018            cursor_path: Path::new("test.rs").into(),
4019            cursor_excerpt: excerpt.into(),
4020            cursor_offset_in_excerpt: cursor_offset,
4021            excerpt_start_row: None,
4022            events: vec![],
4023            related_files: Some(vec![]),
4024            active_buffer_diagnostics: vec![],
4025            excerpt_ranges: ExcerptRanges {
4026                editable_150: editable_range.clone(),
4027                editable_180: editable_range.clone(),
4028                editable_350: editable_range,
4029                editable_150_context_350: context_range.clone(),
4030                editable_180_context_350: context_range.clone(),
4031                editable_350_context_150: context_range,
4032                ..Default::default()
4033            },
4034            syntax_ranges: None,
4035            experiment: None,
4036            in_open_source_repo: false,
4037            can_collect_data: false,
4038            repo_url: None,
4039        }
4040    }
4041
4042    fn make_event(path: &str, diff: &str) -> Event {
4043        Event::BufferChange {
4044            path: Path::new(path).into(),
4045            old_path: Path::new(path).into(),
4046            diff: diff.to_string(),
4047            predicted: false,
4048            in_open_source_repo: false,
4049        }
4050    }
4051
4052    fn make_related_file(path: &str, content: &str) -> RelatedFile {
4053        RelatedFile {
4054            path: Path::new(path).into(),
4055            max_row: content.lines().count() as u32,
4056            excerpts: vec![RelatedExcerpt {
4057                row_range: 0..content.lines().count() as u32,
4058                text: content.into(),
4059                order: 0,
4060            }],
4061            in_open_source_repo: false,
4062        }
4063    }
4064
4065    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4066        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
4067    }
4068
4069    #[test]
4070    fn test_no_truncation_when_within_budget() {
4071        let input = make_input(
4072            "prefix\neditable\nsuffix",
4073            7..15,
4074            10,
4075            vec![make_event("a.rs", "-old\n+new\n")],
4076            vec![make_related_file("related.rs", "fn helper() {}\n")],
4077        );
4078
4079        assert_eq!(
4080            format_with_budget(&input, 10000),
4081            indoc! {r#"
4082                <|file_sep|>related.rs
4083                fn helper() {}
4084                <|file_sep|>edit history
4085                --- a/a.rs
4086                +++ b/a.rs
4087                -old
4088                +new
4089                <|file_sep|>test.rs
4090                <|fim_prefix|>
4091                prefix
4092                <|fim_middle|>current
4093                edi<|user_cursor|>table
4094                <|fim_suffix|>
4095
4096                suffix
4097                <|fim_middle|>updated
4098            "#}
4099        );
4100    }
4101
4102    #[test]
4103    fn test_truncation_drops_edit_history_when_budget_tight() {
4104        let input = make_input(
4105            "code",
4106            0..4,
4107            2,
4108            vec![make_event("a.rs", "-x\n+y\n")],
4109            vec![
4110                make_related_file("r1.rs", "a\n"),
4111                make_related_file("r2.rs", "b\n"),
4112            ],
4113        );
4114
4115        assert_eq!(
4116            format_with_budget(&input, 10000),
4117            indoc! {r#"
4118                <|file_sep|>r1.rs
4119                a
4120                <|file_sep|>r2.rs
4121                b
4122                <|file_sep|>edit history
4123                --- a/a.rs
4124                +++ b/a.rs
4125                -x
4126                +y
4127                <|file_sep|>test.rs
4128                <|fim_prefix|>
4129                <|fim_middle|>current
4130                co<|user_cursor|>de
4131                <|fim_suffix|>
4132                <|fim_middle|>updated
4133            "#}
4134        );
4135
4136        assert_eq!(
4137            format_with_budget(&input, 50),
4138            indoc! {r#"
4139                <|file_sep|>r1.rs
4140                a
4141                <|file_sep|>r2.rs
4142                b
4143                <|file_sep|>test.rs
4144                <|fim_prefix|>
4145                <|fim_middle|>current
4146                co<|user_cursor|>de
4147                <|fim_suffix|>
4148                <|fim_middle|>updated
4149            "#}
4150        );
4151    }
4152
4153    #[test]
4154    fn test_truncation_includes_partial_excerpts() {
4155        let input = make_input(
4156            "x",
4157            0..1,
4158            0,
4159            vec![],
4160            vec![RelatedFile {
4161                path: Path::new("big.rs").into(),
4162                max_row: 30,
4163                in_open_source_repo: false,
4164                excerpts: vec![
4165                    RelatedExcerpt {
4166                        row_range: 0..10,
4167                        text: "first excerpt\n".into(),
4168                        order: 0,
4169                    },
4170                    RelatedExcerpt {
4171                        row_range: 10..20,
4172                        text: "second excerpt\n".into(),
4173                        order: 0,
4174                    },
4175                    RelatedExcerpt {
4176                        row_range: 20..30,
4177                        text: "third excerpt\n".into(),
4178                        order: 0,
4179                    },
4180                ],
4181            }],
4182        );
4183
4184        assert_eq!(
4185            format_with_budget(&input, 10000),
4186            indoc! {r#"
4187                <|file_sep|>big.rs
4188                first excerpt
4189                ...
4190                second excerpt
4191                ...
4192                third excerpt
4193                <|file_sep|>test.rs
4194                <|fim_prefix|>
4195                <|fim_middle|>current
4196                <|user_cursor|>x
4197                <|fim_suffix|>
4198                <|fim_middle|>updated
4199            "#}
4200        );
4201
4202        assert_eq!(
4203            format_with_budget(&input, 50),
4204            indoc! {r#"
4205                <|file_sep|>big.rs
4206                first excerpt
4207                ...
4208                <|file_sep|>test.rs
4209                <|fim_prefix|>
4210                <|fim_middle|>current
4211                <|user_cursor|>x
4212                <|fim_suffix|>
4213                <|fim_middle|>updated
4214            "#}
4215        );
4216    }
4217
4218    #[test]
4219    fn test_truncation_prioritizes_lower_order_excerpts() {
4220        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4221        // With tight budget, only the lower-order excerpt from file_b should be included.
4222        let input = make_input(
4223            "x",
4224            0..1,
4225            0,
4226            vec![],
4227            vec![
4228                RelatedFile {
4229                    path: Path::new("file_a.rs").into(),
4230                    max_row: 10,
4231                    in_open_source_repo: false,
4232                    excerpts: vec![RelatedExcerpt {
4233                        row_range: 0..10,
4234                        text: "low priority content\n".into(),
4235                        order: 5,
4236                    }],
4237                },
4238                RelatedFile {
4239                    path: Path::new("file_b.rs").into(),
4240                    max_row: 10,
4241                    in_open_source_repo: false,
4242                    excerpts: vec![RelatedExcerpt {
4243                        row_range: 0..10,
4244                        text: "high priority content\n".into(),
4245                        order: 1,
4246                    }],
4247                },
4248            ],
4249        );
4250
4251        // With large budget, both files included; rendered in stable lexicographic order.
4252        assert_eq!(
4253            format_with_budget(&input, 10000),
4254            indoc! {r#"
4255                <|file_sep|>file_a.rs
4256                low priority content
4257                <|file_sep|>file_b.rs
4258                high priority content
4259                <|file_sep|>test.rs
4260                <|fim_prefix|>
4261                <|fim_middle|>current
4262                <|user_cursor|>x
4263                <|fim_suffix|>
4264                <|fim_middle|>updated
4265            "#}
4266        );
4267
4268        // With tight budget, only file_b (lower order) fits.
4269        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4270        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4271        // file_a would need another 14 tokens, which doesn't fit.
4272        assert_eq!(
4273            format_with_budget(&input, 52),
4274            indoc! {r#"
4275                <|file_sep|>file_b.rs
4276                high priority content
4277                <|file_sep|>test.rs
4278                <|fim_prefix|>
4279                <|fim_middle|>current
4280                <|user_cursor|>x
4281                <|fim_suffix|>
4282                <|fim_middle|>updated
4283            "#}
4284        );
4285    }
4286
4287    #[test]
4288    fn test_truncation_drops_high_order_excerpts_within_file() {
4289        // A single file has excerpts at order 1 and order 3. With a tight budget,
4290        // only the order-1 excerpts are included while the order-3 excerpt is
4291        // dropped — even though they belong to the same file. This also preserves
4292        // the parent invariant: parent outline items have order ≤ their best
4293        // child, so they're always included when any child is.
4294        let input = make_input(
4295            "x",
4296            0..1,
4297            0,
4298            vec![],
4299            vec![RelatedFile {
4300                path: Path::new("mod.rs").into(),
4301                max_row: 30,
4302                in_open_source_repo: false,
4303                excerpts: vec![
4304                    RelatedExcerpt {
4305                        row_range: 0..5,
4306                        text: "mod header\n".into(),
4307                        order: 1,
4308                    },
4309                    RelatedExcerpt {
4310                        row_range: 5..15,
4311                        text: "important fn\n".into(),
4312                        order: 1,
4313                    },
4314                    RelatedExcerpt {
4315                        row_range: 15..30,
4316                        text: "less important fn\n".into(),
4317                        order: 3,
4318                    },
4319                ],
4320            }],
4321        );
4322
4323        // With large budget, all three excerpts included.
4324        assert_eq!(
4325            format_with_budget(&input, 10000),
4326            indoc! {r#"
4327                <|file_sep|>mod.rs
4328                mod header
4329                ...
4330                important fn
4331                ...
4332                less important fn
4333                <|file_sep|>test.rs
4334                <|fim_prefix|>
4335                <|fim_middle|>current
4336                <|user_cursor|>x
4337                <|fim_suffix|>
4338                <|fim_middle|>updated
4339            "#}
4340        );
4341
4342        // With tight budget, only order<=1 excerpts included (header + important fn).
4343        assert_eq!(
4344            format_with_budget(&input, 55),
4345            indoc! {r#"
4346                <|file_sep|>mod.rs
4347                mod header
4348                ...
4349                important fn
4350                ...
4351                <|file_sep|>test.rs
4352                <|fim_prefix|>
4353                <|fim_middle|>current
4354                <|user_cursor|>x
4355                <|fim_suffix|>
4356                <|fim_middle|>updated
4357            "#}
4358        );
4359    }
4360
4361    #[test]
4362    fn test_truncation_drops_older_events_first() {
4363        let input = make_input(
4364            "x",
4365            0..1,
4366            0,
4367            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4368            vec![],
4369        );
4370
4371        assert_eq!(
4372            format_with_budget(&input, 10000),
4373            indoc! {r#"
4374                <|file_sep|>edit history
4375                --- a/old.rs
4376                +++ b/old.rs
4377                -1
4378                --- a/new.rs
4379                +++ b/new.rs
4380                -2
4381                <|file_sep|>test.rs
4382                <|fim_prefix|>
4383                <|fim_middle|>current
4384                <|user_cursor|>x
4385                <|fim_suffix|>
4386                <|fim_middle|>updated
4387            "#}
4388        );
4389
4390        assert_eq!(
4391            format_with_budget(&input, 55),
4392            indoc! {r#"
4393                <|file_sep|>edit history
4394                --- a/new.rs
4395                +++ b/new.rs
4396                -2
4397                <|file_sep|>test.rs
4398                <|fim_prefix|>
4399                <|fim_middle|>current
4400                <|user_cursor|>x
4401                <|fim_suffix|>
4402                <|fim_middle|>updated
4403            "#}
4404        );
4405    }
4406
4407    #[test]
4408    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4409        let input = make_input(
4410            "fn main() {}",
4411            0..12,
4412            3,
4413            vec![make_event("a.rs", "-old\n+new\n")],
4414            vec![make_related_file("related.rs", "helper\n")],
4415        );
4416
4417        assert_eq!(
4418            format_with_budget(&input, 30),
4419            indoc! {r#"
4420                <|file_sep|>test.rs
4421                <|fim_prefix|>
4422                <|fim_middle|>current
4423                fn <|user_cursor|>main() {}
4424                <|fim_suffix|>
4425                <|fim_middle|>updated
4426            "#}
4427        );
4428    }
4429
4430    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4431        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4432    }
4433
4434    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4435        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4436    }
4437
4438    #[test]
4439    fn test_seed_coder_basic_format() {
4440        let input = make_input(
4441            "prefix\neditable\nsuffix",
4442            7..15,
4443            10,
4444            vec![make_event("a.rs", "-old\n+new\n")],
4445            vec![make_related_file("related.rs", "fn helper() {}\n")],
4446        );
4447
4448        assert_eq!(
4449            format_seed_coder(&input),
4450            indoc! {r#"
4451                <[fim-suffix]>
4452                suffix
4453                <[fim-prefix]><filename>related.rs
4454                fn helper() {}
4455
4456                <filename>edit_history
4457                --- a/a.rs
4458                +++ b/a.rs
4459                -old
4460                +new
4461
4462                <filename>test.rs
4463                prefix
4464                <<<<<<< CURRENT
4465                edi<|user_cursor|>table
4466                =======
4467                <[fim-middle]>"#}
4468        );
4469    }
4470
4471    #[test]
4472    fn test_seed_coder_no_context() {
4473        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4474
4475        assert_eq!(
4476            format_seed_coder(&input),
4477            indoc! {r#"
4478                <[fim-suffix]>
4479                after
4480                <[fim-prefix]><filename>test.rs
4481                before
4482                <<<<<<< CURRENT
4483                mid<|user_cursor|>dle
4484                =======
4485                <[fim-middle]>"#}
4486        );
4487    }
4488
4489    #[test]
4490    fn test_seed_coder_truncation_drops_context() {
4491        let input = make_input(
4492            "code",
4493            0..4,
4494            2,
4495            vec![make_event("a.rs", "-x\n+y\n")],
4496            vec![make_related_file("r1.rs", "content\n")],
4497        );
4498
4499        // With large budget, everything is included
4500        assert_eq!(
4501            format_seed_coder(&input),
4502            indoc! {r#"
4503                <[fim-suffix]>
4504                <[fim-prefix]><filename>r1.rs
4505                content
4506
4507                <filename>edit_history
4508                --- a/a.rs
4509                +++ b/a.rs
4510                -x
4511                +y
4512
4513                <filename>test.rs
4514                <<<<<<< CURRENT
4515                co<|user_cursor|>de
4516                =======
4517                <[fim-middle]>"#}
4518        );
4519
4520        // With tight budget, context is dropped but cursor section remains
4521        assert_eq!(
4522            format_seed_coder_with_budget(&input, 30),
4523            indoc! {r#"
4524                <[fim-suffix]>
4525                <[fim-prefix]><filename>test.rs
4526                <<<<<<< CURRENT
4527                co<|user_cursor|>de
4528                =======
4529                <[fim-middle]>"#}
4530        );
4531    }
4532
4533    #[test]
4534    fn test_seed_coder_truncation_prioritizes_lower_order() {
4535        let input = make_input(
4536            "code",
4537            0..4,
4538            2,
4539            vec![],
4540            vec![
4541                RelatedFile {
4542                    path: Path::new("low_prio.rs").into(),
4543                    max_row: 5,
4544                    in_open_source_repo: false,
4545                    excerpts: vec![RelatedExcerpt {
4546                        row_range: 0..5,
4547                        text: "low prio\n".into(),
4548                        order: 10,
4549                    }],
4550                },
4551                RelatedFile {
4552                    path: Path::new("high_prio.rs").into(),
4553                    max_row: 5,
4554                    in_open_source_repo: false,
4555                    excerpts: vec![RelatedExcerpt {
4556                        row_range: 0..5,
4557                        text: "high prio\n".into(),
4558                        order: 1,
4559                    }],
4560                },
4561            ],
4562        );
4563
4564        // With large budget, both included; rendered in stable lexicographic order.
4565        assert_eq!(
4566            format_seed_coder(&input),
4567            indoc! {r#"
4568                <[fim-suffix]>
4569                <[fim-prefix]><filename>low_prio.rs
4570                low prio
4571                <filename>high_prio.rs
4572                high prio
4573
4574                <filename>test.rs
4575                <<<<<<< CURRENT
4576                co<|user_cursor|>de
4577                =======
4578                <[fim-middle]>"#}
4579        );
4580
4581        // With tight budget, only high_prio included.
4582        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4583        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4584        assert_eq!(
4585            format_seed_coder_with_budget(&input, 44),
4586            indoc! {r#"
4587                <[fim-suffix]>
4588                <[fim-prefix]><filename>high_prio.rs
4589                high prio
4590
4591                <filename>test.rs
4592                <<<<<<< CURRENT
4593                co<|user_cursor|>de
4594                =======
4595                <[fim-middle]>"#}
4596        );
4597    }
4598
4599    #[test]
4600    fn test_format_zeta1_from_input_basic() {
4601        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4602        let input = ZetaPromptInput {
4603            cursor_path: Path::new("src/main.rs").into(),
4604            cursor_excerpt: excerpt.into(),
4605            cursor_offset_in_excerpt: 30,
4606            excerpt_start_row: Some(0),
4607            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4608            related_files: Some(vec![]),
4609            active_buffer_diagnostics: vec![],
4610            excerpt_ranges: ExcerptRanges {
4611                editable_150: 15..41,
4612                editable_180: 15..41,
4613                editable_350: 15..41,
4614                editable_150_context_350: 0..excerpt.len(),
4615                editable_180_context_350: 0..excerpt.len(),
4616                editable_350_context_150: 0..excerpt.len(),
4617                ..Default::default()
4618            },
4619            syntax_ranges: None,
4620            experiment: None,
4621            in_open_source_repo: false,
4622            can_collect_data: false,
4623            repo_url: None,
4624        };
4625
4626        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4627
4628        assert_eq!(
4629            prompt,
4630            concat!(
4631                "### Instruction:\n",
4632                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4633                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4634                "into account the cursor location.\n",
4635                "\n",
4636                "### User Edits:\n",
4637                "\n",
4638                "User edited other.rs:\n",
4639                "```diff\n",
4640                "-old\n",
4641                "+new\n",
4642                "\n",
4643                "```\n",
4644                "\n",
4645                "### User Excerpt:\n",
4646                "\n",
4647                "```src/main.rs\n",
4648                "<|start_of_file|>\n",
4649                "fn before() {}\n",
4650                "<|editable_region_start|>\n",
4651                "fn foo() {\n",
4652                "    <|user_cursor_is_here|>let x = 1;\n",
4653                "\n",
4654                "<|editable_region_end|>}\n",
4655                "fn after() {}\n",
4656                "\n",
4657                "```\n",
4658                "\n",
4659                "### Response:\n",
4660            ),
4661        );
4662    }
4663
4664    #[test]
4665    fn test_format_zeta1_from_input_no_start_of_file() {
4666        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4667        let input = ZetaPromptInput {
4668            cursor_path: Path::new("src/main.rs").into(),
4669            cursor_excerpt: excerpt.into(),
4670            cursor_offset_in_excerpt: 15,
4671            excerpt_start_row: Some(10),
4672            events: vec![],
4673            related_files: Some(vec![]),
4674            active_buffer_diagnostics: vec![],
4675            excerpt_ranges: ExcerptRanges {
4676                editable_150: 0..28,
4677                editable_180: 0..28,
4678                editable_350: 0..28,
4679                editable_150_context_350: 0..28,
4680                editable_180_context_350: 0..28,
4681                editable_350_context_150: 0..28,
4682                ..Default::default()
4683            },
4684            syntax_ranges: None,
4685            experiment: None,
4686            in_open_source_repo: false,
4687            can_collect_data: false,
4688            repo_url: None,
4689        };
4690
4691        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4692
4693        assert_eq!(
4694            prompt,
4695            concat!(
4696                "### Instruction:\n",
4697                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4698                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4699                "into account the cursor location.\n",
4700                "\n",
4701                "### User Edits:\n",
4702                "\n",
4703                "\n",
4704                "\n",
4705                "### User Excerpt:\n",
4706                "\n",
4707                "```src/main.rs\n",
4708                "<|editable_region_start|>\n",
4709                "fn foo() {\n",
4710                "    <|user_cursor_is_here|>let x = 1;\n",
4711                "}\n",
4712                "\n",
4713                "<|editable_region_end|>\n",
4714                "```\n",
4715                "\n",
4716                "### Response:\n",
4717            ),
4718        );
4719    }
4720
4721    #[test]
4722    fn test_format_zeta1_from_input_with_sub_ranges() {
4723        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4724        let editable_range = 10..37;
4725        let context_range = 0..excerpt.len();
4726
4727        let input = ZetaPromptInput {
4728            cursor_path: Path::new("test.rs").into(),
4729            cursor_excerpt: excerpt.into(),
4730            cursor_offset_in_excerpt: 25,
4731            excerpt_start_row: Some(0),
4732            events: vec![],
4733            related_files: Some(vec![]),
4734            active_buffer_diagnostics: vec![],
4735            excerpt_ranges: ExcerptRanges {
4736                editable_150: editable_range.clone(),
4737                editable_180: editable_range.clone(),
4738                editable_350: editable_range.clone(),
4739                editable_150_context_350: context_range.clone(),
4740                editable_180_context_350: context_range.clone(),
4741                editable_350_context_150: context_range.clone(),
4742                ..Default::default()
4743            },
4744            syntax_ranges: None,
4745            experiment: None,
4746            in_open_source_repo: false,
4747            can_collect_data: false,
4748            repo_url: None,
4749        };
4750
4751        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4752
4753        assert_eq!(
4754            prompt,
4755            concat!(
4756                "### Instruction:\n",
4757                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4758                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4759                "into account the cursor location.\n",
4760                "\n",
4761                "### User Edits:\n",
4762                "\n",
4763                "\n",
4764                "\n",
4765                "### User Excerpt:\n",
4766                "\n",
4767                "```test.rs\n",
4768                "<|start_of_file|>\n",
4769                "// prefix\n",
4770                "<|editable_region_start|>\n",
4771                "fn foo() {\n",
4772                "    <|user_cursor_is_here|>let x = 1;\n",
4773                "}\n",
4774                "<|editable_region_end|>\n",
4775                "// suffix\n",
4776                "\n",
4777                "```\n",
4778                "\n",
4779                "### Response:\n",
4780            ),
4781        );
4782    }
4783
4784    #[test]
4785    fn test_clean_zeta1_model_output_basic() {
4786        let output = indoc! {"
4787            <|editable_region_start|>
4788            fn main() {
4789                println!(\"hello\");
4790            }
4791            <|editable_region_end|>
4792        "};
4793
4794        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4795        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4796    }
4797
4798    #[test]
4799    fn test_clean_zeta1_model_output_with_cursor() {
4800        let output = indoc! {"
4801            <|editable_region_start|>
4802            fn main() {
4803                <|user_cursor_is_here|>println!(\"hello\");
4804            }
4805            <|editable_region_end|>
4806        "};
4807
4808        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4809        assert_eq!(
4810            cleaned,
4811            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4812        );
4813    }
4814
4815    #[test]
4816    fn test_clean_zeta1_model_output_no_markers() {
4817        let output = "fn main() {}\n";
4818        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4819        assert_eq!(cleaned, "fn main() {}\n");
4820    }
4821
4822    #[test]
4823    fn test_clean_zeta1_model_output_empty_region() {
4824        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4825        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4826        assert_eq!(cleaned, "");
4827    }
4828
4829    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4830        let mut result = excerpt.to_string();
4831        result.replace_range(
4832            parsed_output.range_in_excerpt.clone(),
4833            &parsed_output.new_editable_region,
4834        );
4835        result
4836    }
4837
4838    #[test]
4839    fn test_parse_zeta2_model_output() {
4840        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4841        let context_start = excerpt.find("ctx start").unwrap();
4842        let context_end = excerpt.find("after ctx").unwrap();
4843        let editable_start = excerpt.find("editable old").unwrap();
4844        let editable_end = editable_start + "editable old\n".len();
4845        let input = make_input_with_context_range(
4846            excerpt,
4847            editable_start..editable_end,
4848            context_start..context_end,
4849            editable_start,
4850        );
4851
4852        let output = parse_zeta2_model_output(
4853            "editable new\n>>>>>>> UPDATED\n",
4854            ZetaFormat::V0131GitMergeMarkersPrefix,
4855            &input,
4856        )
4857        .unwrap();
4858
4859        assert_eq!(
4860            apply_edit(excerpt, &output),
4861            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4862        );
4863    }
4864
4865    #[test]
4866    fn test_parse_zeta2_model_output_identity() {
4867        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4868        let editable_start = excerpt.find("bbb").unwrap();
4869        let editable_end = excerpt.find("ddd").unwrap();
4870        let input = make_input_with_context_range(
4871            excerpt,
4872            editable_start..editable_end,
4873            0..excerpt.len(),
4874            editable_start,
4875        );
4876
4877        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4878        let output =
4879            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4880
4881        assert_eq!(apply_edit(excerpt, &output), excerpt);
4882    }
4883
4884    #[test]
4885    fn test_parse_zeta2_model_output_strips_end_marker() {
4886        let excerpt = "hello\nworld\n";
4887        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4888
4889        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4890        let output1 =
4891            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4892        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4893
4894        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4895        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4896    }
4897}