zeta_prompt.rs

   1use anyhow::{Result, anyhow};
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub repo_url: Option<String>,
  66}
  67
  68#[derive(
  69    Default,
  70    Clone,
  71    Copy,
  72    Debug,
  73    PartialEq,
  74    Eq,
  75    Hash,
  76    EnumIter,
  77    IntoStaticStr,
  78    Serialize,
  79    Deserialize,
  80)]
  81#[allow(non_camel_case_types)]
  82pub enum ZetaFormat {
  83    V0112MiddleAtEnd,
  84    V0113Ordered,
  85    V0114180EditableRegion,
  86    V0120GitMergeMarkers,
  87    #[default]
  88    V0131GitMergeMarkersPrefix,
  89    V0211Prefill,
  90    V0211SeedCoder,
  91    v0226Hashline,
  92    V0304VariableEdit,
  93    V0304SeedNoEdits,
  94}
  95
  96impl std::fmt::Display for ZetaFormat {
  97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  98        write!(f, "{}", <&'static str>::from(self))
  99    }
 100}
 101
 102impl ZetaFormat {
 103    pub fn parse(format_name: &str) -> Result<Self> {
 104        let mut results = ZetaFormat::iter().filter(|version| {
 105            <&'static str>::from(version)
 106                .to_lowercase()
 107                .contains(&format_name.to_lowercase())
 108        });
 109        let Some(result) = results.next() else {
 110            anyhow::bail!(
 111                "`{format_name}` did not match any of:\n{}",
 112                Self::options_as_string()
 113            );
 114        };
 115        if results.next().is_some() {
 116            anyhow::bail!(
 117                "`{format_name}` matched more than one of:\n{}",
 118                Self::options_as_string()
 119            );
 120        }
 121        Ok(result)
 122    }
 123
 124    pub fn options_as_string() -> String {
 125        ZetaFormat::iter()
 126            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 127            .collect::<Vec<_>>()
 128            .concat()
 129    }
 130}
 131
 132#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 133#[serde(tag = "event")]
 134pub enum Event {
 135    BufferChange {
 136        path: Arc<Path>,
 137        old_path: Arc<Path>,
 138        diff: String,
 139        predicted: bool,
 140        in_open_source_repo: bool,
 141    },
 142}
 143
 144impl Event {
 145    pub fn in_open_source_repo(&self) -> bool {
 146        match self {
 147            Event::BufferChange {
 148                in_open_source_repo,
 149                ..
 150            } => *in_open_source_repo,
 151        }
 152    }
 153}
 154
 155pub fn write_event(prompt: &mut String, event: &Event) {
 156    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 157        for component in path.components() {
 158            prompt.push('/');
 159            write!(prompt, "{}", component.as_os_str().display()).ok();
 160        }
 161    }
 162    match event {
 163        Event::BufferChange {
 164            path,
 165            old_path,
 166            diff,
 167            predicted,
 168            in_open_source_repo: _,
 169        } => {
 170            if *predicted {
 171                prompt.push_str("// User accepted prediction:\n");
 172            }
 173            prompt.push_str("--- a");
 174            write_path_as_unix_str(prompt, old_path.as_ref());
 175            prompt.push_str("\n+++ b");
 176            write_path_as_unix_str(prompt, path.as_ref());
 177            prompt.push('\n');
 178            prompt.push_str(diff);
 179        }
 180    }
 181}
 182
 183#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 184pub struct RelatedFile {
 185    pub path: Arc<Path>,
 186    pub max_row: u32,
 187    pub excerpts: Vec<RelatedExcerpt>,
 188    #[serde(default)]
 189    pub in_open_source_repo: bool,
 190}
 191
 192#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 193pub struct RelatedExcerpt {
 194    pub row_range: Range<u32>,
 195    pub text: Arc<str>,
 196    #[serde(default)]
 197    pub order: usize,
 198}
 199
 200pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 201    special_tokens_for_format(format)
 202        .iter()
 203        .any(|token| input.cursor_excerpt.contains(token))
 204}
 205
 206pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 207    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 208}
 209
 210pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 211    match format {
 212        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 213        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 214        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 215        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 216        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 217        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 218        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 219        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 220        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 221        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 222    }
 223}
 224
 225pub fn excerpt_ranges_for_format(
 226    format: ZetaFormat,
 227    ranges: &ExcerptRanges,
 228) -> (Range<usize>, Range<usize>) {
 229    match format {
 230        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 231            ranges.editable_150.clone(),
 232            ranges.editable_150_context_350.clone(),
 233        ),
 234        ZetaFormat::V0114180EditableRegion => (
 235            ranges.editable_180.clone(),
 236            ranges.editable_180_context_350.clone(),
 237        ),
 238        ZetaFormat::V0120GitMergeMarkers
 239        | ZetaFormat::V0131GitMergeMarkersPrefix
 240        | ZetaFormat::V0211Prefill
 241        | ZetaFormat::V0211SeedCoder
 242        | ZetaFormat::v0226Hashline
 243        | ZetaFormat::V0304SeedNoEdits => (
 244            ranges.editable_350.clone(),
 245            ranges.editable_350_context_150.clone(),
 246        ),
 247        ZetaFormat::V0304VariableEdit => {
 248            let context = ranges
 249                .context_8192
 250                .clone()
 251                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 252            (context.clone(), context)
 253        }
 254    }
 255}
 256
 257pub fn write_cursor_excerpt_section_for_format(
 258    format: ZetaFormat,
 259    prompt: &mut String,
 260    path: &Path,
 261    context: &str,
 262    editable_range: &Range<usize>,
 263    cursor_offset: usize,
 264) {
 265    match format {
 266        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 267            prompt,
 268            path,
 269            context,
 270            editable_range,
 271            cursor_offset,
 272        ),
 273        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 274            v0113_ordered::write_cursor_excerpt_section(
 275                prompt,
 276                path,
 277                context,
 278                editable_range,
 279                cursor_offset,
 280            )
 281        }
 282        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 283            prompt,
 284            path,
 285            context,
 286            editable_range,
 287            cursor_offset,
 288        ),
 289        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 290            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 291                prompt,
 292                path,
 293                context,
 294                editable_range,
 295                cursor_offset,
 296            )
 297        }
 298        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 299            seed_coder::write_cursor_excerpt_section(
 300                prompt,
 301                path,
 302                context,
 303                editable_range,
 304                cursor_offset,
 305            )
 306        }
 307        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 308            prompt,
 309            path,
 310            context,
 311            editable_range,
 312            cursor_offset,
 313        ),
 314        ZetaFormat::V0304VariableEdit => {
 315            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 316        }
 317    }
 318}
 319
 320fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 321    let start_row = text[0..range.start].matches('\n').count() as u32;
 322    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 323    if !text[..range.end].ends_with('\n') {
 324        end_row += 1;
 325    }
 326    return start_row..end_row;
 327}
 328
 329pub fn format_prompt_with_budget_for_format(
 330    input: &ZetaPromptInput,
 331    format: ZetaFormat,
 332    max_tokens: usize,
 333) -> String {
 334    let (context, editable_range, context_range, cursor_offset) =
 335        resolve_cursor_region(input, format);
 336    let path = &*input.cursor_path;
 337
 338    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 339        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 340        let row_range = relative_row_range.start + cursor_excerpt_start_row
 341            ..relative_row_range.end + cursor_excerpt_start_row;
 342        &filter_redundant_excerpts(
 343            input.related_files.clone(),
 344            input.cursor_path.as_ref(),
 345            row_range,
 346        )
 347    } else {
 348        &input.related_files
 349    };
 350
 351    match format {
 352        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 353            seed_coder::format_prompt_with_budget(
 354                path,
 355                context,
 356                &editable_range,
 357                cursor_offset,
 358                &input.events,
 359                related_files,
 360                max_tokens,
 361            )
 362        }
 363        _ => {
 364            let mut cursor_section = String::new();
 365            write_cursor_excerpt_section_for_format(
 366                format,
 367                &mut cursor_section,
 368                path,
 369                context,
 370                &editable_range,
 371                cursor_offset,
 372            );
 373
 374            let cursor_tokens = estimate_tokens(cursor_section.len());
 375            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 376
 377            let edit_history_section = format_edit_history_within_budget(
 378                &input.events,
 379                "<|file_sep|>",
 380                "edit history",
 381                budget_after_cursor,
 382            );
 383            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 384            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 385
 386            let related_files_section = format_related_files_within_budget(
 387                &related_files,
 388                "<|file_sep|>",
 389                "",
 390                budget_after_edit_history,
 391            );
 392
 393            let mut prompt = String::new();
 394            prompt.push_str(&related_files_section);
 395            prompt.push_str(&edit_history_section);
 396            prompt.push_str(&cursor_section);
 397            prompt
 398        }
 399    }
 400}
 401
 402pub fn filter_redundant_excerpts(
 403    mut related_files: Vec<RelatedFile>,
 404    cursor_path: &Path,
 405    cursor_row_range: Range<u32>,
 406) -> Vec<RelatedFile> {
 407    for file in &mut related_files {
 408        if file.path.as_ref() == cursor_path {
 409            file.excerpts.retain(|excerpt| {
 410                excerpt.row_range.start < cursor_row_range.start
 411                    || excerpt.row_range.end > cursor_row_range.end
 412            });
 413        }
 414    }
 415    related_files.retain(|file| !file.excerpts.is_empty());
 416    related_files
 417}
 418
 419pub fn get_prefill_for_format(
 420    format: ZetaFormat,
 421    context: &str,
 422    editable_range: &Range<usize>,
 423) -> String {
 424    match format {
 425        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 426        ZetaFormat::V0112MiddleAtEnd
 427        | ZetaFormat::V0113Ordered
 428        | ZetaFormat::V0114180EditableRegion
 429        | ZetaFormat::V0120GitMergeMarkers
 430        | ZetaFormat::V0131GitMergeMarkersPrefix
 431        | ZetaFormat::V0211SeedCoder
 432        | ZetaFormat::v0226Hashline
 433        | ZetaFormat::V0304VariableEdit => String::new(),
 434        ZetaFormat::V0304SeedNoEdits => String::new(),
 435    }
 436}
 437
 438pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 439    match format {
 440        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 441        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 442        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 443        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
 444        ZetaFormat::V0112MiddleAtEnd
 445        | ZetaFormat::V0113Ordered
 446        | ZetaFormat::V0114180EditableRegion
 447        | ZetaFormat::v0226Hashline
 448        | ZetaFormat::V0304VariableEdit => None,
 449    }
 450}
 451
 452pub fn encode_patch_as_output_for_format(
 453    format: ZetaFormat,
 454    old_editable_region: &str,
 455    patch: &str,
 456    cursor_offset: Option<usize>,
 457) -> Result<Option<String>> {
 458    match format {
 459        ZetaFormat::v0226Hashline => {
 460            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 461        }
 462        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 463            old_editable_region,
 464            patch,
 465            cursor_offset,
 466        )
 467        .map(Some),
 468        ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
 469        _ => Ok(None),
 470    }
 471}
 472
 473/// Parse model output for the given zeta format
 474pub fn parse_zeta2_model_output(
 475    output: &str,
 476    format: ZetaFormat,
 477    prompt_inputs: &ZetaPromptInput,
 478) -> Result<(Range<usize>, String)> {
 479    let output = match output_end_marker_for_format(format) {
 480        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 481        None => output,
 482    };
 483
 484    let (context, editable_range_in_context, context_range, _) =
 485        resolve_cursor_region(prompt_inputs, format);
 486    let context_start = context_range.start;
 487    let old_editable_region = &context[editable_range_in_context.clone()];
 488
 489    let (range_in_context, output) = match format {
 490        ZetaFormat::v0226Hashline => (
 491            editable_range_in_context,
 492            if hashline::output_has_edit_commands(output) {
 493                hashline::apply_edit_commands(old_editable_region, output)
 494            } else {
 495                output.to_string()
 496            },
 497        ),
 498        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 499        ZetaFormat::V0304SeedNoEdits => (
 500            editable_range_in_context,
 501            if output.starts_with(seed_coder::NO_EDITS) {
 502                old_editable_region.to_string()
 503            } else {
 504                output.to_string()
 505            },
 506        ),
 507        _ => (editable_range_in_context, output.to_string()),
 508    };
 509
 510    let range_in_excerpt =
 511        range_in_context.start + context_start..range_in_context.end + context_start;
 512    Ok((range_in_excerpt, output))
 513}
 514
 515pub fn excerpt_range_for_format(
 516    format: ZetaFormat,
 517    ranges: &ExcerptRanges,
 518) -> (Range<usize>, Range<usize>) {
 519    excerpt_ranges_for_format(format, ranges)
 520}
 521
 522pub fn resolve_cursor_region(
 523    input: &ZetaPromptInput,
 524    format: ZetaFormat,
 525) -> (&str, Range<usize>, Range<usize>, usize) {
 526    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 527    let context_start = context_range.start;
 528    let context_text = &input.cursor_excerpt[context_range.clone()];
 529    let adjusted_editable =
 530        (editable_range.start - context_start)..(editable_range.end - context_start);
 531    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 532
 533    (
 534        context_text,
 535        adjusted_editable,
 536        context_range,
 537        adjusted_cursor,
 538    )
 539}
 540
 541pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 542    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 543    get_prefill_for_format(format, context, &editable_range)
 544}
 545
 546fn format_edit_history_within_budget(
 547    events: &[Arc<Event>],
 548    file_marker: &str,
 549    edit_history_name: &str,
 550    max_tokens: usize,
 551) -> String {
 552    let header = format!("{}{}\n", file_marker, edit_history_name);
 553    let header_tokens = estimate_tokens(header.len());
 554    if header_tokens >= max_tokens {
 555        return String::new();
 556    }
 557
 558    let mut event_strings: Vec<String> = Vec::new();
 559    let mut total_tokens = header_tokens;
 560
 561    for event in events.iter().rev() {
 562        let mut event_str = String::new();
 563        write_event(&mut event_str, event);
 564        let event_tokens = estimate_tokens(event_str.len());
 565
 566        if total_tokens + event_tokens > max_tokens {
 567            break;
 568        }
 569        total_tokens += event_tokens;
 570        event_strings.push(event_str);
 571    }
 572
 573    if event_strings.is_empty() {
 574        return String::new();
 575    }
 576
 577    let mut result = header;
 578    for event_str in event_strings.iter().rev() {
 579        result.push_str(event_str);
 580    }
 581    result
 582}
 583
 584fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 585    let needs_newline = !excerpt.text.ends_with('\n');
 586    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 587    let len = excerpt.text.len()
 588        + if needs_newline { "\n".len() } else { 0 }
 589        + if needs_ellipsis { "...\n".len() } else { 0 };
 590    estimate_tokens(len)
 591}
 592
 593pub fn format_related_files_within_budget(
 594    related_files: &[RelatedFile],
 595    file_prefix: &str,
 596    file_suffix: &str,
 597    max_tokens: usize,
 598) -> String {
 599    struct ExcerptCandidate {
 600        file_ix: usize,
 601        excerpt_ix: usize,
 602        order: usize,
 603    }
 604
 605    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 606        .iter()
 607        .enumerate()
 608        .flat_map(|(file_ix, file)| {
 609            file.excerpts
 610                .iter()
 611                .enumerate()
 612                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 613                    file_ix,
 614                    excerpt_ix,
 615                    order: e.order,
 616                })
 617        })
 618        .collect();
 619
 620    // Pre-compute file header strings and their token costs.
 621    let file_headers: Vec<String> = related_files
 622        .iter()
 623        .map(|file| {
 624            let path_str = file.path.to_string_lossy();
 625            format!("{}{}\n", file_prefix, path_str)
 626        })
 627        .collect();
 628
 629    // Sort the excerpts by their order and determine how many fit within the budget.
 630    let mut total_tokens = 0;
 631    let mut included_excerpt_count = 0_usize;
 632    let mut included_file_indices = vec![false; related_files.len()];
 633    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 634    for candidate in &excerpt_candidates {
 635        let file = &related_files[candidate.file_ix];
 636        let excerpt = &file.excerpts[candidate.excerpt_ix];
 637        let file_already_included = included_file_indices[candidate.file_ix];
 638        let header_cost = if file_already_included {
 639            0
 640        } else {
 641            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 642        };
 643        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 644        if total_tokens + header_cost + excerpt_cost > max_tokens {
 645            break;
 646        }
 647        total_tokens += header_cost + excerpt_cost;
 648        if !file_already_included {
 649            included_file_indices[candidate.file_ix] = true;
 650        }
 651        included_excerpt_count += 1;
 652    }
 653
 654    excerpt_candidates.truncate(included_excerpt_count);
 655    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 656
 657    // Render all of the files that fit within the token budget, in the original order.
 658    let mut result = String::new();
 659    let mut last_file_ix = None;
 660    for candidate in &excerpt_candidates {
 661        if last_file_ix != Some(candidate.file_ix) {
 662            if last_file_ix.is_some() {
 663                result.push_str(file_suffix);
 664            }
 665            result.push_str(&file_headers[candidate.file_ix]);
 666            last_file_ix = Some(candidate.file_ix);
 667        }
 668        let file = &related_files[candidate.file_ix];
 669        let excerpt = &file.excerpts[candidate.excerpt_ix];
 670        result.push_str(&excerpt.text);
 671        if !result.ends_with('\n') {
 672            result.push('\n');
 673        }
 674        if excerpt.row_range.end < file.max_row {
 675            result.push_str("...\n");
 676        }
 677    }
 678
 679    result
 680}
 681
 682pub fn write_related_files(
 683    prompt: &mut String,
 684    related_files: &[RelatedFile],
 685) -> Vec<Range<usize>> {
 686    let mut ranges = Vec::new();
 687    for file in related_files {
 688        let start = prompt.len();
 689        let path_str = file.path.to_string_lossy();
 690        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 691        for excerpt in &file.excerpts {
 692            prompt.push_str(&excerpt.text);
 693            if !prompt.ends_with('\n') {
 694                prompt.push('\n');
 695            }
 696            if excerpt.row_range.end < file.max_row {
 697                prompt.push_str("...\n");
 698            }
 699        }
 700        let end = prompt.len();
 701        ranges.push(start..end);
 702    }
 703    ranges
 704}
 705
 706mod v0112_middle_at_end {
 707    use super::*;
 708
 709    pub fn special_tokens() -> &'static [&'static str] {
 710        &[
 711            "<|fim_prefix|>",
 712            "<|fim_suffix|>",
 713            "<|fim_middle|>",
 714            "<|file_sep|>",
 715            CURSOR_MARKER,
 716        ]
 717    }
 718
 719    pub fn write_cursor_excerpt_section(
 720        prompt: &mut String,
 721        path: &Path,
 722        context: &str,
 723        editable_range: &Range<usize>,
 724        cursor_offset: usize,
 725    ) {
 726        let path_str = path.to_string_lossy();
 727        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 728
 729        prompt.push_str("<|fim_prefix|>\n");
 730        prompt.push_str(&context[..editable_range.start]);
 731
 732        prompt.push_str("<|fim_suffix|>\n");
 733        prompt.push_str(&context[editable_range.end..]);
 734        if !prompt.ends_with('\n') {
 735            prompt.push('\n');
 736        }
 737
 738        prompt.push_str("<|fim_middle|>current\n");
 739        prompt.push_str(&context[editable_range.start..cursor_offset]);
 740        prompt.push_str(CURSOR_MARKER);
 741        prompt.push_str(&context[cursor_offset..editable_range.end]);
 742        if !prompt.ends_with('\n') {
 743            prompt.push('\n');
 744        }
 745
 746        prompt.push_str("<|fim_middle|>updated\n");
 747    }
 748}
 749
 750mod v0113_ordered {
 751    use super::*;
 752
 753    pub fn special_tokens() -> &'static [&'static str] {
 754        &[
 755            "<|fim_prefix|>",
 756            "<|fim_suffix|>",
 757            "<|fim_middle|>",
 758            "<|file_sep|>",
 759            CURSOR_MARKER,
 760        ]
 761    }
 762
 763    pub fn write_cursor_excerpt_section(
 764        prompt: &mut String,
 765        path: &Path,
 766        context: &str,
 767        editable_range: &Range<usize>,
 768        cursor_offset: usize,
 769    ) {
 770        let path_str = path.to_string_lossy();
 771        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 772
 773        prompt.push_str("<|fim_prefix|>\n");
 774        prompt.push_str(&context[..editable_range.start]);
 775        if !prompt.ends_with('\n') {
 776            prompt.push('\n');
 777        }
 778
 779        prompt.push_str("<|fim_middle|>current\n");
 780        prompt.push_str(&context[editable_range.start..cursor_offset]);
 781        prompt.push_str(CURSOR_MARKER);
 782        prompt.push_str(&context[cursor_offset..editable_range.end]);
 783        if !prompt.ends_with('\n') {
 784            prompt.push('\n');
 785        }
 786
 787        prompt.push_str("<|fim_suffix|>\n");
 788        prompt.push_str(&context[editable_range.end..]);
 789        if !prompt.ends_with('\n') {
 790            prompt.push('\n');
 791        }
 792
 793        prompt.push_str("<|fim_middle|>updated\n");
 794    }
 795}
 796
 797mod v0114180_editable_region {
 798    use super::*;
 799
 800    pub fn special_tokens() -> &'static [&'static str] {
 801        v0113_ordered::special_tokens()
 802    }
 803}
 804
 805pub mod v0120_git_merge_markers {
 806    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 807    //!
 808    //! Example prompt:
 809    //!
 810    //! <|file_sep|>path/to/target_file.py
 811    //! <|fim_prefix|>
 812    //! code before editable region
 813    //! <|fim_suffix|>
 814    //! code after editable region
 815    //! <|fim_middle|>
 816    //! <<<<<<< CURRENT
 817    //! code that
 818    //! needs to<|user_cursor|>
 819    //! be rewritten
 820    //! =======
 821    //!
 822    //! Expected output (should be generated by the model):
 823    //!
 824    //! updated
 825    //! code with
 826    //! changes applied
 827    //! >>>>>>> UPDATED
 828
 829    use super::*;
 830
 831    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 832    pub const SEPARATOR: &str = "=======\n";
 833    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 834
 835    pub fn special_tokens() -> &'static [&'static str] {
 836        &[
 837            "<|fim_prefix|>",
 838            "<|fim_suffix|>",
 839            "<|fim_middle|>",
 840            "<|file_sep|>",
 841            START_MARKER,
 842            SEPARATOR,
 843            END_MARKER,
 844            CURSOR_MARKER,
 845        ]
 846    }
 847
 848    pub fn write_cursor_excerpt_section(
 849        prompt: &mut String,
 850        path: &Path,
 851        context: &str,
 852        editable_range: &Range<usize>,
 853        cursor_offset: usize,
 854    ) {
 855        let path_str = path.to_string_lossy();
 856        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 857
 858        prompt.push_str("<|fim_prefix|>");
 859        prompt.push_str(&context[..editable_range.start]);
 860
 861        prompt.push_str("<|fim_suffix|>");
 862        prompt.push_str(&context[editable_range.end..]);
 863        if !prompt.ends_with('\n') {
 864            prompt.push('\n');
 865        }
 866
 867        prompt.push_str("<|fim_middle|>");
 868        prompt.push_str(START_MARKER);
 869        prompt.push_str(&context[editable_range.start..cursor_offset]);
 870        prompt.push_str(CURSOR_MARKER);
 871        prompt.push_str(&context[cursor_offset..editable_range.end]);
 872        if !prompt.ends_with('\n') {
 873            prompt.push('\n');
 874        }
 875        prompt.push_str(SEPARATOR);
 876    }
 877}
 878
 879pub mod v0131_git_merge_markers_prefix {
 880    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 881    //!
 882    //! Example prompt:
 883    //!
 884    //! <|file_sep|>path/to/target_file.py
 885    //! <|fim_prefix|>
 886    //! code before editable region
 887    //! <<<<<<< CURRENT
 888    //! code that
 889    //! needs to<|user_cursor|>
 890    //! be rewritten
 891    //! =======
 892    //! <|fim_suffix|>
 893    //! code after editable region
 894    //! <|fim_middle|>
 895    //!
 896    //! Expected output (should be generated by the model):
 897    //!
 898    //! updated
 899    //! code with
 900    //! changes applied
 901    //! >>>>>>> UPDATED
 902
 903    use super::*;
 904
 905    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 906    pub const SEPARATOR: &str = "=======\n";
 907    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 908
 909    pub fn special_tokens() -> &'static [&'static str] {
 910        &[
 911            "<|fim_prefix|>",
 912            "<|fim_suffix|>",
 913            "<|fim_middle|>",
 914            "<|file_sep|>",
 915            START_MARKER,
 916            SEPARATOR,
 917            END_MARKER,
 918            CURSOR_MARKER,
 919        ]
 920    }
 921
 922    pub fn write_cursor_excerpt_section(
 923        prompt: &mut String,
 924        path: &Path,
 925        context: &str,
 926        editable_range: &Range<usize>,
 927        cursor_offset: usize,
 928    ) {
 929        let path_str = path.to_string_lossy();
 930        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 931
 932        prompt.push_str("<|fim_prefix|>");
 933        prompt.push_str(&context[..editable_range.start]);
 934        prompt.push_str(START_MARKER);
 935        prompt.push_str(&context[editable_range.start..cursor_offset]);
 936        prompt.push_str(CURSOR_MARKER);
 937        prompt.push_str(&context[cursor_offset..editable_range.end]);
 938        if !prompt.ends_with('\n') {
 939            prompt.push('\n');
 940        }
 941        prompt.push_str(SEPARATOR);
 942
 943        prompt.push_str("<|fim_suffix|>");
 944        prompt.push_str(&context[editable_range.end..]);
 945        if !prompt.ends_with('\n') {
 946            prompt.push('\n');
 947        }
 948
 949        prompt.push_str("<|fim_middle|>");
 950    }
 951}
 952
 953pub mod v0211_prefill {
 954    use super::*;
 955
 956    pub fn special_tokens() -> &'static [&'static str] {
 957        v0131_git_merge_markers_prefix::special_tokens()
 958    }
 959
 960    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 961        let editable_region = &context[editable_range.start..editable_range.end];
 962
 963        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 964        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 965
 966        // Find a token boundary to avoid splitting tokens in the prefill.
 967        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 968        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 969        // the \n and consume any consecutive \n characters after it.
 970        let prefill = &editable_region[..prefill_len];
 971        match prefill.rfind('\n') {
 972            Some(pos) => {
 973                let mut end = pos + 1;
 974                while end < editable_region.len()
 975                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 976                {
 977                    end += 1;
 978                }
 979                editable_region[..end].to_string()
 980            }
 981            // No newline found. Fall back to splitting before the last space
 982            // (word-level boundary)
 983            None => match prefill.rfind(' ') {
 984                Some(pos) => prefill[..pos].to_string(),
 985                None => prefill.to_string(),
 986            },
 987        }
 988    }
 989}
 990
 991pub mod hashline {
 992
 993    use std::fmt::Display;
 994
 995    pub const END_MARKER: &str = "<|fim_middle|>updated";
 996    pub const START_MARKER: &str = "<|fim_middle|>current";
 997
 998    use super::*;
 999
1000    const SET_COMMAND_MARKER: &str = "<|set|>";
1001    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1002
1003    pub fn special_tokens() -> &'static [&'static str] {
1004        return &[
1005            SET_COMMAND_MARKER,
1006            "<|set_range|>",
1007            INSERT_COMMAND_MARKER,
1008            CURSOR_MARKER,
1009            "<|file_sep|>",
1010            "<|fim_prefix|>",
1011            "<|fim_suffix|>",
1012            "<|fim_middle|>",
1013        ];
1014    }
1015
1016    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1017    #[derive(Debug, Clone, PartialEq, Eq)]
1018    struct LineRef {
1019        index: usize,
1020        hash: u8,
1021    }
1022
1023    impl Display for LineRef {
1024        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1025            write!(f, "{}:{:02x}", self.index, self.hash)
1026        }
1027    }
1028
1029    pub fn hash_line(line: &[u8]) -> u8 {
1030        let mut h: u8 = 0;
1031        for &byte in line {
1032            h = h.wrapping_add(byte);
1033        }
1034        return h;
1035    }
1036
1037    /// Write the hashline-encoded editable region into `out`. Each line of
1038    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1039    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1040    /// to the start of `editable_text`).
1041    pub fn write_hashline_editable_region(
1042        out: &mut String,
1043        editable_text: &str,
1044        cursor_offset_in_editable: usize,
1045    ) {
1046        let mut offset = 0;
1047        for (i, line) in editable_text.lines().enumerate() {
1048            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1049                && cursor_offset_in_editable < offset + line.len()
1050            {
1051                (
1052                    &line[..cursor_offset_in_editable - offset],
1053                    CURSOR_MARKER,
1054                    &line[cursor_offset_in_editable - offset..],
1055                )
1056            } else {
1057                (line, "", "")
1058            };
1059            write!(
1060                out,
1061                "\n{}|{head}{cursor}{tail}",
1062                LineRef {
1063                    index: i,
1064                    hash: hash_line(line.as_bytes())
1065                }
1066            )
1067            .unwrap();
1068            offset += line.len() + 1;
1069        }
1070    }
1071
1072    pub fn write_cursor_excerpt_section(
1073        prompt: &mut String,
1074        path: &Path,
1075        context: &str,
1076        editable_range: &Range<usize>,
1077        cursor_offset: usize,
1078    ) {
1079        let path_str = path.to_string_lossy();
1080        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1081
1082        prompt.push_str("<|fim_prefix|>\n");
1083        prompt.push_str(&context[..editable_range.start]);
1084        prompt.push_str(START_MARKER);
1085
1086        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1087        let editable_region = &context[editable_range.clone()];
1088        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1089
1090        if !prompt.ends_with('\n') {
1091            prompt.push('\n');
1092        }
1093
1094        prompt.push_str("<|fim_suffix|>\n");
1095        prompt.push_str(&context[editable_range.end..]);
1096        if !prompt.ends_with('\n') {
1097            prompt.push('\n');
1098        }
1099
1100        prompt.push_str(END_MARKER);
1101    }
1102
1103    /// A single edit command parsed from the model output.
1104    #[derive(Debug)]
1105    enum EditCommand<'a> {
1106        /// Replace a range of lines (inclusive on both ends). Single-line set is
1107        /// represented by `start == end`.
1108        Set {
1109            start: LineRef,
1110            end: LineRef,
1111            content: &'a str,
1112        },
1113        /// Insert new lines after the given line, or before the first line if
1114        /// `after` is `None`.
1115        Insert {
1116            after: Option<LineRef>,
1117            content: &'a str,
1118        },
1119    }
1120
1121    /// Parse a line reference like `3:c3` into a `LineRef`.
1122    fn parse_line_ref(s: &str) -> Option<LineRef> {
1123        let (idx_str, hash_str) = s.split_once(':')?;
1124        let index = idx_str.parse::<usize>().ok()?;
1125        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1126        Some(LineRef { index, hash })
1127    }
1128
1129    /// Parse the model output into a list of `EditCommand`s.
1130    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1131        let mut commands = Vec::new();
1132        let mut offset = 0usize;
1133
1134        while offset < model_output.len() {
1135            let next_nl = model_output[offset..]
1136                .find('\n')
1137                .map(|i| offset + i)
1138                .unwrap_or(model_output.len());
1139            let line = &model_output[offset..next_nl];
1140            let line_end = if next_nl < model_output.len() {
1141                next_nl + 1
1142            } else {
1143                next_nl
1144            };
1145
1146            let trimmed = line.trim();
1147            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1148                (true, spec)
1149            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1150                (false, spec)
1151            } else {
1152                offset = line_end;
1153                continue;
1154            };
1155
1156            let mut content_end = line_end;
1157            let mut scan = line_end;
1158
1159            while scan < model_output.len() {
1160                let body_nl = model_output[scan..]
1161                    .find('\n')
1162                    .map(|i| scan + i)
1163                    .unwrap_or(model_output.len());
1164                let body_line = &model_output[scan..body_nl];
1165                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1166                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1167                {
1168                    break;
1169                }
1170                scan = if body_nl < model_output.len() {
1171                    body_nl + 1
1172                } else {
1173                    body_nl
1174                };
1175                content_end = scan;
1176            }
1177
1178            let content = &model_output[line_end..content_end];
1179
1180            if is_set {
1181                if let Some((start_str, end_str)) = specifier.split_once('-') {
1182                    if let (Some(start), Some(end)) =
1183                        (parse_line_ref(start_str), parse_line_ref(end_str))
1184                    {
1185                        commands.push(EditCommand::Set {
1186                            start,
1187                            end,
1188                            content,
1189                        });
1190                    }
1191                } else if let Some(target) = parse_line_ref(specifier) {
1192                    commands.push(EditCommand::Set {
1193                        start: target.clone(),
1194                        end: target,
1195                        content,
1196                    });
1197                }
1198            } else {
1199                let after = parse_line_ref(specifier);
1200                commands.push(EditCommand::Insert { after, content });
1201            }
1202
1203            offset = scan;
1204        }
1205
1206        commands
1207    }
1208
1209    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1210    /// (as opposed to being a plain full-replacement output).
1211    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1212    /// editable region, returning the plain text content.
1213    pub fn strip_hashline_prefixes(region: &str) -> String {
1214        let mut decoded: String = region
1215            .lines()
1216            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1217            .collect::<Vec<_>>()
1218            .join("\n");
1219        if region.ends_with('\n') {
1220            decoded.push('\n');
1221        }
1222        decoded
1223    }
1224
1225    pub fn output_has_edit_commands(model_output: &str) -> bool {
1226        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1227    }
1228
1229    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1230    /// original editable region text.
1231    ///
1232    /// `editable_region` is the original text of the editable region (without hash
1233    /// prefixes). `model_output` is the raw model response containing edit commands.
1234    ///
1235    /// Returns the full replacement text for the editable region.
1236    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1237        let original_lines: Vec<&str> = editable_region.lines().collect();
1238        let old_hashes: Vec<u8> = original_lines
1239            .iter()
1240            .map(|line| hash_line(line.as_bytes()))
1241            .collect();
1242
1243        let commands = parse_edit_commands(model_output);
1244
1245        // For set operations: indexed by start line → Some((end line index, content))
1246        // For insert operations: indexed by line index → vec of content to insert after
1247        // Insert-before-first is tracked separately.
1248        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1249        let mut insert_before_first: Vec<&str> = Vec::new();
1250        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1251
1252        for command in &commands {
1253            match command {
1254                EditCommand::Set {
1255                    start,
1256                    end,
1257                    content,
1258                } => {
1259                    if start.index < old_hashes.len()
1260                        && end.index < old_hashes.len()
1261                        && start.index <= end.index
1262                        && old_hashes[start.index] == start.hash
1263                        && old_hashes[end.index] == end.hash
1264                    {
1265                        set_ops[start.index] = Some((end.index, *content));
1266                    }
1267                }
1268                EditCommand::Insert { after, content } => match after {
1269                    None => insert_before_first.push(*content),
1270                    Some(line_ref) => {
1271                        if line_ref.index < old_hashes.len()
1272                            && old_hashes[line_ref.index] == line_ref.hash
1273                        {
1274                            insert_after[line_ref.index].push(*content);
1275                        }
1276                    }
1277                },
1278            }
1279        }
1280
1281        let mut result = String::new();
1282
1283        // Emit any insertions before the first line
1284        for content in &insert_before_first {
1285            result.push_str(content);
1286            if !content.ends_with('\n') {
1287                result.push('\n');
1288            }
1289        }
1290
1291        let mut i = 0;
1292        while i < original_lines.len() {
1293            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1294                // Replace lines i..=end_index with the replacement content
1295                result.push_str(replacement);
1296                if !replacement.is_empty() && !replacement.ends_with('\n') {
1297                    result.push('\n');
1298                }
1299                // Emit any insertions after the end of this set range
1300                if *end_index < insert_after.len() {
1301                    for content in &insert_after[*end_index] {
1302                        result.push_str(content);
1303                        if !content.ends_with('\n') {
1304                            result.push('\n');
1305                        }
1306                    }
1307                }
1308                i = end_index + 1;
1309            } else {
1310                // Keep the original line
1311                result.push_str(original_lines[i]);
1312                result.push('\n');
1313                // Emit any insertions after this line
1314                for content in &insert_after[i] {
1315                    result.push_str(content);
1316                    if !content.ends_with('\n') {
1317                        result.push('\n');
1318                    }
1319                }
1320                i += 1;
1321            }
1322        }
1323
1324        // Preserve trailing newline behavior: if the original ended with a
1325        // newline the result already has one; if it didn't, trim the extra one
1326        // we added.
1327        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1328            result.pop();
1329        }
1330
1331        result
1332    }
1333
1334    /// Convert a unified diff patch into hashline edit commands.
1335    ///
1336    /// Parses the unified diff `patch` directly to determine which lines of
1337    /// `old_text` are deleted/replaced and what new lines are added, then emits
1338    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1339    /// `{index}:{hash}` identifiers.
1340    ///
1341    /// `cursor_offset` is an optional byte offset into the first hunk's new
1342    /// text (context + additions) where the cursor marker should be placed.
1343    pub fn patch_to_edit_commands(
1344        old_text: &str,
1345        patch: &str,
1346        cursor_offset: Option<usize>,
1347    ) -> Result<String> {
1348        let old_lines: Vec<&str> = old_text.lines().collect();
1349        let old_hashes: Vec<u8> = old_lines
1350            .iter()
1351            .map(|line| hash_line(line.as_bytes()))
1352            .collect();
1353
1354        let mut result = String::new();
1355        let mut first_hunk = true;
1356
1357        struct Hunk<'a> {
1358            line_range: Range<usize>,
1359            new_text_lines: Vec<&'a str>,
1360            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1361        }
1362
1363        // Parse the patch line by line. We only care about hunk headers,
1364        // context, deletions, and additions.
1365        let mut old_line_index: usize = 0;
1366        let mut current_hunk: Option<Hunk> = None;
1367        // Byte offset tracking within the hunk's new text for cursor placement.
1368        let mut new_text_byte_offset: usize = 0;
1369        // The line index of the last old line seen before/in the current hunk
1370        // (used for insert-after reference).
1371        let mut last_old_line_before_hunk: Option<usize> = None;
1372
1373        fn flush_hunk(
1374            hunk: Hunk,
1375            last_old_line: Option<usize>,
1376            result: &mut String,
1377            old_hashes: &[u8],
1378        ) {
1379            if hunk.line_range.is_empty() {
1380                // Pure insertion — reference the old line to insert after when in bounds.
1381                if let Some(after) = last_old_line
1382                    && let Some(&hash) = old_hashes.get(after)
1383                {
1384                    write!(
1385                        result,
1386                        "{INSERT_COMMAND_MARKER}{}\n",
1387                        LineRef { index: after, hash }
1388                    )
1389                    .unwrap();
1390                } else {
1391                    result.push_str(INSERT_COMMAND_MARKER);
1392                    result.push('\n');
1393                }
1394            } else {
1395                let start = hunk.line_range.start;
1396                let end_exclusive = hunk.line_range.end;
1397                let deleted_line_count = end_exclusive.saturating_sub(start);
1398
1399                if deleted_line_count == 1 {
1400                    if let Some(&hash) = old_hashes.get(start) {
1401                        write!(
1402                            result,
1403                            "{SET_COMMAND_MARKER}{}\n",
1404                            LineRef { index: start, hash }
1405                        )
1406                        .unwrap();
1407                    } else {
1408                        result.push_str(SET_COMMAND_MARKER);
1409                        result.push('\n');
1410                    }
1411                } else {
1412                    let end_inclusive = end_exclusive - 1;
1413                    match (
1414                        old_hashes.get(start).copied(),
1415                        old_hashes.get(end_inclusive).copied(),
1416                    ) {
1417                        (Some(start_hash), Some(end_hash)) => {
1418                            write!(
1419                                result,
1420                                "{SET_COMMAND_MARKER}{}-{}\n",
1421                                LineRef {
1422                                    index: start,
1423                                    hash: start_hash
1424                                },
1425                                LineRef {
1426                                    index: end_inclusive,
1427                                    hash: end_hash
1428                                }
1429                            )
1430                            .unwrap();
1431                        }
1432                        _ => {
1433                            result.push_str(SET_COMMAND_MARKER);
1434                            result.push('\n');
1435                        }
1436                    }
1437                }
1438            }
1439            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1440                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1441                    && line_offset == cursor_line_offset
1442                {
1443                    result.push_str(&line[..char_offset]);
1444                    result.push_str(CURSOR_MARKER);
1445                    result.push_str(&line[char_offset..]);
1446                    continue;
1447                }
1448
1449                result.push_str(line);
1450            }
1451        }
1452
1453        for raw_line in patch.split_inclusive('\n') {
1454            if raw_line.starts_with("@@") {
1455                // Flush any pending change hunk from a previous patch hunk.
1456                if let Some(hunk) = current_hunk.take() {
1457                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1458                }
1459
1460                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1461                // We intentionally do not trust old_start as a direct local index into `old_text`,
1462                // because some patches are produced against a larger file region and carry
1463                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1464                if first_hunk {
1465                    new_text_byte_offset = 0;
1466                    first_hunk = false;
1467                }
1468                continue;
1469            }
1470
1471            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1472                continue;
1473            }
1474            if raw_line.starts_with("\\ No newline") {
1475                continue;
1476            }
1477
1478            if raw_line.starts_with('-') {
1479                // Extend or start a change hunk with this deleted old line.
1480                match &mut current_hunk {
1481                    Some(Hunk {
1482                        line_range: range, ..
1483                    }) => range.end = old_line_index + 1,
1484                    None => {
1485                        current_hunk = Some(Hunk {
1486                            line_range: old_line_index..old_line_index + 1,
1487                            new_text_lines: Vec::new(),
1488                            cursor_line_offset_in_new_text: None,
1489                        });
1490                    }
1491                }
1492                old_line_index += 1;
1493            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1494                // Place cursor marker if cursor_offset falls within this line.
1495                let mut cursor_line_offset = None;
1496                if let Some(cursor_off) = cursor_offset
1497                    && (first_hunk
1498                        || cursor_off >= new_text_byte_offset
1499                            && cursor_off <= new_text_byte_offset + added_content.len())
1500                {
1501                    let line_offset = added_content.floor_char_boundary(
1502                        cursor_off
1503                            .saturating_sub(new_text_byte_offset)
1504                            .min(added_content.len()),
1505                    );
1506                    cursor_line_offset = Some(line_offset);
1507                }
1508
1509                new_text_byte_offset += added_content.len();
1510
1511                let hunk = current_hunk.get_or_insert(Hunk {
1512                    line_range: old_line_index..old_line_index,
1513                    new_text_lines: vec![],
1514                    cursor_line_offset_in_new_text: None,
1515                });
1516                hunk.new_text_lines.push(added_content);
1517                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1518                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1519            } else {
1520                // Context line (starts with ' ' or is empty).
1521                if let Some(hunk) = current_hunk.take() {
1522                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1523                }
1524                last_old_line_before_hunk = Some(old_line_index);
1525                old_line_index += 1;
1526                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1527                new_text_byte_offset += content.len();
1528            }
1529        }
1530
1531        // Flush final group.
1532        if let Some(hunk) = current_hunk.take() {
1533            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1534        }
1535
1536        // Trim a single trailing newline.
1537        if result.ends_with('\n') {
1538            result.pop();
1539        }
1540
1541        Ok(result)
1542    }
1543
1544    #[cfg(test)]
1545    mod tests {
1546        use super::*;
1547        use indoc::indoc;
1548
1549        #[test]
1550        fn test_format_cursor_region() {
1551            struct Case {
1552                name: &'static str,
1553                context: &'static str,
1554                editable_range: Range<usize>,
1555                cursor_offset: usize,
1556                expected: &'static str,
1557            }
1558
1559            let cases = [
1560                Case {
1561                    name: "basic_cursor_placement",
1562                    context: "hello world\n",
1563                    editable_range: 0..12,
1564                    cursor_offset: 5,
1565                    expected: indoc! {"
1566                    <|file_sep|>test.rs
1567                    <|fim_prefix|>
1568                    <|fim_middle|>current
1569                    0:5c|hello<|user_cursor|> world
1570                    <|fim_suffix|>
1571                    <|fim_middle|>updated"},
1572                },
1573                Case {
1574                    name: "multiline_cursor_on_second_line",
1575                    context: "aaa\nbbb\nccc\n",
1576                    editable_range: 0..12,
1577                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1578                    expected: indoc! {"
1579                    <|file_sep|>test.rs
1580                    <|fim_prefix|>
1581                    <|fim_middle|>current
1582                    0:23|aaa
1583                    1:26|b<|user_cursor|>bb
1584                    2:29|ccc
1585                    <|fim_suffix|>
1586                    <|fim_middle|>updated"},
1587                },
1588                Case {
1589                    name: "no_trailing_newline_in_context",
1590                    context: "line1\nline2",
1591                    editable_range: 0..11,
1592                    cursor_offset: 3,
1593                    expected: indoc! {"
1594                    <|file_sep|>test.rs
1595                    <|fim_prefix|>
1596                    <|fim_middle|>current
1597                    0:d9|lin<|user_cursor|>e1
1598                    1:da|line2
1599                    <|fim_suffix|>
1600                    <|fim_middle|>updated"},
1601                },
1602                Case {
1603                    name: "leading_newline_in_editable_region",
1604                    context: "\nabc\n",
1605                    editable_range: 0..5,
1606                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1607                    expected: indoc! {"
1608                    <|file_sep|>test.rs
1609                    <|fim_prefix|>
1610                    <|fim_middle|>current
1611                    0:00|
1612                    1:26|a<|user_cursor|>bc
1613                    <|fim_suffix|>
1614                    <|fim_middle|>updated"},
1615                },
1616                Case {
1617                    name: "with_suffix",
1618                    context: "abc\ndef",
1619                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1620                    cursor_offset: 2,
1621                    expected: indoc! {"
1622                    <|file_sep|>test.rs
1623                    <|fim_prefix|>
1624                    <|fim_middle|>current
1625                    0:26|ab<|user_cursor|>c
1626                    <|fim_suffix|>
1627                    def
1628                    <|fim_middle|>updated"},
1629                },
1630                Case {
1631                    name: "unicode_two_byte_chars",
1632                    context: "héllo\n",
1633                    editable_range: 0..7,
1634                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1635                    expected: indoc! {"
1636                    <|file_sep|>test.rs
1637                    <|fim_prefix|>
1638                    <|fim_middle|>current
1639                    0:1b|hé<|user_cursor|>llo
1640                    <|fim_suffix|>
1641                    <|fim_middle|>updated"},
1642                },
1643                Case {
1644                    name: "unicode_three_byte_chars",
1645                    context: "日本語\n",
1646                    editable_range: 0..10,
1647                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1648                    expected: indoc! {"
1649                    <|file_sep|>test.rs
1650                    <|fim_prefix|>
1651                    <|fim_middle|>current
1652                    0:80|日本<|user_cursor|>語
1653                    <|fim_suffix|>
1654                    <|fim_middle|>updated"},
1655                },
1656                Case {
1657                    name: "unicode_four_byte_chars",
1658                    context: "a🌍b\n",
1659                    editable_range: 0..7,
1660                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1661                    expected: indoc! {"
1662                    <|file_sep|>test.rs
1663                    <|fim_prefix|>
1664                    <|fim_middle|>current
1665                    0:6b|a🌍<|user_cursor|>b
1666                    <|fim_suffix|>
1667                    <|fim_middle|>updated"},
1668                },
1669                Case {
1670                    name: "cursor_at_start_of_region_not_placed",
1671                    context: "abc\n",
1672                    editable_range: 0..4,
1673                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1674                    expected: indoc! {"
1675                    <|file_sep|>test.rs
1676                    <|fim_prefix|>
1677                    <|fim_middle|>current
1678                    0:26|abc
1679                    <|fim_suffix|>
1680                    <|fim_middle|>updated"},
1681                },
1682                Case {
1683                    name: "cursor_at_end_of_line_not_placed",
1684                    context: "abc\ndef\n",
1685                    editable_range: 0..8,
1686                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1687                    expected: indoc! {"
1688                    <|file_sep|>test.rs
1689                    <|fim_prefix|>
1690                    <|fim_middle|>current
1691                    0:26|abc
1692                    1:2f|def
1693                    <|fim_suffix|>
1694                    <|fim_middle|>updated"},
1695                },
1696                Case {
1697                    name: "cursor_offset_relative_to_context_not_editable_region",
1698                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1699                    // write_cursor_excerpt_section must subtract it before comparing against
1700                    // per-line offsets within the editable region.
1701                    context: "pre\naaa\nbbb\nsuf\n",
1702                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1703                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1704                    expected: indoc! {"
1705                    <|file_sep|>test.rs
1706                    <|fim_prefix|>
1707                    pre
1708                    <|fim_middle|>current
1709                    0:23|aaa
1710                    1:26|b<|user_cursor|>bb
1711                    <|fim_suffix|>
1712                    suf
1713                    <|fim_middle|>updated"},
1714                },
1715            ];
1716
1717            for case in &cases {
1718                let mut prompt = String::new();
1719                hashline::write_cursor_excerpt_section(
1720                    &mut prompt,
1721                    Path::new("test.rs"),
1722                    case.context,
1723                    &case.editable_range,
1724                    case.cursor_offset,
1725                );
1726                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1727            }
1728        }
1729
1730        #[test]
1731        fn test_apply_edit_commands() {
1732            struct Case {
1733                name: &'static str,
1734                original: &'static str,
1735                model_output: &'static str,
1736                expected: &'static str,
1737            }
1738
1739            let cases = vec![
1740                Case {
1741                    name: "set_single_line",
1742                    original: indoc! {"
1743                    let mut total = 0;
1744                    for product in products {
1745                        total += ;
1746                    }
1747                    total
1748                "},
1749                    model_output: indoc! {"
1750                    <|set|>2:87
1751                        total += product.price;
1752                "},
1753                    expected: indoc! {"
1754                    let mut total = 0;
1755                    for product in products {
1756                        total += product.price;
1757                    }
1758                    total
1759                "},
1760                },
1761                Case {
1762                    name: "set_range",
1763                    original: indoc! {"
1764                    fn foo() {
1765                        let x = 1;
1766                        let y = 2;
1767                        let z = 3;
1768                    }
1769                "},
1770                    model_output: indoc! {"
1771                    <|set|>1:46-3:4a
1772                        let sum = 6;
1773                "},
1774                    expected: indoc! {"
1775                    fn foo() {
1776                        let sum = 6;
1777                    }
1778                "},
1779                },
1780                Case {
1781                    name: "insert_after_line",
1782                    original: indoc! {"
1783                    fn main() {
1784                        let x = 1;
1785                    }
1786                "},
1787                    model_output: indoc! {"
1788                    <|insert|>1:46
1789                        let y = 2;
1790                "},
1791                    expected: indoc! {"
1792                    fn main() {
1793                        let x = 1;
1794                        let y = 2;
1795                    }
1796                "},
1797                },
1798                Case {
1799                    name: "insert_before_first",
1800                    original: indoc! {"
1801                    let x = 1;
1802                    let y = 2;
1803                "},
1804                    model_output: indoc! {"
1805                    <|insert|>
1806                    use std::io;
1807                "},
1808                    expected: indoc! {"
1809                    use std::io;
1810                    let x = 1;
1811                    let y = 2;
1812                "},
1813                },
1814                Case {
1815                    name: "set_with_cursor_marker",
1816                    original: indoc! {"
1817                    fn main() {
1818                        println!();
1819                    }
1820                "},
1821                    model_output: indoc! {"
1822                    <|set|>1:34
1823                        eprintln!(\"<|user_cursor|>\");
1824                "},
1825                    expected: indoc! {"
1826                    fn main() {
1827                        eprintln!(\"<|user_cursor|>\");
1828                    }
1829                "},
1830                },
1831                Case {
1832                    name: "multiple_set_commands",
1833                    original: indoc! {"
1834                    aaa
1835                    bbb
1836                    ccc
1837                    ddd
1838                "},
1839                    model_output: indoc! {"
1840                    <|set|>0:23
1841                    AAA
1842                    <|set|>2:29
1843                    CCC
1844                "},
1845                    expected: indoc! {"
1846                    AAA
1847                    bbb
1848                    CCC
1849                    ddd
1850                "},
1851                },
1852                Case {
1853                    name: "set_range_multiline_replacement",
1854                    original: indoc! {"
1855                    fn handle_submit() {
1856                    }
1857
1858                    fn handle_keystroke() {
1859                "},
1860                    model_output: indoc! {"
1861                    <|set|>0:3f-1:7d
1862                    fn handle_submit(modal_state: &mut ModalState) {
1863                        <|user_cursor|>
1864                    }
1865                "},
1866                    expected: indoc! {"
1867                    fn handle_submit(modal_state: &mut ModalState) {
1868                        <|user_cursor|>
1869                    }
1870
1871                    fn handle_keystroke() {
1872                "},
1873                },
1874                Case {
1875                    name: "no_edit_commands_returns_original",
1876                    original: indoc! {"
1877                    hello
1878                    world
1879                "},
1880                    model_output: "some random text with no commands",
1881                    expected: indoc! {"
1882                    hello
1883                    world
1884                "},
1885                },
1886                Case {
1887                    name: "wrong_hash_set_ignored",
1888                    original: indoc! {"
1889                    aaa
1890                    bbb
1891                "},
1892                    model_output: indoc! {"
1893                    <|set|>0:ff
1894                    ZZZ
1895                "},
1896                    expected: indoc! {"
1897                    aaa
1898                    bbb
1899                "},
1900                },
1901                Case {
1902                    name: "insert_and_set_combined",
1903                    original: indoc! {"
1904                    alpha
1905                    beta
1906                    gamma
1907                "},
1908                    model_output: indoc! {"
1909                    <|set|>0:06
1910                    ALPHA
1911                    <|insert|>1:9c
1912                    beta_extra
1913                "},
1914                    expected: indoc! {"
1915                    ALPHA
1916                    beta
1917                    beta_extra
1918                    gamma
1919                "},
1920                },
1921                Case {
1922                    name: "no_trailing_newline_preserved",
1923                    original: "hello\nworld",
1924                    model_output: indoc! {"
1925                    <|set|>0:14
1926                    HELLO
1927                "},
1928                    expected: "HELLO\nworld",
1929                },
1930                Case {
1931                    name: "set_range_hash_mismatch_in_end_bound",
1932                    original: indoc! {"
1933                    one
1934                    two
1935                    three
1936                "},
1937                    model_output: indoc! {"
1938                    <|set|>0:42-2:ff
1939                    ONE_TWO_THREE
1940                "},
1941                    expected: indoc! {"
1942                    one
1943                    two
1944                    three
1945                "},
1946                },
1947                Case {
1948                    name: "set_range_start_greater_than_end_ignored",
1949                    original: indoc! {"
1950                    a
1951                    b
1952                    c
1953                "},
1954                    model_output: indoc! {"
1955                    <|set|>2:63-1:62
1956                    X
1957                "},
1958                    expected: indoc! {"
1959                    a
1960                    b
1961                    c
1962                "},
1963                },
1964                Case {
1965                    name: "insert_out_of_bounds_ignored",
1966                    original: indoc! {"
1967                    x
1968                    y
1969                "},
1970                    model_output: indoc! {"
1971                    <|insert|>99:aa
1972                    z
1973                "},
1974                    expected: indoc! {"
1975                    x
1976                    y
1977                "},
1978                },
1979                Case {
1980                    name: "set_out_of_bounds_ignored",
1981                    original: indoc! {"
1982                    x
1983                    y
1984                "},
1985                    model_output: indoc! {"
1986                    <|set|>99:aa
1987                    z
1988                "},
1989                    expected: indoc! {"
1990                    x
1991                    y
1992                "},
1993                },
1994                Case {
1995                    name: "malformed_set_command_ignored",
1996                    original: indoc! {"
1997                    alpha
1998                    beta
1999                "},
2000                    model_output: indoc! {"
2001                    <|set|>not-a-line-ref
2002                    UPDATED
2003                "},
2004                    expected: indoc! {"
2005                    alpha
2006                    beta
2007                "},
2008                },
2009                Case {
2010                    name: "malformed_insert_hash_treated_as_before_first",
2011                    original: indoc! {"
2012                    alpha
2013                    beta
2014                "},
2015                    model_output: indoc! {"
2016                    <|insert|>1:nothex
2017                    preamble
2018                "},
2019                    expected: indoc! {"
2020                    preamble
2021                    alpha
2022                    beta
2023                "},
2024                },
2025                Case {
2026                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2027                    original: indoc! {"
2028                    cat
2029                    dog
2030                "},
2031                    model_output: indoc! {"
2032                    <|set|>0:38
2033                    CAT
2034                    <|insert|>0:38
2035                    TAIL
2036                "},
2037                    expected: indoc! {"
2038                    CAT
2039                    TAIL
2040                    dog
2041                "},
2042                },
2043                Case {
2044                    name: "overlapping_set_ranges_last_wins",
2045                    original: indoc! {"
2046                    a
2047                    b
2048                    c
2049                    d
2050                "},
2051                    model_output: indoc! {"
2052                    <|set|>0:61-2:63
2053                    FIRST
2054                    <|set|>1:62-3:64
2055                    SECOND
2056                "},
2057                    expected: indoc! {"
2058                    FIRST
2059                    d
2060                "},
2061                },
2062                Case {
2063                    name: "insert_before_first_and_after_line",
2064                    original: indoc! {"
2065                    a
2066                    b
2067                "},
2068                    model_output: indoc! {"
2069                    <|insert|>
2070                    HEAD
2071                    <|insert|>0:61
2072                    MID
2073                "},
2074                    expected: indoc! {"
2075                    HEAD
2076                    a
2077                    MID
2078                    b
2079                "},
2080                },
2081            ];
2082
2083            for case in &cases {
2084                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2085                assert_eq!(result, case.expected, "failed case: {}", case.name);
2086            }
2087        }
2088
2089        #[test]
2090        fn test_output_has_edit_commands() {
2091            assert!(hashline::output_has_edit_commands(&format!(
2092                "{}0:ab\nnew",
2093                SET_COMMAND_MARKER
2094            )));
2095            assert!(hashline::output_has_edit_commands(&format!(
2096                "{}0:ab\nnew",
2097                INSERT_COMMAND_MARKER
2098            )));
2099            assert!(hashline::output_has_edit_commands(&format!(
2100                "some text\n{}1:cd\nstuff",
2101                SET_COMMAND_MARKER
2102            )));
2103            assert!(!hashline::output_has_edit_commands("just plain text"));
2104            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2105        }
2106
2107        // ---- hashline::patch_to_edit_commands round-trip tests ----
2108
2109        #[test]
2110        fn test_patch_to_edit_commands() {
2111            struct Case {
2112                name: &'static str,
2113                old: &'static str,
2114                patch: &'static str,
2115                expected_new: &'static str,
2116            }
2117
2118            let cases = [
2119                Case {
2120                    name: "single_line_replacement",
2121                    old: indoc! {"
2122                    let mut total = 0;
2123                    for product in products {
2124                        total += ;
2125                    }
2126                    total
2127                "},
2128                    patch: indoc! {"
2129                    @@ -1,5 +1,5 @@
2130                     let mut total = 0;
2131                     for product in products {
2132                    -    total += ;
2133                    +    total += product.price;
2134                     }
2135                     total
2136                "},
2137                    expected_new: indoc! {"
2138                    let mut total = 0;
2139                    for product in products {
2140                        total += product.price;
2141                    }
2142                    total
2143                "},
2144                },
2145                Case {
2146                    name: "multiline_replacement",
2147                    old: indoc! {"
2148                    fn foo() {
2149                        let x = 1;
2150                        let y = 2;
2151                        let z = 3;
2152                    }
2153                "},
2154                    patch: indoc! {"
2155                    @@ -1,5 +1,3 @@
2156                     fn foo() {
2157                    -    let x = 1;
2158                    -    let y = 2;
2159                    -    let z = 3;
2160                    +    let sum = 1 + 2 + 3;
2161                     }
2162                "},
2163                    expected_new: indoc! {"
2164                    fn foo() {
2165                        let sum = 1 + 2 + 3;
2166                    }
2167                "},
2168                },
2169                Case {
2170                    name: "insertion",
2171                    old: indoc! {"
2172                    fn main() {
2173                        let x = 1;
2174                    }
2175                "},
2176                    patch: indoc! {"
2177                    @@ -1,3 +1,4 @@
2178                     fn main() {
2179                         let x = 1;
2180                    +    let y = 2;
2181                     }
2182                "},
2183                    expected_new: indoc! {"
2184                    fn main() {
2185                        let x = 1;
2186                        let y = 2;
2187                    }
2188                "},
2189                },
2190                Case {
2191                    name: "insertion_before_first",
2192                    old: indoc! {"
2193                    let x = 1;
2194                    let y = 2;
2195                "},
2196                    patch: indoc! {"
2197                    @@ -1,2 +1,3 @@
2198                    +use std::io;
2199                     let x = 1;
2200                     let y = 2;
2201                "},
2202                    expected_new: indoc! {"
2203                    use std::io;
2204                    let x = 1;
2205                    let y = 2;
2206                "},
2207                },
2208                Case {
2209                    name: "deletion",
2210                    old: indoc! {"
2211                    aaa
2212                    bbb
2213                    ccc
2214                    ddd
2215                "},
2216                    patch: indoc! {"
2217                    @@ -1,4 +1,2 @@
2218                     aaa
2219                    -bbb
2220                    -ccc
2221                     ddd
2222                "},
2223                    expected_new: indoc! {"
2224                    aaa
2225                    ddd
2226                "},
2227                },
2228                Case {
2229                    name: "multiple_changes",
2230                    old: indoc! {"
2231                    alpha
2232                    beta
2233                    gamma
2234                    delta
2235                    epsilon
2236                "},
2237                    patch: indoc! {"
2238                    @@ -1,5 +1,5 @@
2239                    -alpha
2240                    +ALPHA
2241                     beta
2242                     gamma
2243                    -delta
2244                    +DELTA
2245                     epsilon
2246                "},
2247                    expected_new: indoc! {"
2248                    ALPHA
2249                    beta
2250                    gamma
2251                    DELTA
2252                    epsilon
2253                "},
2254                },
2255                Case {
2256                    name: "replace_with_insertion",
2257                    old: indoc! {r#"
2258                    fn handle() {
2259                        modal_state.close();
2260                        modal_state.dismiss();
2261                "#},
2262                    patch: indoc! {r#"
2263                    @@ -1,3 +1,4 @@
2264                     fn handle() {
2265                         modal_state.close();
2266                    +    eprintln!("");
2267                         modal_state.dismiss();
2268                "#},
2269                    expected_new: indoc! {r#"
2270                    fn handle() {
2271                        modal_state.close();
2272                        eprintln!("");
2273                        modal_state.dismiss();
2274                "#},
2275                },
2276                Case {
2277                    name: "complete_replacement",
2278                    old: indoc! {"
2279                    aaa
2280                    bbb
2281                    ccc
2282                "},
2283                    patch: indoc! {"
2284                    @@ -1,3 +1,3 @@
2285                    -aaa
2286                    -bbb
2287                    -ccc
2288                    +xxx
2289                    +yyy
2290                    +zzz
2291                "},
2292                    expected_new: indoc! {"
2293                    xxx
2294                    yyy
2295                    zzz
2296                "},
2297                },
2298                Case {
2299                    name: "add_function_body",
2300                    old: indoc! {"
2301                    fn foo() {
2302                        modal_state.dismiss();
2303                    }
2304
2305                    fn
2306
2307                    fn handle_keystroke() {
2308                "},
2309                    patch: indoc! {"
2310                    @@ -1,6 +1,8 @@
2311                     fn foo() {
2312                         modal_state.dismiss();
2313                     }
2314
2315                    -fn
2316                    +fn handle_submit() {
2317                    +    todo()
2318                    +}
2319
2320                     fn handle_keystroke() {
2321                "},
2322                    expected_new: indoc! {"
2323                    fn foo() {
2324                        modal_state.dismiss();
2325                    }
2326
2327                    fn handle_submit() {
2328                        todo()
2329                    }
2330
2331                    fn handle_keystroke() {
2332                "},
2333                },
2334                Case {
2335                    name: "with_cursor_offset",
2336                    old: indoc! {r#"
2337                    fn main() {
2338                        println!();
2339                    }
2340                "#},
2341                    patch: indoc! {r#"
2342                    @@ -1,3 +1,3 @@
2343                     fn main() {
2344                    -    println!();
2345                    +    eprintln!("");
2346                     }
2347                "#},
2348                    expected_new: indoc! {r#"
2349                    fn main() {
2350                        eprintln!("<|user_cursor|>");
2351                    }
2352                "#},
2353                },
2354                Case {
2355                    name: "non_local_hunk_header_pure_insertion_repro",
2356                    old: indoc! {"
2357                    aaa
2358                    bbb
2359                "},
2360                    patch: indoc! {"
2361                    @@ -20,2 +20,3 @@
2362                     aaa
2363                    +xxx
2364                     bbb
2365                "},
2366                    expected_new: indoc! {"
2367                    aaa
2368                    xxx
2369                    bbb
2370                "},
2371                },
2372            ];
2373
2374            for case in &cases {
2375                // The cursor_offset for patch_to_edit_commands is relative to
2376                // the first hunk's new text (context + additions). We compute
2377                // it by finding where the marker sits in the expected output
2378                // (which mirrors the new text of the hunk).
2379                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2380
2381                let commands =
2382                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2383                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2384
2385                assert!(
2386                    hashline::output_has_edit_commands(&commands),
2387                    "case {}: expected edit commands, got: {commands:?}",
2388                    case.name,
2389                );
2390
2391                let applied = hashline::apply_edit_commands(case.old, &commands);
2392                assert_eq!(applied, case.expected_new, "case {}", case.name);
2393            }
2394        }
2395    }
2396}
2397
2398pub mod seed_coder {
2399    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2400    //!
2401    //! Seed-Coder uses different FIM tokens and order than Qwen:
2402    //! - SPM order: suffix comes FIRST, then prefix, then middle
2403    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2404    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2405    //!
2406    //! All context (related files, edit history) goes in the PREFIX section.
2407    //! The suffix contains only code after the editable region.
2408    //!
2409    //! Example prompt:
2410    //!
2411    //! <[fim-suffix]>
2412    //! code after editable region
2413    //! <[fim-prefix]><filename>related/file.py
2414    //! related file content
2415    //!
2416    //! <filename>edit_history
2417    //! --- a/some_file.py
2418    //! +++ b/some_file.py
2419    //! -old
2420    //! +new
2421    //!
2422    //! <filename>path/to/target_file.py
2423    //! code before editable region
2424    //! <<<<<<< CURRENT
2425    //! code that
2426    //! needs to<|user_cursor|>
2427    //! be rewritten
2428    //! =======
2429    //! <[fim-middle]>
2430    //!
2431    //! Expected output (model generates):
2432    //!
2433    //! updated
2434    //! code with
2435    //! changes applied
2436    //! >>>>>>> UPDATED
2437
2438    use super::*;
2439
2440    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2441    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2442    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2443    pub const FILE_MARKER: &str = "<filename>";
2444
2445    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2446    pub const SEPARATOR: &str = "=======\n";
2447    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2448
2449    pub const NO_EDITS: &str = "NO_EDITS\n";
2450
2451    pub fn special_tokens() -> &'static [&'static str] {
2452        &[
2453            FIM_SUFFIX,
2454            FIM_PREFIX,
2455            FIM_MIDDLE,
2456            FILE_MARKER,
2457            START_MARKER,
2458            SEPARATOR,
2459            END_MARKER,
2460            CURSOR_MARKER,
2461        ]
2462    }
2463
2464    pub fn write_cursor_excerpt_section(
2465        prompt: &mut String,
2466        path: &Path,
2467        context: &str,
2468        editable_range: &Range<usize>,
2469        cursor_offset: usize,
2470    ) {
2471        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2472        prompt.push_str(&section);
2473    }
2474
2475    pub fn format_prompt_with_budget(
2476        path: &Path,
2477        context: &str,
2478        editable_range: &Range<usize>,
2479        cursor_offset: usize,
2480        events: &[Arc<Event>],
2481        related_files: &[RelatedFile],
2482        max_tokens: usize,
2483    ) -> String {
2484        let suffix_section = build_suffix_section(context, editable_range);
2485        let cursor_prefix_section =
2486            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2487
2488        let suffix_tokens = estimate_tokens(suffix_section.len());
2489        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2490        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2491
2492        let edit_history_section = super::format_edit_history_within_budget(
2493            events,
2494            FILE_MARKER,
2495            "edit_history",
2496            budget_after_cursor,
2497        );
2498        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2499        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2500
2501        let related_files_section = super::format_related_files_within_budget(
2502            related_files,
2503            FILE_MARKER,
2504            "",
2505            budget_after_edit_history,
2506        );
2507
2508        let mut prompt = String::new();
2509        prompt.push_str(&suffix_section);
2510        prompt.push_str(FIM_PREFIX);
2511        prompt.push_str(&related_files_section);
2512        if !related_files_section.is_empty() {
2513            prompt.push('\n');
2514        }
2515        prompt.push_str(&edit_history_section);
2516        if !edit_history_section.is_empty() {
2517            prompt.push('\n');
2518        }
2519        prompt.push_str(&cursor_prefix_section);
2520        prompt.push_str(FIM_MIDDLE);
2521        prompt
2522    }
2523
2524    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2525        let mut section = String::new();
2526        section.push_str(FIM_SUFFIX);
2527        section.push_str(&context[editable_range.end..]);
2528        if !section.ends_with('\n') {
2529            section.push('\n');
2530        }
2531        section
2532    }
2533
2534    fn build_cursor_prefix_section(
2535        path: &Path,
2536        context: &str,
2537        editable_range: &Range<usize>,
2538        cursor_offset: usize,
2539    ) -> String {
2540        let mut section = String::new();
2541        let path_str = path.to_string_lossy();
2542        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2543
2544        section.push_str(&context[..editable_range.start]);
2545        section.push_str(START_MARKER);
2546        section.push_str(&context[editable_range.start..cursor_offset]);
2547        section.push_str(CURSOR_MARKER);
2548        section.push_str(&context[cursor_offset..editable_range.end]);
2549        if !section.ends_with('\n') {
2550            section.push('\n');
2551        }
2552        section.push_str(SEPARATOR);
2553        section
2554    }
2555
2556    /// Format patch as containing no changes if it's empty; otherwise return None.
2557    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2558        // Count lines in the patch
2559        let empty_patch = patch.lines().count() <= 3;
2560        if empty_patch {
2561            Some(format!("{NO_EDITS}{END_MARKER}"))
2562        } else {
2563            None
2564        }
2565    }
2566}
2567
2568pub mod v0304_variable_edit {
2569    //! A prompt format with no fixed editable region. The entire context is shown
2570    //! to the model, and it chooses which text to replace by outputting surrounding
2571    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2572    //! text.
2573    //!
2574    //! Example prompt:
2575    //!
2576    //! <|file_sep|>path/to/file.py
2577    //! zero
2578    //! one
2579    //! two
2580    //! three<|user_cursor|>
2581    //! four
2582    //! five
2583    //! <|fim_prefix|>
2584    //
2585    //! Expected output (model generates):
2586    //!
2587    //! two
2588    //! <|fim_middle|>
2589    //! THREE
2590    //! <|fim_suffix|>
2591    //! four
2592    //!
2593    //! The output means: find "two\n...\nfour" in the context, and replace
2594    //! everything between "two\n" and "four" with "THREE\n".
2595
2596    use super::*;
2597
2598    pub fn special_tokens() -> &'static [&'static str] {
2599        &[
2600            "<|fim_prefix|>",
2601            "<|fim_suffix|>",
2602            "<|fim_middle|>",
2603            "<|file_sep|>",
2604            CURSOR_MARKER,
2605        ]
2606    }
2607
2608    pub fn write_cursor_excerpt_section(
2609        prompt: &mut String,
2610        path: &Path,
2611        context: &str,
2612        cursor_offset: usize,
2613    ) {
2614        let path_str = path.to_string_lossy();
2615        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2616
2617        prompt.push_str(&context[..cursor_offset]);
2618        prompt.push_str(CURSOR_MARKER);
2619        prompt.push_str(&context[cursor_offset..]);
2620        if !prompt.ends_with('\n') {
2621            prompt.push('\n');
2622        }
2623        prompt.push_str("<|fim_prefix|>\n")
2624    }
2625
2626    /// Apply a variable-edit model output to the original context text.
2627    ///
2628    /// The model output has the form:
2629    ///
2630    /// - prefix context lines
2631    /// - `<|fim_middle|>`
2632    /// - new text
2633    /// - `<|fim_suffix|>`
2634    /// - suffix context lines
2635    ///
2636    /// We locate the prefix/suffix context lines in the original text and replace
2637    /// everything between them with the new text.
2638    pub fn apply_variable_edit(
2639        context: &str,
2640        model_output: &str,
2641    ) -> Result<(Range<usize>, String)> {
2642        let (prefix_context, rest) = model_output
2643            .split_once("<|fim_middle|>\n")
2644            .or_else(|| model_output.split_once("<|fim_middle|>"))
2645            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2646
2647        let (new_text, suffix_context) = rest
2648            .split_once("<|fim_suffix|>\n")
2649            .or_else(|| rest.split_once("<|fim_suffix|>"))
2650            .unwrap_or((rest, ""));
2651
2652        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2653            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2654        } else {
2655            suffix_context
2656        };
2657
2658        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2659            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2660            + prefix_context.len();
2661        let suffix_offset = if suffix_context.is_empty() {
2662            context.len()
2663        } else {
2664            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2665                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2666                + prefix_offset
2667        };
2668
2669        let edit_range = prefix_offset..suffix_offset;
2670        return Ok((edit_range, new_text.to_string()));
2671    }
2672
2673    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2674        if needle.is_empty() {
2675            return Some(0);
2676        }
2677
2678        haystack.match_indices(needle).find_map(|(offset, _)| {
2679            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2680            matched_line_start.then_some(offset)
2681        })
2682    }
2683
2684    /// Convert a unified diff patch into the variable-edit output format.
2685    ///
2686    /// Parses `patch` as a unified diff against `old_text` and produces model
2687    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2688    /// delimiters. The diff is resolved by content matching rather than line
2689    /// numbers.
2690    pub fn patch_to_variable_edit_output(
2691        old_text: &str,
2692        patch: &str,
2693        cursor_offset: Option<usize>,
2694    ) -> Result<String> {
2695        // Parse the unified diff into hunks. Each hunk has an `old_context`
2696        // string (context + deleted lines interleaved in order) and a list of
2697        // edits expressed as byte ranges within that context plus replacement
2698        // text.
2699        let hunks = parse_hunks(patch);
2700        if hunks.is_empty() {
2701            return Ok(String::new());
2702        }
2703
2704        // Apply each hunk by finding its old_context in the text and
2705        // performing the edits. We search forward from where the previous
2706        // hunk ended so that hunks are applied in order.
2707        let mut new_text = old_text.to_string();
2708        let mut search_from: usize = 0;
2709        let mut first_hunk_pos: Option<usize> = None;
2710
2711        for hunk in &hunks {
2712            let context_pos = new_text[search_from..]
2713                .find(&hunk.old_context)
2714                .map(|pos| pos + search_from)
2715                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2716
2717            if first_hunk_pos.is_none() {
2718                first_hunk_pos = Some(context_pos);
2719            }
2720
2721            // Apply edits in reverse order so byte offsets remain valid.
2722            for edit in hunk.edits.iter().rev() {
2723                let abs_start = context_pos + edit.range.start;
2724                let abs_end = context_pos + edit.range.end;
2725                new_text.replace_range(abs_start..abs_end, &edit.text);
2726            }
2727
2728            // Advance past this hunk's region in the (now modified) text.
2729            let new_region_len: usize =
2730                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2731                    len + edit.text.len() - (edit.range.end - edit.range.start)
2732                });
2733            search_from = context_pos + new_region_len;
2734        }
2735
2736        // Now we have old_text and new_text. Find the changed line range by
2737        // comparing them.
2738        let old_lines: Vec<&str> = old_text.lines().collect();
2739        let new_lines: Vec<&str> = new_text.lines().collect();
2740
2741        // Find first differing line.
2742        let first_changed_row = old_lines
2743            .iter()
2744            .zip(new_lines.iter())
2745            .position(|(a, b)| a != b)
2746            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2747
2748        // Find last differing line (from the end).
2749        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2750        let common_suffix = old_lines
2751            .iter()
2752            .rev()
2753            .zip(new_lines.iter().rev())
2754            .take(max_suffix)
2755            .take_while(|(a, b)| a == b)
2756            .count();
2757
2758        let old_end = old_lines.len() - common_suffix;
2759        let new_end = new_lines.len() - common_suffix;
2760
2761        if first_changed_row == old_end && first_changed_row == new_end {
2762            return Ok(String::new());
2763        }
2764
2765        // Build the replacement text from new_lines[first_diff..new_end].
2766        let mut merged_new_text = String::new();
2767        for line in &new_lines[first_changed_row..new_end] {
2768            merged_new_text.push_str(line);
2769            merged_new_text.push('\n');
2770        }
2771
2772        // cursor_offset is relative to the first hunk's new content in
2773        // new_text. Translate it to an offset within merged_new_text, which
2774        // only contains lines first_diff..new_end of new_text.
2775        if let Some(hunk_offset) = cursor_offset {
2776            let hunk_start = first_hunk_pos.unwrap_or(0);
2777            let absolute_pos = hunk_start + hunk_offset;
2778
2779            // Byte offset where first_diff starts in new_text.
2780            let merged_start: usize = new_lines[..first_changed_row]
2781                .iter()
2782                .map(|line| line.len() + 1)
2783                .sum();
2784
2785            if absolute_pos >= merged_start {
2786                let relative_offset = absolute_pos - merged_start;
2787                if relative_offset <= merged_new_text.len() {
2788                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2789                }
2790            }
2791        }
2792
2793        // Build output with 2 lines of context above and below.
2794        let context_lines_count = 2;
2795        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2796        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2797
2798        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2799            let pattern = &lines[line_range];
2800            let pattern_len = pattern.len();
2801
2802            let mut count = 0;
2803            for offset in 0..=lines.len() - pattern_len {
2804                if &lines[offset..offset + pattern_len] == pattern {
2805                    count += 1;
2806                }
2807            }
2808            count
2809        }
2810
2811        // Expand prefix and suffix until they are unique
2812        while prefix_start > 0 {
2813            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2814                prefix_start -= 1;
2815            } else {
2816                break;
2817            }
2818        }
2819        while suffix_end < old_lines.len() {
2820            if count_matches(old_end..suffix_end, &old_lines) > 1 {
2821                suffix_end += 1;
2822            } else {
2823                break;
2824            }
2825        }
2826
2827        let mut output = String::new();
2828        for line in &old_lines[prefix_start..first_changed_row] {
2829            output.push_str(line);
2830            output.push('\n');
2831        }
2832        output.push_str("<|fim_middle|>\n");
2833        output.push_str(&merged_new_text);
2834        output.push_str("<|fim_suffix|>\n");
2835        for line in &old_lines[old_end..suffix_end] {
2836            output.push_str(line);
2837            output.push('\n');
2838        }
2839
2840        Ok(output)
2841    }
2842
2843    struct ParsedHunk {
2844        old_context: String,
2845        edits: Vec<ParsedEdit>,
2846    }
2847
2848    struct ParsedEdit {
2849        range: Range<usize>,
2850        text: String,
2851    }
2852
2853    /// Parse a unified diff into content-based hunks. Each hunk contains an
2854    /// `old_context` string (context lines + deleted lines, which together
2855    /// form the text that should be found in the original) and a list of edits
2856    /// expressed as byte ranges within that context.
2857    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2858        let mut hunks = Vec::new();
2859        let mut current: Option<ParsedHunk> = None;
2860
2861        for line in patch.lines() {
2862            if line.starts_with("@@") {
2863                if let Some(hunk) = current.take() {
2864                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2865                        hunks.push(hunk);
2866                    }
2867                }
2868                current = Some(ParsedHunk {
2869                    old_context: String::new(),
2870                    edits: Vec::new(),
2871                });
2872            } else if line.starts_with("---") || line.starts_with("+++") {
2873                continue;
2874            } else if let Some(hunk) = &mut current {
2875                if let Some(added) = line.strip_prefix('+') {
2876                    let pos = hunk.old_context.len();
2877                    if let Some(last_edit) = hunk.edits.last_mut() {
2878                        if last_edit.range.end == pos {
2879                            writeln!(&mut last_edit.text, "{added}").ok();
2880                            continue;
2881                        }
2882                    }
2883                    hunk.edits.push(ParsedEdit {
2884                        range: pos..pos,
2885                        text: format!("{added}\n"),
2886                    });
2887                } else if let Some(removed) = line.strip_prefix('-') {
2888                    let start = hunk.old_context.len();
2889                    writeln!(&mut hunk.old_context, "{removed}").ok();
2890                    let end = hunk.old_context.len();
2891                    if let Some(last_edit) = hunk.edits.last_mut() {
2892                        if last_edit.range.end == start {
2893                            last_edit.range.end = end;
2894                            continue;
2895                        }
2896                    }
2897                    hunk.edits.push(ParsedEdit {
2898                        range: start..end,
2899                        text: String::new(),
2900                    });
2901                } else {
2902                    let ctx = line.strip_prefix(' ').unwrap_or(line);
2903                    writeln!(&mut hunk.old_context, "{ctx}").ok();
2904                }
2905            }
2906        }
2907
2908        if let Some(hunk) = current {
2909            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2910                hunks.push(hunk);
2911            }
2912        }
2913
2914        hunks
2915    }
2916
2917    #[cfg(test)]
2918    mod tests {
2919        use super::*;
2920        use indoc::indoc;
2921
2922        #[test]
2923        fn test_apply_variable_edit() {
2924            struct Case {
2925                name: &'static str,
2926                original: &'static str,
2927                model_output: &'static str,
2928                expected: &'static str,
2929            }
2930
2931            let cases = [
2932                Case {
2933                    name: "simple_single_line_replacement",
2934                    original: indoc! {"
2935                        zero
2936                        one
2937                        two
2938                        three
2939                        four
2940                        five
2941                    "},
2942                    model_output: indoc! {"
2943                        two
2944                        <|fim_middle|>
2945                        THREE
2946                        <|fim_suffix|>
2947                        four
2948                    "},
2949                    expected: indoc! {"
2950                        zero
2951                        one
2952                        two
2953                        THREE
2954                        four
2955                        five
2956                    "},
2957                },
2958                Case {
2959                    name: "multi_line_replacement",
2960                    original: indoc! {"
2961                        a
2962                        b
2963                        c
2964                        d
2965                        e
2966                    "},
2967                    model_output: indoc! {"
2968                        a
2969                        <|fim_middle|>
2970                        B
2971                        C
2972                        D
2973                        <|fim_suffix|>
2974                        e
2975                    "},
2976                    expected: indoc! {"
2977                        a
2978                        B
2979                        C
2980                        D
2981                        e
2982                    "},
2983                },
2984                Case {
2985                    name: "insertion_between_existing_lines",
2986                    original: indoc! {"
2987                        a
2988                        b
2989                        c
2990                    "},
2991                    model_output: indoc! {"
2992                        a
2993                        <|fim_middle|>
2994                        X
2995                        <|fim_suffix|>
2996                        b
2997                    "},
2998                    expected: indoc! {"
2999                        a
3000                        X
3001                        b
3002                        c
3003                    "},
3004                },
3005                Case {
3006                    name: "deletion",
3007                    original: indoc! {"
3008                        a
3009                        b
3010                        c
3011                        d
3012                    "},
3013                    model_output: indoc! {"
3014                        a
3015                        <|fim_middle|>
3016                        <|fim_suffix|>
3017                        c
3018                    "},
3019                    expected: indoc! {"
3020                        a
3021                        c
3022                        d
3023                    "},
3024                },
3025                Case {
3026                    name: "replacement_at_start_no_prefix_context",
3027                    original: indoc! {"
3028                        a
3029                        b
3030                        c
3031                    "},
3032                    model_output: indoc! {"
3033                        <|fim_middle|>
3034                        X
3035                        <|fim_suffix|>
3036                        b
3037                    "},
3038                    expected: indoc! {"
3039                        X
3040                        b
3041                        c
3042                    "},
3043                },
3044                Case {
3045                    name: "replacement_at_end_no_suffix_context",
3046                    original: indoc! {"
3047                        a
3048                        b
3049                        c
3050                    "},
3051                    model_output: indoc! {"
3052                        b
3053                        <|fim_middle|>
3054                        Z
3055                        <|fim_suffix|>
3056                    "},
3057                    expected: indoc! {"
3058                        a
3059                        b
3060                        Z
3061                    "},
3062                },
3063                Case {
3064                    name: "context_with_trailing_newline_is_preserved",
3065                    original: indoc! {"
3066                        a
3067                        b
3068                        c
3069                    "},
3070                    model_output: indoc! {"
3071                        a
3072                        <|fim_middle|>
3073                        B
3074                        <|fim_suffix|>
3075                        c
3076                    "},
3077                    expected: indoc! {"
3078                        a
3079                        B
3080                        c
3081                    "},
3082                },
3083                Case {
3084                    name: "cursor_marker_passes_through_untouched",
3085                    original: indoc! {"
3086                        a
3087                        b
3088                        c
3089                    "},
3090                    model_output: indoc! {"
3091                        a
3092                        <|fim_middle|>
3093                        B<|user_cursor|>B
3094                        <|fim_suffix|>
3095                        c
3096                    "},
3097                    expected: indoc! {"
3098                        a
3099                        B<|user_cursor|>B
3100                        c
3101                    "},
3102                },
3103                Case {
3104                    name: "multiple_prefix_context_lines",
3105                    original: indoc! {"
3106                        a
3107                        b
3108                        c
3109                        d
3110                        e
3111                    "},
3112                    model_output: indoc! {"
3113                        b
3114                        c
3115                        <|fim_middle|>
3116                        D
3117                        <|fim_suffix|>
3118                        e
3119                    "},
3120                    expected: indoc! {"
3121                        a
3122                        b
3123                        c
3124                        D
3125                        e
3126                    "},
3127                },
3128            ];
3129
3130            for case in cases {
3131                let (edit_range, replacement) =
3132                    apply_variable_edit(case.original, case.model_output).unwrap();
3133                let mut edited = case.original.to_string();
3134                edited.replace_range(edit_range, &replacement);
3135                assert_eq!(edited, case.expected, "{}", case.name);
3136            }
3137        }
3138
3139        #[test]
3140        fn test_patch_to_variable_edit() {
3141            struct Case {
3142                name: &'static str,
3143                old: &'static str,
3144                patch: &'static str,
3145                cursor_offset: Option<usize>,
3146                expected_variable_edit: &'static str,
3147                expected_after_apply: &'static str,
3148            }
3149
3150            let cases = [
3151                Case {
3152                    name: "simple_replacement",
3153                    old: indoc! {"
3154                        zero
3155                        one
3156                        two
3157                        three
3158                        four
3159                        five
3160                    "},
3161                    patch: indoc! {"
3162                        @@ -3,3 +3,3 @@
3163                         two
3164                        -three
3165                        +THREE
3166                         four
3167                    "},
3168                    cursor_offset: None,
3169                    expected_variable_edit: indoc! {"
3170                        one
3171                        two
3172                        <|fim_middle|>
3173                        THREE
3174                        <|fim_suffix|>
3175                        four
3176                        five
3177                    "},
3178                    expected_after_apply: indoc! {"
3179                        zero
3180                        one
3181                        two
3182                        THREE
3183                        four
3184                        five
3185                    "},
3186                },
3187                Case {
3188                    name: "insertion",
3189                    old: indoc! {"
3190                        a
3191                        b
3192                        c
3193                        d
3194                        e
3195                    "},
3196                    patch: indoc! {"
3197                        @@ -2,0 +3,1 @@
3198                         b
3199                        +X
3200                         c
3201                    "},
3202                    cursor_offset: None,
3203                    expected_variable_edit: indoc! {"
3204                        a
3205                        b
3206                        <|fim_middle|>
3207                        X
3208                        <|fim_suffix|>
3209                        c
3210                        d
3211                    "},
3212                    expected_after_apply: indoc! {"
3213                        a
3214                        b
3215                        X
3216                        c
3217                        d
3218                        e
3219                    "},
3220                },
3221                Case {
3222                    name: "deletion",
3223                    old: indoc! {"
3224                        a
3225                        b
3226                        c
3227                        d
3228                        e
3229                    "},
3230                    patch: indoc! {"
3231                        @@ -2,3 +2,2 @@
3232                         b
3233                        -c
3234                         d
3235                    "},
3236                    cursor_offset: None,
3237                    expected_variable_edit: indoc! {"
3238                        a
3239                        b
3240                        <|fim_middle|>
3241                        <|fim_suffix|>
3242                        d
3243                        e
3244                    "},
3245                    expected_after_apply: indoc! {"
3246                        a
3247                        b
3248                        d
3249                        e
3250                    "},
3251                },
3252                Case {
3253                    name: "edit_near_start",
3254                    old: indoc! {"
3255                        first
3256                        second
3257                        third
3258                        fourth
3259                    "},
3260                    patch: indoc! {"
3261                        @@ -1,1 +1,1 @@
3262                        -first
3263                        +FIRST
3264                    "},
3265                    cursor_offset: None,
3266                    expected_variable_edit: indoc! {"
3267                        <|fim_middle|>
3268                        FIRST
3269                        <|fim_suffix|>
3270                        second
3271                        third
3272                    "},
3273                    expected_after_apply: indoc! {"
3274                        FIRST
3275                        second
3276                        third
3277                        fourth
3278                    "},
3279                },
3280                Case {
3281                    name: "edit_near_end",
3282                    old: indoc! {"
3283                        first
3284                        second
3285                        third
3286                        fourth
3287                    "},
3288                    patch: indoc! {"
3289                        @@ -4,1 +4,1 @@
3290                        -fourth
3291                        +FOURTH
3292                    "},
3293                    cursor_offset: None,
3294                    expected_variable_edit: indoc! {"
3295                        second
3296                        third
3297                        <|fim_middle|>
3298                        FOURTH
3299                        <|fim_suffix|>
3300                    "},
3301                    expected_after_apply: indoc! {"
3302                        first
3303                        second
3304                        third
3305                        FOURTH
3306                    "},
3307                },
3308                Case {
3309                    name: "cursor_at_start_of_replacement",
3310                    old: indoc! {"
3311                        zero
3312                        one
3313                        two
3314                        three
3315                        four
3316                        five
3317                    "},
3318                    patch: indoc! {"
3319                        @@ -3,3 +3,3 @@
3320                         two
3321                        -three
3322                        +THREE
3323                         four
3324                    "},
3325                    cursor_offset: Some(4),
3326                    expected_variable_edit: indoc! {"
3327                        one
3328                        two
3329                        <|fim_middle|>
3330                        <|user_cursor|>THREE
3331                        <|fim_suffix|>
3332                        four
3333                        five
3334                    "},
3335                    expected_after_apply: indoc! {"
3336                        zero
3337                        one
3338                        two
3339                        <|user_cursor|>THREE
3340                        four
3341                        five
3342                    "},
3343                },
3344                Case {
3345                    name: "cursor_in_middle_of_replacement",
3346                    old: indoc! {"
3347                        zero
3348                        one
3349                        two
3350                        three
3351                        four
3352                        five
3353                    "},
3354                    patch: indoc! {"
3355                        @@ -3,3 +3,3 @@
3356                         two
3357                        -three
3358                        +THREE
3359                         four
3360                    "},
3361                    cursor_offset: Some(6),
3362                    expected_variable_edit: indoc! {"
3363                        one
3364                        two
3365                        <|fim_middle|>
3366                        TH<|user_cursor|>REE
3367                        <|fim_suffix|>
3368                        four
3369                        five
3370                    "},
3371                    expected_after_apply: indoc! {"
3372                        zero
3373                        one
3374                        two
3375                        TH<|user_cursor|>REE
3376                        four
3377                        five
3378                    "},
3379                },
3380                Case {
3381                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3382                    old: indoc! {"
3383                        one
3384                        a
3385                        b
3386                        c
3387                        d
3388                        two
3389                        a
3390                        b
3391                        c
3392                        d
3393                        three
3394                        a
3395                        b
3396                        c
3397                        d
3398                        four
3399                    "},
3400                    patch: indoc! {"
3401                        @@ -4,5 +4,5 @@
3402                         two
3403                         a
3404                         b
3405                        -c
3406                        +C
3407                         d
3408                         three
3409                    "},
3410                    cursor_offset: None,
3411                    expected_variable_edit: indoc! {"
3412                        two
3413                        a
3414                        b
3415                        <|fim_middle|>
3416                        C
3417                        <|fim_suffix|>
3418                        d
3419                        three
3420                    "},
3421                    expected_after_apply: indoc! {"
3422                        one
3423                        a
3424                        b
3425                        c
3426                        d
3427                        two
3428                        a
3429                        b
3430                        C
3431                        d
3432                        three
3433                        a
3434                        b
3435                        c
3436                        d
3437                        four
3438                    "},
3439                },
3440                Case {
3441                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3442                    old: indoc! {"
3443                        {
3444                            {
3445                                one();
3446                            }
3447                        }
3448                        {
3449                            {
3450                                two();
3451                            }
3452                        }
3453                        {
3454                            {
3455                                three();
3456                            }
3457                        }
3458                        {
3459                            {
3460                                four();
3461                            }
3462                        }
3463                    "},
3464                    patch: indoc! {"
3465                        @@ -4,5 +4,5 @@
3466                             {
3467                        -        two();
3468                        +        TWO();
3469                             }
3470                    "},
3471                    cursor_offset: None,
3472                    expected_variable_edit: indoc! {"
3473                                one();
3474                            }
3475                        }
3476                        {
3477                            {
3478                        <|fim_middle|>
3479                                TWO();
3480                        <|fim_suffix|>
3481                            }
3482                        }
3483                        {
3484                            {
3485                                three();
3486                    "},
3487                    expected_after_apply: indoc! {"
3488                        {
3489                            {
3490                                one();
3491                            }
3492                        }
3493                        {
3494                            {
3495                                TWO();
3496                            }
3497                        }
3498                        {
3499                            {
3500                                three();
3501                            }
3502                        }
3503                        {
3504                            {
3505                                four();
3506                            }
3507                        }
3508                    "},
3509                },
3510            ];
3511
3512            for case in cases {
3513                let output =
3514                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3515                        .unwrap_or_else(|error| {
3516                            panic!("failed converting patch for {}: {error}", case.name)
3517                        });
3518                assert_eq!(
3519                    output, case.expected_variable_edit,
3520                    "patch->variable_edit mismatch for {}",
3521                    case.name
3522                );
3523
3524                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3525                    .unwrap_or_else(|error| {
3526                        panic!("failed applying variable_edit for {}: {error}", case.name)
3527                    });
3528                let mut edited_by_variable_edit = case.old.to_string();
3529                edited_by_variable_edit.replace_range(edit_range, &replacement);
3530                assert_eq!(
3531                    edited_by_variable_edit, case.expected_after_apply,
3532                    "variable_edit apply mismatch for {}",
3533                    case.name
3534                );
3535
3536                let (expected_edit_range, expected_replacement) =
3537                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3538                        |error| {
3539                            panic!(
3540                                "failed applying expected variable_edit for {}: {error}",
3541                                case.name
3542                            )
3543                        },
3544                    );
3545                let mut edited_by_expected_variable_edit = case.old.to_string();
3546                edited_by_expected_variable_edit
3547                    .replace_range(expected_edit_range, &expected_replacement);
3548                assert_eq!(
3549                    edited_by_expected_variable_edit, case.expected_after_apply,
3550                    "expected variable_edit apply mismatch for {}",
3551                    case.name
3552                );
3553            }
3554        }
3555
3556        #[test]
3557        fn test_write_cursor_excerpt_section() {
3558            let path = Path::new("test.rs");
3559            let context = "fn main() {\n    hello();\n}\n";
3560            let cursor_offset = 17;
3561            let mut prompt = String::new();
3562            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3563            assert_eq!(
3564                prompt,
3565                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3566            );
3567        }
3568    }
3569}
3570
3571/// The zeta1 prompt format
3572pub mod zeta1 {
3573    use super::*;
3574    use std::fmt::Write;
3575
3576    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3577    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3578    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3579    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3580
3581    const INSTRUCTION_HEADER: &str = concat!(
3582        "### Instruction:\n",
3583        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3584        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3585        "into account the cursor location.\n\n",
3586        "### User Edits:\n\n"
3587    );
3588    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3589    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3590
3591    /// Formats a complete zeta1 prompt from the input events and excerpt.
3592    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3593        let mut prompt = String::with_capacity(
3594            INSTRUCTION_HEADER.len()
3595                + input_events.len()
3596                + EXCERPT_HEADER.len()
3597                + input_excerpt.len()
3598                + RESPONSE_HEADER.len(),
3599        );
3600        prompt.push_str(INSTRUCTION_HEADER);
3601        prompt.push_str(input_events);
3602        prompt.push_str(EXCERPT_HEADER);
3603        prompt.push_str(input_excerpt);
3604        prompt.push_str(RESPONSE_HEADER);
3605        prompt
3606    }
3607
3608    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3609    /// editable and context byte-offset ranges within `cursor_excerpt`.
3610    pub fn format_zeta1_from_input(
3611        input: &ZetaPromptInput,
3612        editable_range: Range<usize>,
3613        context_range: Range<usize>,
3614    ) -> String {
3615        let events = format_zeta1_events(&input.events);
3616        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3617        format_zeta1_prompt(&events, &excerpt)
3618    }
3619
3620    /// Formats events in zeta1 style (oldest first).
3621    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3622        let mut result = String::new();
3623        for event in events {
3624            let event_string = format_zeta1_event(event);
3625            if event_string.is_empty() {
3626                continue;
3627            }
3628            if !result.is_empty() {
3629                result.push_str("\n\n");
3630            }
3631            result.push_str(&event_string);
3632        }
3633        result
3634    }
3635
3636    fn format_zeta1_event(event: &Event) -> String {
3637        match event {
3638            Event::BufferChange {
3639                path,
3640                old_path,
3641                diff,
3642                ..
3643            } => {
3644                let mut prompt = String::new();
3645                if old_path != path {
3646                    writeln!(
3647                        prompt,
3648                        "User renamed {} to {}\n",
3649                        old_path.display(),
3650                        path.display()
3651                    )
3652                    .ok();
3653                }
3654                if !diff.is_empty() {
3655                    write!(
3656                        prompt,
3657                        "User edited {}:\n```diff\n{}\n```",
3658                        path.display(),
3659                        diff
3660                    )
3661                    .ok();
3662                }
3663                prompt
3664            }
3665        }
3666    }
3667
3668    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3669    /// within `cursor_excerpt`.
3670    fn format_zeta1_excerpt(
3671        input: &ZetaPromptInput,
3672        editable_range: Range<usize>,
3673        context_range: Range<usize>,
3674    ) -> String {
3675        let path_str = input.cursor_path.to_string_lossy();
3676        let excerpt = &*input.cursor_excerpt;
3677        let cursor_offset = input.cursor_offset_in_excerpt;
3678
3679        let mut prompt = String::new();
3680        writeln!(&mut prompt, "```{path_str}").ok();
3681
3682        let starts_at_file_beginning =
3683            input.excerpt_start_row == Some(0) && context_range.start == 0;
3684        if starts_at_file_beginning {
3685            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3686        }
3687
3688        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3689
3690        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3691        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3692        prompt.push_str(CURSOR_MARKER);
3693        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3694        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3695
3696        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3697        write!(prompt, "\n```").ok();
3698
3699        prompt
3700    }
3701
3702    /// Cleans zeta1 model output by extracting content between editable region
3703    /// markers and converting the zeta1 cursor marker to the universal one.
3704    /// Returns `None` if the output doesn't contain the expected markers.
3705    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3706        let content = output.replace(CURSOR_MARKER, "");
3707
3708        let content_start = content
3709            .find(EDITABLE_REGION_START_MARKER)
3710            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3711            .map(|pos| {
3712                if content.as_bytes().get(pos) == Some(&b'\n') {
3713                    pos + 1
3714                } else {
3715                    pos
3716                }
3717            })
3718            .unwrap_or(0);
3719
3720        let content_end = content
3721            .find(EDITABLE_REGION_END_MARKER)
3722            .map(|pos| {
3723                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3724                    pos - 1
3725                } else {
3726                    pos
3727                }
3728            })
3729            .unwrap_or(content.len());
3730
3731        if content_start > content_end {
3732            return Some(String::new());
3733        }
3734
3735        let extracted = &content[content_start..content_end];
3736
3737        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3738            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3739            let text_before_cursor = text_before_cursor
3740                .find(EDITABLE_REGION_START_MARKER)
3741                .map(|pos| {
3742                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3743                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3744                        after_marker + 1
3745                    } else {
3746                        after_marker
3747                    }
3748                })
3749                .unwrap_or(0);
3750            let offset_in_extracted = zeta1_cursor_pos
3751                .saturating_sub(text_before_cursor)
3752                .min(extracted.len());
3753            offset_in_extracted
3754        });
3755
3756        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3757        if let Some(offset) = cursor_offset {
3758            result.push_str(&extracted[..offset]);
3759            result.push_str(super::CURSOR_MARKER);
3760            result.push_str(&extracted[offset..]);
3761        } else {
3762            result.push_str(extracted);
3763        }
3764
3765        Some(result)
3766    }
3767}
3768
3769#[cfg(test)]
3770mod tests {
3771    use super::*;
3772    use indoc::indoc;
3773
3774    fn make_input(
3775        cursor_excerpt: &str,
3776        editable_range: Range<usize>,
3777        cursor_offset: usize,
3778        events: Vec<Event>,
3779        related_files: Vec<RelatedFile>,
3780    ) -> ZetaPromptInput {
3781        let context_range = 0..cursor_excerpt.len();
3782        ZetaPromptInput {
3783            cursor_path: Path::new("test.rs").into(),
3784            cursor_excerpt: cursor_excerpt.into(),
3785            cursor_offset_in_excerpt: cursor_offset,
3786            excerpt_start_row: None,
3787            events: events.into_iter().map(Arc::new).collect(),
3788            related_files,
3789            excerpt_ranges: ExcerptRanges {
3790                editable_150: editable_range.clone(),
3791                editable_180: editable_range.clone(),
3792                editable_350: editable_range,
3793                editable_150_context_350: context_range.clone(),
3794                editable_180_context_350: context_range.clone(),
3795                editable_350_context_150: context_range,
3796                ..Default::default()
3797            },
3798            experiment: None,
3799            in_open_source_repo: false,
3800            can_collect_data: false,
3801            repo_url: None,
3802        }
3803    }
3804
3805    fn make_input_with_context_range(
3806        excerpt: &str,
3807        editable_range: Range<usize>,
3808        context_range: Range<usize>,
3809        cursor_offset: usize,
3810    ) -> ZetaPromptInput {
3811        ZetaPromptInput {
3812            cursor_path: Path::new("test.rs").into(),
3813            cursor_excerpt: excerpt.into(),
3814            cursor_offset_in_excerpt: cursor_offset,
3815            excerpt_start_row: None,
3816            events: vec![],
3817            related_files: vec![],
3818            excerpt_ranges: ExcerptRanges {
3819                editable_150: editable_range.clone(),
3820                editable_180: editable_range.clone(),
3821                editable_350: editable_range,
3822                editable_150_context_350: context_range.clone(),
3823                editable_180_context_350: context_range.clone(),
3824                editable_350_context_150: context_range,
3825                ..Default::default()
3826            },
3827            experiment: None,
3828            in_open_source_repo: false,
3829            can_collect_data: false,
3830            repo_url: None,
3831        }
3832    }
3833
3834    fn make_event(path: &str, diff: &str) -> Event {
3835        Event::BufferChange {
3836            path: Path::new(path).into(),
3837            old_path: Path::new(path).into(),
3838            diff: diff.to_string(),
3839            predicted: false,
3840            in_open_source_repo: false,
3841        }
3842    }
3843
3844    fn make_related_file(path: &str, content: &str) -> RelatedFile {
3845        RelatedFile {
3846            path: Path::new(path).into(),
3847            max_row: content.lines().count() as u32,
3848            excerpts: vec![RelatedExcerpt {
3849                row_range: 0..content.lines().count() as u32,
3850                text: content.into(),
3851                order: 0,
3852            }],
3853            in_open_source_repo: false,
3854        }
3855    }
3856
3857    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3858        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3859    }
3860
3861    #[test]
3862    fn test_no_truncation_when_within_budget() {
3863        let input = make_input(
3864            "prefix\neditable\nsuffix",
3865            7..15,
3866            10,
3867            vec![make_event("a.rs", "-old\n+new\n")],
3868            vec![make_related_file("related.rs", "fn helper() {}\n")],
3869        );
3870
3871        assert_eq!(
3872            format_with_budget(&input, 10000),
3873            indoc! {r#"
3874                <|file_sep|>related.rs
3875                fn helper() {}
3876                <|file_sep|>edit history
3877                --- a/a.rs
3878                +++ b/a.rs
3879                -old
3880                +new
3881                <|file_sep|>test.rs
3882                <|fim_prefix|>
3883                prefix
3884                <|fim_middle|>current
3885                edi<|user_cursor|>table
3886                <|fim_suffix|>
3887
3888                suffix
3889                <|fim_middle|>updated
3890            "#}
3891        );
3892    }
3893
3894    #[test]
3895    fn test_truncation_drops_edit_history_when_budget_tight() {
3896        let input = make_input(
3897            "code",
3898            0..4,
3899            2,
3900            vec![make_event("a.rs", "-x\n+y\n")],
3901            vec![
3902                make_related_file("r1.rs", "a\n"),
3903                make_related_file("r2.rs", "b\n"),
3904            ],
3905        );
3906
3907        assert_eq!(
3908            format_with_budget(&input, 10000),
3909            indoc! {r#"
3910                <|file_sep|>r1.rs
3911                a
3912                <|file_sep|>r2.rs
3913                b
3914                <|file_sep|>edit history
3915                --- a/a.rs
3916                +++ b/a.rs
3917                -x
3918                +y
3919                <|file_sep|>test.rs
3920                <|fim_prefix|>
3921                <|fim_middle|>current
3922                co<|user_cursor|>de
3923                <|fim_suffix|>
3924                <|fim_middle|>updated
3925            "#}
3926        );
3927
3928        assert_eq!(
3929            format_with_budget(&input, 50),
3930            indoc! {r#"
3931                <|file_sep|>r1.rs
3932                a
3933                <|file_sep|>r2.rs
3934                b
3935                <|file_sep|>test.rs
3936                <|fim_prefix|>
3937                <|fim_middle|>current
3938                co<|user_cursor|>de
3939                <|fim_suffix|>
3940                <|fim_middle|>updated
3941            "#}
3942        );
3943    }
3944
3945    #[test]
3946    fn test_truncation_includes_partial_excerpts() {
3947        let input = make_input(
3948            "x",
3949            0..1,
3950            0,
3951            vec![],
3952            vec![RelatedFile {
3953                path: Path::new("big.rs").into(),
3954                max_row: 30,
3955                in_open_source_repo: false,
3956                excerpts: vec![
3957                    RelatedExcerpt {
3958                        row_range: 0..10,
3959                        text: "first excerpt\n".into(),
3960                        order: 0,
3961                    },
3962                    RelatedExcerpt {
3963                        row_range: 10..20,
3964                        text: "second excerpt\n".into(),
3965                        order: 0,
3966                    },
3967                    RelatedExcerpt {
3968                        row_range: 20..30,
3969                        text: "third excerpt\n".into(),
3970                        order: 0,
3971                    },
3972                ],
3973            }],
3974        );
3975
3976        assert_eq!(
3977            format_with_budget(&input, 10000),
3978            indoc! {r#"
3979                <|file_sep|>big.rs
3980                first excerpt
3981                ...
3982                second excerpt
3983                ...
3984                third excerpt
3985                <|file_sep|>test.rs
3986                <|fim_prefix|>
3987                <|fim_middle|>current
3988                <|user_cursor|>x
3989                <|fim_suffix|>
3990                <|fim_middle|>updated
3991            "#}
3992        );
3993
3994        assert_eq!(
3995            format_with_budget(&input, 50),
3996            indoc! {r#"
3997                <|file_sep|>big.rs
3998                first excerpt
3999                ...
4000                <|file_sep|>test.rs
4001                <|fim_prefix|>
4002                <|fim_middle|>current
4003                <|user_cursor|>x
4004                <|fim_suffix|>
4005                <|fim_middle|>updated
4006            "#}
4007        );
4008    }
4009
4010    #[test]
4011    fn test_truncation_prioritizes_lower_order_excerpts() {
4012        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4013        // With tight budget, only the lower-order excerpt from file_b should be included.
4014        let input = make_input(
4015            "x",
4016            0..1,
4017            0,
4018            vec![],
4019            vec![
4020                RelatedFile {
4021                    path: Path::new("file_a.rs").into(),
4022                    max_row: 10,
4023                    in_open_source_repo: false,
4024                    excerpts: vec![RelatedExcerpt {
4025                        row_range: 0..10,
4026                        text: "low priority content\n".into(),
4027                        order: 5,
4028                    }],
4029                },
4030                RelatedFile {
4031                    path: Path::new("file_b.rs").into(),
4032                    max_row: 10,
4033                    in_open_source_repo: false,
4034                    excerpts: vec![RelatedExcerpt {
4035                        row_range: 0..10,
4036                        text: "high priority content\n".into(),
4037                        order: 1,
4038                    }],
4039                },
4040            ],
4041        );
4042
4043        // With large budget, both files included; rendered in stable lexicographic order.
4044        assert_eq!(
4045            format_with_budget(&input, 10000),
4046            indoc! {r#"
4047                <|file_sep|>file_a.rs
4048                low priority content
4049                <|file_sep|>file_b.rs
4050                high priority content
4051                <|file_sep|>test.rs
4052                <|fim_prefix|>
4053                <|fim_middle|>current
4054                <|user_cursor|>x
4055                <|fim_suffix|>
4056                <|fim_middle|>updated
4057            "#}
4058        );
4059
4060        // With tight budget, only file_b (lower order) fits.
4061        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4062        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4063        // file_a would need another 14 tokens, which doesn't fit.
4064        assert_eq!(
4065            format_with_budget(&input, 52),
4066            indoc! {r#"
4067                <|file_sep|>file_b.rs
4068                high priority content
4069                <|file_sep|>test.rs
4070                <|fim_prefix|>
4071                <|fim_middle|>current
4072                <|user_cursor|>x
4073                <|fim_suffix|>
4074                <|fim_middle|>updated
4075            "#}
4076        );
4077    }
4078
4079    #[test]
4080    fn test_truncation_drops_high_order_excerpts_within_file() {
4081        // A single file has excerpts at order 1 and order 3. With a tight budget,
4082        // only the order-1 excerpts are included while the order-3 excerpt is
4083        // dropped — even though they belong to the same file. This also preserves
4084        // the parent invariant: parent outline items have order ≤ their best
4085        // child, so they're always included when any child is.
4086        let input = make_input(
4087            "x",
4088            0..1,
4089            0,
4090            vec![],
4091            vec![RelatedFile {
4092                path: Path::new("mod.rs").into(),
4093                max_row: 30,
4094                in_open_source_repo: false,
4095                excerpts: vec![
4096                    RelatedExcerpt {
4097                        row_range: 0..5,
4098                        text: "mod header\n".into(),
4099                        order: 1,
4100                    },
4101                    RelatedExcerpt {
4102                        row_range: 5..15,
4103                        text: "important fn\n".into(),
4104                        order: 1,
4105                    },
4106                    RelatedExcerpt {
4107                        row_range: 15..30,
4108                        text: "less important fn\n".into(),
4109                        order: 3,
4110                    },
4111                ],
4112            }],
4113        );
4114
4115        // With large budget, all three excerpts included.
4116        assert_eq!(
4117            format_with_budget(&input, 10000),
4118            indoc! {r#"
4119                <|file_sep|>mod.rs
4120                mod header
4121                ...
4122                important fn
4123                ...
4124                less important fn
4125                <|file_sep|>test.rs
4126                <|fim_prefix|>
4127                <|fim_middle|>current
4128                <|user_cursor|>x
4129                <|fim_suffix|>
4130                <|fim_middle|>updated
4131            "#}
4132        );
4133
4134        // With tight budget, only order<=1 excerpts included (header + important fn).
4135        assert_eq!(
4136            format_with_budget(&input, 55),
4137            indoc! {r#"
4138                <|file_sep|>mod.rs
4139                mod header
4140                ...
4141                important fn
4142                ...
4143                <|file_sep|>test.rs
4144                <|fim_prefix|>
4145                <|fim_middle|>current
4146                <|user_cursor|>x
4147                <|fim_suffix|>
4148                <|fim_middle|>updated
4149            "#}
4150        );
4151    }
4152
4153    #[test]
4154    fn test_truncation_drops_older_events_first() {
4155        let input = make_input(
4156            "x",
4157            0..1,
4158            0,
4159            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4160            vec![],
4161        );
4162
4163        assert_eq!(
4164            format_with_budget(&input, 10000),
4165            indoc! {r#"
4166                <|file_sep|>edit history
4167                --- a/old.rs
4168                +++ b/old.rs
4169                -1
4170                --- a/new.rs
4171                +++ b/new.rs
4172                -2
4173                <|file_sep|>test.rs
4174                <|fim_prefix|>
4175                <|fim_middle|>current
4176                <|user_cursor|>x
4177                <|fim_suffix|>
4178                <|fim_middle|>updated
4179            "#}
4180        );
4181
4182        assert_eq!(
4183            format_with_budget(&input, 55),
4184            indoc! {r#"
4185                <|file_sep|>edit history
4186                --- a/new.rs
4187                +++ b/new.rs
4188                -2
4189                <|file_sep|>test.rs
4190                <|fim_prefix|>
4191                <|fim_middle|>current
4192                <|user_cursor|>x
4193                <|fim_suffix|>
4194                <|fim_middle|>updated
4195            "#}
4196        );
4197    }
4198
4199    #[test]
4200    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4201        let input = make_input(
4202            "fn main() {}",
4203            0..12,
4204            3,
4205            vec![make_event("a.rs", "-old\n+new\n")],
4206            vec![make_related_file("related.rs", "helper\n")],
4207        );
4208
4209        assert_eq!(
4210            format_with_budget(&input, 30),
4211            indoc! {r#"
4212                <|file_sep|>test.rs
4213                <|fim_prefix|>
4214                <|fim_middle|>current
4215                fn <|user_cursor|>main() {}
4216                <|fim_suffix|>
4217                <|fim_middle|>updated
4218            "#}
4219        );
4220    }
4221
4222    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4223        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4224    }
4225
4226    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4227        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4228    }
4229
4230    #[test]
4231    fn test_seed_coder_basic_format() {
4232        let input = make_input(
4233            "prefix\neditable\nsuffix",
4234            7..15,
4235            10,
4236            vec![make_event("a.rs", "-old\n+new\n")],
4237            vec![make_related_file("related.rs", "fn helper() {}\n")],
4238        );
4239
4240        assert_eq!(
4241            format_seed_coder(&input),
4242            indoc! {r#"
4243                <[fim-suffix]>
4244                suffix
4245                <[fim-prefix]><filename>related.rs
4246                fn helper() {}
4247
4248                <filename>edit_history
4249                --- a/a.rs
4250                +++ b/a.rs
4251                -old
4252                +new
4253
4254                <filename>test.rs
4255                prefix
4256                <<<<<<< CURRENT
4257                edi<|user_cursor|>table
4258                =======
4259                <[fim-middle]>"#}
4260        );
4261    }
4262
4263    #[test]
4264    fn test_seed_coder_no_context() {
4265        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4266
4267        assert_eq!(
4268            format_seed_coder(&input),
4269            indoc! {r#"
4270                <[fim-suffix]>
4271                after
4272                <[fim-prefix]><filename>test.rs
4273                before
4274                <<<<<<< CURRENT
4275                mid<|user_cursor|>dle
4276                =======
4277                <[fim-middle]>"#}
4278        );
4279    }
4280
4281    #[test]
4282    fn test_seed_coder_truncation_drops_context() {
4283        let input = make_input(
4284            "code",
4285            0..4,
4286            2,
4287            vec![make_event("a.rs", "-x\n+y\n")],
4288            vec![make_related_file("r1.rs", "content\n")],
4289        );
4290
4291        // With large budget, everything is included
4292        assert_eq!(
4293            format_seed_coder(&input),
4294            indoc! {r#"
4295                <[fim-suffix]>
4296                <[fim-prefix]><filename>r1.rs
4297                content
4298
4299                <filename>edit_history
4300                --- a/a.rs
4301                +++ b/a.rs
4302                -x
4303                +y
4304
4305                <filename>test.rs
4306                <<<<<<< CURRENT
4307                co<|user_cursor|>de
4308                =======
4309                <[fim-middle]>"#}
4310        );
4311
4312        // With tight budget, context is dropped but cursor section remains
4313        assert_eq!(
4314            format_seed_coder_with_budget(&input, 30),
4315            indoc! {r#"
4316                <[fim-suffix]>
4317                <[fim-prefix]><filename>test.rs
4318                <<<<<<< CURRENT
4319                co<|user_cursor|>de
4320                =======
4321                <[fim-middle]>"#}
4322        );
4323    }
4324
4325    #[test]
4326    fn test_seed_coder_truncation_prioritizes_lower_order() {
4327        let input = make_input(
4328            "code",
4329            0..4,
4330            2,
4331            vec![],
4332            vec![
4333                RelatedFile {
4334                    path: Path::new("low_prio.rs").into(),
4335                    max_row: 5,
4336                    in_open_source_repo: false,
4337                    excerpts: vec![RelatedExcerpt {
4338                        row_range: 0..5,
4339                        text: "low prio\n".into(),
4340                        order: 10,
4341                    }],
4342                },
4343                RelatedFile {
4344                    path: Path::new("high_prio.rs").into(),
4345                    max_row: 5,
4346                    in_open_source_repo: false,
4347                    excerpts: vec![RelatedExcerpt {
4348                        row_range: 0..5,
4349                        text: "high prio\n".into(),
4350                        order: 1,
4351                    }],
4352                },
4353            ],
4354        );
4355
4356        // With large budget, both included; rendered in stable lexicographic order.
4357        assert_eq!(
4358            format_seed_coder(&input),
4359            indoc! {r#"
4360                <[fim-suffix]>
4361                <[fim-prefix]><filename>low_prio.rs
4362                low prio
4363                <filename>high_prio.rs
4364                high prio
4365
4366                <filename>test.rs
4367                <<<<<<< CURRENT
4368                co<|user_cursor|>de
4369                =======
4370                <[fim-middle]>"#}
4371        );
4372
4373        // With tight budget, only high_prio included.
4374        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4375        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4376        assert_eq!(
4377            format_seed_coder_with_budget(&input, 44),
4378            indoc! {r#"
4379                <[fim-suffix]>
4380                <[fim-prefix]><filename>high_prio.rs
4381                high prio
4382
4383                <filename>test.rs
4384                <<<<<<< CURRENT
4385                co<|user_cursor|>de
4386                =======
4387                <[fim-middle]>"#}
4388        );
4389    }
4390
4391    #[test]
4392    fn test_format_zeta1_from_input_basic() {
4393        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4394        let input = ZetaPromptInput {
4395            cursor_path: Path::new("src/main.rs").into(),
4396            cursor_excerpt: excerpt.into(),
4397            cursor_offset_in_excerpt: 30,
4398            excerpt_start_row: Some(0),
4399            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4400            related_files: vec![],
4401            excerpt_ranges: ExcerptRanges {
4402                editable_150: 15..41,
4403                editable_180: 15..41,
4404                editable_350: 15..41,
4405                editable_150_context_350: 0..excerpt.len(),
4406                editable_180_context_350: 0..excerpt.len(),
4407                editable_350_context_150: 0..excerpt.len(),
4408                ..Default::default()
4409            },
4410            experiment: None,
4411            in_open_source_repo: false,
4412            can_collect_data: false,
4413            repo_url: None,
4414        };
4415
4416        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4417
4418        assert_eq!(
4419            prompt,
4420            concat!(
4421                "### Instruction:\n",
4422                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4423                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4424                "into account the cursor location.\n",
4425                "\n",
4426                "### User Edits:\n",
4427                "\n",
4428                "User edited other.rs:\n",
4429                "```diff\n",
4430                "-old\n",
4431                "+new\n",
4432                "\n",
4433                "```\n",
4434                "\n",
4435                "### User Excerpt:\n",
4436                "\n",
4437                "```src/main.rs\n",
4438                "<|start_of_file|>\n",
4439                "fn before() {}\n",
4440                "<|editable_region_start|>\n",
4441                "fn foo() {\n",
4442                "    <|user_cursor_is_here|>let x = 1;\n",
4443                "\n",
4444                "<|editable_region_end|>}\n",
4445                "fn after() {}\n",
4446                "\n",
4447                "```\n",
4448                "\n",
4449                "### Response:\n",
4450            ),
4451        );
4452    }
4453
4454    #[test]
4455    fn test_format_zeta1_from_input_no_start_of_file() {
4456        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4457        let input = ZetaPromptInput {
4458            cursor_path: Path::new("src/main.rs").into(),
4459            cursor_excerpt: excerpt.into(),
4460            cursor_offset_in_excerpt: 15,
4461            excerpt_start_row: Some(10),
4462            events: vec![],
4463            related_files: vec![],
4464            excerpt_ranges: ExcerptRanges {
4465                editable_150: 0..28,
4466                editable_180: 0..28,
4467                editable_350: 0..28,
4468                editable_150_context_350: 0..28,
4469                editable_180_context_350: 0..28,
4470                editable_350_context_150: 0..28,
4471                ..Default::default()
4472            },
4473            experiment: None,
4474            in_open_source_repo: false,
4475            can_collect_data: false,
4476            repo_url: None,
4477        };
4478
4479        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4480
4481        assert_eq!(
4482            prompt,
4483            concat!(
4484                "### Instruction:\n",
4485                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4486                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4487                "into account the cursor location.\n",
4488                "\n",
4489                "### User Edits:\n",
4490                "\n",
4491                "\n",
4492                "\n",
4493                "### User Excerpt:\n",
4494                "\n",
4495                "```src/main.rs\n",
4496                "<|editable_region_start|>\n",
4497                "fn foo() {\n",
4498                "    <|user_cursor_is_here|>let x = 1;\n",
4499                "}\n",
4500                "\n",
4501                "<|editable_region_end|>\n",
4502                "```\n",
4503                "\n",
4504                "### Response:\n",
4505            ),
4506        );
4507    }
4508
4509    #[test]
4510    fn test_format_zeta1_from_input_with_sub_ranges() {
4511        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4512        let editable_range = 10..37;
4513        let context_range = 0..excerpt.len();
4514
4515        let input = ZetaPromptInput {
4516            cursor_path: Path::new("test.rs").into(),
4517            cursor_excerpt: excerpt.into(),
4518            cursor_offset_in_excerpt: 25,
4519            excerpt_start_row: Some(0),
4520            events: vec![],
4521            related_files: vec![],
4522            excerpt_ranges: ExcerptRanges {
4523                editable_150: editable_range.clone(),
4524                editable_180: editable_range.clone(),
4525                editable_350: editable_range.clone(),
4526                editable_150_context_350: context_range.clone(),
4527                editable_180_context_350: context_range.clone(),
4528                editable_350_context_150: context_range.clone(),
4529                ..Default::default()
4530            },
4531            experiment: None,
4532            in_open_source_repo: false,
4533            can_collect_data: false,
4534            repo_url: None,
4535        };
4536
4537        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4538
4539        assert_eq!(
4540            prompt,
4541            concat!(
4542                "### Instruction:\n",
4543                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4544                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4545                "into account the cursor location.\n",
4546                "\n",
4547                "### User Edits:\n",
4548                "\n",
4549                "\n",
4550                "\n",
4551                "### User Excerpt:\n",
4552                "\n",
4553                "```test.rs\n",
4554                "<|start_of_file|>\n",
4555                "// prefix\n",
4556                "<|editable_region_start|>\n",
4557                "fn foo() {\n",
4558                "    <|user_cursor_is_here|>let x = 1;\n",
4559                "}\n",
4560                "<|editable_region_end|>\n",
4561                "// suffix\n",
4562                "\n",
4563                "```\n",
4564                "\n",
4565                "### Response:\n",
4566            ),
4567        );
4568    }
4569
4570    #[test]
4571    fn test_clean_zeta1_model_output_basic() {
4572        let output = indoc! {"
4573            <|editable_region_start|>
4574            fn main() {
4575                println!(\"hello\");
4576            }
4577            <|editable_region_end|>
4578        "};
4579
4580        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4581        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4582    }
4583
4584    #[test]
4585    fn test_clean_zeta1_model_output_with_cursor() {
4586        let output = indoc! {"
4587            <|editable_region_start|>
4588            fn main() {
4589                <|user_cursor_is_here|>println!(\"hello\");
4590            }
4591            <|editable_region_end|>
4592        "};
4593
4594        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4595        assert_eq!(
4596            cleaned,
4597            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4598        );
4599    }
4600
4601    #[test]
4602    fn test_clean_zeta1_model_output_no_markers() {
4603        let output = "fn main() {}\n";
4604        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4605        assert_eq!(cleaned, "fn main() {}\n");
4606    }
4607
4608    #[test]
4609    fn test_clean_zeta1_model_output_empty_region() {
4610        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4611        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4612        assert_eq!(cleaned, "");
4613    }
4614
4615    fn apply_edit(excerpt: &str, range: &Range<usize>, new_text: &str) -> String {
4616        let mut result = excerpt.to_string();
4617        result.replace_range(range.clone(), new_text);
4618        result
4619    }
4620
4621    #[test]
4622    fn test_parse_zeta2_model_output() {
4623        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4624        let context_start = excerpt.find("ctx start").unwrap();
4625        let context_end = excerpt.find("after ctx").unwrap();
4626        let editable_start = excerpt.find("editable old").unwrap();
4627        let editable_end = editable_start + "editable old\n".len();
4628        let input = make_input_with_context_range(
4629            excerpt,
4630            editable_start..editable_end,
4631            context_start..context_end,
4632            editable_start,
4633        );
4634
4635        let (range, text) = parse_zeta2_model_output(
4636            "editable new\n>>>>>>> UPDATED\n",
4637            ZetaFormat::V0131GitMergeMarkersPrefix,
4638            &input,
4639        )
4640        .unwrap();
4641
4642        assert_eq!(
4643            apply_edit(excerpt, &range, &text),
4644            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4645        );
4646    }
4647
4648    #[test]
4649    fn test_parse_zeta2_model_output_identity() {
4650        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4651        let editable_start = excerpt.find("bbb").unwrap();
4652        let editable_end = excerpt.find("ddd").unwrap();
4653        let input = make_input_with_context_range(
4654            excerpt,
4655            editable_start..editable_end,
4656            0..excerpt.len(),
4657            editable_start,
4658        );
4659
4660        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4661        let (range, text) =
4662            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4663
4664        assert_eq!(apply_edit(excerpt, &range, &text), excerpt);
4665    }
4666
4667    #[test]
4668    fn test_parse_zeta2_model_output_strips_end_marker() {
4669        let excerpt = "hello\nworld\n";
4670        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4671
4672        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4673        let (range1, text1) =
4674            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4675        let (range2, text2) = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4676
4677        assert_eq!(
4678            apply_edit(excerpt, &range1, &text1),
4679            apply_edit(excerpt, &range2, &text2)
4680        );
4681        assert_eq!(apply_edit(excerpt, &range1, &text1), "new content\n");
4682    }
4683}