zeta_prompt.rs

   1use anyhow::{Result, anyhow};
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub repo_url: Option<String>,
  66}
  67
  68#[derive(
  69    Default,
  70    Clone,
  71    Copy,
  72    Debug,
  73    PartialEq,
  74    Eq,
  75    Hash,
  76    EnumIter,
  77    IntoStaticStr,
  78    Serialize,
  79    Deserialize,
  80)]
  81#[allow(non_camel_case_types)]
  82pub enum ZetaFormat {
  83    V0112MiddleAtEnd,
  84    V0113Ordered,
  85    V0114180EditableRegion,
  86    V0120GitMergeMarkers,
  87    #[default]
  88    V0131GitMergeMarkersPrefix,
  89    V0211Prefill,
  90    V0211SeedCoder,
  91    v0226Hashline,
  92    V0304VariableEdit,
  93    V0304SeedNoEdits,
  94}
  95
  96impl std::fmt::Display for ZetaFormat {
  97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  98        write!(f, "{}", <&'static str>::from(self))
  99    }
 100}
 101
 102impl ZetaFormat {
 103    pub fn parse(format_name: &str) -> Result<Self> {
 104        let mut results = ZetaFormat::iter().filter(|version| {
 105            <&'static str>::from(version)
 106                .to_lowercase()
 107                .contains(&format_name.to_lowercase())
 108        });
 109        let Some(result) = results.next() else {
 110            anyhow::bail!(
 111                "`{format_name}` did not match any of:\n{}",
 112                Self::options_as_string()
 113            );
 114        };
 115        if results.next().is_some() {
 116            anyhow::bail!(
 117                "`{format_name}` matched more than one of:\n{}",
 118                Self::options_as_string()
 119            );
 120        }
 121        Ok(result)
 122    }
 123
 124    pub fn options_as_string() -> String {
 125        ZetaFormat::iter()
 126            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 127            .collect::<Vec<_>>()
 128            .concat()
 129    }
 130}
 131
 132#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 133#[serde(tag = "event")]
 134pub enum Event {
 135    BufferChange {
 136        path: Arc<Path>,
 137        old_path: Arc<Path>,
 138        diff: String,
 139        predicted: bool,
 140        in_open_source_repo: bool,
 141    },
 142}
 143
 144impl Event {
 145    pub fn in_open_source_repo(&self) -> bool {
 146        match self {
 147            Event::BufferChange {
 148                in_open_source_repo,
 149                ..
 150            } => *in_open_source_repo,
 151        }
 152    }
 153}
 154
 155pub fn write_event(prompt: &mut String, event: &Event) {
 156    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 157        for component in path.components() {
 158            prompt.push('/');
 159            write!(prompt, "{}", component.as_os_str().display()).ok();
 160        }
 161    }
 162    match event {
 163        Event::BufferChange {
 164            path,
 165            old_path,
 166            diff,
 167            predicted,
 168            in_open_source_repo: _,
 169        } => {
 170            if *predicted {
 171                prompt.push_str("// User accepted prediction:\n");
 172            }
 173            prompt.push_str("--- a");
 174            write_path_as_unix_str(prompt, old_path.as_ref());
 175            prompt.push_str("\n+++ b");
 176            write_path_as_unix_str(prompt, path.as_ref());
 177            prompt.push('\n');
 178            prompt.push_str(diff);
 179        }
 180    }
 181}
 182
 183#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 184pub struct RelatedFile {
 185    pub path: Arc<Path>,
 186    pub max_row: u32,
 187    pub excerpts: Vec<RelatedExcerpt>,
 188    #[serde(default)]
 189    pub in_open_source_repo: bool,
 190}
 191
 192#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 193pub struct RelatedExcerpt {
 194    pub row_range: Range<u32>,
 195    pub text: Arc<str>,
 196    #[serde(default)]
 197    pub order: usize,
 198}
 199
 200pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 201    special_tokens_for_format(format)
 202        .iter()
 203        .any(|token| input.cursor_excerpt.contains(token))
 204}
 205
 206pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 207    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 208}
 209
 210pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 211    match format {
 212        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 213        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 214        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 215        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 216        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 217        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 218        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 219        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 220        ZetaFormat::V0304VariableEdit => v0304_variable_edit::special_tokens(),
 221        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 222    }
 223}
 224
 225pub fn excerpt_ranges_for_format(
 226    format: ZetaFormat,
 227    ranges: &ExcerptRanges,
 228) -> (Range<usize>, Range<usize>) {
 229    match format {
 230        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 231            ranges.editable_150.clone(),
 232            ranges.editable_150_context_350.clone(),
 233        ),
 234        ZetaFormat::V0114180EditableRegion => (
 235            ranges.editable_180.clone(),
 236            ranges.editable_180_context_350.clone(),
 237        ),
 238        ZetaFormat::V0120GitMergeMarkers
 239        | ZetaFormat::V0131GitMergeMarkersPrefix
 240        | ZetaFormat::V0211Prefill
 241        | ZetaFormat::V0211SeedCoder
 242        | ZetaFormat::v0226Hashline
 243        | ZetaFormat::V0304SeedNoEdits => (
 244            ranges.editable_350.clone(),
 245            ranges.editable_350_context_150.clone(),
 246        ),
 247        ZetaFormat::V0304VariableEdit => {
 248            let context = ranges
 249                .context_8192
 250                .clone()
 251                .unwrap_or_else(|| ranges.editable_350_context_150.clone());
 252            (context.clone(), context)
 253        }
 254    }
 255}
 256
 257pub fn write_cursor_excerpt_section_for_format(
 258    format: ZetaFormat,
 259    prompt: &mut String,
 260    path: &Path,
 261    context: &str,
 262    editable_range: &Range<usize>,
 263    cursor_offset: usize,
 264) {
 265    match format {
 266        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 267            prompt,
 268            path,
 269            context,
 270            editable_range,
 271            cursor_offset,
 272        ),
 273        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 274            v0113_ordered::write_cursor_excerpt_section(
 275                prompt,
 276                path,
 277                context,
 278                editable_range,
 279                cursor_offset,
 280            )
 281        }
 282        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 283            prompt,
 284            path,
 285            context,
 286            editable_range,
 287            cursor_offset,
 288        ),
 289        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 290            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 291                prompt,
 292                path,
 293                context,
 294                editable_range,
 295                cursor_offset,
 296            )
 297        }
 298        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 299            seed_coder::write_cursor_excerpt_section(
 300                prompt,
 301                path,
 302                context,
 303                editable_range,
 304                cursor_offset,
 305            )
 306        }
 307        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 308            prompt,
 309            path,
 310            context,
 311            editable_range,
 312            cursor_offset,
 313        ),
 314        ZetaFormat::V0304VariableEdit => {
 315            v0304_variable_edit::write_cursor_excerpt_section(prompt, path, context, cursor_offset)
 316        }
 317    }
 318}
 319
 320fn offset_range_to_row_range(text: &str, range: Range<usize>) -> Range<u32> {
 321    let start_row = text[0..range.start].matches('\n').count() as u32;
 322    let mut end_row = start_row + text[range.clone()].matches('\n').count() as u32;
 323    if !text[..range.end].ends_with('\n') {
 324        end_row += 1;
 325    }
 326    return start_row..end_row;
 327}
 328
 329pub fn format_prompt_with_budget_for_format(
 330    input: &ZetaPromptInput,
 331    format: ZetaFormat,
 332    max_tokens: usize,
 333) -> String {
 334    let (context, editable_range, context_range, cursor_offset) =
 335        resolve_cursor_region(input, format);
 336    let path = &*input.cursor_path;
 337
 338    let related_files = if let Some(cursor_excerpt_start_row) = input.excerpt_start_row {
 339        let relative_row_range = offset_range_to_row_range(&input.cursor_excerpt, context_range);
 340        let row_range = relative_row_range.start + cursor_excerpt_start_row
 341            ..relative_row_range.end + cursor_excerpt_start_row;
 342        &filter_redundant_excerpts(
 343            input.related_files.clone(),
 344            input.cursor_path.as_ref(),
 345            row_range,
 346        )
 347    } else {
 348        &input.related_files
 349    };
 350
 351    match format {
 352        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 353            seed_coder::format_prompt_with_budget(
 354                path,
 355                context,
 356                &editable_range,
 357                cursor_offset,
 358                &input.events,
 359                related_files,
 360                max_tokens,
 361            )
 362        }
 363        _ => {
 364            let mut cursor_section = String::new();
 365            write_cursor_excerpt_section_for_format(
 366                format,
 367                &mut cursor_section,
 368                path,
 369                context,
 370                &editable_range,
 371                cursor_offset,
 372            );
 373
 374            let cursor_tokens = estimate_tokens(cursor_section.len());
 375            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 376
 377            let edit_history_section = format_edit_history_within_budget(
 378                &input.events,
 379                "<|file_sep|>",
 380                "edit history",
 381                budget_after_cursor,
 382            );
 383            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 384            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 385
 386            let related_files_section = format_related_files_within_budget(
 387                &related_files,
 388                "<|file_sep|>",
 389                "",
 390                budget_after_edit_history,
 391            );
 392
 393            let mut prompt = String::new();
 394            prompt.push_str(&related_files_section);
 395            prompt.push_str(&edit_history_section);
 396            prompt.push_str(&cursor_section);
 397            prompt
 398        }
 399    }
 400}
 401
 402pub fn filter_redundant_excerpts(
 403    mut related_files: Vec<RelatedFile>,
 404    cursor_path: &Path,
 405    cursor_row_range: Range<u32>,
 406) -> Vec<RelatedFile> {
 407    for file in &mut related_files {
 408        if file.path.as_ref() == cursor_path {
 409            file.excerpts.retain(|excerpt| {
 410                excerpt.row_range.start < cursor_row_range.start
 411                    || excerpt.row_range.end > cursor_row_range.end
 412            });
 413        }
 414    }
 415    related_files.retain(|file| !file.excerpts.is_empty());
 416    related_files
 417}
 418
 419pub fn get_prefill_for_format(
 420    format: ZetaFormat,
 421    context: &str,
 422    editable_range: &Range<usize>,
 423) -> String {
 424    match format {
 425        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 426        ZetaFormat::V0112MiddleAtEnd
 427        | ZetaFormat::V0113Ordered
 428        | ZetaFormat::V0114180EditableRegion
 429        | ZetaFormat::V0120GitMergeMarkers
 430        | ZetaFormat::V0131GitMergeMarkersPrefix
 431        | ZetaFormat::V0211SeedCoder
 432        | ZetaFormat::v0226Hashline
 433        | ZetaFormat::V0304VariableEdit => String::new(),
 434        ZetaFormat::V0304SeedNoEdits => String::new(),
 435    }
 436}
 437
 438pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 439    match format {
 440        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 441        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 442        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 443        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
 444        ZetaFormat::V0112MiddleAtEnd
 445        | ZetaFormat::V0113Ordered
 446        | ZetaFormat::V0114180EditableRegion
 447        | ZetaFormat::v0226Hashline
 448        | ZetaFormat::V0304VariableEdit => None,
 449    }
 450}
 451
 452pub fn encode_patch_as_output_for_format(
 453    format: ZetaFormat,
 454    old_editable_region: &str,
 455    patch: &str,
 456    cursor_offset: Option<usize>,
 457) -> Result<Option<String>> {
 458    match format {
 459        ZetaFormat::v0226Hashline => {
 460            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 461        }
 462        ZetaFormat::V0304VariableEdit => v0304_variable_edit::patch_to_variable_edit_output(
 463            old_editable_region,
 464            patch,
 465            cursor_offset,
 466        )
 467        .map(Some),
 468        ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
 469        _ => Ok(None),
 470    }
 471}
 472
 473pub struct ParsedOutput {
 474    /// Text that should replace the editable region
 475    pub new_editable_region: String,
 476    /// The byte range within `cursor_excerpt` that this replacement applies to
 477    pub range_in_excerpt: Range<usize>,
 478}
 479
 480/// Parse model output for the given zeta format
 481pub fn parse_zeta2_model_output(
 482    output: &str,
 483    format: ZetaFormat,
 484    prompt_inputs: &ZetaPromptInput,
 485) -> Result<ParsedOutput> {
 486    let output = match output_end_marker_for_format(format) {
 487        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 488        None => output,
 489    };
 490
 491    let (context, editable_range_in_context, context_range, _) =
 492        resolve_cursor_region(prompt_inputs, format);
 493    let context_start = context_range.start;
 494    let old_editable_region = &context[editable_range_in_context.clone()];
 495
 496    let (range_in_context, output) = match format {
 497        ZetaFormat::v0226Hashline => (
 498            editable_range_in_context,
 499            if hashline::output_has_edit_commands(output) {
 500                hashline::apply_edit_commands(old_editable_region, output)
 501            } else {
 502                output.to_string()
 503            },
 504        ),
 505        ZetaFormat::V0304VariableEdit => v0304_variable_edit::apply_variable_edit(context, output)?,
 506        ZetaFormat::V0304SeedNoEdits => (
 507            editable_range_in_context,
 508            if output.starts_with(seed_coder::NO_EDITS) {
 509                old_editable_region.to_string()
 510            } else {
 511                output.to_string()
 512            },
 513        ),
 514        _ => (editable_range_in_context, output.to_string()),
 515    };
 516
 517    let range_in_excerpt =
 518        range_in_context.start + context_start..range_in_context.end + context_start;
 519
 520    Ok(ParsedOutput {
 521        new_editable_region: output,
 522        range_in_excerpt,
 523    })
 524}
 525
 526pub fn excerpt_range_for_format(
 527    format: ZetaFormat,
 528    ranges: &ExcerptRanges,
 529) -> (Range<usize>, Range<usize>) {
 530    excerpt_ranges_for_format(format, ranges)
 531}
 532
 533pub fn resolve_cursor_region(
 534    input: &ZetaPromptInput,
 535    format: ZetaFormat,
 536) -> (&str, Range<usize>, Range<usize>, usize) {
 537    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 538    let context_start = context_range.start;
 539    let context_text = &input.cursor_excerpt[context_range.clone()];
 540    let adjusted_editable =
 541        (editable_range.start - context_start)..(editable_range.end - context_start);
 542    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 543
 544    (
 545        context_text,
 546        adjusted_editable,
 547        context_range,
 548        adjusted_cursor,
 549    )
 550}
 551
 552pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 553    let (context, editable_range, _, _) = resolve_cursor_region(input, format);
 554    get_prefill_for_format(format, context, &editable_range)
 555}
 556
 557fn format_edit_history_within_budget(
 558    events: &[Arc<Event>],
 559    file_marker: &str,
 560    edit_history_name: &str,
 561    max_tokens: usize,
 562) -> String {
 563    let header = format!("{}{}\n", file_marker, edit_history_name);
 564    let header_tokens = estimate_tokens(header.len());
 565    if header_tokens >= max_tokens {
 566        return String::new();
 567    }
 568
 569    let mut event_strings: Vec<String> = Vec::new();
 570    let mut total_tokens = header_tokens;
 571
 572    for event in events.iter().rev() {
 573        let mut event_str = String::new();
 574        write_event(&mut event_str, event);
 575        let event_tokens = estimate_tokens(event_str.len());
 576
 577        if total_tokens + event_tokens > max_tokens {
 578            break;
 579        }
 580        total_tokens += event_tokens;
 581        event_strings.push(event_str);
 582    }
 583
 584    if event_strings.is_empty() {
 585        return String::new();
 586    }
 587
 588    let mut result = header;
 589    for event_str in event_strings.iter().rev() {
 590        result.push_str(event_str);
 591    }
 592    result
 593}
 594
 595fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 596    let needs_newline = !excerpt.text.ends_with('\n');
 597    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 598    let len = excerpt.text.len()
 599        + if needs_newline { "\n".len() } else { 0 }
 600        + if needs_ellipsis { "...\n".len() } else { 0 };
 601    estimate_tokens(len)
 602}
 603
 604pub fn format_related_files_within_budget(
 605    related_files: &[RelatedFile],
 606    file_prefix: &str,
 607    file_suffix: &str,
 608    max_tokens: usize,
 609) -> String {
 610    struct ExcerptCandidate {
 611        file_ix: usize,
 612        excerpt_ix: usize,
 613        order: usize,
 614    }
 615
 616    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 617        .iter()
 618        .enumerate()
 619        .flat_map(|(file_ix, file)| {
 620            file.excerpts
 621                .iter()
 622                .enumerate()
 623                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 624                    file_ix,
 625                    excerpt_ix,
 626                    order: e.order,
 627                })
 628        })
 629        .collect();
 630
 631    // Pre-compute file header strings and their token costs.
 632    let file_headers: Vec<String> = related_files
 633        .iter()
 634        .map(|file| {
 635            let path_str = file.path.to_string_lossy();
 636            format!("{}{}\n", file_prefix, path_str)
 637        })
 638        .collect();
 639
 640    // Sort the excerpts by their order and determine how many fit within the budget.
 641    let mut total_tokens = 0;
 642    let mut included_excerpt_count = 0_usize;
 643    let mut included_file_indices = vec![false; related_files.len()];
 644    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 645    for candidate in &excerpt_candidates {
 646        let file = &related_files[candidate.file_ix];
 647        let excerpt = &file.excerpts[candidate.excerpt_ix];
 648        let file_already_included = included_file_indices[candidate.file_ix];
 649        let header_cost = if file_already_included {
 650            0
 651        } else {
 652            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 653        };
 654        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 655        if total_tokens + header_cost + excerpt_cost > max_tokens {
 656            break;
 657        }
 658        total_tokens += header_cost + excerpt_cost;
 659        if !file_already_included {
 660            included_file_indices[candidate.file_ix] = true;
 661        }
 662        included_excerpt_count += 1;
 663    }
 664
 665    excerpt_candidates.truncate(included_excerpt_count);
 666    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 667
 668    // Render all of the files that fit within the token budget, in the original order.
 669    let mut result = String::new();
 670    let mut last_file_ix = None;
 671    for candidate in &excerpt_candidates {
 672        if last_file_ix != Some(candidate.file_ix) {
 673            if last_file_ix.is_some() {
 674                result.push_str(file_suffix);
 675            }
 676            result.push_str(&file_headers[candidate.file_ix]);
 677            last_file_ix = Some(candidate.file_ix);
 678        }
 679        let file = &related_files[candidate.file_ix];
 680        let excerpt = &file.excerpts[candidate.excerpt_ix];
 681        result.push_str(&excerpt.text);
 682        if !result.ends_with('\n') {
 683            result.push('\n');
 684        }
 685        if excerpt.row_range.end < file.max_row {
 686            result.push_str("...\n");
 687        }
 688    }
 689
 690    result
 691}
 692
 693pub fn write_related_files(
 694    prompt: &mut String,
 695    related_files: &[RelatedFile],
 696) -> Vec<Range<usize>> {
 697    let mut ranges = Vec::new();
 698    for file in related_files {
 699        let start = prompt.len();
 700        let path_str = file.path.to_string_lossy();
 701        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 702        for excerpt in &file.excerpts {
 703            prompt.push_str(&excerpt.text);
 704            if !prompt.ends_with('\n') {
 705                prompt.push('\n');
 706            }
 707            if excerpt.row_range.end < file.max_row {
 708                prompt.push_str("...\n");
 709            }
 710        }
 711        let end = prompt.len();
 712        ranges.push(start..end);
 713    }
 714    ranges
 715}
 716
 717mod v0112_middle_at_end {
 718    use super::*;
 719
 720    pub fn special_tokens() -> &'static [&'static str] {
 721        &[
 722            "<|fim_prefix|>",
 723            "<|fim_suffix|>",
 724            "<|fim_middle|>",
 725            "<|file_sep|>",
 726            CURSOR_MARKER,
 727        ]
 728    }
 729
 730    pub fn write_cursor_excerpt_section(
 731        prompt: &mut String,
 732        path: &Path,
 733        context: &str,
 734        editable_range: &Range<usize>,
 735        cursor_offset: usize,
 736    ) {
 737        let path_str = path.to_string_lossy();
 738        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 739
 740        prompt.push_str("<|fim_prefix|>\n");
 741        prompt.push_str(&context[..editable_range.start]);
 742
 743        prompt.push_str("<|fim_suffix|>\n");
 744        prompt.push_str(&context[editable_range.end..]);
 745        if !prompt.ends_with('\n') {
 746            prompt.push('\n');
 747        }
 748
 749        prompt.push_str("<|fim_middle|>current\n");
 750        prompt.push_str(&context[editable_range.start..cursor_offset]);
 751        prompt.push_str(CURSOR_MARKER);
 752        prompt.push_str(&context[cursor_offset..editable_range.end]);
 753        if !prompt.ends_with('\n') {
 754            prompt.push('\n');
 755        }
 756
 757        prompt.push_str("<|fim_middle|>updated\n");
 758    }
 759}
 760
 761mod v0113_ordered {
 762    use super::*;
 763
 764    pub fn special_tokens() -> &'static [&'static str] {
 765        &[
 766            "<|fim_prefix|>",
 767            "<|fim_suffix|>",
 768            "<|fim_middle|>",
 769            "<|file_sep|>",
 770            CURSOR_MARKER,
 771        ]
 772    }
 773
 774    pub fn write_cursor_excerpt_section(
 775        prompt: &mut String,
 776        path: &Path,
 777        context: &str,
 778        editable_range: &Range<usize>,
 779        cursor_offset: usize,
 780    ) {
 781        let path_str = path.to_string_lossy();
 782        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 783
 784        prompt.push_str("<|fim_prefix|>\n");
 785        prompt.push_str(&context[..editable_range.start]);
 786        if !prompt.ends_with('\n') {
 787            prompt.push('\n');
 788        }
 789
 790        prompt.push_str("<|fim_middle|>current\n");
 791        prompt.push_str(&context[editable_range.start..cursor_offset]);
 792        prompt.push_str(CURSOR_MARKER);
 793        prompt.push_str(&context[cursor_offset..editable_range.end]);
 794        if !prompt.ends_with('\n') {
 795            prompt.push('\n');
 796        }
 797
 798        prompt.push_str("<|fim_suffix|>\n");
 799        prompt.push_str(&context[editable_range.end..]);
 800        if !prompt.ends_with('\n') {
 801            prompt.push('\n');
 802        }
 803
 804        prompt.push_str("<|fim_middle|>updated\n");
 805    }
 806}
 807
 808mod v0114180_editable_region {
 809    use super::*;
 810
 811    pub fn special_tokens() -> &'static [&'static str] {
 812        v0113_ordered::special_tokens()
 813    }
 814}
 815
 816pub mod v0120_git_merge_markers {
 817    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 818    //!
 819    //! Example prompt:
 820    //!
 821    //! <|file_sep|>path/to/target_file.py
 822    //! <|fim_prefix|>
 823    //! code before editable region
 824    //! <|fim_suffix|>
 825    //! code after editable region
 826    //! <|fim_middle|>
 827    //! <<<<<<< CURRENT
 828    //! code that
 829    //! needs to<|user_cursor|>
 830    //! be rewritten
 831    //! =======
 832    //!
 833    //! Expected output (should be generated by the model):
 834    //!
 835    //! updated
 836    //! code with
 837    //! changes applied
 838    //! >>>>>>> UPDATED
 839
 840    use super::*;
 841
 842    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 843    pub const SEPARATOR: &str = "=======\n";
 844    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 845
 846    pub fn special_tokens() -> &'static [&'static str] {
 847        &[
 848            "<|fim_prefix|>",
 849            "<|fim_suffix|>",
 850            "<|fim_middle|>",
 851            "<|file_sep|>",
 852            START_MARKER,
 853            SEPARATOR,
 854            END_MARKER,
 855            CURSOR_MARKER,
 856        ]
 857    }
 858
 859    pub fn write_cursor_excerpt_section(
 860        prompt: &mut String,
 861        path: &Path,
 862        context: &str,
 863        editable_range: &Range<usize>,
 864        cursor_offset: usize,
 865    ) {
 866        let path_str = path.to_string_lossy();
 867        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 868
 869        prompt.push_str("<|fim_prefix|>");
 870        prompt.push_str(&context[..editable_range.start]);
 871
 872        prompt.push_str("<|fim_suffix|>");
 873        prompt.push_str(&context[editable_range.end..]);
 874        if !prompt.ends_with('\n') {
 875            prompt.push('\n');
 876        }
 877
 878        prompt.push_str("<|fim_middle|>");
 879        prompt.push_str(START_MARKER);
 880        prompt.push_str(&context[editable_range.start..cursor_offset]);
 881        prompt.push_str(CURSOR_MARKER);
 882        prompt.push_str(&context[cursor_offset..editable_range.end]);
 883        if !prompt.ends_with('\n') {
 884            prompt.push('\n');
 885        }
 886        prompt.push_str(SEPARATOR);
 887    }
 888}
 889
 890pub mod v0131_git_merge_markers_prefix {
 891    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 892    //!
 893    //! Example prompt:
 894    //!
 895    //! <|file_sep|>path/to/target_file.py
 896    //! <|fim_prefix|>
 897    //! code before editable region
 898    //! <<<<<<< CURRENT
 899    //! code that
 900    //! needs to<|user_cursor|>
 901    //! be rewritten
 902    //! =======
 903    //! <|fim_suffix|>
 904    //! code after editable region
 905    //! <|fim_middle|>
 906    //!
 907    //! Expected output (should be generated by the model):
 908    //!
 909    //! updated
 910    //! code with
 911    //! changes applied
 912    //! >>>>>>> UPDATED
 913
 914    use super::*;
 915
 916    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 917    pub const SEPARATOR: &str = "=======\n";
 918    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 919
 920    pub fn special_tokens() -> &'static [&'static str] {
 921        &[
 922            "<|fim_prefix|>",
 923            "<|fim_suffix|>",
 924            "<|fim_middle|>",
 925            "<|file_sep|>",
 926            START_MARKER,
 927            SEPARATOR,
 928            END_MARKER,
 929            CURSOR_MARKER,
 930        ]
 931    }
 932
 933    pub fn write_cursor_excerpt_section(
 934        prompt: &mut String,
 935        path: &Path,
 936        context: &str,
 937        editable_range: &Range<usize>,
 938        cursor_offset: usize,
 939    ) {
 940        let path_str = path.to_string_lossy();
 941        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 942
 943        prompt.push_str("<|fim_prefix|>");
 944        prompt.push_str(&context[..editable_range.start]);
 945        prompt.push_str(START_MARKER);
 946        prompt.push_str(&context[editable_range.start..cursor_offset]);
 947        prompt.push_str(CURSOR_MARKER);
 948        prompt.push_str(&context[cursor_offset..editable_range.end]);
 949        if !prompt.ends_with('\n') {
 950            prompt.push('\n');
 951        }
 952        prompt.push_str(SEPARATOR);
 953
 954        prompt.push_str("<|fim_suffix|>");
 955        prompt.push_str(&context[editable_range.end..]);
 956        if !prompt.ends_with('\n') {
 957            prompt.push('\n');
 958        }
 959
 960        prompt.push_str("<|fim_middle|>");
 961    }
 962}
 963
 964pub mod v0211_prefill {
 965    use super::*;
 966
 967    pub fn special_tokens() -> &'static [&'static str] {
 968        v0131_git_merge_markers_prefix::special_tokens()
 969    }
 970
 971    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 972        let editable_region = &context[editable_range.start..editable_range.end];
 973
 974        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 975        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 976
 977        // Find a token boundary to avoid splitting tokens in the prefill.
 978        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 979        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 980        // the \n and consume any consecutive \n characters after it.
 981        let prefill = &editable_region[..prefill_len];
 982        match prefill.rfind('\n') {
 983            Some(pos) => {
 984                let mut end = pos + 1;
 985                while end < editable_region.len()
 986                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 987                {
 988                    end += 1;
 989                }
 990                editable_region[..end].to_string()
 991            }
 992            // No newline found. Fall back to splitting before the last space
 993            // (word-level boundary)
 994            None => match prefill.rfind(' ') {
 995                Some(pos) => prefill[..pos].to_string(),
 996                None => prefill.to_string(),
 997            },
 998        }
 999    }
1000}
1001
1002pub mod hashline {
1003
1004    use std::fmt::Display;
1005
1006    pub const END_MARKER: &str = "<|fim_middle|>updated";
1007    pub const START_MARKER: &str = "<|fim_middle|>current";
1008
1009    use super::*;
1010
1011    const SET_COMMAND_MARKER: &str = "<|set|>";
1012    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
1013
1014    pub fn special_tokens() -> &'static [&'static str] {
1015        return &[
1016            SET_COMMAND_MARKER,
1017            "<|set_range|>",
1018            INSERT_COMMAND_MARKER,
1019            CURSOR_MARKER,
1020            "<|file_sep|>",
1021            "<|fim_prefix|>",
1022            "<|fim_suffix|>",
1023            "<|fim_middle|>",
1024        ];
1025    }
1026
1027    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
1028    #[derive(Debug, Clone, PartialEq, Eq)]
1029    struct LineRef {
1030        index: usize,
1031        hash: u8,
1032    }
1033
1034    impl Display for LineRef {
1035        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1036            write!(f, "{}:{:02x}", self.index, self.hash)
1037        }
1038    }
1039
1040    pub fn hash_line(line: &[u8]) -> u8 {
1041        let mut h: u8 = 0;
1042        for &byte in line {
1043            h = h.wrapping_add(byte);
1044        }
1045        return h;
1046    }
1047
1048    /// Write the hashline-encoded editable region into `out`. Each line of
1049    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
1050    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
1051    /// to the start of `editable_text`).
1052    pub fn write_hashline_editable_region(
1053        out: &mut String,
1054        editable_text: &str,
1055        cursor_offset_in_editable: usize,
1056    ) {
1057        let mut offset = 0;
1058        for (i, line) in editable_text.lines().enumerate() {
1059            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1060                && cursor_offset_in_editable < offset + line.len()
1061            {
1062                (
1063                    &line[..cursor_offset_in_editable - offset],
1064                    CURSOR_MARKER,
1065                    &line[cursor_offset_in_editable - offset..],
1066                )
1067            } else {
1068                (line, "", "")
1069            };
1070            write!(
1071                out,
1072                "\n{}|{head}{cursor}{tail}",
1073                LineRef {
1074                    index: i,
1075                    hash: hash_line(line.as_bytes())
1076                }
1077            )
1078            .unwrap();
1079            offset += line.len() + 1;
1080        }
1081    }
1082
1083    pub fn write_cursor_excerpt_section(
1084        prompt: &mut String,
1085        path: &Path,
1086        context: &str,
1087        editable_range: &Range<usize>,
1088        cursor_offset: usize,
1089    ) {
1090        let path_str = path.to_string_lossy();
1091        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1092
1093        prompt.push_str("<|fim_prefix|>\n");
1094        prompt.push_str(&context[..editable_range.start]);
1095        prompt.push_str(START_MARKER);
1096
1097        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1098        let editable_region = &context[editable_range.clone()];
1099        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1100
1101        if !prompt.ends_with('\n') {
1102            prompt.push('\n');
1103        }
1104
1105        prompt.push_str("<|fim_suffix|>\n");
1106        prompt.push_str(&context[editable_range.end..]);
1107        if !prompt.ends_with('\n') {
1108            prompt.push('\n');
1109        }
1110
1111        prompt.push_str(END_MARKER);
1112    }
1113
1114    /// A single edit command parsed from the model output.
1115    #[derive(Debug)]
1116    enum EditCommand<'a> {
1117        /// Replace a range of lines (inclusive on both ends). Single-line set is
1118        /// represented by `start == end`.
1119        Set {
1120            start: LineRef,
1121            end: LineRef,
1122            content: &'a str,
1123        },
1124        /// Insert new lines after the given line, or before the first line if
1125        /// `after` is `None`.
1126        Insert {
1127            after: Option<LineRef>,
1128            content: &'a str,
1129        },
1130    }
1131
1132    /// Parse a line reference like `3:c3` into a `LineRef`.
1133    fn parse_line_ref(s: &str) -> Option<LineRef> {
1134        let (idx_str, hash_str) = s.split_once(':')?;
1135        let index = idx_str.parse::<usize>().ok()?;
1136        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1137        Some(LineRef { index, hash })
1138    }
1139
1140    /// Parse the model output into a list of `EditCommand`s.
1141    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1142        let mut commands = Vec::new();
1143        let mut offset = 0usize;
1144
1145        while offset < model_output.len() {
1146            let next_nl = model_output[offset..]
1147                .find('\n')
1148                .map(|i| offset + i)
1149                .unwrap_or(model_output.len());
1150            let line = &model_output[offset..next_nl];
1151            let line_end = if next_nl < model_output.len() {
1152                next_nl + 1
1153            } else {
1154                next_nl
1155            };
1156
1157            let trimmed = line.trim();
1158            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1159                (true, spec)
1160            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1161                (false, spec)
1162            } else {
1163                offset = line_end;
1164                continue;
1165            };
1166
1167            let mut content_end = line_end;
1168            let mut scan = line_end;
1169
1170            while scan < model_output.len() {
1171                let body_nl = model_output[scan..]
1172                    .find('\n')
1173                    .map(|i| scan + i)
1174                    .unwrap_or(model_output.len());
1175                let body_line = &model_output[scan..body_nl];
1176                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1177                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1178                {
1179                    break;
1180                }
1181                scan = if body_nl < model_output.len() {
1182                    body_nl + 1
1183                } else {
1184                    body_nl
1185                };
1186                content_end = scan;
1187            }
1188
1189            let content = &model_output[line_end..content_end];
1190
1191            if is_set {
1192                if let Some((start_str, end_str)) = specifier.split_once('-') {
1193                    if let (Some(start), Some(end)) =
1194                        (parse_line_ref(start_str), parse_line_ref(end_str))
1195                    {
1196                        commands.push(EditCommand::Set {
1197                            start,
1198                            end,
1199                            content,
1200                        });
1201                    }
1202                } else if let Some(target) = parse_line_ref(specifier) {
1203                    commands.push(EditCommand::Set {
1204                        start: target.clone(),
1205                        end: target,
1206                        content,
1207                    });
1208                }
1209            } else {
1210                let after = parse_line_ref(specifier);
1211                commands.push(EditCommand::Insert { after, content });
1212            }
1213
1214            offset = scan;
1215        }
1216
1217        commands
1218    }
1219
1220    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1221    /// (as opposed to being a plain full-replacement output).
1222    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1223    /// editable region, returning the plain text content.
1224    pub fn strip_hashline_prefixes(region: &str) -> String {
1225        let mut decoded: String = region
1226            .lines()
1227            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1228            .collect::<Vec<_>>()
1229            .join("\n");
1230        if region.ends_with('\n') {
1231            decoded.push('\n');
1232        }
1233        decoded
1234    }
1235
1236    pub fn output_has_edit_commands(model_output: &str) -> bool {
1237        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1238    }
1239
1240    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1241    /// original editable region text.
1242    ///
1243    /// `editable_region` is the original text of the editable region (without hash
1244    /// prefixes). `model_output` is the raw model response containing edit commands.
1245    ///
1246    /// Returns the full replacement text for the editable region.
1247    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1248        let original_lines: Vec<&str> = editable_region.lines().collect();
1249        let old_hashes: Vec<u8> = original_lines
1250            .iter()
1251            .map(|line| hash_line(line.as_bytes()))
1252            .collect();
1253
1254        let commands = parse_edit_commands(model_output);
1255
1256        // For set operations: indexed by start line → Some((end line index, content))
1257        // For insert operations: indexed by line index → vec of content to insert after
1258        // Insert-before-first is tracked separately.
1259        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1260        let mut insert_before_first: Vec<&str> = Vec::new();
1261        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1262
1263        for command in &commands {
1264            match command {
1265                EditCommand::Set {
1266                    start,
1267                    end,
1268                    content,
1269                } => {
1270                    if start.index < old_hashes.len()
1271                        && end.index < old_hashes.len()
1272                        && start.index <= end.index
1273                        && old_hashes[start.index] == start.hash
1274                        && old_hashes[end.index] == end.hash
1275                    {
1276                        set_ops[start.index] = Some((end.index, *content));
1277                    }
1278                }
1279                EditCommand::Insert { after, content } => match after {
1280                    None => insert_before_first.push(*content),
1281                    Some(line_ref) => {
1282                        if line_ref.index < old_hashes.len()
1283                            && old_hashes[line_ref.index] == line_ref.hash
1284                        {
1285                            insert_after[line_ref.index].push(*content);
1286                        }
1287                    }
1288                },
1289            }
1290        }
1291
1292        let mut result = String::new();
1293
1294        // Emit any insertions before the first line
1295        for content in &insert_before_first {
1296            result.push_str(content);
1297            if !content.ends_with('\n') {
1298                result.push('\n');
1299            }
1300        }
1301
1302        let mut i = 0;
1303        while i < original_lines.len() {
1304            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1305                // Replace lines i..=end_index with the replacement content
1306                result.push_str(replacement);
1307                if !replacement.is_empty() && !replacement.ends_with('\n') {
1308                    result.push('\n');
1309                }
1310                // Emit any insertions after the end of this set range
1311                if *end_index < insert_after.len() {
1312                    for content in &insert_after[*end_index] {
1313                        result.push_str(content);
1314                        if !content.ends_with('\n') {
1315                            result.push('\n');
1316                        }
1317                    }
1318                }
1319                i = end_index + 1;
1320            } else {
1321                // Keep the original line
1322                result.push_str(original_lines[i]);
1323                result.push('\n');
1324                // Emit any insertions after this line
1325                for content in &insert_after[i] {
1326                    result.push_str(content);
1327                    if !content.ends_with('\n') {
1328                        result.push('\n');
1329                    }
1330                }
1331                i += 1;
1332            }
1333        }
1334
1335        // Preserve trailing newline behavior: if the original ended with a
1336        // newline the result already has one; if it didn't, trim the extra one
1337        // we added.
1338        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1339            result.pop();
1340        }
1341
1342        result
1343    }
1344
1345    /// Convert a unified diff patch into hashline edit commands.
1346    ///
1347    /// Parses the unified diff `patch` directly to determine which lines of
1348    /// `old_text` are deleted/replaced and what new lines are added, then emits
1349    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1350    /// `{index}:{hash}` identifiers.
1351    ///
1352    /// `cursor_offset` is an optional byte offset into the first hunk's new
1353    /// text (context + additions) where the cursor marker should be placed.
1354    pub fn patch_to_edit_commands(
1355        old_text: &str,
1356        patch: &str,
1357        cursor_offset: Option<usize>,
1358    ) -> Result<String> {
1359        let old_lines: Vec<&str> = old_text.lines().collect();
1360        let old_hashes: Vec<u8> = old_lines
1361            .iter()
1362            .map(|line| hash_line(line.as_bytes()))
1363            .collect();
1364
1365        let mut result = String::new();
1366        let mut first_hunk = true;
1367
1368        struct Hunk<'a> {
1369            line_range: Range<usize>,
1370            new_text_lines: Vec<&'a str>,
1371            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1372        }
1373
1374        // Parse the patch line by line. We only care about hunk headers,
1375        // context, deletions, and additions.
1376        let mut old_line_index: usize = 0;
1377        let mut current_hunk: Option<Hunk> = None;
1378        // Byte offset tracking within the hunk's new text for cursor placement.
1379        let mut new_text_byte_offset: usize = 0;
1380        // The line index of the last old line seen before/in the current hunk
1381        // (used for insert-after reference).
1382        let mut last_old_line_before_hunk: Option<usize> = None;
1383
1384        fn flush_hunk(
1385            hunk: Hunk,
1386            last_old_line: Option<usize>,
1387            result: &mut String,
1388            old_hashes: &[u8],
1389        ) {
1390            if hunk.line_range.is_empty() {
1391                // Pure insertion — reference the old line to insert after when in bounds.
1392                if let Some(after) = last_old_line
1393                    && let Some(&hash) = old_hashes.get(after)
1394                {
1395                    write!(
1396                        result,
1397                        "{INSERT_COMMAND_MARKER}{}\n",
1398                        LineRef { index: after, hash }
1399                    )
1400                    .unwrap();
1401                } else {
1402                    result.push_str(INSERT_COMMAND_MARKER);
1403                    result.push('\n');
1404                }
1405            } else {
1406                let start = hunk.line_range.start;
1407                let end_exclusive = hunk.line_range.end;
1408                let deleted_line_count = end_exclusive.saturating_sub(start);
1409
1410                if deleted_line_count == 1 {
1411                    if let Some(&hash) = old_hashes.get(start) {
1412                        write!(
1413                            result,
1414                            "{SET_COMMAND_MARKER}{}\n",
1415                            LineRef { index: start, hash }
1416                        )
1417                        .unwrap();
1418                    } else {
1419                        result.push_str(SET_COMMAND_MARKER);
1420                        result.push('\n');
1421                    }
1422                } else {
1423                    let end_inclusive = end_exclusive - 1;
1424                    match (
1425                        old_hashes.get(start).copied(),
1426                        old_hashes.get(end_inclusive).copied(),
1427                    ) {
1428                        (Some(start_hash), Some(end_hash)) => {
1429                            write!(
1430                                result,
1431                                "{SET_COMMAND_MARKER}{}-{}\n",
1432                                LineRef {
1433                                    index: start,
1434                                    hash: start_hash
1435                                },
1436                                LineRef {
1437                                    index: end_inclusive,
1438                                    hash: end_hash
1439                                }
1440                            )
1441                            .unwrap();
1442                        }
1443                        _ => {
1444                            result.push_str(SET_COMMAND_MARKER);
1445                            result.push('\n');
1446                        }
1447                    }
1448                }
1449            }
1450            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1451                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1452                    && line_offset == cursor_line_offset
1453                {
1454                    result.push_str(&line[..char_offset]);
1455                    result.push_str(CURSOR_MARKER);
1456                    result.push_str(&line[char_offset..]);
1457                    continue;
1458                }
1459
1460                result.push_str(line);
1461            }
1462        }
1463
1464        for raw_line in patch.split_inclusive('\n') {
1465            if raw_line.starts_with("@@") {
1466                // Flush any pending change hunk from a previous patch hunk.
1467                if let Some(hunk) = current_hunk.take() {
1468                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1469                }
1470
1471                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1472                // We intentionally do not trust old_start as a direct local index into `old_text`,
1473                // because some patches are produced against a larger file region and carry
1474                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1475                if first_hunk {
1476                    new_text_byte_offset = 0;
1477                    first_hunk = false;
1478                }
1479                continue;
1480            }
1481
1482            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1483                continue;
1484            }
1485            if raw_line.starts_with("\\ No newline") {
1486                continue;
1487            }
1488
1489            if raw_line.starts_with('-') {
1490                // Extend or start a change hunk with this deleted old line.
1491                match &mut current_hunk {
1492                    Some(Hunk {
1493                        line_range: range, ..
1494                    }) => range.end = old_line_index + 1,
1495                    None => {
1496                        current_hunk = Some(Hunk {
1497                            line_range: old_line_index..old_line_index + 1,
1498                            new_text_lines: Vec::new(),
1499                            cursor_line_offset_in_new_text: None,
1500                        });
1501                    }
1502                }
1503                old_line_index += 1;
1504            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1505                // Place cursor marker if cursor_offset falls within this line.
1506                let mut cursor_line_offset = None;
1507                if let Some(cursor_off) = cursor_offset
1508                    && (first_hunk
1509                        || cursor_off >= new_text_byte_offset
1510                            && cursor_off <= new_text_byte_offset + added_content.len())
1511                {
1512                    let line_offset = added_content.floor_char_boundary(
1513                        cursor_off
1514                            .saturating_sub(new_text_byte_offset)
1515                            .min(added_content.len()),
1516                    );
1517                    cursor_line_offset = Some(line_offset);
1518                }
1519
1520                new_text_byte_offset += added_content.len();
1521
1522                let hunk = current_hunk.get_or_insert(Hunk {
1523                    line_range: old_line_index..old_line_index,
1524                    new_text_lines: vec![],
1525                    cursor_line_offset_in_new_text: None,
1526                });
1527                hunk.new_text_lines.push(added_content);
1528                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1529                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1530            } else {
1531                // Context line (starts with ' ' or is empty).
1532                if let Some(hunk) = current_hunk.take() {
1533                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1534                }
1535                last_old_line_before_hunk = Some(old_line_index);
1536                old_line_index += 1;
1537                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1538                new_text_byte_offset += content.len();
1539            }
1540        }
1541
1542        // Flush final group.
1543        if let Some(hunk) = current_hunk.take() {
1544            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1545        }
1546
1547        // Trim a single trailing newline.
1548        if result.ends_with('\n') {
1549            result.pop();
1550        }
1551
1552        Ok(result)
1553    }
1554
1555    #[cfg(test)]
1556    mod tests {
1557        use super::*;
1558        use indoc::indoc;
1559
1560        #[test]
1561        fn test_format_cursor_region() {
1562            struct Case {
1563                name: &'static str,
1564                context: &'static str,
1565                editable_range: Range<usize>,
1566                cursor_offset: usize,
1567                expected: &'static str,
1568            }
1569
1570            let cases = [
1571                Case {
1572                    name: "basic_cursor_placement",
1573                    context: "hello world\n",
1574                    editable_range: 0..12,
1575                    cursor_offset: 5,
1576                    expected: indoc! {"
1577                    <|file_sep|>test.rs
1578                    <|fim_prefix|>
1579                    <|fim_middle|>current
1580                    0:5c|hello<|user_cursor|> world
1581                    <|fim_suffix|>
1582                    <|fim_middle|>updated"},
1583                },
1584                Case {
1585                    name: "multiline_cursor_on_second_line",
1586                    context: "aaa\nbbb\nccc\n",
1587                    editable_range: 0..12,
1588                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1589                    expected: indoc! {"
1590                    <|file_sep|>test.rs
1591                    <|fim_prefix|>
1592                    <|fim_middle|>current
1593                    0:23|aaa
1594                    1:26|b<|user_cursor|>bb
1595                    2:29|ccc
1596                    <|fim_suffix|>
1597                    <|fim_middle|>updated"},
1598                },
1599                Case {
1600                    name: "no_trailing_newline_in_context",
1601                    context: "line1\nline2",
1602                    editable_range: 0..11,
1603                    cursor_offset: 3,
1604                    expected: indoc! {"
1605                    <|file_sep|>test.rs
1606                    <|fim_prefix|>
1607                    <|fim_middle|>current
1608                    0:d9|lin<|user_cursor|>e1
1609                    1:da|line2
1610                    <|fim_suffix|>
1611                    <|fim_middle|>updated"},
1612                },
1613                Case {
1614                    name: "leading_newline_in_editable_region",
1615                    context: "\nabc\n",
1616                    editable_range: 0..5,
1617                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1618                    expected: indoc! {"
1619                    <|file_sep|>test.rs
1620                    <|fim_prefix|>
1621                    <|fim_middle|>current
1622                    0:00|
1623                    1:26|a<|user_cursor|>bc
1624                    <|fim_suffix|>
1625                    <|fim_middle|>updated"},
1626                },
1627                Case {
1628                    name: "with_suffix",
1629                    context: "abc\ndef",
1630                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1631                    cursor_offset: 2,
1632                    expected: indoc! {"
1633                    <|file_sep|>test.rs
1634                    <|fim_prefix|>
1635                    <|fim_middle|>current
1636                    0:26|ab<|user_cursor|>c
1637                    <|fim_suffix|>
1638                    def
1639                    <|fim_middle|>updated"},
1640                },
1641                Case {
1642                    name: "unicode_two_byte_chars",
1643                    context: "héllo\n",
1644                    editable_range: 0..7,
1645                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1646                    expected: indoc! {"
1647                    <|file_sep|>test.rs
1648                    <|fim_prefix|>
1649                    <|fim_middle|>current
1650                    0:1b|hé<|user_cursor|>llo
1651                    <|fim_suffix|>
1652                    <|fim_middle|>updated"},
1653                },
1654                Case {
1655                    name: "unicode_three_byte_chars",
1656                    context: "日本語\n",
1657                    editable_range: 0..10,
1658                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1659                    expected: indoc! {"
1660                    <|file_sep|>test.rs
1661                    <|fim_prefix|>
1662                    <|fim_middle|>current
1663                    0:80|日本<|user_cursor|>語
1664                    <|fim_suffix|>
1665                    <|fim_middle|>updated"},
1666                },
1667                Case {
1668                    name: "unicode_four_byte_chars",
1669                    context: "a🌍b\n",
1670                    editable_range: 0..7,
1671                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1672                    expected: indoc! {"
1673                    <|file_sep|>test.rs
1674                    <|fim_prefix|>
1675                    <|fim_middle|>current
1676                    0:6b|a🌍<|user_cursor|>b
1677                    <|fim_suffix|>
1678                    <|fim_middle|>updated"},
1679                },
1680                Case {
1681                    name: "cursor_at_start_of_region_not_placed",
1682                    context: "abc\n",
1683                    editable_range: 0..4,
1684                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1685                    expected: indoc! {"
1686                    <|file_sep|>test.rs
1687                    <|fim_prefix|>
1688                    <|fim_middle|>current
1689                    0:26|abc
1690                    <|fim_suffix|>
1691                    <|fim_middle|>updated"},
1692                },
1693                Case {
1694                    name: "cursor_at_end_of_line_not_placed",
1695                    context: "abc\ndef\n",
1696                    editable_range: 0..8,
1697                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1698                    expected: indoc! {"
1699                    <|file_sep|>test.rs
1700                    <|fim_prefix|>
1701                    <|fim_middle|>current
1702                    0:26|abc
1703                    1:2f|def
1704                    <|fim_suffix|>
1705                    <|fim_middle|>updated"},
1706                },
1707                Case {
1708                    name: "cursor_offset_relative_to_context_not_editable_region",
1709                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1710                    // write_cursor_excerpt_section must subtract it before comparing against
1711                    // per-line offsets within the editable region.
1712                    context: "pre\naaa\nbbb\nsuf\n",
1713                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1714                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1715                    expected: indoc! {"
1716                    <|file_sep|>test.rs
1717                    <|fim_prefix|>
1718                    pre
1719                    <|fim_middle|>current
1720                    0:23|aaa
1721                    1:26|b<|user_cursor|>bb
1722                    <|fim_suffix|>
1723                    suf
1724                    <|fim_middle|>updated"},
1725                },
1726            ];
1727
1728            for case in &cases {
1729                let mut prompt = String::new();
1730                hashline::write_cursor_excerpt_section(
1731                    &mut prompt,
1732                    Path::new("test.rs"),
1733                    case.context,
1734                    &case.editable_range,
1735                    case.cursor_offset,
1736                );
1737                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1738            }
1739        }
1740
1741        #[test]
1742        fn test_apply_edit_commands() {
1743            struct Case {
1744                name: &'static str,
1745                original: &'static str,
1746                model_output: &'static str,
1747                expected: &'static str,
1748            }
1749
1750            let cases = vec![
1751                Case {
1752                    name: "set_single_line",
1753                    original: indoc! {"
1754                    let mut total = 0;
1755                    for product in products {
1756                        total += ;
1757                    }
1758                    total
1759                "},
1760                    model_output: indoc! {"
1761                    <|set|>2:87
1762                        total += product.price;
1763                "},
1764                    expected: indoc! {"
1765                    let mut total = 0;
1766                    for product in products {
1767                        total += product.price;
1768                    }
1769                    total
1770                "},
1771                },
1772                Case {
1773                    name: "set_range",
1774                    original: indoc! {"
1775                    fn foo() {
1776                        let x = 1;
1777                        let y = 2;
1778                        let z = 3;
1779                    }
1780                "},
1781                    model_output: indoc! {"
1782                    <|set|>1:46-3:4a
1783                        let sum = 6;
1784                "},
1785                    expected: indoc! {"
1786                    fn foo() {
1787                        let sum = 6;
1788                    }
1789                "},
1790                },
1791                Case {
1792                    name: "insert_after_line",
1793                    original: indoc! {"
1794                    fn main() {
1795                        let x = 1;
1796                    }
1797                "},
1798                    model_output: indoc! {"
1799                    <|insert|>1:46
1800                        let y = 2;
1801                "},
1802                    expected: indoc! {"
1803                    fn main() {
1804                        let x = 1;
1805                        let y = 2;
1806                    }
1807                "},
1808                },
1809                Case {
1810                    name: "insert_before_first",
1811                    original: indoc! {"
1812                    let x = 1;
1813                    let y = 2;
1814                "},
1815                    model_output: indoc! {"
1816                    <|insert|>
1817                    use std::io;
1818                "},
1819                    expected: indoc! {"
1820                    use std::io;
1821                    let x = 1;
1822                    let y = 2;
1823                "},
1824                },
1825                Case {
1826                    name: "set_with_cursor_marker",
1827                    original: indoc! {"
1828                    fn main() {
1829                        println!();
1830                    }
1831                "},
1832                    model_output: indoc! {"
1833                    <|set|>1:34
1834                        eprintln!(\"<|user_cursor|>\");
1835                "},
1836                    expected: indoc! {"
1837                    fn main() {
1838                        eprintln!(\"<|user_cursor|>\");
1839                    }
1840                "},
1841                },
1842                Case {
1843                    name: "multiple_set_commands",
1844                    original: indoc! {"
1845                    aaa
1846                    bbb
1847                    ccc
1848                    ddd
1849                "},
1850                    model_output: indoc! {"
1851                    <|set|>0:23
1852                    AAA
1853                    <|set|>2:29
1854                    CCC
1855                "},
1856                    expected: indoc! {"
1857                    AAA
1858                    bbb
1859                    CCC
1860                    ddd
1861                "},
1862                },
1863                Case {
1864                    name: "set_range_multiline_replacement",
1865                    original: indoc! {"
1866                    fn handle_submit() {
1867                    }
1868
1869                    fn handle_keystroke() {
1870                "},
1871                    model_output: indoc! {"
1872                    <|set|>0:3f-1:7d
1873                    fn handle_submit(modal_state: &mut ModalState) {
1874                        <|user_cursor|>
1875                    }
1876                "},
1877                    expected: indoc! {"
1878                    fn handle_submit(modal_state: &mut ModalState) {
1879                        <|user_cursor|>
1880                    }
1881
1882                    fn handle_keystroke() {
1883                "},
1884                },
1885                Case {
1886                    name: "no_edit_commands_returns_original",
1887                    original: indoc! {"
1888                    hello
1889                    world
1890                "},
1891                    model_output: "some random text with no commands",
1892                    expected: indoc! {"
1893                    hello
1894                    world
1895                "},
1896                },
1897                Case {
1898                    name: "wrong_hash_set_ignored",
1899                    original: indoc! {"
1900                    aaa
1901                    bbb
1902                "},
1903                    model_output: indoc! {"
1904                    <|set|>0:ff
1905                    ZZZ
1906                "},
1907                    expected: indoc! {"
1908                    aaa
1909                    bbb
1910                "},
1911                },
1912                Case {
1913                    name: "insert_and_set_combined",
1914                    original: indoc! {"
1915                    alpha
1916                    beta
1917                    gamma
1918                "},
1919                    model_output: indoc! {"
1920                    <|set|>0:06
1921                    ALPHA
1922                    <|insert|>1:9c
1923                    beta_extra
1924                "},
1925                    expected: indoc! {"
1926                    ALPHA
1927                    beta
1928                    beta_extra
1929                    gamma
1930                "},
1931                },
1932                Case {
1933                    name: "no_trailing_newline_preserved",
1934                    original: "hello\nworld",
1935                    model_output: indoc! {"
1936                    <|set|>0:14
1937                    HELLO
1938                "},
1939                    expected: "HELLO\nworld",
1940                },
1941                Case {
1942                    name: "set_range_hash_mismatch_in_end_bound",
1943                    original: indoc! {"
1944                    one
1945                    two
1946                    three
1947                "},
1948                    model_output: indoc! {"
1949                    <|set|>0:42-2:ff
1950                    ONE_TWO_THREE
1951                "},
1952                    expected: indoc! {"
1953                    one
1954                    two
1955                    three
1956                "},
1957                },
1958                Case {
1959                    name: "set_range_start_greater_than_end_ignored",
1960                    original: indoc! {"
1961                    a
1962                    b
1963                    c
1964                "},
1965                    model_output: indoc! {"
1966                    <|set|>2:63-1:62
1967                    X
1968                "},
1969                    expected: indoc! {"
1970                    a
1971                    b
1972                    c
1973                "},
1974                },
1975                Case {
1976                    name: "insert_out_of_bounds_ignored",
1977                    original: indoc! {"
1978                    x
1979                    y
1980                "},
1981                    model_output: indoc! {"
1982                    <|insert|>99:aa
1983                    z
1984                "},
1985                    expected: indoc! {"
1986                    x
1987                    y
1988                "},
1989                },
1990                Case {
1991                    name: "set_out_of_bounds_ignored",
1992                    original: indoc! {"
1993                    x
1994                    y
1995                "},
1996                    model_output: indoc! {"
1997                    <|set|>99:aa
1998                    z
1999                "},
2000                    expected: indoc! {"
2001                    x
2002                    y
2003                "},
2004                },
2005                Case {
2006                    name: "malformed_set_command_ignored",
2007                    original: indoc! {"
2008                    alpha
2009                    beta
2010                "},
2011                    model_output: indoc! {"
2012                    <|set|>not-a-line-ref
2013                    UPDATED
2014                "},
2015                    expected: indoc! {"
2016                    alpha
2017                    beta
2018                "},
2019                },
2020                Case {
2021                    name: "malformed_insert_hash_treated_as_before_first",
2022                    original: indoc! {"
2023                    alpha
2024                    beta
2025                "},
2026                    model_output: indoc! {"
2027                    <|insert|>1:nothex
2028                    preamble
2029                "},
2030                    expected: indoc! {"
2031                    preamble
2032                    alpha
2033                    beta
2034                "},
2035                },
2036                Case {
2037                    name: "set_then_insert_same_target_orders_insert_after_replacement",
2038                    original: indoc! {"
2039                    cat
2040                    dog
2041                "},
2042                    model_output: indoc! {"
2043                    <|set|>0:38
2044                    CAT
2045                    <|insert|>0:38
2046                    TAIL
2047                "},
2048                    expected: indoc! {"
2049                    CAT
2050                    TAIL
2051                    dog
2052                "},
2053                },
2054                Case {
2055                    name: "overlapping_set_ranges_last_wins",
2056                    original: indoc! {"
2057                    a
2058                    b
2059                    c
2060                    d
2061                "},
2062                    model_output: indoc! {"
2063                    <|set|>0:61-2:63
2064                    FIRST
2065                    <|set|>1:62-3:64
2066                    SECOND
2067                "},
2068                    expected: indoc! {"
2069                    FIRST
2070                    d
2071                "},
2072                },
2073                Case {
2074                    name: "insert_before_first_and_after_line",
2075                    original: indoc! {"
2076                    a
2077                    b
2078                "},
2079                    model_output: indoc! {"
2080                    <|insert|>
2081                    HEAD
2082                    <|insert|>0:61
2083                    MID
2084                "},
2085                    expected: indoc! {"
2086                    HEAD
2087                    a
2088                    MID
2089                    b
2090                "},
2091                },
2092            ];
2093
2094            for case in &cases {
2095                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2096                assert_eq!(result, case.expected, "failed case: {}", case.name);
2097            }
2098        }
2099
2100        #[test]
2101        fn test_output_has_edit_commands() {
2102            assert!(hashline::output_has_edit_commands(&format!(
2103                "{}0:ab\nnew",
2104                SET_COMMAND_MARKER
2105            )));
2106            assert!(hashline::output_has_edit_commands(&format!(
2107                "{}0:ab\nnew",
2108                INSERT_COMMAND_MARKER
2109            )));
2110            assert!(hashline::output_has_edit_commands(&format!(
2111                "some text\n{}1:cd\nstuff",
2112                SET_COMMAND_MARKER
2113            )));
2114            assert!(!hashline::output_has_edit_commands("just plain text"));
2115            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2116        }
2117
2118        // ---- hashline::patch_to_edit_commands round-trip tests ----
2119
2120        #[test]
2121        fn test_patch_to_edit_commands() {
2122            struct Case {
2123                name: &'static str,
2124                old: &'static str,
2125                patch: &'static str,
2126                expected_new: &'static str,
2127            }
2128
2129            let cases = [
2130                Case {
2131                    name: "single_line_replacement",
2132                    old: indoc! {"
2133                    let mut total = 0;
2134                    for product in products {
2135                        total += ;
2136                    }
2137                    total
2138                "},
2139                    patch: indoc! {"
2140                    @@ -1,5 +1,5 @@
2141                     let mut total = 0;
2142                     for product in products {
2143                    -    total += ;
2144                    +    total += product.price;
2145                     }
2146                     total
2147                "},
2148                    expected_new: indoc! {"
2149                    let mut total = 0;
2150                    for product in products {
2151                        total += product.price;
2152                    }
2153                    total
2154                "},
2155                },
2156                Case {
2157                    name: "multiline_replacement",
2158                    old: indoc! {"
2159                    fn foo() {
2160                        let x = 1;
2161                        let y = 2;
2162                        let z = 3;
2163                    }
2164                "},
2165                    patch: indoc! {"
2166                    @@ -1,5 +1,3 @@
2167                     fn foo() {
2168                    -    let x = 1;
2169                    -    let y = 2;
2170                    -    let z = 3;
2171                    +    let sum = 1 + 2 + 3;
2172                     }
2173                "},
2174                    expected_new: indoc! {"
2175                    fn foo() {
2176                        let sum = 1 + 2 + 3;
2177                    }
2178                "},
2179                },
2180                Case {
2181                    name: "insertion",
2182                    old: indoc! {"
2183                    fn main() {
2184                        let x = 1;
2185                    }
2186                "},
2187                    patch: indoc! {"
2188                    @@ -1,3 +1,4 @@
2189                     fn main() {
2190                         let x = 1;
2191                    +    let y = 2;
2192                     }
2193                "},
2194                    expected_new: indoc! {"
2195                    fn main() {
2196                        let x = 1;
2197                        let y = 2;
2198                    }
2199                "},
2200                },
2201                Case {
2202                    name: "insertion_before_first",
2203                    old: indoc! {"
2204                    let x = 1;
2205                    let y = 2;
2206                "},
2207                    patch: indoc! {"
2208                    @@ -1,2 +1,3 @@
2209                    +use std::io;
2210                     let x = 1;
2211                     let y = 2;
2212                "},
2213                    expected_new: indoc! {"
2214                    use std::io;
2215                    let x = 1;
2216                    let y = 2;
2217                "},
2218                },
2219                Case {
2220                    name: "deletion",
2221                    old: indoc! {"
2222                    aaa
2223                    bbb
2224                    ccc
2225                    ddd
2226                "},
2227                    patch: indoc! {"
2228                    @@ -1,4 +1,2 @@
2229                     aaa
2230                    -bbb
2231                    -ccc
2232                     ddd
2233                "},
2234                    expected_new: indoc! {"
2235                    aaa
2236                    ddd
2237                "},
2238                },
2239                Case {
2240                    name: "multiple_changes",
2241                    old: indoc! {"
2242                    alpha
2243                    beta
2244                    gamma
2245                    delta
2246                    epsilon
2247                "},
2248                    patch: indoc! {"
2249                    @@ -1,5 +1,5 @@
2250                    -alpha
2251                    +ALPHA
2252                     beta
2253                     gamma
2254                    -delta
2255                    +DELTA
2256                     epsilon
2257                "},
2258                    expected_new: indoc! {"
2259                    ALPHA
2260                    beta
2261                    gamma
2262                    DELTA
2263                    epsilon
2264                "},
2265                },
2266                Case {
2267                    name: "replace_with_insertion",
2268                    old: indoc! {r#"
2269                    fn handle() {
2270                        modal_state.close();
2271                        modal_state.dismiss();
2272                "#},
2273                    patch: indoc! {r#"
2274                    @@ -1,3 +1,4 @@
2275                     fn handle() {
2276                         modal_state.close();
2277                    +    eprintln!("");
2278                         modal_state.dismiss();
2279                "#},
2280                    expected_new: indoc! {r#"
2281                    fn handle() {
2282                        modal_state.close();
2283                        eprintln!("");
2284                        modal_state.dismiss();
2285                "#},
2286                },
2287                Case {
2288                    name: "complete_replacement",
2289                    old: indoc! {"
2290                    aaa
2291                    bbb
2292                    ccc
2293                "},
2294                    patch: indoc! {"
2295                    @@ -1,3 +1,3 @@
2296                    -aaa
2297                    -bbb
2298                    -ccc
2299                    +xxx
2300                    +yyy
2301                    +zzz
2302                "},
2303                    expected_new: indoc! {"
2304                    xxx
2305                    yyy
2306                    zzz
2307                "},
2308                },
2309                Case {
2310                    name: "add_function_body",
2311                    old: indoc! {"
2312                    fn foo() {
2313                        modal_state.dismiss();
2314                    }
2315
2316                    fn
2317
2318                    fn handle_keystroke() {
2319                "},
2320                    patch: indoc! {"
2321                    @@ -1,6 +1,8 @@
2322                     fn foo() {
2323                         modal_state.dismiss();
2324                     }
2325
2326                    -fn
2327                    +fn handle_submit() {
2328                    +    todo()
2329                    +}
2330
2331                     fn handle_keystroke() {
2332                "},
2333                    expected_new: indoc! {"
2334                    fn foo() {
2335                        modal_state.dismiss();
2336                    }
2337
2338                    fn handle_submit() {
2339                        todo()
2340                    }
2341
2342                    fn handle_keystroke() {
2343                "},
2344                },
2345                Case {
2346                    name: "with_cursor_offset",
2347                    old: indoc! {r#"
2348                    fn main() {
2349                        println!();
2350                    }
2351                "#},
2352                    patch: indoc! {r#"
2353                    @@ -1,3 +1,3 @@
2354                     fn main() {
2355                    -    println!();
2356                    +    eprintln!("");
2357                     }
2358                "#},
2359                    expected_new: indoc! {r#"
2360                    fn main() {
2361                        eprintln!("<|user_cursor|>");
2362                    }
2363                "#},
2364                },
2365                Case {
2366                    name: "non_local_hunk_header_pure_insertion_repro",
2367                    old: indoc! {"
2368                    aaa
2369                    bbb
2370                "},
2371                    patch: indoc! {"
2372                    @@ -20,2 +20,3 @@
2373                     aaa
2374                    +xxx
2375                     bbb
2376                "},
2377                    expected_new: indoc! {"
2378                    aaa
2379                    xxx
2380                    bbb
2381                "},
2382                },
2383            ];
2384
2385            for case in &cases {
2386                // The cursor_offset for patch_to_edit_commands is relative to
2387                // the first hunk's new text (context + additions). We compute
2388                // it by finding where the marker sits in the expected output
2389                // (which mirrors the new text of the hunk).
2390                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2391
2392                let commands =
2393                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2394                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2395
2396                assert!(
2397                    hashline::output_has_edit_commands(&commands),
2398                    "case {}: expected edit commands, got: {commands:?}",
2399                    case.name,
2400                );
2401
2402                let applied = hashline::apply_edit_commands(case.old, &commands);
2403                assert_eq!(applied, case.expected_new, "case {}", case.name);
2404            }
2405        }
2406    }
2407}
2408
2409pub mod seed_coder {
2410    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2411    //!
2412    //! Seed-Coder uses different FIM tokens and order than Qwen:
2413    //! - SPM order: suffix comes FIRST, then prefix, then middle
2414    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2415    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2416    //!
2417    //! All context (related files, edit history) goes in the PREFIX section.
2418    //! The suffix contains only code after the editable region.
2419    //!
2420    //! Example prompt:
2421    //!
2422    //! <[fim-suffix]>
2423    //! code after editable region
2424    //! <[fim-prefix]><filename>related/file.py
2425    //! related file content
2426    //!
2427    //! <filename>edit_history
2428    //! --- a/some_file.py
2429    //! +++ b/some_file.py
2430    //! -old
2431    //! +new
2432    //!
2433    //! <filename>path/to/target_file.py
2434    //! code before editable region
2435    //! <<<<<<< CURRENT
2436    //! code that
2437    //! needs to<|user_cursor|>
2438    //! be rewritten
2439    //! =======
2440    //! <[fim-middle]>
2441    //!
2442    //! Expected output (model generates):
2443    //!
2444    //! updated
2445    //! code with
2446    //! changes applied
2447    //! >>>>>>> UPDATED
2448
2449    use super::*;
2450
2451    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2452    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2453    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2454    pub const FILE_MARKER: &str = "<filename>";
2455
2456    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2457    pub const SEPARATOR: &str = "=======\n";
2458    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2459
2460    pub const NO_EDITS: &str = "NO_EDITS\n";
2461
2462    pub fn special_tokens() -> &'static [&'static str] {
2463        &[
2464            FIM_SUFFIX,
2465            FIM_PREFIX,
2466            FIM_MIDDLE,
2467            FILE_MARKER,
2468            START_MARKER,
2469            SEPARATOR,
2470            END_MARKER,
2471            CURSOR_MARKER,
2472        ]
2473    }
2474
2475    pub fn write_cursor_excerpt_section(
2476        prompt: &mut String,
2477        path: &Path,
2478        context: &str,
2479        editable_range: &Range<usize>,
2480        cursor_offset: usize,
2481    ) {
2482        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2483        prompt.push_str(&section);
2484    }
2485
2486    pub fn format_prompt_with_budget(
2487        path: &Path,
2488        context: &str,
2489        editable_range: &Range<usize>,
2490        cursor_offset: usize,
2491        events: &[Arc<Event>],
2492        related_files: &[RelatedFile],
2493        max_tokens: usize,
2494    ) -> String {
2495        let suffix_section = build_suffix_section(context, editable_range);
2496        let cursor_prefix_section =
2497            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2498
2499        let suffix_tokens = estimate_tokens(suffix_section.len());
2500        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2501        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2502
2503        let edit_history_section = super::format_edit_history_within_budget(
2504            events,
2505            FILE_MARKER,
2506            "edit_history",
2507            budget_after_cursor,
2508        );
2509        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2510        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2511
2512        let related_files_section = super::format_related_files_within_budget(
2513            related_files,
2514            FILE_MARKER,
2515            "",
2516            budget_after_edit_history,
2517        );
2518
2519        let mut prompt = String::new();
2520        prompt.push_str(&suffix_section);
2521        prompt.push_str(FIM_PREFIX);
2522        prompt.push_str(&related_files_section);
2523        if !related_files_section.is_empty() {
2524            prompt.push('\n');
2525        }
2526        prompt.push_str(&edit_history_section);
2527        if !edit_history_section.is_empty() {
2528            prompt.push('\n');
2529        }
2530        prompt.push_str(&cursor_prefix_section);
2531        prompt.push_str(FIM_MIDDLE);
2532        prompt
2533    }
2534
2535    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2536        let mut section = String::new();
2537        section.push_str(FIM_SUFFIX);
2538        section.push_str(&context[editable_range.end..]);
2539        if !section.ends_with('\n') {
2540            section.push('\n');
2541        }
2542        section
2543    }
2544
2545    fn build_cursor_prefix_section(
2546        path: &Path,
2547        context: &str,
2548        editable_range: &Range<usize>,
2549        cursor_offset: usize,
2550    ) -> String {
2551        let mut section = String::new();
2552        let path_str = path.to_string_lossy();
2553        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2554
2555        section.push_str(&context[..editable_range.start]);
2556        section.push_str(START_MARKER);
2557        section.push_str(&context[editable_range.start..cursor_offset]);
2558        section.push_str(CURSOR_MARKER);
2559        section.push_str(&context[cursor_offset..editable_range.end]);
2560        if !section.ends_with('\n') {
2561            section.push('\n');
2562        }
2563        section.push_str(SEPARATOR);
2564        section
2565    }
2566
2567    /// Format patch as containing no changes if it's empty; otherwise return None.
2568    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2569        // Count lines in the patch
2570        let empty_patch = patch.lines().count() <= 3;
2571        if empty_patch {
2572            Some(format!("{NO_EDITS}{END_MARKER}"))
2573        } else {
2574            None
2575        }
2576    }
2577}
2578
2579pub mod v0304_variable_edit {
2580    //! A prompt format with no fixed editable region. The entire context is shown
2581    //! to the model, and it chooses which text to replace by outputting surrounding
2582    //! context lines with `<|fim_middle|>` and `<|fim_suffix|>` delimiting the new
2583    //! text.
2584    //!
2585    //! Example prompt:
2586    //!
2587    //! <|file_sep|>path/to/file.py
2588    //! zero
2589    //! one
2590    //! two
2591    //! three<|user_cursor|>
2592    //! four
2593    //! five
2594    //! <|fim_prefix|>
2595    //
2596    //! Expected output (model generates):
2597    //!
2598    //! two
2599    //! <|fim_middle|>
2600    //! THREE
2601    //! <|fim_suffix|>
2602    //! four
2603    //!
2604    //! The output means: find "two\n...\nfour" in the context, and replace
2605    //! everything between "two\n" and "four" with "THREE\n".
2606
2607    use super::*;
2608
2609    pub fn special_tokens() -> &'static [&'static str] {
2610        &[
2611            "<|fim_prefix|>",
2612            "<|fim_suffix|>",
2613            "<|fim_middle|>",
2614            "<|file_sep|>",
2615            CURSOR_MARKER,
2616        ]
2617    }
2618
2619    pub fn write_cursor_excerpt_section(
2620        prompt: &mut String,
2621        path: &Path,
2622        context: &str,
2623        cursor_offset: usize,
2624    ) {
2625        let path_str = path.to_string_lossy();
2626        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
2627
2628        prompt.push_str(&context[..cursor_offset]);
2629        prompt.push_str(CURSOR_MARKER);
2630        prompt.push_str(&context[cursor_offset..]);
2631        if !prompt.ends_with('\n') {
2632            prompt.push('\n');
2633        }
2634        prompt.push_str("<|fim_prefix|>\n")
2635    }
2636
2637    /// Apply a variable-edit model output to the original context text.
2638    ///
2639    /// The model output has the form:
2640    ///
2641    /// - prefix context lines
2642    /// - `<|fim_middle|>`
2643    /// - new text
2644    /// - `<|fim_suffix|>`
2645    /// - suffix context lines
2646    ///
2647    /// We locate the prefix/suffix context lines in the original text and replace
2648    /// everything between them with the new text.
2649    pub fn apply_variable_edit(
2650        context: &str,
2651        model_output: &str,
2652    ) -> Result<(Range<usize>, String)> {
2653        let (prefix_context, rest) = model_output
2654            .split_once("<|fim_middle|>\n")
2655            .or_else(|| model_output.split_once("<|fim_middle|>"))
2656            .ok_or_else(|| anyhow::anyhow!("missing <|fim_middle|> in model output"))?;
2657
2658        let (new_text, suffix_context) = rest
2659            .split_once("<|fim_suffix|>\n")
2660            .or_else(|| rest.split_once("<|fim_suffix|>"))
2661            .unwrap_or((rest, ""));
2662
2663        let suffix_context = if prefix_context.is_empty() && !suffix_context.is_empty() {
2664            suffix_context.strip_prefix('\n').unwrap_or(suffix_context)
2665        } else {
2666            suffix_context
2667        };
2668
2669        let prefix_offset = find_substring_at_line_boundary(context, prefix_context)
2670            .ok_or_else(|| anyhow!("could not locate prefix lines"))?
2671            + prefix_context.len();
2672        let suffix_offset = if suffix_context.is_empty() {
2673            context.len()
2674        } else {
2675            find_substring_at_line_boundary(&context[prefix_offset..], suffix_context)
2676                .ok_or_else(|| anyhow!("could not locate suffix lines"))?
2677                + prefix_offset
2678        };
2679
2680        let edit_range = prefix_offset..suffix_offset;
2681        return Ok((edit_range, new_text.to_string()));
2682    }
2683
2684    fn find_substring_at_line_boundary(haystack: &str, needle: &str) -> Option<usize> {
2685        if needle.is_empty() {
2686            return Some(0);
2687        }
2688
2689        haystack.match_indices(needle).find_map(|(offset, _)| {
2690            let matched_line_start = offset == 0 || haystack[..offset].ends_with('\n');
2691            matched_line_start.then_some(offset)
2692        })
2693    }
2694
2695    /// Convert a unified diff patch into the variable-edit output format.
2696    ///
2697    /// Parses `patch` as a unified diff against `old_text` and produces model
2698    /// output with context lines surrounding `<|fim_middle|>` / `<|fim_suffix|>`
2699    /// delimiters. The diff is resolved by content matching rather than line
2700    /// numbers.
2701    pub fn patch_to_variable_edit_output(
2702        old_text: &str,
2703        patch: &str,
2704        cursor_offset: Option<usize>,
2705    ) -> Result<String> {
2706        // Parse the unified diff into hunks. Each hunk has an `old_context`
2707        // string (context + deleted lines interleaved in order) and a list of
2708        // edits expressed as byte ranges within that context plus replacement
2709        // text.
2710        let hunks = parse_hunks(patch);
2711        if hunks.is_empty() {
2712            return Ok(String::new());
2713        }
2714
2715        // Apply each hunk by finding its old_context in the text and
2716        // performing the edits. We search forward from where the previous
2717        // hunk ended so that hunks are applied in order.
2718        let mut new_text = old_text.to_string();
2719        let mut search_from: usize = 0;
2720        let mut first_hunk_pos: Option<usize> = None;
2721
2722        for hunk in &hunks {
2723            let context_pos = new_text[search_from..]
2724                .find(&hunk.old_context)
2725                .map(|pos| pos + search_from)
2726                .ok_or_else(|| anyhow::anyhow!("could not locate hunk context in text"))?;
2727
2728            if first_hunk_pos.is_none() {
2729                first_hunk_pos = Some(context_pos);
2730            }
2731
2732            // Apply edits in reverse order so byte offsets remain valid.
2733            for edit in hunk.edits.iter().rev() {
2734                let abs_start = context_pos + edit.range.start;
2735                let abs_end = context_pos + edit.range.end;
2736                new_text.replace_range(abs_start..abs_end, &edit.text);
2737            }
2738
2739            // Advance past this hunk's region in the (now modified) text.
2740            let new_region_len: usize =
2741                hunk.edits.iter().fold(hunk.old_context.len(), |len, edit| {
2742                    len + edit.text.len() - (edit.range.end - edit.range.start)
2743                });
2744            search_from = context_pos + new_region_len;
2745        }
2746
2747        // Now we have old_text and new_text. Find the changed line range by
2748        // comparing them.
2749        let old_lines: Vec<&str> = old_text.lines().collect();
2750        let new_lines: Vec<&str> = new_text.lines().collect();
2751
2752        // Find first differing line.
2753        let first_changed_row = old_lines
2754            .iter()
2755            .zip(new_lines.iter())
2756            .position(|(a, b)| a != b)
2757            .unwrap_or_else(|| old_lines.len().min(new_lines.len()));
2758
2759        // Find last differing line (from the end).
2760        let max_suffix = old_lines.len().min(new_lines.len()) - first_changed_row;
2761        let common_suffix = old_lines
2762            .iter()
2763            .rev()
2764            .zip(new_lines.iter().rev())
2765            .take(max_suffix)
2766            .take_while(|(a, b)| a == b)
2767            .count();
2768
2769        let old_end = old_lines.len() - common_suffix;
2770        let new_end = new_lines.len() - common_suffix;
2771
2772        if first_changed_row == old_end && first_changed_row == new_end {
2773            return Ok(String::new());
2774        }
2775
2776        // Build the replacement text from new_lines[first_diff..new_end].
2777        let mut merged_new_text = String::new();
2778        for line in &new_lines[first_changed_row..new_end] {
2779            merged_new_text.push_str(line);
2780            merged_new_text.push('\n');
2781        }
2782
2783        // cursor_offset is relative to the first hunk's new content in
2784        // new_text. Translate it to an offset within merged_new_text, which
2785        // only contains lines first_diff..new_end of new_text.
2786        if let Some(hunk_offset) = cursor_offset {
2787            let hunk_start = first_hunk_pos.unwrap_or(0);
2788            let absolute_pos = hunk_start + hunk_offset;
2789
2790            // Byte offset where first_diff starts in new_text.
2791            let merged_start: usize = new_lines[..first_changed_row]
2792                .iter()
2793                .map(|line| line.len() + 1)
2794                .sum();
2795
2796            if absolute_pos >= merged_start {
2797                let relative_offset = absolute_pos - merged_start;
2798                if relative_offset <= merged_new_text.len() {
2799                    merged_new_text.insert_str(relative_offset, CURSOR_MARKER);
2800                }
2801            }
2802        }
2803
2804        // Build output with 2 lines of context above and below.
2805        let context_lines_count = 2;
2806        let mut prefix_start = first_changed_row.saturating_sub(context_lines_count);
2807        let mut suffix_end = (old_end + context_lines_count).min(old_lines.len());
2808
2809        fn count_matches(line_range: Range<usize>, lines: &[&str]) -> usize {
2810            let pattern = &lines[line_range];
2811            let pattern_len = pattern.len();
2812
2813            let mut count = 0;
2814            for offset in 0..=lines.len() - pattern_len {
2815                if &lines[offset..offset + pattern_len] == pattern {
2816                    count += 1;
2817                }
2818            }
2819            count
2820        }
2821
2822        // Expand prefix and suffix until they are unique
2823        while prefix_start > 0 {
2824            if count_matches(prefix_start..first_changed_row, &old_lines) > 1 {
2825                prefix_start -= 1;
2826            } else {
2827                break;
2828            }
2829        }
2830        while suffix_end < old_lines.len() {
2831            if count_matches(old_end..suffix_end, &old_lines) > 1 {
2832                suffix_end += 1;
2833            } else {
2834                break;
2835            }
2836        }
2837
2838        let mut output = String::new();
2839        for line in &old_lines[prefix_start..first_changed_row] {
2840            output.push_str(line);
2841            output.push('\n');
2842        }
2843        output.push_str("<|fim_middle|>\n");
2844        output.push_str(&merged_new_text);
2845        output.push_str("<|fim_suffix|>\n");
2846        for line in &old_lines[old_end..suffix_end] {
2847            output.push_str(line);
2848            output.push('\n');
2849        }
2850
2851        Ok(output)
2852    }
2853
2854    struct ParsedHunk {
2855        old_context: String,
2856        edits: Vec<ParsedEdit>,
2857    }
2858
2859    struct ParsedEdit {
2860        range: Range<usize>,
2861        text: String,
2862    }
2863
2864    /// Parse a unified diff into content-based hunks. Each hunk contains an
2865    /// `old_context` string (context lines + deleted lines, which together
2866    /// form the text that should be found in the original) and a list of edits
2867    /// expressed as byte ranges within that context.
2868    fn parse_hunks(patch: &str) -> Vec<ParsedHunk> {
2869        let mut hunks = Vec::new();
2870        let mut current: Option<ParsedHunk> = None;
2871
2872        for line in patch.lines() {
2873            if line.starts_with("@@") {
2874                if let Some(hunk) = current.take() {
2875                    if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2876                        hunks.push(hunk);
2877                    }
2878                }
2879                current = Some(ParsedHunk {
2880                    old_context: String::new(),
2881                    edits: Vec::new(),
2882                });
2883            } else if line.starts_with("---") || line.starts_with("+++") {
2884                continue;
2885            } else if let Some(hunk) = &mut current {
2886                if let Some(added) = line.strip_prefix('+') {
2887                    let pos = hunk.old_context.len();
2888                    if let Some(last_edit) = hunk.edits.last_mut() {
2889                        if last_edit.range.end == pos {
2890                            writeln!(&mut last_edit.text, "{added}").ok();
2891                            continue;
2892                        }
2893                    }
2894                    hunk.edits.push(ParsedEdit {
2895                        range: pos..pos,
2896                        text: format!("{added}\n"),
2897                    });
2898                } else if let Some(removed) = line.strip_prefix('-') {
2899                    let start = hunk.old_context.len();
2900                    writeln!(&mut hunk.old_context, "{removed}").ok();
2901                    let end = hunk.old_context.len();
2902                    if let Some(last_edit) = hunk.edits.last_mut() {
2903                        if last_edit.range.end == start {
2904                            last_edit.range.end = end;
2905                            continue;
2906                        }
2907                    }
2908                    hunk.edits.push(ParsedEdit {
2909                        range: start..end,
2910                        text: String::new(),
2911                    });
2912                } else {
2913                    let ctx = line.strip_prefix(' ').unwrap_or(line);
2914                    writeln!(&mut hunk.old_context, "{ctx}").ok();
2915                }
2916            }
2917        }
2918
2919        if let Some(hunk) = current {
2920            if !hunk.old_context.is_empty() || !hunk.edits.is_empty() {
2921                hunks.push(hunk);
2922            }
2923        }
2924
2925        hunks
2926    }
2927
2928    #[cfg(test)]
2929    mod tests {
2930        use super::*;
2931        use indoc::indoc;
2932
2933        #[test]
2934        fn test_apply_variable_edit() {
2935            struct Case {
2936                name: &'static str,
2937                original: &'static str,
2938                model_output: &'static str,
2939                expected: &'static str,
2940            }
2941
2942            let cases = [
2943                Case {
2944                    name: "simple_single_line_replacement",
2945                    original: indoc! {"
2946                        zero
2947                        one
2948                        two
2949                        three
2950                        four
2951                        five
2952                    "},
2953                    model_output: indoc! {"
2954                        two
2955                        <|fim_middle|>
2956                        THREE
2957                        <|fim_suffix|>
2958                        four
2959                    "},
2960                    expected: indoc! {"
2961                        zero
2962                        one
2963                        two
2964                        THREE
2965                        four
2966                        five
2967                    "},
2968                },
2969                Case {
2970                    name: "multi_line_replacement",
2971                    original: indoc! {"
2972                        a
2973                        b
2974                        c
2975                        d
2976                        e
2977                    "},
2978                    model_output: indoc! {"
2979                        a
2980                        <|fim_middle|>
2981                        B
2982                        C
2983                        D
2984                        <|fim_suffix|>
2985                        e
2986                    "},
2987                    expected: indoc! {"
2988                        a
2989                        B
2990                        C
2991                        D
2992                        e
2993                    "},
2994                },
2995                Case {
2996                    name: "insertion_between_existing_lines",
2997                    original: indoc! {"
2998                        a
2999                        b
3000                        c
3001                    "},
3002                    model_output: indoc! {"
3003                        a
3004                        <|fim_middle|>
3005                        X
3006                        <|fim_suffix|>
3007                        b
3008                    "},
3009                    expected: indoc! {"
3010                        a
3011                        X
3012                        b
3013                        c
3014                    "},
3015                },
3016                Case {
3017                    name: "deletion",
3018                    original: indoc! {"
3019                        a
3020                        b
3021                        c
3022                        d
3023                    "},
3024                    model_output: indoc! {"
3025                        a
3026                        <|fim_middle|>
3027                        <|fim_suffix|>
3028                        c
3029                    "},
3030                    expected: indoc! {"
3031                        a
3032                        c
3033                        d
3034                    "},
3035                },
3036                Case {
3037                    name: "replacement_at_start_no_prefix_context",
3038                    original: indoc! {"
3039                        a
3040                        b
3041                        c
3042                    "},
3043                    model_output: indoc! {"
3044                        <|fim_middle|>
3045                        X
3046                        <|fim_suffix|>
3047                        b
3048                    "},
3049                    expected: indoc! {"
3050                        X
3051                        b
3052                        c
3053                    "},
3054                },
3055                Case {
3056                    name: "replacement_at_end_no_suffix_context",
3057                    original: indoc! {"
3058                        a
3059                        b
3060                        c
3061                    "},
3062                    model_output: indoc! {"
3063                        b
3064                        <|fim_middle|>
3065                        Z
3066                        <|fim_suffix|>
3067                    "},
3068                    expected: indoc! {"
3069                        a
3070                        b
3071                        Z
3072                    "},
3073                },
3074                Case {
3075                    name: "context_with_trailing_newline_is_preserved",
3076                    original: indoc! {"
3077                        a
3078                        b
3079                        c
3080                    "},
3081                    model_output: indoc! {"
3082                        a
3083                        <|fim_middle|>
3084                        B
3085                        <|fim_suffix|>
3086                        c
3087                    "},
3088                    expected: indoc! {"
3089                        a
3090                        B
3091                        c
3092                    "},
3093                },
3094                Case {
3095                    name: "cursor_marker_passes_through_untouched",
3096                    original: indoc! {"
3097                        a
3098                        b
3099                        c
3100                    "},
3101                    model_output: indoc! {"
3102                        a
3103                        <|fim_middle|>
3104                        B<|user_cursor|>B
3105                        <|fim_suffix|>
3106                        c
3107                    "},
3108                    expected: indoc! {"
3109                        a
3110                        B<|user_cursor|>B
3111                        c
3112                    "},
3113                },
3114                Case {
3115                    name: "multiple_prefix_context_lines",
3116                    original: indoc! {"
3117                        a
3118                        b
3119                        c
3120                        d
3121                        e
3122                    "},
3123                    model_output: indoc! {"
3124                        b
3125                        c
3126                        <|fim_middle|>
3127                        D
3128                        <|fim_suffix|>
3129                        e
3130                    "},
3131                    expected: indoc! {"
3132                        a
3133                        b
3134                        c
3135                        D
3136                        e
3137                    "},
3138                },
3139            ];
3140
3141            for case in cases {
3142                let (edit_range, replacement) =
3143                    apply_variable_edit(case.original, case.model_output).unwrap();
3144                let mut edited = case.original.to_string();
3145                edited.replace_range(edit_range, &replacement);
3146                assert_eq!(edited, case.expected, "{}", case.name);
3147            }
3148        }
3149
3150        #[test]
3151        fn test_patch_to_variable_edit() {
3152            struct Case {
3153                name: &'static str,
3154                old: &'static str,
3155                patch: &'static str,
3156                cursor_offset: Option<usize>,
3157                expected_variable_edit: &'static str,
3158                expected_after_apply: &'static str,
3159            }
3160
3161            let cases = [
3162                Case {
3163                    name: "simple_replacement",
3164                    old: indoc! {"
3165                        zero
3166                        one
3167                        two
3168                        three
3169                        four
3170                        five
3171                    "},
3172                    patch: indoc! {"
3173                        @@ -3,3 +3,3 @@
3174                         two
3175                        -three
3176                        +THREE
3177                         four
3178                    "},
3179                    cursor_offset: None,
3180                    expected_variable_edit: indoc! {"
3181                        one
3182                        two
3183                        <|fim_middle|>
3184                        THREE
3185                        <|fim_suffix|>
3186                        four
3187                        five
3188                    "},
3189                    expected_after_apply: indoc! {"
3190                        zero
3191                        one
3192                        two
3193                        THREE
3194                        four
3195                        five
3196                    "},
3197                },
3198                Case {
3199                    name: "insertion",
3200                    old: indoc! {"
3201                        a
3202                        b
3203                        c
3204                        d
3205                        e
3206                    "},
3207                    patch: indoc! {"
3208                        @@ -2,0 +3,1 @@
3209                         b
3210                        +X
3211                         c
3212                    "},
3213                    cursor_offset: None,
3214                    expected_variable_edit: indoc! {"
3215                        a
3216                        b
3217                        <|fim_middle|>
3218                        X
3219                        <|fim_suffix|>
3220                        c
3221                        d
3222                    "},
3223                    expected_after_apply: indoc! {"
3224                        a
3225                        b
3226                        X
3227                        c
3228                        d
3229                        e
3230                    "},
3231                },
3232                Case {
3233                    name: "deletion",
3234                    old: indoc! {"
3235                        a
3236                        b
3237                        c
3238                        d
3239                        e
3240                    "},
3241                    patch: indoc! {"
3242                        @@ -2,3 +2,2 @@
3243                         b
3244                        -c
3245                         d
3246                    "},
3247                    cursor_offset: None,
3248                    expected_variable_edit: indoc! {"
3249                        a
3250                        b
3251                        <|fim_middle|>
3252                        <|fim_suffix|>
3253                        d
3254                        e
3255                    "},
3256                    expected_after_apply: indoc! {"
3257                        a
3258                        b
3259                        d
3260                        e
3261                    "},
3262                },
3263                Case {
3264                    name: "edit_near_start",
3265                    old: indoc! {"
3266                        first
3267                        second
3268                        third
3269                        fourth
3270                    "},
3271                    patch: indoc! {"
3272                        @@ -1,1 +1,1 @@
3273                        -first
3274                        +FIRST
3275                    "},
3276                    cursor_offset: None,
3277                    expected_variable_edit: indoc! {"
3278                        <|fim_middle|>
3279                        FIRST
3280                        <|fim_suffix|>
3281                        second
3282                        third
3283                    "},
3284                    expected_after_apply: indoc! {"
3285                        FIRST
3286                        second
3287                        third
3288                        fourth
3289                    "},
3290                },
3291                Case {
3292                    name: "edit_near_end",
3293                    old: indoc! {"
3294                        first
3295                        second
3296                        third
3297                        fourth
3298                    "},
3299                    patch: indoc! {"
3300                        @@ -4,1 +4,1 @@
3301                        -fourth
3302                        +FOURTH
3303                    "},
3304                    cursor_offset: None,
3305                    expected_variable_edit: indoc! {"
3306                        second
3307                        third
3308                        <|fim_middle|>
3309                        FOURTH
3310                        <|fim_suffix|>
3311                    "},
3312                    expected_after_apply: indoc! {"
3313                        first
3314                        second
3315                        third
3316                        FOURTH
3317                    "},
3318                },
3319                Case {
3320                    name: "cursor_at_start_of_replacement",
3321                    old: indoc! {"
3322                        zero
3323                        one
3324                        two
3325                        three
3326                        four
3327                        five
3328                    "},
3329                    patch: indoc! {"
3330                        @@ -3,3 +3,3 @@
3331                         two
3332                        -three
3333                        +THREE
3334                         four
3335                    "},
3336                    cursor_offset: Some(4),
3337                    expected_variable_edit: indoc! {"
3338                        one
3339                        two
3340                        <|fim_middle|>
3341                        <|user_cursor|>THREE
3342                        <|fim_suffix|>
3343                        four
3344                        five
3345                    "},
3346                    expected_after_apply: indoc! {"
3347                        zero
3348                        one
3349                        two
3350                        <|user_cursor|>THREE
3351                        four
3352                        five
3353                    "},
3354                },
3355                Case {
3356                    name: "cursor_in_middle_of_replacement",
3357                    old: indoc! {"
3358                        zero
3359                        one
3360                        two
3361                        three
3362                        four
3363                        five
3364                    "},
3365                    patch: indoc! {"
3366                        @@ -3,3 +3,3 @@
3367                         two
3368                        -three
3369                        +THREE
3370                         four
3371                    "},
3372                    cursor_offset: Some(6),
3373                    expected_variable_edit: indoc! {"
3374                        one
3375                        two
3376                        <|fim_middle|>
3377                        TH<|user_cursor|>REE
3378                        <|fim_suffix|>
3379                        four
3380                        five
3381                    "},
3382                    expected_after_apply: indoc! {"
3383                        zero
3384                        one
3385                        two
3386                        TH<|user_cursor|>REE
3387                        four
3388                        five
3389                    "},
3390                },
3391                Case {
3392                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3393                    old: indoc! {"
3394                        one
3395                        a
3396                        b
3397                        c
3398                        d
3399                        two
3400                        a
3401                        b
3402                        c
3403                        d
3404                        three
3405                        a
3406                        b
3407                        c
3408                        d
3409                        four
3410                    "},
3411                    patch: indoc! {"
3412                        @@ -4,5 +4,5 @@
3413                         two
3414                         a
3415                         b
3416                        -c
3417                        +C
3418                         d
3419                         three
3420                    "},
3421                    cursor_offset: None,
3422                    expected_variable_edit: indoc! {"
3423                        two
3424                        a
3425                        b
3426                        <|fim_middle|>
3427                        C
3428                        <|fim_suffix|>
3429                        d
3430                        three
3431                    "},
3432                    expected_after_apply: indoc! {"
3433                        one
3434                        a
3435                        b
3436                        c
3437                        d
3438                        two
3439                        a
3440                        b
3441                        C
3442                        d
3443                        three
3444                        a
3445                        b
3446                        c
3447                        d
3448                        four
3449                    "},
3450                },
3451                Case {
3452                    name: "expands_context_when_two_lines_not_unique_before_and_after",
3453                    old: indoc! {"
3454                        {
3455                            {
3456                                one();
3457                            }
3458                        }
3459                        {
3460                            {
3461                                two();
3462                            }
3463                        }
3464                        {
3465                            {
3466                                three();
3467                            }
3468                        }
3469                        {
3470                            {
3471                                four();
3472                            }
3473                        }
3474                    "},
3475                    patch: indoc! {"
3476                        @@ -4,5 +4,5 @@
3477                             {
3478                        -        two();
3479                        +        TWO();
3480                             }
3481                    "},
3482                    cursor_offset: None,
3483                    expected_variable_edit: indoc! {"
3484                                one();
3485                            }
3486                        }
3487                        {
3488                            {
3489                        <|fim_middle|>
3490                                TWO();
3491                        <|fim_suffix|>
3492                            }
3493                        }
3494                        {
3495                            {
3496                                three();
3497                    "},
3498                    expected_after_apply: indoc! {"
3499                        {
3500                            {
3501                                one();
3502                            }
3503                        }
3504                        {
3505                            {
3506                                TWO();
3507                            }
3508                        }
3509                        {
3510                            {
3511                                three();
3512                            }
3513                        }
3514                        {
3515                            {
3516                                four();
3517                            }
3518                        }
3519                    "},
3520                },
3521            ];
3522
3523            for case in cases {
3524                let output =
3525                    patch_to_variable_edit_output(case.old, case.patch, case.cursor_offset)
3526                        .unwrap_or_else(|error| {
3527                            panic!("failed converting patch for {}: {error}", case.name)
3528                        });
3529                assert_eq!(
3530                    output, case.expected_variable_edit,
3531                    "patch->variable_edit mismatch for {}",
3532                    case.name
3533                );
3534
3535                let (edit_range, replacement) = apply_variable_edit(case.old, &output)
3536                    .unwrap_or_else(|error| {
3537                        panic!("failed applying variable_edit for {}: {error}", case.name)
3538                    });
3539                let mut edited_by_variable_edit = case.old.to_string();
3540                edited_by_variable_edit.replace_range(edit_range, &replacement);
3541                assert_eq!(
3542                    edited_by_variable_edit, case.expected_after_apply,
3543                    "variable_edit apply mismatch for {}",
3544                    case.name
3545                );
3546
3547                let (expected_edit_range, expected_replacement) =
3548                    apply_variable_edit(case.old, case.expected_variable_edit).unwrap_or_else(
3549                        |error| {
3550                            panic!(
3551                                "failed applying expected variable_edit for {}: {error}",
3552                                case.name
3553                            )
3554                        },
3555                    );
3556                let mut edited_by_expected_variable_edit = case.old.to_string();
3557                edited_by_expected_variable_edit
3558                    .replace_range(expected_edit_range, &expected_replacement);
3559                assert_eq!(
3560                    edited_by_expected_variable_edit, case.expected_after_apply,
3561                    "expected variable_edit apply mismatch for {}",
3562                    case.name
3563                );
3564            }
3565        }
3566
3567        #[test]
3568        fn test_write_cursor_excerpt_section() {
3569            let path = Path::new("test.rs");
3570            let context = "fn main() {\n    hello();\n}\n";
3571            let cursor_offset = 17;
3572            let mut prompt = String::new();
3573            write_cursor_excerpt_section(&mut prompt, path, context, cursor_offset);
3574            assert_eq!(
3575                prompt,
3576                "<|file_sep|>test.rs\nfn main() {\n    h<|user_cursor|>ello();\n}\n<|fim_prefix|>\n"
3577            );
3578        }
3579    }
3580}
3581
3582/// The zeta1 prompt format
3583pub mod zeta1 {
3584    use super::*;
3585    use std::fmt::Write;
3586
3587    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
3588    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
3589    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
3590    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
3591
3592    const INSTRUCTION_HEADER: &str = concat!(
3593        "### Instruction:\n",
3594        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3595        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3596        "into account the cursor location.\n\n",
3597        "### User Edits:\n\n"
3598    );
3599    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
3600    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
3601
3602    /// Formats a complete zeta1 prompt from the input events and excerpt.
3603    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
3604        let mut prompt = String::with_capacity(
3605            INSTRUCTION_HEADER.len()
3606                + input_events.len()
3607                + EXCERPT_HEADER.len()
3608                + input_excerpt.len()
3609                + RESPONSE_HEADER.len(),
3610        );
3611        prompt.push_str(INSTRUCTION_HEADER);
3612        prompt.push_str(input_events);
3613        prompt.push_str(EXCERPT_HEADER);
3614        prompt.push_str(input_excerpt);
3615        prompt.push_str(RESPONSE_HEADER);
3616        prompt
3617    }
3618
3619    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
3620    /// editable and context byte-offset ranges within `cursor_excerpt`.
3621    pub fn format_zeta1_from_input(
3622        input: &ZetaPromptInput,
3623        editable_range: Range<usize>,
3624        context_range: Range<usize>,
3625    ) -> String {
3626        let events = format_zeta1_events(&input.events);
3627        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
3628        format_zeta1_prompt(&events, &excerpt)
3629    }
3630
3631    /// Formats events in zeta1 style (oldest first).
3632    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
3633        let mut result = String::new();
3634        for event in events {
3635            let event_string = format_zeta1_event(event);
3636            if event_string.is_empty() {
3637                continue;
3638            }
3639            if !result.is_empty() {
3640                result.push_str("\n\n");
3641            }
3642            result.push_str(&event_string);
3643        }
3644        result
3645    }
3646
3647    fn format_zeta1_event(event: &Event) -> String {
3648        match event {
3649            Event::BufferChange {
3650                path,
3651                old_path,
3652                diff,
3653                ..
3654            } => {
3655                let mut prompt = String::new();
3656                if old_path != path {
3657                    writeln!(
3658                        prompt,
3659                        "User renamed {} to {}\n",
3660                        old_path.display(),
3661                        path.display()
3662                    )
3663                    .ok();
3664                }
3665                if !diff.is_empty() {
3666                    write!(
3667                        prompt,
3668                        "User edited {}:\n```diff\n{}\n```",
3669                        path.display(),
3670                        diff
3671                    )
3672                    .ok();
3673                }
3674                prompt
3675            }
3676        }
3677    }
3678
3679    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
3680    /// within `cursor_excerpt`.
3681    fn format_zeta1_excerpt(
3682        input: &ZetaPromptInput,
3683        editable_range: Range<usize>,
3684        context_range: Range<usize>,
3685    ) -> String {
3686        let path_str = input.cursor_path.to_string_lossy();
3687        let excerpt = &*input.cursor_excerpt;
3688        let cursor_offset = input.cursor_offset_in_excerpt;
3689
3690        let mut prompt = String::new();
3691        writeln!(&mut prompt, "```{path_str}").ok();
3692
3693        let starts_at_file_beginning =
3694            input.excerpt_start_row == Some(0) && context_range.start == 0;
3695        if starts_at_file_beginning {
3696            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
3697        }
3698
3699        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
3700
3701        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
3702        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
3703        prompt.push_str(CURSOR_MARKER);
3704        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
3705        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
3706
3707        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
3708        write!(prompt, "\n```").ok();
3709
3710        prompt
3711    }
3712
3713    /// Cleans zeta1 model output by extracting content between editable region
3714    /// markers and converting the zeta1 cursor marker to the universal one.
3715    /// Returns `None` if the output doesn't contain the expected markers.
3716    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
3717        let content = output.replace(CURSOR_MARKER, "");
3718
3719        let content_start = content
3720            .find(EDITABLE_REGION_START_MARKER)
3721            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
3722            .map(|pos| {
3723                if content.as_bytes().get(pos) == Some(&b'\n') {
3724                    pos + 1
3725                } else {
3726                    pos
3727                }
3728            })
3729            .unwrap_or(0);
3730
3731        let content_end = content
3732            .find(EDITABLE_REGION_END_MARKER)
3733            .map(|pos| {
3734                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
3735                    pos - 1
3736                } else {
3737                    pos
3738                }
3739            })
3740            .unwrap_or(content.len());
3741
3742        if content_start > content_end {
3743            return Some(String::new());
3744        }
3745
3746        let extracted = &content[content_start..content_end];
3747
3748        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
3749            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
3750            let text_before_cursor = text_before_cursor
3751                .find(EDITABLE_REGION_START_MARKER)
3752                .map(|pos| {
3753                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
3754                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
3755                        after_marker + 1
3756                    } else {
3757                        after_marker
3758                    }
3759                })
3760                .unwrap_or(0);
3761            let offset_in_extracted = zeta1_cursor_pos
3762                .saturating_sub(text_before_cursor)
3763                .min(extracted.len());
3764            offset_in_extracted
3765        });
3766
3767        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
3768        if let Some(offset) = cursor_offset {
3769            result.push_str(&extracted[..offset]);
3770            result.push_str(super::CURSOR_MARKER);
3771            result.push_str(&extracted[offset..]);
3772        } else {
3773            result.push_str(extracted);
3774        }
3775
3776        Some(result)
3777    }
3778}
3779
3780#[cfg(test)]
3781mod tests {
3782    use super::*;
3783    use indoc::indoc;
3784
3785    fn make_input(
3786        cursor_excerpt: &str,
3787        editable_range: Range<usize>,
3788        cursor_offset: usize,
3789        events: Vec<Event>,
3790        related_files: Vec<RelatedFile>,
3791    ) -> ZetaPromptInput {
3792        let context_range = 0..cursor_excerpt.len();
3793        ZetaPromptInput {
3794            cursor_path: Path::new("test.rs").into(),
3795            cursor_excerpt: cursor_excerpt.into(),
3796            cursor_offset_in_excerpt: cursor_offset,
3797            excerpt_start_row: None,
3798            events: events.into_iter().map(Arc::new).collect(),
3799            related_files,
3800            excerpt_ranges: ExcerptRanges {
3801                editable_150: editable_range.clone(),
3802                editable_180: editable_range.clone(),
3803                editable_350: editable_range,
3804                editable_150_context_350: context_range.clone(),
3805                editable_180_context_350: context_range.clone(),
3806                editable_350_context_150: context_range,
3807                ..Default::default()
3808            },
3809            experiment: None,
3810            in_open_source_repo: false,
3811            can_collect_data: false,
3812            repo_url: None,
3813        }
3814    }
3815
3816    fn make_input_with_context_range(
3817        excerpt: &str,
3818        editable_range: Range<usize>,
3819        context_range: Range<usize>,
3820        cursor_offset: usize,
3821    ) -> ZetaPromptInput {
3822        ZetaPromptInput {
3823            cursor_path: Path::new("test.rs").into(),
3824            cursor_excerpt: excerpt.into(),
3825            cursor_offset_in_excerpt: cursor_offset,
3826            excerpt_start_row: None,
3827            events: vec![],
3828            related_files: vec![],
3829            excerpt_ranges: ExcerptRanges {
3830                editable_150: editable_range.clone(),
3831                editable_180: editable_range.clone(),
3832                editable_350: editable_range,
3833                editable_150_context_350: context_range.clone(),
3834                editable_180_context_350: context_range.clone(),
3835                editable_350_context_150: context_range,
3836                ..Default::default()
3837            },
3838            experiment: None,
3839            in_open_source_repo: false,
3840            can_collect_data: false,
3841            repo_url: None,
3842        }
3843    }
3844
3845    fn make_event(path: &str, diff: &str) -> Event {
3846        Event::BufferChange {
3847            path: Path::new(path).into(),
3848            old_path: Path::new(path).into(),
3849            diff: diff.to_string(),
3850            predicted: false,
3851            in_open_source_repo: false,
3852        }
3853    }
3854
3855    fn make_related_file(path: &str, content: &str) -> RelatedFile {
3856        RelatedFile {
3857            path: Path::new(path).into(),
3858            max_row: content.lines().count() as u32,
3859            excerpts: vec![RelatedExcerpt {
3860                row_range: 0..content.lines().count() as u32,
3861                text: content.into(),
3862                order: 0,
3863            }],
3864            in_open_source_repo: false,
3865        }
3866    }
3867
3868    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3869        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
3870    }
3871
3872    #[test]
3873    fn test_no_truncation_when_within_budget() {
3874        let input = make_input(
3875            "prefix\neditable\nsuffix",
3876            7..15,
3877            10,
3878            vec![make_event("a.rs", "-old\n+new\n")],
3879            vec![make_related_file("related.rs", "fn helper() {}\n")],
3880        );
3881
3882        assert_eq!(
3883            format_with_budget(&input, 10000),
3884            indoc! {r#"
3885                <|file_sep|>related.rs
3886                fn helper() {}
3887                <|file_sep|>edit history
3888                --- a/a.rs
3889                +++ b/a.rs
3890                -old
3891                +new
3892                <|file_sep|>test.rs
3893                <|fim_prefix|>
3894                prefix
3895                <|fim_middle|>current
3896                edi<|user_cursor|>table
3897                <|fim_suffix|>
3898
3899                suffix
3900                <|fim_middle|>updated
3901            "#}
3902        );
3903    }
3904
3905    #[test]
3906    fn test_truncation_drops_edit_history_when_budget_tight() {
3907        let input = make_input(
3908            "code",
3909            0..4,
3910            2,
3911            vec![make_event("a.rs", "-x\n+y\n")],
3912            vec![
3913                make_related_file("r1.rs", "a\n"),
3914                make_related_file("r2.rs", "b\n"),
3915            ],
3916        );
3917
3918        assert_eq!(
3919            format_with_budget(&input, 10000),
3920            indoc! {r#"
3921                <|file_sep|>r1.rs
3922                a
3923                <|file_sep|>r2.rs
3924                b
3925                <|file_sep|>edit history
3926                --- a/a.rs
3927                +++ b/a.rs
3928                -x
3929                +y
3930                <|file_sep|>test.rs
3931                <|fim_prefix|>
3932                <|fim_middle|>current
3933                co<|user_cursor|>de
3934                <|fim_suffix|>
3935                <|fim_middle|>updated
3936            "#}
3937        );
3938
3939        assert_eq!(
3940            format_with_budget(&input, 50),
3941            indoc! {r#"
3942                <|file_sep|>r1.rs
3943                a
3944                <|file_sep|>r2.rs
3945                b
3946                <|file_sep|>test.rs
3947                <|fim_prefix|>
3948                <|fim_middle|>current
3949                co<|user_cursor|>de
3950                <|fim_suffix|>
3951                <|fim_middle|>updated
3952            "#}
3953        );
3954    }
3955
3956    #[test]
3957    fn test_truncation_includes_partial_excerpts() {
3958        let input = make_input(
3959            "x",
3960            0..1,
3961            0,
3962            vec![],
3963            vec![RelatedFile {
3964                path: Path::new("big.rs").into(),
3965                max_row: 30,
3966                in_open_source_repo: false,
3967                excerpts: vec![
3968                    RelatedExcerpt {
3969                        row_range: 0..10,
3970                        text: "first excerpt\n".into(),
3971                        order: 0,
3972                    },
3973                    RelatedExcerpt {
3974                        row_range: 10..20,
3975                        text: "second excerpt\n".into(),
3976                        order: 0,
3977                    },
3978                    RelatedExcerpt {
3979                        row_range: 20..30,
3980                        text: "third excerpt\n".into(),
3981                        order: 0,
3982                    },
3983                ],
3984            }],
3985        );
3986
3987        assert_eq!(
3988            format_with_budget(&input, 10000),
3989            indoc! {r#"
3990                <|file_sep|>big.rs
3991                first excerpt
3992                ...
3993                second excerpt
3994                ...
3995                third excerpt
3996                <|file_sep|>test.rs
3997                <|fim_prefix|>
3998                <|fim_middle|>current
3999                <|user_cursor|>x
4000                <|fim_suffix|>
4001                <|fim_middle|>updated
4002            "#}
4003        );
4004
4005        assert_eq!(
4006            format_with_budget(&input, 50),
4007            indoc! {r#"
4008                <|file_sep|>big.rs
4009                first excerpt
4010                ...
4011                <|file_sep|>test.rs
4012                <|fim_prefix|>
4013                <|fim_middle|>current
4014                <|user_cursor|>x
4015                <|fim_suffix|>
4016                <|fim_middle|>updated
4017            "#}
4018        );
4019    }
4020
4021    #[test]
4022    fn test_truncation_prioritizes_lower_order_excerpts() {
4023        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
4024        // With tight budget, only the lower-order excerpt from file_b should be included.
4025        let input = make_input(
4026            "x",
4027            0..1,
4028            0,
4029            vec![],
4030            vec![
4031                RelatedFile {
4032                    path: Path::new("file_a.rs").into(),
4033                    max_row: 10,
4034                    in_open_source_repo: false,
4035                    excerpts: vec![RelatedExcerpt {
4036                        row_range: 0..10,
4037                        text: "low priority content\n".into(),
4038                        order: 5,
4039                    }],
4040                },
4041                RelatedFile {
4042                    path: Path::new("file_b.rs").into(),
4043                    max_row: 10,
4044                    in_open_source_repo: false,
4045                    excerpts: vec![RelatedExcerpt {
4046                        row_range: 0..10,
4047                        text: "high priority content\n".into(),
4048                        order: 1,
4049                    }],
4050                },
4051            ],
4052        );
4053
4054        // With large budget, both files included; rendered in stable lexicographic order.
4055        assert_eq!(
4056            format_with_budget(&input, 10000),
4057            indoc! {r#"
4058                <|file_sep|>file_a.rs
4059                low priority content
4060                <|file_sep|>file_b.rs
4061                high priority content
4062                <|file_sep|>test.rs
4063                <|fim_prefix|>
4064                <|fim_middle|>current
4065                <|user_cursor|>x
4066                <|fim_suffix|>
4067                <|fim_middle|>updated
4068            "#}
4069        );
4070
4071        // With tight budget, only file_b (lower order) fits.
4072        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
4073        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
4074        // file_a would need another 14 tokens, which doesn't fit.
4075        assert_eq!(
4076            format_with_budget(&input, 52),
4077            indoc! {r#"
4078                <|file_sep|>file_b.rs
4079                high priority content
4080                <|file_sep|>test.rs
4081                <|fim_prefix|>
4082                <|fim_middle|>current
4083                <|user_cursor|>x
4084                <|fim_suffix|>
4085                <|fim_middle|>updated
4086            "#}
4087        );
4088    }
4089
4090    #[test]
4091    fn test_truncation_drops_high_order_excerpts_within_file() {
4092        // A single file has excerpts at order 1 and order 3. With a tight budget,
4093        // only the order-1 excerpts are included while the order-3 excerpt is
4094        // dropped — even though they belong to the same file. This also preserves
4095        // the parent invariant: parent outline items have order ≤ their best
4096        // child, so they're always included when any child is.
4097        let input = make_input(
4098            "x",
4099            0..1,
4100            0,
4101            vec![],
4102            vec![RelatedFile {
4103                path: Path::new("mod.rs").into(),
4104                max_row: 30,
4105                in_open_source_repo: false,
4106                excerpts: vec![
4107                    RelatedExcerpt {
4108                        row_range: 0..5,
4109                        text: "mod header\n".into(),
4110                        order: 1,
4111                    },
4112                    RelatedExcerpt {
4113                        row_range: 5..15,
4114                        text: "important fn\n".into(),
4115                        order: 1,
4116                    },
4117                    RelatedExcerpt {
4118                        row_range: 15..30,
4119                        text: "less important fn\n".into(),
4120                        order: 3,
4121                    },
4122                ],
4123            }],
4124        );
4125
4126        // With large budget, all three excerpts included.
4127        assert_eq!(
4128            format_with_budget(&input, 10000),
4129            indoc! {r#"
4130                <|file_sep|>mod.rs
4131                mod header
4132                ...
4133                important fn
4134                ...
4135                less important fn
4136                <|file_sep|>test.rs
4137                <|fim_prefix|>
4138                <|fim_middle|>current
4139                <|user_cursor|>x
4140                <|fim_suffix|>
4141                <|fim_middle|>updated
4142            "#}
4143        );
4144
4145        // With tight budget, only order<=1 excerpts included (header + important fn).
4146        assert_eq!(
4147            format_with_budget(&input, 55),
4148            indoc! {r#"
4149                <|file_sep|>mod.rs
4150                mod header
4151                ...
4152                important fn
4153                ...
4154                <|file_sep|>test.rs
4155                <|fim_prefix|>
4156                <|fim_middle|>current
4157                <|user_cursor|>x
4158                <|fim_suffix|>
4159                <|fim_middle|>updated
4160            "#}
4161        );
4162    }
4163
4164    #[test]
4165    fn test_truncation_drops_older_events_first() {
4166        let input = make_input(
4167            "x",
4168            0..1,
4169            0,
4170            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
4171            vec![],
4172        );
4173
4174        assert_eq!(
4175            format_with_budget(&input, 10000),
4176            indoc! {r#"
4177                <|file_sep|>edit history
4178                --- a/old.rs
4179                +++ b/old.rs
4180                -1
4181                --- a/new.rs
4182                +++ b/new.rs
4183                -2
4184                <|file_sep|>test.rs
4185                <|fim_prefix|>
4186                <|fim_middle|>current
4187                <|user_cursor|>x
4188                <|fim_suffix|>
4189                <|fim_middle|>updated
4190            "#}
4191        );
4192
4193        assert_eq!(
4194            format_with_budget(&input, 55),
4195            indoc! {r#"
4196                <|file_sep|>edit history
4197                --- a/new.rs
4198                +++ b/new.rs
4199                -2
4200                <|file_sep|>test.rs
4201                <|fim_prefix|>
4202                <|fim_middle|>current
4203                <|user_cursor|>x
4204                <|fim_suffix|>
4205                <|fim_middle|>updated
4206            "#}
4207        );
4208    }
4209
4210    #[test]
4211    fn test_cursor_excerpt_always_included_with_minimal_budget() {
4212        let input = make_input(
4213            "fn main() {}",
4214            0..12,
4215            3,
4216            vec![make_event("a.rs", "-old\n+new\n")],
4217            vec![make_related_file("related.rs", "helper\n")],
4218        );
4219
4220        assert_eq!(
4221            format_with_budget(&input, 30),
4222            indoc! {r#"
4223                <|file_sep|>test.rs
4224                <|fim_prefix|>
4225                <|fim_middle|>current
4226                fn <|user_cursor|>main() {}
4227                <|fim_suffix|>
4228                <|fim_middle|>updated
4229            "#}
4230        );
4231    }
4232
4233    fn format_seed_coder(input: &ZetaPromptInput) -> String {
4234        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
4235    }
4236
4237    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
4238        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
4239    }
4240
4241    #[test]
4242    fn test_seed_coder_basic_format() {
4243        let input = make_input(
4244            "prefix\neditable\nsuffix",
4245            7..15,
4246            10,
4247            vec![make_event("a.rs", "-old\n+new\n")],
4248            vec![make_related_file("related.rs", "fn helper() {}\n")],
4249        );
4250
4251        assert_eq!(
4252            format_seed_coder(&input),
4253            indoc! {r#"
4254                <[fim-suffix]>
4255                suffix
4256                <[fim-prefix]><filename>related.rs
4257                fn helper() {}
4258
4259                <filename>edit_history
4260                --- a/a.rs
4261                +++ b/a.rs
4262                -old
4263                +new
4264
4265                <filename>test.rs
4266                prefix
4267                <<<<<<< CURRENT
4268                edi<|user_cursor|>table
4269                =======
4270                <[fim-middle]>"#}
4271        );
4272    }
4273
4274    #[test]
4275    fn test_seed_coder_no_context() {
4276        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
4277
4278        assert_eq!(
4279            format_seed_coder(&input),
4280            indoc! {r#"
4281                <[fim-suffix]>
4282                after
4283                <[fim-prefix]><filename>test.rs
4284                before
4285                <<<<<<< CURRENT
4286                mid<|user_cursor|>dle
4287                =======
4288                <[fim-middle]>"#}
4289        );
4290    }
4291
4292    #[test]
4293    fn test_seed_coder_truncation_drops_context() {
4294        let input = make_input(
4295            "code",
4296            0..4,
4297            2,
4298            vec![make_event("a.rs", "-x\n+y\n")],
4299            vec![make_related_file("r1.rs", "content\n")],
4300        );
4301
4302        // With large budget, everything is included
4303        assert_eq!(
4304            format_seed_coder(&input),
4305            indoc! {r#"
4306                <[fim-suffix]>
4307                <[fim-prefix]><filename>r1.rs
4308                content
4309
4310                <filename>edit_history
4311                --- a/a.rs
4312                +++ b/a.rs
4313                -x
4314                +y
4315
4316                <filename>test.rs
4317                <<<<<<< CURRENT
4318                co<|user_cursor|>de
4319                =======
4320                <[fim-middle]>"#}
4321        );
4322
4323        // With tight budget, context is dropped but cursor section remains
4324        assert_eq!(
4325            format_seed_coder_with_budget(&input, 30),
4326            indoc! {r#"
4327                <[fim-suffix]>
4328                <[fim-prefix]><filename>test.rs
4329                <<<<<<< CURRENT
4330                co<|user_cursor|>de
4331                =======
4332                <[fim-middle]>"#}
4333        );
4334    }
4335
4336    #[test]
4337    fn test_seed_coder_truncation_prioritizes_lower_order() {
4338        let input = make_input(
4339            "code",
4340            0..4,
4341            2,
4342            vec![],
4343            vec![
4344                RelatedFile {
4345                    path: Path::new("low_prio.rs").into(),
4346                    max_row: 5,
4347                    in_open_source_repo: false,
4348                    excerpts: vec![RelatedExcerpt {
4349                        row_range: 0..5,
4350                        text: "low prio\n".into(),
4351                        order: 10,
4352                    }],
4353                },
4354                RelatedFile {
4355                    path: Path::new("high_prio.rs").into(),
4356                    max_row: 5,
4357                    in_open_source_repo: false,
4358                    excerpts: vec![RelatedExcerpt {
4359                        row_range: 0..5,
4360                        text: "high prio\n".into(),
4361                        order: 1,
4362                    }],
4363                },
4364            ],
4365        );
4366
4367        // With large budget, both included; rendered in stable lexicographic order.
4368        assert_eq!(
4369            format_seed_coder(&input),
4370            indoc! {r#"
4371                <[fim-suffix]>
4372                <[fim-prefix]><filename>low_prio.rs
4373                low prio
4374                <filename>high_prio.rs
4375                high prio
4376
4377                <filename>test.rs
4378                <<<<<<< CURRENT
4379                co<|user_cursor|>de
4380                =======
4381                <[fim-middle]>"#}
4382        );
4383
4384        // With tight budget, only high_prio included.
4385        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
4386        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
4387        assert_eq!(
4388            format_seed_coder_with_budget(&input, 44),
4389            indoc! {r#"
4390                <[fim-suffix]>
4391                <[fim-prefix]><filename>high_prio.rs
4392                high prio
4393
4394                <filename>test.rs
4395                <<<<<<< CURRENT
4396                co<|user_cursor|>de
4397                =======
4398                <[fim-middle]>"#}
4399        );
4400    }
4401
4402    #[test]
4403    fn test_format_zeta1_from_input_basic() {
4404        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
4405        let input = ZetaPromptInput {
4406            cursor_path: Path::new("src/main.rs").into(),
4407            cursor_excerpt: excerpt.into(),
4408            cursor_offset_in_excerpt: 30,
4409            excerpt_start_row: Some(0),
4410            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
4411            related_files: vec![],
4412            excerpt_ranges: ExcerptRanges {
4413                editable_150: 15..41,
4414                editable_180: 15..41,
4415                editable_350: 15..41,
4416                editable_150_context_350: 0..excerpt.len(),
4417                editable_180_context_350: 0..excerpt.len(),
4418                editable_350_context_150: 0..excerpt.len(),
4419                ..Default::default()
4420            },
4421            experiment: None,
4422            in_open_source_repo: false,
4423            can_collect_data: false,
4424            repo_url: None,
4425        };
4426
4427        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
4428
4429        assert_eq!(
4430            prompt,
4431            concat!(
4432                "### Instruction:\n",
4433                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4434                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4435                "into account the cursor location.\n",
4436                "\n",
4437                "### User Edits:\n",
4438                "\n",
4439                "User edited other.rs:\n",
4440                "```diff\n",
4441                "-old\n",
4442                "+new\n",
4443                "\n",
4444                "```\n",
4445                "\n",
4446                "### User Excerpt:\n",
4447                "\n",
4448                "```src/main.rs\n",
4449                "<|start_of_file|>\n",
4450                "fn before() {}\n",
4451                "<|editable_region_start|>\n",
4452                "fn foo() {\n",
4453                "    <|user_cursor_is_here|>let x = 1;\n",
4454                "\n",
4455                "<|editable_region_end|>}\n",
4456                "fn after() {}\n",
4457                "\n",
4458                "```\n",
4459                "\n",
4460                "### Response:\n",
4461            ),
4462        );
4463    }
4464
4465    #[test]
4466    fn test_format_zeta1_from_input_no_start_of_file() {
4467        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
4468        let input = ZetaPromptInput {
4469            cursor_path: Path::new("src/main.rs").into(),
4470            cursor_excerpt: excerpt.into(),
4471            cursor_offset_in_excerpt: 15,
4472            excerpt_start_row: Some(10),
4473            events: vec![],
4474            related_files: vec![],
4475            excerpt_ranges: ExcerptRanges {
4476                editable_150: 0..28,
4477                editable_180: 0..28,
4478                editable_350: 0..28,
4479                editable_150_context_350: 0..28,
4480                editable_180_context_350: 0..28,
4481                editable_350_context_150: 0..28,
4482                ..Default::default()
4483            },
4484            experiment: None,
4485            in_open_source_repo: false,
4486            can_collect_data: false,
4487            repo_url: None,
4488        };
4489
4490        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
4491
4492        assert_eq!(
4493            prompt,
4494            concat!(
4495                "### Instruction:\n",
4496                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4497                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4498                "into account the cursor location.\n",
4499                "\n",
4500                "### User Edits:\n",
4501                "\n",
4502                "\n",
4503                "\n",
4504                "### User Excerpt:\n",
4505                "\n",
4506                "```src/main.rs\n",
4507                "<|editable_region_start|>\n",
4508                "fn foo() {\n",
4509                "    <|user_cursor_is_here|>let x = 1;\n",
4510                "}\n",
4511                "\n",
4512                "<|editable_region_end|>\n",
4513                "```\n",
4514                "\n",
4515                "### Response:\n",
4516            ),
4517        );
4518    }
4519
4520    #[test]
4521    fn test_format_zeta1_from_input_with_sub_ranges() {
4522        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
4523        let editable_range = 10..37;
4524        let context_range = 0..excerpt.len();
4525
4526        let input = ZetaPromptInput {
4527            cursor_path: Path::new("test.rs").into(),
4528            cursor_excerpt: excerpt.into(),
4529            cursor_offset_in_excerpt: 25,
4530            excerpt_start_row: Some(0),
4531            events: vec![],
4532            related_files: vec![],
4533            excerpt_ranges: ExcerptRanges {
4534                editable_150: editable_range.clone(),
4535                editable_180: editable_range.clone(),
4536                editable_350: editable_range.clone(),
4537                editable_150_context_350: context_range.clone(),
4538                editable_180_context_350: context_range.clone(),
4539                editable_350_context_150: context_range.clone(),
4540                ..Default::default()
4541            },
4542            experiment: None,
4543            in_open_source_repo: false,
4544            can_collect_data: false,
4545            repo_url: None,
4546        };
4547
4548        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
4549
4550        assert_eq!(
4551            prompt,
4552            concat!(
4553                "### Instruction:\n",
4554                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
4555                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
4556                "into account the cursor location.\n",
4557                "\n",
4558                "### User Edits:\n",
4559                "\n",
4560                "\n",
4561                "\n",
4562                "### User Excerpt:\n",
4563                "\n",
4564                "```test.rs\n",
4565                "<|start_of_file|>\n",
4566                "// prefix\n",
4567                "<|editable_region_start|>\n",
4568                "fn foo() {\n",
4569                "    <|user_cursor_is_here|>let x = 1;\n",
4570                "}\n",
4571                "<|editable_region_end|>\n",
4572                "// suffix\n",
4573                "\n",
4574                "```\n",
4575                "\n",
4576                "### Response:\n",
4577            ),
4578        );
4579    }
4580
4581    #[test]
4582    fn test_clean_zeta1_model_output_basic() {
4583        let output = indoc! {"
4584            <|editable_region_start|>
4585            fn main() {
4586                println!(\"hello\");
4587            }
4588            <|editable_region_end|>
4589        "};
4590
4591        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4592        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
4593    }
4594
4595    #[test]
4596    fn test_clean_zeta1_model_output_with_cursor() {
4597        let output = indoc! {"
4598            <|editable_region_start|>
4599            fn main() {
4600                <|user_cursor_is_here|>println!(\"hello\");
4601            }
4602            <|editable_region_end|>
4603        "};
4604
4605        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4606        assert_eq!(
4607            cleaned,
4608            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
4609        );
4610    }
4611
4612    #[test]
4613    fn test_clean_zeta1_model_output_no_markers() {
4614        let output = "fn main() {}\n";
4615        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4616        assert_eq!(cleaned, "fn main() {}\n");
4617    }
4618
4619    #[test]
4620    fn test_clean_zeta1_model_output_empty_region() {
4621        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
4622        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
4623        assert_eq!(cleaned, "");
4624    }
4625
4626    fn apply_edit(excerpt: &str, parsed_output: &ParsedOutput) -> String {
4627        let mut result = excerpt.to_string();
4628        result.replace_range(
4629            parsed_output.range_in_excerpt.clone(),
4630            &parsed_output.new_editable_region,
4631        );
4632        result
4633    }
4634
4635    #[test]
4636    fn test_parse_zeta2_model_output() {
4637        let excerpt = "before ctx\nctx start\neditable old\nctx end\nafter ctx\n";
4638        let context_start = excerpt.find("ctx start").unwrap();
4639        let context_end = excerpt.find("after ctx").unwrap();
4640        let editable_start = excerpt.find("editable old").unwrap();
4641        let editable_end = editable_start + "editable old\n".len();
4642        let input = make_input_with_context_range(
4643            excerpt,
4644            editable_start..editable_end,
4645            context_start..context_end,
4646            editable_start,
4647        );
4648
4649        let output = parse_zeta2_model_output(
4650            "editable new\n>>>>>>> UPDATED\n",
4651            ZetaFormat::V0131GitMergeMarkersPrefix,
4652            &input,
4653        )
4654        .unwrap();
4655
4656        assert_eq!(
4657            apply_edit(excerpt, &output),
4658            "before ctx\nctx start\neditable new\nctx end\nafter ctx\n"
4659        );
4660    }
4661
4662    #[test]
4663    fn test_parse_zeta2_model_output_identity() {
4664        let excerpt = "aaa\nbbb\nccc\nddd\neee\n";
4665        let editable_start = excerpt.find("bbb").unwrap();
4666        let editable_end = excerpt.find("ddd").unwrap();
4667        let input = make_input_with_context_range(
4668            excerpt,
4669            editable_start..editable_end,
4670            0..excerpt.len(),
4671            editable_start,
4672        );
4673
4674        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4675        let output =
4676            parse_zeta2_model_output("bbb\nccc\n>>>>>>> UPDATED\n", format, &input).unwrap();
4677
4678        assert_eq!(apply_edit(excerpt, &output), excerpt);
4679    }
4680
4681    #[test]
4682    fn test_parse_zeta2_model_output_strips_end_marker() {
4683        let excerpt = "hello\nworld\n";
4684        let input = make_input_with_context_range(excerpt, 0..excerpt.len(), 0..excerpt.len(), 0);
4685
4686        let format = ZetaFormat::V0131GitMergeMarkersPrefix;
4687        let output1 =
4688            parse_zeta2_model_output("new content\n>>>>>>> UPDATED\n", format, &input).unwrap();
4689        let output2 = parse_zeta2_model_output("new content\n", format, &input).unwrap();
4690
4691        assert_eq!(apply_edit(excerpt, &output1), apply_edit(excerpt, &output2));
4692        assert_eq!(apply_edit(excerpt, &output1), "new content\n");
4693    }
4694}