zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64}
  65
  66#[derive(
  67    Default,
  68    Clone,
  69    Copy,
  70    Debug,
  71    PartialEq,
  72    Eq,
  73    Hash,
  74    EnumIter,
  75    IntoStaticStr,
  76    Serialize,
  77    Deserialize,
  78)]
  79#[allow(non_camel_case_types)]
  80pub enum ZetaFormat {
  81    V0112MiddleAtEnd,
  82    V0113Ordered,
  83    V0114180EditableRegion,
  84    V0120GitMergeMarkers,
  85    #[default]
  86    V0131GitMergeMarkersPrefix,
  87    V0211Prefill,
  88    V0211SeedCoder,
  89    v0226Hashline,
  90}
  91
  92impl std::fmt::Display for ZetaFormat {
  93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  94        write!(f, "{}", <&'static str>::from(self))
  95    }
  96}
  97
  98impl ZetaFormat {
  99    pub fn parse(format_name: &str) -> Result<Self> {
 100        let mut results = ZetaFormat::iter().filter(|version| {
 101            <&'static str>::from(version)
 102                .to_lowercase()
 103                .contains(&format_name.to_lowercase())
 104        });
 105        let Some(result) = results.next() else {
 106            anyhow::bail!(
 107                "`{format_name}` did not match any of:\n{}",
 108                Self::options_as_string()
 109            );
 110        };
 111        if results.next().is_some() {
 112            anyhow::bail!(
 113                "`{format_name}` matched more than one of:\n{}",
 114                Self::options_as_string()
 115            );
 116        }
 117        Ok(result)
 118    }
 119
 120    pub fn options_as_string() -> String {
 121        ZetaFormat::iter()
 122            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 123            .collect::<Vec<_>>()
 124            .concat()
 125    }
 126}
 127
 128#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 129#[serde(tag = "event")]
 130pub enum Event {
 131    BufferChange {
 132        path: Arc<Path>,
 133        old_path: Arc<Path>,
 134        diff: String,
 135        predicted: bool,
 136        in_open_source_repo: bool,
 137    },
 138}
 139
 140impl Event {
 141    pub fn in_open_source_repo(&self) -> bool {
 142        match self {
 143            Event::BufferChange {
 144                in_open_source_repo,
 145                ..
 146            } => *in_open_source_repo,
 147        }
 148    }
 149}
 150
 151pub fn write_event(prompt: &mut String, event: &Event) {
 152    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 153        for component in path.components() {
 154            prompt.push('/');
 155            write!(prompt, "{}", component.as_os_str().display()).ok();
 156        }
 157    }
 158    match event {
 159        Event::BufferChange {
 160            path,
 161            old_path,
 162            diff,
 163            predicted,
 164            in_open_source_repo: _,
 165        } => {
 166            if *predicted {
 167                prompt.push_str("// User accepted prediction:\n");
 168            }
 169            prompt.push_str("--- a");
 170            write_path_as_unix_str(prompt, old_path.as_ref());
 171            prompt.push_str("\n+++ b");
 172            write_path_as_unix_str(prompt, path.as_ref());
 173            prompt.push('\n');
 174            prompt.push_str(diff);
 175        }
 176    }
 177}
 178
 179#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 180pub struct RelatedFile {
 181    pub path: Arc<Path>,
 182    pub max_row: u32,
 183    pub excerpts: Vec<RelatedExcerpt>,
 184    #[serde(default)]
 185    pub in_open_source_repo: bool,
 186}
 187
 188#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 189pub struct RelatedExcerpt {
 190    pub row_range: Range<u32>,
 191    pub text: Arc<str>,
 192    #[serde(default)]
 193    pub order: usize,
 194}
 195
 196pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 197    special_tokens_for_format(format)
 198        .iter()
 199        .any(|token| input.cursor_excerpt.contains(token))
 200}
 201
 202pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 203    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 204}
 205
 206pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 207    match format {
 208        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 209        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 210        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 211        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 212        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 213        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 214        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 215        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 216    }
 217}
 218
 219pub fn excerpt_ranges_for_format(
 220    format: ZetaFormat,
 221    ranges: &ExcerptRanges,
 222) -> (Range<usize>, Range<usize>) {
 223    match format {
 224        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 225            ranges.editable_150.clone(),
 226            ranges.editable_150_context_350.clone(),
 227        ),
 228        ZetaFormat::V0114180EditableRegion => (
 229            ranges.editable_180.clone(),
 230            ranges.editable_180_context_350.clone(),
 231        ),
 232        ZetaFormat::V0120GitMergeMarkers
 233        | ZetaFormat::V0131GitMergeMarkersPrefix
 234        | ZetaFormat::V0211Prefill
 235        | ZetaFormat::V0211SeedCoder
 236        | ZetaFormat::v0226Hashline => (
 237            ranges.editable_350.clone(),
 238            ranges.editable_350_context_150.clone(),
 239        ),
 240    }
 241}
 242
 243pub fn write_cursor_excerpt_section_for_format(
 244    format: ZetaFormat,
 245    prompt: &mut String,
 246    path: &Path,
 247    context: &str,
 248    editable_range: &Range<usize>,
 249    cursor_offset: usize,
 250) {
 251    match format {
 252        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 253            prompt,
 254            path,
 255            context,
 256            editable_range,
 257            cursor_offset,
 258        ),
 259        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 260            v0113_ordered::write_cursor_excerpt_section(
 261                prompt,
 262                path,
 263                context,
 264                editable_range,
 265                cursor_offset,
 266            )
 267        }
 268        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 269            prompt,
 270            path,
 271            context,
 272            editable_range,
 273            cursor_offset,
 274        ),
 275        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 276            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 277                prompt,
 278                path,
 279                context,
 280                editable_range,
 281                cursor_offset,
 282            )
 283        }
 284        ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
 285            prompt,
 286            path,
 287            context,
 288            editable_range,
 289            cursor_offset,
 290        ),
 291        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 292            prompt,
 293            path,
 294            context,
 295            editable_range,
 296            cursor_offset,
 297        ),
 298    }
 299}
 300
 301pub fn format_prompt_with_budget_for_format(
 302    input: &ZetaPromptInput,
 303    format: ZetaFormat,
 304    max_tokens: usize,
 305) -> String {
 306    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 307    let path = &*input.cursor_path;
 308
 309    match format {
 310        ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
 311            path,
 312            context,
 313            &editable_range,
 314            cursor_offset,
 315            &input.events,
 316            &input.related_files,
 317            max_tokens,
 318        ),
 319        _ => {
 320            let mut cursor_section = String::new();
 321            write_cursor_excerpt_section_for_format(
 322                format,
 323                &mut cursor_section,
 324                path,
 325                context,
 326                &editable_range,
 327                cursor_offset,
 328            );
 329
 330            let cursor_tokens = estimate_tokens(cursor_section.len());
 331            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 332
 333            let edit_history_section = format_edit_history_within_budget(
 334                &input.events,
 335                "<|file_sep|>",
 336                "edit history",
 337                budget_after_cursor,
 338            );
 339            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 340            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 341
 342            let related_files_section = format_related_files_within_budget(
 343                &input.related_files,
 344                "<|file_sep|>",
 345                "",
 346                budget_after_edit_history,
 347            );
 348
 349            let mut prompt = String::new();
 350            prompt.push_str(&related_files_section);
 351            prompt.push_str(&edit_history_section);
 352            prompt.push_str(&cursor_section);
 353            prompt
 354        }
 355    }
 356}
 357
 358pub fn get_prefill_for_format(
 359    format: ZetaFormat,
 360    context: &str,
 361    editable_range: &Range<usize>,
 362) -> String {
 363    match format {
 364        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 365        ZetaFormat::V0112MiddleAtEnd
 366        | ZetaFormat::V0113Ordered
 367        | ZetaFormat::V0114180EditableRegion
 368        | ZetaFormat::V0120GitMergeMarkers
 369        | ZetaFormat::V0131GitMergeMarkersPrefix
 370        | ZetaFormat::V0211SeedCoder
 371        | ZetaFormat::v0226Hashline => String::new(),
 372    }
 373}
 374
 375pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 376    match format {
 377        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 378        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 379        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 380        ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
 381        ZetaFormat::V0112MiddleAtEnd
 382        | ZetaFormat::V0113Ordered
 383        | ZetaFormat::V0114180EditableRegion
 384        | ZetaFormat::v0226Hashline => None,
 385    }
 386}
 387
 388pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
 389    match format {
 390        ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
 391        ZetaFormat::V0113Ordered
 392        | ZetaFormat::V0114180EditableRegion
 393        | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
 394        ZetaFormat::V0120GitMergeMarkers
 395        | ZetaFormat::V0131GitMergeMarkersPrefix
 396        | ZetaFormat::V0211Prefill => (
 397            v0120_git_merge_markers::START_MARKER,
 398            v0120_git_merge_markers::SEPARATOR,
 399        ),
 400        ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
 401    }
 402}
 403
 404pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
 405    match format {
 406        ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
 407        _ => region.to_string(),
 408    }
 409}
 410
 411pub fn encode_patch_as_output_for_format(
 412    format: ZetaFormat,
 413    old_editable_region: &str,
 414    patch: &str,
 415    cursor_offset: Option<usize>,
 416) -> Result<Option<String>> {
 417    match format {
 418        ZetaFormat::v0226Hashline => {
 419            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 420        }
 421        _ => Ok(None),
 422    }
 423}
 424
 425pub fn output_with_context_for_format(
 426    format: ZetaFormat,
 427    old_editable_region: &str,
 428    output: &str,
 429) -> Result<Option<String>> {
 430    match format {
 431        ZetaFormat::v0226Hashline => {
 432            if hashline::output_has_edit_commands(output) {
 433                Ok(Some(hashline::apply_edit_commands(
 434                    old_editable_region,
 435                    output,
 436                )))
 437            } else {
 438                Ok(None)
 439            }
 440        }
 441        _ => Ok(None),
 442    }
 443}
 444
 445/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 446pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 447    match output_end_marker_for_format(format) {
 448        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 449        None => output,
 450    }
 451}
 452
 453pub fn excerpt_range_for_format(
 454    format: ZetaFormat,
 455    ranges: &ExcerptRanges,
 456) -> (Range<usize>, Range<usize>) {
 457    excerpt_ranges_for_format(format, ranges)
 458}
 459
 460pub fn resolve_cursor_region(
 461    input: &ZetaPromptInput,
 462    format: ZetaFormat,
 463) -> (&str, Range<usize>, usize) {
 464    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 465    let context_start = context_range.start;
 466    let context_text = &input.cursor_excerpt[context_range];
 467    let adjusted_editable =
 468        (editable_range.start - context_start)..(editable_range.end - context_start);
 469    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 470
 471    (context_text, adjusted_editable, adjusted_cursor)
 472}
 473
 474pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 475    let (context, editable_range, _) = resolve_cursor_region(input, format);
 476    get_prefill_for_format(format, context, &editable_range)
 477}
 478
 479fn format_edit_history_within_budget(
 480    events: &[Arc<Event>],
 481    file_marker: &str,
 482    edit_history_name: &str,
 483    max_tokens: usize,
 484) -> String {
 485    let header = format!("{}{}\n", file_marker, edit_history_name);
 486    let header_tokens = estimate_tokens(header.len());
 487    if header_tokens >= max_tokens {
 488        return String::new();
 489    }
 490
 491    let mut event_strings: Vec<String> = Vec::new();
 492    let mut total_tokens = header_tokens;
 493
 494    for event in events.iter().rev() {
 495        let mut event_str = String::new();
 496        write_event(&mut event_str, event);
 497        let event_tokens = estimate_tokens(event_str.len());
 498
 499        if total_tokens + event_tokens > max_tokens {
 500            break;
 501        }
 502        total_tokens += event_tokens;
 503        event_strings.push(event_str);
 504    }
 505
 506    if event_strings.is_empty() {
 507        return String::new();
 508    }
 509
 510    let mut result = header;
 511    for event_str in event_strings.iter().rev() {
 512        result.push_str(event_str);
 513    }
 514    result
 515}
 516
 517fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 518    let needs_newline = !excerpt.text.ends_with('\n');
 519    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 520    let len = excerpt.text.len()
 521        + if needs_newline { "\n".len() } else { 0 }
 522        + if needs_ellipsis { "...\n".len() } else { 0 };
 523    estimate_tokens(len)
 524}
 525
 526pub fn format_related_files_within_budget(
 527    related_files: &[RelatedFile],
 528    file_prefix: &str,
 529    file_suffix: &str,
 530    max_tokens: usize,
 531) -> String {
 532    struct ExcerptCandidate {
 533        file_ix: usize,
 534        excerpt_ix: usize,
 535        order: usize,
 536    }
 537
 538    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 539        .iter()
 540        .enumerate()
 541        .flat_map(|(file_ix, file)| {
 542            file.excerpts
 543                .iter()
 544                .enumerate()
 545                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 546                    file_ix,
 547                    excerpt_ix,
 548                    order: e.order,
 549                })
 550        })
 551        .collect();
 552
 553    // Pre-compute file header strings and their token costs.
 554    let file_headers: Vec<String> = related_files
 555        .iter()
 556        .map(|file| {
 557            let path_str = file.path.to_string_lossy();
 558            format!("{}{}\n", file_prefix, path_str)
 559        })
 560        .collect();
 561
 562    // Sort the excerpts by their order and determine how many fit within the budget.
 563    let mut total_tokens = 0;
 564    let mut included_excerpt_count = 0_usize;
 565    let mut included_file_indices = vec![false; related_files.len()];
 566    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 567    for candidate in &excerpt_candidates {
 568        let file = &related_files[candidate.file_ix];
 569        let excerpt = &file.excerpts[candidate.excerpt_ix];
 570        let file_already_included = included_file_indices[candidate.file_ix];
 571        let header_cost = if file_already_included {
 572            0
 573        } else {
 574            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 575        };
 576        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 577        if total_tokens + header_cost + excerpt_cost > max_tokens {
 578            break;
 579        }
 580        total_tokens += header_cost + excerpt_cost;
 581        if !file_already_included {
 582            included_file_indices[candidate.file_ix] = true;
 583        }
 584        included_excerpt_count += 1;
 585    }
 586
 587    excerpt_candidates.truncate(included_excerpt_count);
 588    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 589
 590    // Render all of the files that fit within the token budget, in the original order.
 591    let mut result = String::new();
 592    let mut last_file_ix = None;
 593    for candidate in &excerpt_candidates {
 594        if last_file_ix != Some(candidate.file_ix) {
 595            if last_file_ix.is_some() {
 596                result.push_str(file_suffix);
 597            }
 598            result.push_str(&file_headers[candidate.file_ix]);
 599            last_file_ix = Some(candidate.file_ix);
 600        }
 601        let file = &related_files[candidate.file_ix];
 602        let excerpt = &file.excerpts[candidate.excerpt_ix];
 603        result.push_str(&excerpt.text);
 604        if !result.ends_with('\n') {
 605            result.push('\n');
 606        }
 607        if excerpt.row_range.end < file.max_row {
 608            result.push_str("...\n");
 609        }
 610    }
 611
 612    result
 613}
 614
 615pub fn write_related_files(
 616    prompt: &mut String,
 617    related_files: &[RelatedFile],
 618) -> Vec<Range<usize>> {
 619    let mut ranges = Vec::new();
 620    for file in related_files {
 621        let start = prompt.len();
 622        let path_str = file.path.to_string_lossy();
 623        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 624        for excerpt in &file.excerpts {
 625            prompt.push_str(&excerpt.text);
 626            if !prompt.ends_with('\n') {
 627                prompt.push('\n');
 628            }
 629            if excerpt.row_range.end < file.max_row {
 630                prompt.push_str("...\n");
 631            }
 632        }
 633        let end = prompt.len();
 634        ranges.push(start..end);
 635    }
 636    ranges
 637}
 638
 639mod v0112_middle_at_end {
 640    use super::*;
 641
 642    pub fn special_tokens() -> &'static [&'static str] {
 643        &[
 644            "<|fim_prefix|>",
 645            "<|fim_suffix|>",
 646            "<|fim_middle|>",
 647            "<|file_sep|>",
 648            CURSOR_MARKER,
 649        ]
 650    }
 651
 652    pub fn write_cursor_excerpt_section(
 653        prompt: &mut String,
 654        path: &Path,
 655        context: &str,
 656        editable_range: &Range<usize>,
 657        cursor_offset: usize,
 658    ) {
 659        let path_str = path.to_string_lossy();
 660        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 661
 662        prompt.push_str("<|fim_prefix|>\n");
 663        prompt.push_str(&context[..editable_range.start]);
 664
 665        prompt.push_str("<|fim_suffix|>\n");
 666        prompt.push_str(&context[editable_range.end..]);
 667        if !prompt.ends_with('\n') {
 668            prompt.push('\n');
 669        }
 670
 671        prompt.push_str("<|fim_middle|>current\n");
 672        prompt.push_str(&context[editable_range.start..cursor_offset]);
 673        prompt.push_str(CURSOR_MARKER);
 674        prompt.push_str(&context[cursor_offset..editable_range.end]);
 675        if !prompt.ends_with('\n') {
 676            prompt.push('\n');
 677        }
 678
 679        prompt.push_str("<|fim_middle|>updated\n");
 680    }
 681}
 682
 683mod v0113_ordered {
 684    use super::*;
 685
 686    pub fn special_tokens() -> &'static [&'static str] {
 687        &[
 688            "<|fim_prefix|>",
 689            "<|fim_suffix|>",
 690            "<|fim_middle|>",
 691            "<|file_sep|>",
 692            CURSOR_MARKER,
 693        ]
 694    }
 695
 696    pub fn write_cursor_excerpt_section(
 697        prompt: &mut String,
 698        path: &Path,
 699        context: &str,
 700        editable_range: &Range<usize>,
 701        cursor_offset: usize,
 702    ) {
 703        let path_str = path.to_string_lossy();
 704        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 705
 706        prompt.push_str("<|fim_prefix|>\n");
 707        prompt.push_str(&context[..editable_range.start]);
 708        if !prompt.ends_with('\n') {
 709            prompt.push('\n');
 710        }
 711
 712        prompt.push_str("<|fim_middle|>current\n");
 713        prompt.push_str(&context[editable_range.start..cursor_offset]);
 714        prompt.push_str(CURSOR_MARKER);
 715        prompt.push_str(&context[cursor_offset..editable_range.end]);
 716        if !prompt.ends_with('\n') {
 717            prompt.push('\n');
 718        }
 719
 720        prompt.push_str("<|fim_suffix|>\n");
 721        prompt.push_str(&context[editable_range.end..]);
 722        if !prompt.ends_with('\n') {
 723            prompt.push('\n');
 724        }
 725
 726        prompt.push_str("<|fim_middle|>updated\n");
 727    }
 728}
 729
 730mod v0114180_editable_region {
 731    use super::*;
 732
 733    pub fn special_tokens() -> &'static [&'static str] {
 734        v0113_ordered::special_tokens()
 735    }
 736}
 737
 738pub mod v0120_git_merge_markers {
 739    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 740    //!
 741    //! Example prompt:
 742    //!
 743    //! <|file_sep|>path/to/target_file.py
 744    //! <|fim_prefix|>
 745    //! code before editable region
 746    //! <|fim_suffix|>
 747    //! code after editable region
 748    //! <|fim_middle|>
 749    //! <<<<<<< CURRENT
 750    //! code that
 751    //! needs to<|user_cursor|>
 752    //! be rewritten
 753    //! =======
 754    //!
 755    //! Expected output (should be generated by the model):
 756    //!
 757    //! updated
 758    //! code with
 759    //! changes applied
 760    //! >>>>>>> UPDATED
 761
 762    use super::*;
 763
 764    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 765    pub const SEPARATOR: &str = "=======\n";
 766    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 767
 768    pub fn special_tokens() -> &'static [&'static str] {
 769        &[
 770            "<|fim_prefix|>",
 771            "<|fim_suffix|>",
 772            "<|fim_middle|>",
 773            "<|file_sep|>",
 774            START_MARKER,
 775            SEPARATOR,
 776            END_MARKER,
 777            CURSOR_MARKER,
 778        ]
 779    }
 780
 781    pub fn write_cursor_excerpt_section(
 782        prompt: &mut String,
 783        path: &Path,
 784        context: &str,
 785        editable_range: &Range<usize>,
 786        cursor_offset: usize,
 787    ) {
 788        let path_str = path.to_string_lossy();
 789        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 790
 791        prompt.push_str("<|fim_prefix|>");
 792        prompt.push_str(&context[..editable_range.start]);
 793
 794        prompt.push_str("<|fim_suffix|>");
 795        prompt.push_str(&context[editable_range.end..]);
 796        if !prompt.ends_with('\n') {
 797            prompt.push('\n');
 798        }
 799
 800        prompt.push_str("<|fim_middle|>");
 801        prompt.push_str(START_MARKER);
 802        prompt.push_str(&context[editable_range.start..cursor_offset]);
 803        prompt.push_str(CURSOR_MARKER);
 804        prompt.push_str(&context[cursor_offset..editable_range.end]);
 805        if !prompt.ends_with('\n') {
 806            prompt.push('\n');
 807        }
 808        prompt.push_str(SEPARATOR);
 809    }
 810}
 811
 812pub mod v0131_git_merge_markers_prefix {
 813    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 814    //!
 815    //! Example prompt:
 816    //!
 817    //! <|file_sep|>path/to/target_file.py
 818    //! <|fim_prefix|>
 819    //! code before editable region
 820    //! <<<<<<< CURRENT
 821    //! code that
 822    //! needs to<|user_cursor|>
 823    //! be rewritten
 824    //! =======
 825    //! <|fim_suffix|>
 826    //! code after editable region
 827    //! <|fim_middle|>
 828    //!
 829    //! Expected output (should be generated by the model):
 830    //!
 831    //! updated
 832    //! code with
 833    //! changes applied
 834    //! >>>>>>> UPDATED
 835
 836    use super::*;
 837
 838    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 839    pub const SEPARATOR: &str = "=======\n";
 840    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 841
 842    pub fn special_tokens() -> &'static [&'static str] {
 843        &[
 844            "<|fim_prefix|>",
 845            "<|fim_suffix|>",
 846            "<|fim_middle|>",
 847            "<|file_sep|>",
 848            START_MARKER,
 849            SEPARATOR,
 850            END_MARKER,
 851            CURSOR_MARKER,
 852        ]
 853    }
 854
 855    pub fn write_cursor_excerpt_section(
 856        prompt: &mut String,
 857        path: &Path,
 858        context: &str,
 859        editable_range: &Range<usize>,
 860        cursor_offset: usize,
 861    ) {
 862        let path_str = path.to_string_lossy();
 863        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 864
 865        prompt.push_str("<|fim_prefix|>");
 866        prompt.push_str(&context[..editable_range.start]);
 867        prompt.push_str(START_MARKER);
 868        prompt.push_str(&context[editable_range.start..cursor_offset]);
 869        prompt.push_str(CURSOR_MARKER);
 870        prompt.push_str(&context[cursor_offset..editable_range.end]);
 871        if !prompt.ends_with('\n') {
 872            prompt.push('\n');
 873        }
 874        prompt.push_str(SEPARATOR);
 875
 876        prompt.push_str("<|fim_suffix|>");
 877        prompt.push_str(&context[editable_range.end..]);
 878        if !prompt.ends_with('\n') {
 879            prompt.push('\n');
 880        }
 881
 882        prompt.push_str("<|fim_middle|>");
 883    }
 884}
 885
 886pub mod v0211_prefill {
 887    use super::*;
 888
 889    pub fn special_tokens() -> &'static [&'static str] {
 890        v0131_git_merge_markers_prefix::special_tokens()
 891    }
 892
 893    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 894        let editable_region = &context[editable_range.start..editable_range.end];
 895
 896        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 897        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 898
 899        // Find a token boundary to avoid splitting tokens in the prefill.
 900        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 901        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 902        // the \n and consume any consecutive \n characters after it.
 903        let prefill = &editable_region[..prefill_len];
 904        match prefill.rfind('\n') {
 905            Some(pos) => {
 906                let mut end = pos + 1;
 907                while end < editable_region.len()
 908                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 909                {
 910                    end += 1;
 911                }
 912                editable_region[..end].to_string()
 913            }
 914            // No newline found. Fall back to splitting before the last space
 915            // (word-level boundary)
 916            None => match prefill.rfind(' ') {
 917                Some(pos) => prefill[..pos].to_string(),
 918                None => prefill.to_string(),
 919            },
 920        }
 921    }
 922}
 923
 924pub mod hashline {
 925
 926    use std::fmt::Display;
 927
 928    pub const END_MARKER: &str = "<|fim_middle|>updated";
 929    pub const START_MARKER: &str = "<|fim_middle|>current";
 930
 931    use super::*;
 932
 933    const SET_COMMAND_MARKER: &str = "<|set|>";
 934    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
 935
 936    pub fn special_tokens() -> &'static [&'static str] {
 937        return &[
 938            SET_COMMAND_MARKER,
 939            "<|set_range|>",
 940            INSERT_COMMAND_MARKER,
 941            CURSOR_MARKER,
 942            "<|file_sep|>",
 943            "<|fim_prefix|>",
 944            "<|fim_suffix|>",
 945            "<|fim_middle|>",
 946        ];
 947    }
 948
 949    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
 950    #[derive(Debug, Clone, PartialEq, Eq)]
 951    struct LineRef {
 952        index: usize,
 953        hash: u8,
 954    }
 955
 956    impl Display for LineRef {
 957        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 958            write!(f, "{}:{:02x}", self.index, self.hash)
 959        }
 960    }
 961
 962    pub fn hash_line(line: &[u8]) -> u8 {
 963        let mut h: u8 = 0;
 964        for &byte in line {
 965            h = h.wrapping_add(byte);
 966        }
 967        return h;
 968    }
 969
 970    /// Write the hashline-encoded editable region into `out`. Each line of
 971    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
 972    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
 973    /// to the start of `editable_text`).
 974    pub fn write_hashline_editable_region(
 975        out: &mut String,
 976        editable_text: &str,
 977        cursor_offset_in_editable: usize,
 978    ) {
 979        let mut offset = 0;
 980        for (i, line) in editable_text.lines().enumerate() {
 981            let (head, cursor, tail) = if cursor_offset_in_editable > offset
 982                && cursor_offset_in_editable < offset + line.len()
 983            {
 984                (
 985                    &line[..cursor_offset_in_editable - offset],
 986                    CURSOR_MARKER,
 987                    &line[cursor_offset_in_editable - offset..],
 988                )
 989            } else {
 990                (line, "", "")
 991            };
 992            write!(
 993                out,
 994                "\n{}|{head}{cursor}{tail}",
 995                LineRef {
 996                    index: i,
 997                    hash: hash_line(line.as_bytes())
 998                }
 999            )
1000            .unwrap();
1001            offset += line.len() + 1;
1002        }
1003    }
1004
1005    pub fn write_cursor_excerpt_section(
1006        prompt: &mut String,
1007        path: &Path,
1008        context: &str,
1009        editable_range: &Range<usize>,
1010        cursor_offset: usize,
1011    ) {
1012        let path_str = path.to_string_lossy();
1013        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1014
1015        prompt.push_str("<|fim_prefix|>\n");
1016        prompt.push_str(&context[..editable_range.start]);
1017        prompt.push_str(START_MARKER);
1018
1019        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1020        let editable_region = &context[editable_range.clone()];
1021        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1022
1023        if !prompt.ends_with('\n') {
1024            prompt.push('\n');
1025        }
1026
1027        prompt.push_str("<|fim_suffix|>\n");
1028        prompt.push_str(&context[editable_range.end..]);
1029        if !prompt.ends_with('\n') {
1030            prompt.push('\n');
1031        }
1032
1033        prompt.push_str(END_MARKER);
1034    }
1035
1036    /// A single edit command parsed from the model output.
1037    #[derive(Debug)]
1038    enum EditCommand<'a> {
1039        /// Replace a range of lines (inclusive on both ends). Single-line set is
1040        /// represented by `start == end`.
1041        Set {
1042            start: LineRef,
1043            end: LineRef,
1044            content: &'a str,
1045        },
1046        /// Insert new lines after the given line, or before the first line if
1047        /// `after` is `None`.
1048        Insert {
1049            after: Option<LineRef>,
1050            content: &'a str,
1051        },
1052    }
1053
1054    /// Parse a line reference like `3:c3` into a `LineRef`.
1055    fn parse_line_ref(s: &str) -> Option<LineRef> {
1056        let (idx_str, hash_str) = s.split_once(':')?;
1057        let index = idx_str.parse::<usize>().ok()?;
1058        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1059        Some(LineRef { index, hash })
1060    }
1061
1062    /// Parse the model output into a list of `EditCommand`s.
1063    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1064        let mut commands = Vec::new();
1065        let mut offset = 0usize;
1066
1067        while offset < model_output.len() {
1068            let next_nl = model_output[offset..]
1069                .find('\n')
1070                .map(|i| offset + i)
1071                .unwrap_or(model_output.len());
1072            let line = &model_output[offset..next_nl];
1073            let line_end = if next_nl < model_output.len() {
1074                next_nl + 1
1075            } else {
1076                next_nl
1077            };
1078
1079            let trimmed = line.trim();
1080            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1081                (true, spec)
1082            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1083                (false, spec)
1084            } else {
1085                offset = line_end;
1086                continue;
1087            };
1088
1089            let mut content_end = line_end;
1090            let mut scan = line_end;
1091
1092            while scan < model_output.len() {
1093                let body_nl = model_output[scan..]
1094                    .find('\n')
1095                    .map(|i| scan + i)
1096                    .unwrap_or(model_output.len());
1097                let body_line = &model_output[scan..body_nl];
1098                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1099                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1100                {
1101                    break;
1102                }
1103                scan = if body_nl < model_output.len() {
1104                    body_nl + 1
1105                } else {
1106                    body_nl
1107                };
1108                content_end = scan;
1109            }
1110
1111            let content = &model_output[line_end..content_end];
1112
1113            if is_set {
1114                if let Some((start_str, end_str)) = specifier.split_once('-') {
1115                    if let (Some(start), Some(end)) =
1116                        (parse_line_ref(start_str), parse_line_ref(end_str))
1117                    {
1118                        commands.push(EditCommand::Set {
1119                            start,
1120                            end,
1121                            content,
1122                        });
1123                    }
1124                } else if let Some(target) = parse_line_ref(specifier) {
1125                    commands.push(EditCommand::Set {
1126                        start: target.clone(),
1127                        end: target,
1128                        content,
1129                    });
1130                }
1131            } else {
1132                let after = parse_line_ref(specifier);
1133                commands.push(EditCommand::Insert { after, content });
1134            }
1135
1136            offset = scan;
1137        }
1138
1139        commands
1140    }
1141
1142    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1143    /// (as opposed to being a plain full-replacement output).
1144    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1145    /// editable region, returning the plain text content.
1146    pub fn strip_hashline_prefixes(region: &str) -> String {
1147        let mut decoded: String = region
1148            .lines()
1149            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1150            .collect::<Vec<_>>()
1151            .join("\n");
1152        if region.ends_with('\n') {
1153            decoded.push('\n');
1154        }
1155        decoded
1156    }
1157
1158    pub fn output_has_edit_commands(model_output: &str) -> bool {
1159        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1160    }
1161
1162    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1163    /// original editable region text.
1164    ///
1165    /// `editable_region` is the original text of the editable region (without hash
1166    /// prefixes). `model_output` is the raw model response containing edit commands.
1167    ///
1168    /// Returns the full replacement text for the editable region.
1169    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1170        let original_lines: Vec<&str> = editable_region.lines().collect();
1171        let old_hashes: Vec<u8> = original_lines
1172            .iter()
1173            .map(|line| hash_line(line.as_bytes()))
1174            .collect();
1175
1176        let commands = parse_edit_commands(model_output);
1177
1178        // For set operations: indexed by start line → Some((end line index, content))
1179        // For insert operations: indexed by line index → vec of content to insert after
1180        // Insert-before-first is tracked separately.
1181        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1182        let mut insert_before_first: Vec<&str> = Vec::new();
1183        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1184
1185        for command in &commands {
1186            match command {
1187                EditCommand::Set {
1188                    start,
1189                    end,
1190                    content,
1191                } => {
1192                    if start.index < old_hashes.len()
1193                        && end.index < old_hashes.len()
1194                        && start.index <= end.index
1195                        && old_hashes[start.index] == start.hash
1196                        && old_hashes[end.index] == end.hash
1197                    {
1198                        set_ops[start.index] = Some((end.index, *content));
1199                    }
1200                }
1201                EditCommand::Insert { after, content } => match after {
1202                    None => insert_before_first.push(*content),
1203                    Some(line_ref) => {
1204                        if line_ref.index < old_hashes.len()
1205                            && old_hashes[line_ref.index] == line_ref.hash
1206                        {
1207                            insert_after[line_ref.index].push(*content);
1208                        }
1209                    }
1210                },
1211            }
1212        }
1213
1214        let mut result = String::new();
1215
1216        // Emit any insertions before the first line
1217        for content in &insert_before_first {
1218            result.push_str(content);
1219            if !content.ends_with('\n') {
1220                result.push('\n');
1221            }
1222        }
1223
1224        let mut i = 0;
1225        while i < original_lines.len() {
1226            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1227                // Replace lines i..=end_index with the replacement content
1228                result.push_str(replacement);
1229                if !replacement.is_empty() && !replacement.ends_with('\n') {
1230                    result.push('\n');
1231                }
1232                // Emit any insertions after the end of this set range
1233                if *end_index < insert_after.len() {
1234                    for content in &insert_after[*end_index] {
1235                        result.push_str(content);
1236                        if !content.ends_with('\n') {
1237                            result.push('\n');
1238                        }
1239                    }
1240                }
1241                i = end_index + 1;
1242            } else {
1243                // Keep the original line
1244                result.push_str(original_lines[i]);
1245                result.push('\n');
1246                // Emit any insertions after this line
1247                for content in &insert_after[i] {
1248                    result.push_str(content);
1249                    if !content.ends_with('\n') {
1250                        result.push('\n');
1251                    }
1252                }
1253                i += 1;
1254            }
1255        }
1256
1257        // Preserve trailing newline behavior: if the original ended with a
1258        // newline the result already has one; if it didn't, trim the extra one
1259        // we added.
1260        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1261            result.pop();
1262        }
1263
1264        result
1265    }
1266
1267    /// Convert a unified diff patch into hashline edit commands.
1268    ///
1269    /// Parses the unified diff `patch` directly to determine which lines of
1270    /// `old_text` are deleted/replaced and what new lines are added, then emits
1271    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1272    /// `{index}:{hash}` identifiers.
1273    ///
1274    /// `cursor_offset` is an optional byte offset into the first hunk's new
1275    /// text (context + additions) where the cursor marker should be placed.
1276    pub fn patch_to_edit_commands(
1277        old_text: &str,
1278        patch: &str,
1279        cursor_offset: Option<usize>,
1280    ) -> Result<String> {
1281        let old_lines: Vec<&str> = old_text.lines().collect();
1282        let old_hashes: Vec<u8> = old_lines
1283            .iter()
1284            .map(|line| hash_line(line.as_bytes()))
1285            .collect();
1286
1287        let mut result = String::new();
1288        let mut first_hunk = true;
1289
1290        struct Hunk<'a> {
1291            line_range: Range<usize>,
1292            new_text_lines: Vec<&'a str>,
1293            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1294        }
1295
1296        // Parse the patch line by line. We only care about hunk headers,
1297        // context, deletions, and additions.
1298        let mut old_line_index: usize = 0;
1299        let mut current_hunk: Option<Hunk> = None;
1300        // Byte offset tracking within the hunk's new text for cursor placement.
1301        let mut new_text_byte_offset: usize = 0;
1302        // The line index of the last old line seen before/in the current hunk
1303        // (used for insert-after reference).
1304        let mut last_old_line_before_hunk: Option<usize> = None;
1305
1306        fn flush_hunk(
1307            hunk: Hunk,
1308            last_old_line: Option<usize>,
1309            result: &mut String,
1310            old_hashes: &[u8],
1311        ) {
1312            if hunk.line_range.is_empty() {
1313                // Pure insertion — reference the old line to insert after when in bounds.
1314                if let Some(after) = last_old_line
1315                    && let Some(&hash) = old_hashes.get(after)
1316                {
1317                    write!(
1318                        result,
1319                        "{INSERT_COMMAND_MARKER}{}\n",
1320                        LineRef { index: after, hash }
1321                    )
1322                    .unwrap();
1323                } else {
1324                    result.push_str(INSERT_COMMAND_MARKER);
1325                    result.push('\n');
1326                }
1327            } else {
1328                let start = hunk.line_range.start;
1329                let end_exclusive = hunk.line_range.end;
1330                let deleted_line_count = end_exclusive.saturating_sub(start);
1331
1332                if deleted_line_count == 1 {
1333                    if let Some(&hash) = old_hashes.get(start) {
1334                        write!(
1335                            result,
1336                            "{SET_COMMAND_MARKER}{}\n",
1337                            LineRef { index: start, hash }
1338                        )
1339                        .unwrap();
1340                    } else {
1341                        result.push_str(SET_COMMAND_MARKER);
1342                        result.push('\n');
1343                    }
1344                } else {
1345                    let end_inclusive = end_exclusive - 1;
1346                    match (
1347                        old_hashes.get(start).copied(),
1348                        old_hashes.get(end_inclusive).copied(),
1349                    ) {
1350                        (Some(start_hash), Some(end_hash)) => {
1351                            write!(
1352                                result,
1353                                "{SET_COMMAND_MARKER}{}-{}\n",
1354                                LineRef {
1355                                    index: start,
1356                                    hash: start_hash
1357                                },
1358                                LineRef {
1359                                    index: end_inclusive,
1360                                    hash: end_hash
1361                                }
1362                            )
1363                            .unwrap();
1364                        }
1365                        _ => {
1366                            result.push_str(SET_COMMAND_MARKER);
1367                            result.push('\n');
1368                        }
1369                    }
1370                }
1371            }
1372            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1373                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1374                    && line_offset == cursor_line_offset
1375                {
1376                    result.push_str(&line[..char_offset]);
1377                    result.push_str(CURSOR_MARKER);
1378                    result.push_str(&line[char_offset..]);
1379                    continue;
1380                }
1381
1382                result.push_str(line);
1383            }
1384        }
1385
1386        for raw_line in patch.split_inclusive('\n') {
1387            if raw_line.starts_with("@@") {
1388                // Flush any pending change hunk from a previous patch hunk.
1389                if let Some(hunk) = current_hunk.take() {
1390                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1391                }
1392
1393                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1394                // We intentionally do not trust old_start as a direct local index into `old_text`,
1395                // because some patches are produced against a larger file region and carry
1396                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1397                if first_hunk {
1398                    new_text_byte_offset = 0;
1399                    first_hunk = false;
1400                }
1401                continue;
1402            }
1403
1404            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1405                continue;
1406            }
1407            if raw_line.starts_with("\\ No newline") {
1408                continue;
1409            }
1410
1411            if raw_line.starts_with('-') {
1412                // Extend or start a change hunk with this deleted old line.
1413                match &mut current_hunk {
1414                    Some(Hunk {
1415                        line_range: range, ..
1416                    }) => range.end = old_line_index + 1,
1417                    None => {
1418                        current_hunk = Some(Hunk {
1419                            line_range: old_line_index..old_line_index + 1,
1420                            new_text_lines: Vec::new(),
1421                            cursor_line_offset_in_new_text: None,
1422                        });
1423                    }
1424                }
1425                old_line_index += 1;
1426            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1427                // Place cursor marker if cursor_offset falls within this line.
1428                let mut cursor_line_offset = None;
1429                if let Some(cursor_off) = cursor_offset
1430                    && (first_hunk
1431                        || cursor_off >= new_text_byte_offset
1432                            && cursor_off <= new_text_byte_offset + added_content.len())
1433                {
1434                    let line_offset = added_content.floor_char_boundary(
1435                        cursor_off
1436                            .saturating_sub(new_text_byte_offset)
1437                            .min(added_content.len()),
1438                    );
1439                    cursor_line_offset = Some(line_offset);
1440                }
1441
1442                new_text_byte_offset += added_content.len();
1443
1444                let hunk = current_hunk.get_or_insert(Hunk {
1445                    line_range: old_line_index..old_line_index,
1446                    new_text_lines: vec![],
1447                    cursor_line_offset_in_new_text: None,
1448                });
1449                hunk.new_text_lines.push(added_content);
1450                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1451                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1452            } else {
1453                // Context line (starts with ' ' or is empty).
1454                if let Some(hunk) = current_hunk.take() {
1455                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1456                }
1457                last_old_line_before_hunk = Some(old_line_index);
1458                old_line_index += 1;
1459                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1460                new_text_byte_offset += content.len();
1461            }
1462        }
1463
1464        // Flush final group.
1465        if let Some(hunk) = current_hunk.take() {
1466            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1467        }
1468
1469        // Trim a single trailing newline.
1470        if result.ends_with('\n') {
1471            result.pop();
1472        }
1473
1474        Ok(result)
1475    }
1476
1477    #[cfg(test)]
1478    mod tests {
1479        use super::*;
1480        use indoc::indoc;
1481
1482        #[test]
1483        fn test_format_cursor_region() {
1484            struct Case {
1485                name: &'static str,
1486                context: &'static str,
1487                editable_range: Range<usize>,
1488                cursor_offset: usize,
1489                expected: &'static str,
1490            }
1491
1492            let cases = [
1493                Case {
1494                    name: "basic_cursor_placement",
1495                    context: "hello world\n",
1496                    editable_range: 0..12,
1497                    cursor_offset: 5,
1498                    expected: indoc! {"
1499                    <|file_sep|>test.rs
1500                    <|fim_prefix|>
1501                    <|fim_middle|>current
1502                    0:5c|hello<|user_cursor|> world
1503                    <|fim_suffix|>
1504                    <|fim_middle|>updated"},
1505                },
1506                Case {
1507                    name: "multiline_cursor_on_second_line",
1508                    context: "aaa\nbbb\nccc\n",
1509                    editable_range: 0..12,
1510                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1511                    expected: indoc! {"
1512                    <|file_sep|>test.rs
1513                    <|fim_prefix|>
1514                    <|fim_middle|>current
1515                    0:23|aaa
1516                    1:26|b<|user_cursor|>bb
1517                    2:29|ccc
1518                    <|fim_suffix|>
1519                    <|fim_middle|>updated"},
1520                },
1521                Case {
1522                    name: "no_trailing_newline_in_context",
1523                    context: "line1\nline2",
1524                    editable_range: 0..11,
1525                    cursor_offset: 3,
1526                    expected: indoc! {"
1527                    <|file_sep|>test.rs
1528                    <|fim_prefix|>
1529                    <|fim_middle|>current
1530                    0:d9|lin<|user_cursor|>e1
1531                    1:da|line2
1532                    <|fim_suffix|>
1533                    <|fim_middle|>updated"},
1534                },
1535                Case {
1536                    name: "leading_newline_in_editable_region",
1537                    context: "\nabc\n",
1538                    editable_range: 0..5,
1539                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1540                    expected: indoc! {"
1541                    <|file_sep|>test.rs
1542                    <|fim_prefix|>
1543                    <|fim_middle|>current
1544                    0:00|
1545                    1:26|a<|user_cursor|>bc
1546                    <|fim_suffix|>
1547                    <|fim_middle|>updated"},
1548                },
1549                Case {
1550                    name: "with_suffix",
1551                    context: "abc\ndef",
1552                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1553                    cursor_offset: 2,
1554                    expected: indoc! {"
1555                    <|file_sep|>test.rs
1556                    <|fim_prefix|>
1557                    <|fim_middle|>current
1558                    0:26|ab<|user_cursor|>c
1559                    <|fim_suffix|>
1560                    def
1561                    <|fim_middle|>updated"},
1562                },
1563                Case {
1564                    name: "unicode_two_byte_chars",
1565                    context: "héllo\n",
1566                    editable_range: 0..7,
1567                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1568                    expected: indoc! {"
1569                    <|file_sep|>test.rs
1570                    <|fim_prefix|>
1571                    <|fim_middle|>current
1572                    0:1b|hé<|user_cursor|>llo
1573                    <|fim_suffix|>
1574                    <|fim_middle|>updated"},
1575                },
1576                Case {
1577                    name: "unicode_three_byte_chars",
1578                    context: "日本語\n",
1579                    editable_range: 0..10,
1580                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1581                    expected: indoc! {"
1582                    <|file_sep|>test.rs
1583                    <|fim_prefix|>
1584                    <|fim_middle|>current
1585                    0:80|日本<|user_cursor|>語
1586                    <|fim_suffix|>
1587                    <|fim_middle|>updated"},
1588                },
1589                Case {
1590                    name: "unicode_four_byte_chars",
1591                    context: "a🌍b\n",
1592                    editable_range: 0..7,
1593                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1594                    expected: indoc! {"
1595                    <|file_sep|>test.rs
1596                    <|fim_prefix|>
1597                    <|fim_middle|>current
1598                    0:6b|a🌍<|user_cursor|>b
1599                    <|fim_suffix|>
1600                    <|fim_middle|>updated"},
1601                },
1602                Case {
1603                    name: "cursor_at_start_of_region_not_placed",
1604                    context: "abc\n",
1605                    editable_range: 0..4,
1606                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1607                    expected: indoc! {"
1608                    <|file_sep|>test.rs
1609                    <|fim_prefix|>
1610                    <|fim_middle|>current
1611                    0:26|abc
1612                    <|fim_suffix|>
1613                    <|fim_middle|>updated"},
1614                },
1615                Case {
1616                    name: "cursor_at_end_of_line_not_placed",
1617                    context: "abc\ndef\n",
1618                    editable_range: 0..8,
1619                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1620                    expected: indoc! {"
1621                    <|file_sep|>test.rs
1622                    <|fim_prefix|>
1623                    <|fim_middle|>current
1624                    0:26|abc
1625                    1:2f|def
1626                    <|fim_suffix|>
1627                    <|fim_middle|>updated"},
1628                },
1629                Case {
1630                    name: "cursor_offset_relative_to_context_not_editable_region",
1631                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1632                    // write_cursor_excerpt_section must subtract it before comparing against
1633                    // per-line offsets within the editable region.
1634                    context: "pre\naaa\nbbb\nsuf\n",
1635                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1636                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1637                    expected: indoc! {"
1638                    <|file_sep|>test.rs
1639                    <|fim_prefix|>
1640                    pre
1641                    <|fim_middle|>current
1642                    0:23|aaa
1643                    1:26|b<|user_cursor|>bb
1644                    <|fim_suffix|>
1645                    suf
1646                    <|fim_middle|>updated"},
1647                },
1648            ];
1649
1650            for case in &cases {
1651                let mut prompt = String::new();
1652                hashline::write_cursor_excerpt_section(
1653                    &mut prompt,
1654                    Path::new("test.rs"),
1655                    case.context,
1656                    &case.editable_range,
1657                    case.cursor_offset,
1658                );
1659                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1660            }
1661        }
1662
1663        #[test]
1664        fn test_apply_edit_commands() {
1665            struct Case {
1666                name: &'static str,
1667                original: &'static str,
1668                model_output: &'static str,
1669                expected: &'static str,
1670            }
1671
1672            let cases = vec![
1673                Case {
1674                    name: "set_single_line",
1675                    original: indoc! {"
1676                    let mut total = 0;
1677                    for product in products {
1678                        total += ;
1679                    }
1680                    total
1681                "},
1682                    model_output: indoc! {"
1683                    <|set|>2:87
1684                        total += product.price;
1685                "},
1686                    expected: indoc! {"
1687                    let mut total = 0;
1688                    for product in products {
1689                        total += product.price;
1690                    }
1691                    total
1692                "},
1693                },
1694                Case {
1695                    name: "set_range",
1696                    original: indoc! {"
1697                    fn foo() {
1698                        let x = 1;
1699                        let y = 2;
1700                        let z = 3;
1701                    }
1702                "},
1703                    model_output: indoc! {"
1704                    <|set|>1:46-3:4a
1705                        let sum = 6;
1706                "},
1707                    expected: indoc! {"
1708                    fn foo() {
1709                        let sum = 6;
1710                    }
1711                "},
1712                },
1713                Case {
1714                    name: "insert_after_line",
1715                    original: indoc! {"
1716                    fn main() {
1717                        let x = 1;
1718                    }
1719                "},
1720                    model_output: indoc! {"
1721                    <|insert|>1:46
1722                        let y = 2;
1723                "},
1724                    expected: indoc! {"
1725                    fn main() {
1726                        let x = 1;
1727                        let y = 2;
1728                    }
1729                "},
1730                },
1731                Case {
1732                    name: "insert_before_first",
1733                    original: indoc! {"
1734                    let x = 1;
1735                    let y = 2;
1736                "},
1737                    model_output: indoc! {"
1738                    <|insert|>
1739                    use std::io;
1740                "},
1741                    expected: indoc! {"
1742                    use std::io;
1743                    let x = 1;
1744                    let y = 2;
1745                "},
1746                },
1747                Case {
1748                    name: "set_with_cursor_marker",
1749                    original: indoc! {"
1750                    fn main() {
1751                        println!();
1752                    }
1753                "},
1754                    model_output: indoc! {"
1755                    <|set|>1:34
1756                        eprintln!(\"<|user_cursor|>\");
1757                "},
1758                    expected: indoc! {"
1759                    fn main() {
1760                        eprintln!(\"<|user_cursor|>\");
1761                    }
1762                "},
1763                },
1764                Case {
1765                    name: "multiple_set_commands",
1766                    original: indoc! {"
1767                    aaa
1768                    bbb
1769                    ccc
1770                    ddd
1771                "},
1772                    model_output: indoc! {"
1773                    <|set|>0:23
1774                    AAA
1775                    <|set|>2:29
1776                    CCC
1777                "},
1778                    expected: indoc! {"
1779                    AAA
1780                    bbb
1781                    CCC
1782                    ddd
1783                "},
1784                },
1785                Case {
1786                    name: "set_range_multiline_replacement",
1787                    original: indoc! {"
1788                    fn handle_submit() {
1789                    }
1790
1791                    fn handle_keystroke() {
1792                "},
1793                    model_output: indoc! {"
1794                    <|set|>0:3f-1:7d
1795                    fn handle_submit(modal_state: &mut ModalState) {
1796                        <|user_cursor|>
1797                    }
1798                "},
1799                    expected: indoc! {"
1800                    fn handle_submit(modal_state: &mut ModalState) {
1801                        <|user_cursor|>
1802                    }
1803
1804                    fn handle_keystroke() {
1805                "},
1806                },
1807                Case {
1808                    name: "no_edit_commands_returns_original",
1809                    original: indoc! {"
1810                    hello
1811                    world
1812                "},
1813                    model_output: "some random text with no commands",
1814                    expected: indoc! {"
1815                    hello
1816                    world
1817                "},
1818                },
1819                Case {
1820                    name: "wrong_hash_set_ignored",
1821                    original: indoc! {"
1822                    aaa
1823                    bbb
1824                "},
1825                    model_output: indoc! {"
1826                    <|set|>0:ff
1827                    ZZZ
1828                "},
1829                    expected: indoc! {"
1830                    aaa
1831                    bbb
1832                "},
1833                },
1834                Case {
1835                    name: "insert_and_set_combined",
1836                    original: indoc! {"
1837                    alpha
1838                    beta
1839                    gamma
1840                "},
1841                    model_output: indoc! {"
1842                    <|set|>0:06
1843                    ALPHA
1844                    <|insert|>1:9c
1845                    beta_extra
1846                "},
1847                    expected: indoc! {"
1848                    ALPHA
1849                    beta
1850                    beta_extra
1851                    gamma
1852                "},
1853                },
1854                Case {
1855                    name: "no_trailing_newline_preserved",
1856                    original: "hello\nworld",
1857                    model_output: indoc! {"
1858                    <|set|>0:14
1859                    HELLO
1860                "},
1861                    expected: "HELLO\nworld",
1862                },
1863                Case {
1864                    name: "set_range_hash_mismatch_in_end_bound",
1865                    original: indoc! {"
1866                    one
1867                    two
1868                    three
1869                "},
1870                    model_output: indoc! {"
1871                    <|set|>0:42-2:ff
1872                    ONE_TWO_THREE
1873                "},
1874                    expected: indoc! {"
1875                    one
1876                    two
1877                    three
1878                "},
1879                },
1880                Case {
1881                    name: "set_range_start_greater_than_end_ignored",
1882                    original: indoc! {"
1883                    a
1884                    b
1885                    c
1886                "},
1887                    model_output: indoc! {"
1888                    <|set|>2:63-1:62
1889                    X
1890                "},
1891                    expected: indoc! {"
1892                    a
1893                    b
1894                    c
1895                "},
1896                },
1897                Case {
1898                    name: "insert_out_of_bounds_ignored",
1899                    original: indoc! {"
1900                    x
1901                    y
1902                "},
1903                    model_output: indoc! {"
1904                    <|insert|>99:aa
1905                    z
1906                "},
1907                    expected: indoc! {"
1908                    x
1909                    y
1910                "},
1911                },
1912                Case {
1913                    name: "set_out_of_bounds_ignored",
1914                    original: indoc! {"
1915                    x
1916                    y
1917                "},
1918                    model_output: indoc! {"
1919                    <|set|>99:aa
1920                    z
1921                "},
1922                    expected: indoc! {"
1923                    x
1924                    y
1925                "},
1926                },
1927                Case {
1928                    name: "malformed_set_command_ignored",
1929                    original: indoc! {"
1930                    alpha
1931                    beta
1932                "},
1933                    model_output: indoc! {"
1934                    <|set|>not-a-line-ref
1935                    UPDATED
1936                "},
1937                    expected: indoc! {"
1938                    alpha
1939                    beta
1940                "},
1941                },
1942                Case {
1943                    name: "malformed_insert_hash_treated_as_before_first",
1944                    original: indoc! {"
1945                    alpha
1946                    beta
1947                "},
1948                    model_output: indoc! {"
1949                    <|insert|>1:nothex
1950                    preamble
1951                "},
1952                    expected: indoc! {"
1953                    preamble
1954                    alpha
1955                    beta
1956                "},
1957                },
1958                Case {
1959                    name: "set_then_insert_same_target_orders_insert_after_replacement",
1960                    original: indoc! {"
1961                    cat
1962                    dog
1963                "},
1964                    model_output: indoc! {"
1965                    <|set|>0:38
1966                    CAT
1967                    <|insert|>0:38
1968                    TAIL
1969                "},
1970                    expected: indoc! {"
1971                    CAT
1972                    TAIL
1973                    dog
1974                "},
1975                },
1976                Case {
1977                    name: "overlapping_set_ranges_last_wins",
1978                    original: indoc! {"
1979                    a
1980                    b
1981                    c
1982                    d
1983                "},
1984                    model_output: indoc! {"
1985                    <|set|>0:61-2:63
1986                    FIRST
1987                    <|set|>1:62-3:64
1988                    SECOND
1989                "},
1990                    expected: indoc! {"
1991                    FIRST
1992                    d
1993                "},
1994                },
1995                Case {
1996                    name: "insert_before_first_and_after_line",
1997                    original: indoc! {"
1998                    a
1999                    b
2000                "},
2001                    model_output: indoc! {"
2002                    <|insert|>
2003                    HEAD
2004                    <|insert|>0:61
2005                    MID
2006                "},
2007                    expected: indoc! {"
2008                    HEAD
2009                    a
2010                    MID
2011                    b
2012                "},
2013                },
2014            ];
2015
2016            for case in &cases {
2017                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2018                assert_eq!(result, case.expected, "failed case: {}", case.name);
2019            }
2020        }
2021
2022        #[test]
2023        fn test_output_has_edit_commands() {
2024            assert!(hashline::output_has_edit_commands(&format!(
2025                "{}0:ab\nnew",
2026                SET_COMMAND_MARKER
2027            )));
2028            assert!(hashline::output_has_edit_commands(&format!(
2029                "{}0:ab\nnew",
2030                INSERT_COMMAND_MARKER
2031            )));
2032            assert!(hashline::output_has_edit_commands(&format!(
2033                "some text\n{}1:cd\nstuff",
2034                SET_COMMAND_MARKER
2035            )));
2036            assert!(!hashline::output_has_edit_commands("just plain text"));
2037            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2038        }
2039
2040        // ---- hashline::patch_to_edit_commands round-trip tests ----
2041
2042        #[test]
2043        fn test_patch_to_edit_commands() {
2044            struct Case {
2045                name: &'static str,
2046                old: &'static str,
2047                patch: &'static str,
2048                expected_new: &'static str,
2049            }
2050
2051            let cases = [
2052                Case {
2053                    name: "single_line_replacement",
2054                    old: indoc! {"
2055                    let mut total = 0;
2056                    for product in products {
2057                        total += ;
2058                    }
2059                    total
2060                "},
2061                    patch: indoc! {"
2062                    @@ -1,5 +1,5 @@
2063                     let mut total = 0;
2064                     for product in products {
2065                    -    total += ;
2066                    +    total += product.price;
2067                     }
2068                     total
2069                "},
2070                    expected_new: indoc! {"
2071                    let mut total = 0;
2072                    for product in products {
2073                        total += product.price;
2074                    }
2075                    total
2076                "},
2077                },
2078                Case {
2079                    name: "multiline_replacement",
2080                    old: indoc! {"
2081                    fn foo() {
2082                        let x = 1;
2083                        let y = 2;
2084                        let z = 3;
2085                    }
2086                "},
2087                    patch: indoc! {"
2088                    @@ -1,5 +1,3 @@
2089                     fn foo() {
2090                    -    let x = 1;
2091                    -    let y = 2;
2092                    -    let z = 3;
2093                    +    let sum = 1 + 2 + 3;
2094                     }
2095                "},
2096                    expected_new: indoc! {"
2097                    fn foo() {
2098                        let sum = 1 + 2 + 3;
2099                    }
2100                "},
2101                },
2102                Case {
2103                    name: "insertion",
2104                    old: indoc! {"
2105                    fn main() {
2106                        let x = 1;
2107                    }
2108                "},
2109                    patch: indoc! {"
2110                    @@ -1,3 +1,4 @@
2111                     fn main() {
2112                         let x = 1;
2113                    +    let y = 2;
2114                     }
2115                "},
2116                    expected_new: indoc! {"
2117                    fn main() {
2118                        let x = 1;
2119                        let y = 2;
2120                    }
2121                "},
2122                },
2123                Case {
2124                    name: "insertion_before_first",
2125                    old: indoc! {"
2126                    let x = 1;
2127                    let y = 2;
2128                "},
2129                    patch: indoc! {"
2130                    @@ -1,2 +1,3 @@
2131                    +use std::io;
2132                     let x = 1;
2133                     let y = 2;
2134                "},
2135                    expected_new: indoc! {"
2136                    use std::io;
2137                    let x = 1;
2138                    let y = 2;
2139                "},
2140                },
2141                Case {
2142                    name: "deletion",
2143                    old: indoc! {"
2144                    aaa
2145                    bbb
2146                    ccc
2147                    ddd
2148                "},
2149                    patch: indoc! {"
2150                    @@ -1,4 +1,2 @@
2151                     aaa
2152                    -bbb
2153                    -ccc
2154                     ddd
2155                "},
2156                    expected_new: indoc! {"
2157                    aaa
2158                    ddd
2159                "},
2160                },
2161                Case {
2162                    name: "multiple_changes",
2163                    old: indoc! {"
2164                    alpha
2165                    beta
2166                    gamma
2167                    delta
2168                    epsilon
2169                "},
2170                    patch: indoc! {"
2171                    @@ -1,5 +1,5 @@
2172                    -alpha
2173                    +ALPHA
2174                     beta
2175                     gamma
2176                    -delta
2177                    +DELTA
2178                     epsilon
2179                "},
2180                    expected_new: indoc! {"
2181                    ALPHA
2182                    beta
2183                    gamma
2184                    DELTA
2185                    epsilon
2186                "},
2187                },
2188                Case {
2189                    name: "replace_with_insertion",
2190                    old: indoc! {r#"
2191                    fn handle() {
2192                        modal_state.close();
2193                        modal_state.dismiss();
2194                "#},
2195                    patch: indoc! {r#"
2196                    @@ -1,3 +1,4 @@
2197                     fn handle() {
2198                         modal_state.close();
2199                    +    eprintln!("");
2200                         modal_state.dismiss();
2201                "#},
2202                    expected_new: indoc! {r#"
2203                    fn handle() {
2204                        modal_state.close();
2205                        eprintln!("");
2206                        modal_state.dismiss();
2207                "#},
2208                },
2209                Case {
2210                    name: "complete_replacement",
2211                    old: indoc! {"
2212                    aaa
2213                    bbb
2214                    ccc
2215                "},
2216                    patch: indoc! {"
2217                    @@ -1,3 +1,3 @@
2218                    -aaa
2219                    -bbb
2220                    -ccc
2221                    +xxx
2222                    +yyy
2223                    +zzz
2224                "},
2225                    expected_new: indoc! {"
2226                    xxx
2227                    yyy
2228                    zzz
2229                "},
2230                },
2231                Case {
2232                    name: "add_function_body",
2233                    old: indoc! {"
2234                    fn foo() {
2235                        modal_state.dismiss();
2236                    }
2237
2238                    fn
2239
2240                    fn handle_keystroke() {
2241                "},
2242                    patch: indoc! {"
2243                    @@ -1,6 +1,8 @@
2244                     fn foo() {
2245                         modal_state.dismiss();
2246                     }
2247
2248                    -fn
2249                    +fn handle_submit() {
2250                    +    todo()
2251                    +}
2252
2253                     fn handle_keystroke() {
2254                "},
2255                    expected_new: indoc! {"
2256                    fn foo() {
2257                        modal_state.dismiss();
2258                    }
2259
2260                    fn handle_submit() {
2261                        todo()
2262                    }
2263
2264                    fn handle_keystroke() {
2265                "},
2266                },
2267                Case {
2268                    name: "with_cursor_offset",
2269                    old: indoc! {r#"
2270                    fn main() {
2271                        println!();
2272                    }
2273                "#},
2274                    patch: indoc! {r#"
2275                    @@ -1,3 +1,3 @@
2276                     fn main() {
2277                    -    println!();
2278                    +    eprintln!("");
2279                     }
2280                "#},
2281                    expected_new: indoc! {r#"
2282                    fn main() {
2283                        eprintln!("<|user_cursor|>");
2284                    }
2285                "#},
2286                },
2287                Case {
2288                    name: "non_local_hunk_header_pure_insertion_repro",
2289                    old: indoc! {"
2290                    aaa
2291                    bbb
2292                "},
2293                    patch: indoc! {"
2294                    @@ -20,2 +20,3 @@
2295                     aaa
2296                    +xxx
2297                     bbb
2298                "},
2299                    expected_new: indoc! {"
2300                    aaa
2301                    xxx
2302                    bbb
2303                "},
2304                },
2305            ];
2306
2307            for case in &cases {
2308                // The cursor_offset for patch_to_edit_commands is relative to
2309                // the first hunk's new text (context + additions). We compute
2310                // it by finding where the marker sits in the expected output
2311                // (which mirrors the new text of the hunk).
2312                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2313
2314                let commands =
2315                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2316                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2317
2318                assert!(
2319                    hashline::output_has_edit_commands(&commands),
2320                    "case {}: expected edit commands, got: {commands:?}",
2321                    case.name,
2322                );
2323
2324                let applied = hashline::apply_edit_commands(case.old, &commands);
2325                assert_eq!(applied, case.expected_new, "case {}", case.name);
2326            }
2327        }
2328    }
2329}
2330
2331pub mod seed_coder {
2332    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2333    //!
2334    //! Seed-Coder uses different FIM tokens and order than Qwen:
2335    //! - SPM order: suffix comes FIRST, then prefix, then middle
2336    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2337    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2338    //!
2339    //! All context (related files, edit history) goes in the PREFIX section.
2340    //! The suffix contains only code after the editable region.
2341    //!
2342    //! Example prompt:
2343    //!
2344    //! <[fim-suffix]>
2345    //! code after editable region
2346    //! <[fim-prefix]><filename>related/file.py
2347    //! related file content
2348    //!
2349    //! <filename>edit_history
2350    //! --- a/some_file.py
2351    //! +++ b/some_file.py
2352    //! -old
2353    //! +new
2354    //!
2355    //! <filename>path/to/target_file.py
2356    //! code before editable region
2357    //! <<<<<<< CURRENT
2358    //! code that
2359    //! needs to<|user_cursor|>
2360    //! be rewritten
2361    //! =======
2362    //! <[fim-middle]>
2363    //!
2364    //! Expected output (model generates):
2365    //!
2366    //! updated
2367    //! code with
2368    //! changes applied
2369    //! >>>>>>> UPDATED
2370
2371    use super::*;
2372
2373    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2374    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2375    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2376    pub const FILE_MARKER: &str = "<filename>";
2377
2378    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2379    pub const SEPARATOR: &str = "=======\n";
2380    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2381
2382    pub fn special_tokens() -> &'static [&'static str] {
2383        &[
2384            FIM_SUFFIX,
2385            FIM_PREFIX,
2386            FIM_MIDDLE,
2387            FILE_MARKER,
2388            START_MARKER,
2389            SEPARATOR,
2390            END_MARKER,
2391            CURSOR_MARKER,
2392        ]
2393    }
2394
2395    pub fn write_cursor_excerpt_section(
2396        prompt: &mut String,
2397        path: &Path,
2398        context: &str,
2399        editable_range: &Range<usize>,
2400        cursor_offset: usize,
2401    ) {
2402        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2403        prompt.push_str(&section);
2404    }
2405
2406    pub fn format_prompt_with_budget(
2407        path: &Path,
2408        context: &str,
2409        editable_range: &Range<usize>,
2410        cursor_offset: usize,
2411        events: &[Arc<Event>],
2412        related_files: &[RelatedFile],
2413        max_tokens: usize,
2414    ) -> String {
2415        let suffix_section = build_suffix_section(context, editable_range);
2416        let cursor_prefix_section =
2417            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2418
2419        let suffix_tokens = estimate_tokens(suffix_section.len());
2420        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2421        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2422
2423        let edit_history_section = super::format_edit_history_within_budget(
2424            events,
2425            FILE_MARKER,
2426            "edit_history",
2427            budget_after_cursor,
2428        );
2429        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2430        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2431
2432        let related_files_section = super::format_related_files_within_budget(
2433            related_files,
2434            FILE_MARKER,
2435            "",
2436            budget_after_edit_history,
2437        );
2438
2439        let mut prompt = String::new();
2440        prompt.push_str(&suffix_section);
2441        prompt.push_str(FIM_PREFIX);
2442        prompt.push_str(&related_files_section);
2443        if !related_files_section.is_empty() {
2444            prompt.push('\n');
2445        }
2446        prompt.push_str(&edit_history_section);
2447        if !edit_history_section.is_empty() {
2448            prompt.push('\n');
2449        }
2450        prompt.push_str(&cursor_prefix_section);
2451        prompt.push_str(FIM_MIDDLE);
2452        prompt
2453    }
2454
2455    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2456        let mut section = String::new();
2457        section.push_str(FIM_SUFFIX);
2458        section.push_str(&context[editable_range.end..]);
2459        if !section.ends_with('\n') {
2460            section.push('\n');
2461        }
2462        section
2463    }
2464
2465    fn build_cursor_prefix_section(
2466        path: &Path,
2467        context: &str,
2468        editable_range: &Range<usize>,
2469        cursor_offset: usize,
2470    ) -> String {
2471        let mut section = String::new();
2472        let path_str = path.to_string_lossy();
2473        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2474
2475        section.push_str(&context[..editable_range.start]);
2476        section.push_str(START_MARKER);
2477        section.push_str(&context[editable_range.start..cursor_offset]);
2478        section.push_str(CURSOR_MARKER);
2479        section.push_str(&context[cursor_offset..editable_range.end]);
2480        if !section.ends_with('\n') {
2481            section.push('\n');
2482        }
2483        section.push_str(SEPARATOR);
2484        section
2485    }
2486}
2487
2488/// The zeta1 prompt format
2489pub mod zeta1 {
2490    use super::*;
2491    use std::fmt::Write;
2492
2493    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2494    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2495    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2496    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2497
2498    const INSTRUCTION_HEADER: &str = concat!(
2499        "### Instruction:\n",
2500        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2501        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2502        "into account the cursor location.\n\n",
2503        "### User Edits:\n\n"
2504    );
2505    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2506    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2507
2508    /// Formats a complete zeta1 prompt from the input events and excerpt.
2509    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2510        let mut prompt = String::with_capacity(
2511            INSTRUCTION_HEADER.len()
2512                + input_events.len()
2513                + EXCERPT_HEADER.len()
2514                + input_excerpt.len()
2515                + RESPONSE_HEADER.len(),
2516        );
2517        prompt.push_str(INSTRUCTION_HEADER);
2518        prompt.push_str(input_events);
2519        prompt.push_str(EXCERPT_HEADER);
2520        prompt.push_str(input_excerpt);
2521        prompt.push_str(RESPONSE_HEADER);
2522        prompt
2523    }
2524
2525    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2526    /// editable and context byte-offset ranges within `cursor_excerpt`.
2527    pub fn format_zeta1_from_input(
2528        input: &ZetaPromptInput,
2529        editable_range: Range<usize>,
2530        context_range: Range<usize>,
2531    ) -> String {
2532        let events = format_zeta1_events(&input.events);
2533        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2534        format_zeta1_prompt(&events, &excerpt)
2535    }
2536
2537    /// Formats events in zeta1 style (oldest first).
2538    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2539        let mut result = String::new();
2540        for event in events {
2541            let event_string = format_zeta1_event(event);
2542            if event_string.is_empty() {
2543                continue;
2544            }
2545            if !result.is_empty() {
2546                result.push_str("\n\n");
2547            }
2548            result.push_str(&event_string);
2549        }
2550        result
2551    }
2552
2553    fn format_zeta1_event(event: &Event) -> String {
2554        match event {
2555            Event::BufferChange {
2556                path,
2557                old_path,
2558                diff,
2559                ..
2560            } => {
2561                let mut prompt = String::new();
2562                if old_path != path {
2563                    writeln!(
2564                        prompt,
2565                        "User renamed {} to {}\n",
2566                        old_path.display(),
2567                        path.display()
2568                    )
2569                    .ok();
2570                }
2571                if !diff.is_empty() {
2572                    write!(
2573                        prompt,
2574                        "User edited {}:\n```diff\n{}\n```",
2575                        path.display(),
2576                        diff
2577                    )
2578                    .ok();
2579                }
2580                prompt
2581            }
2582        }
2583    }
2584
2585    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2586    /// within `cursor_excerpt`.
2587    fn format_zeta1_excerpt(
2588        input: &ZetaPromptInput,
2589        editable_range: Range<usize>,
2590        context_range: Range<usize>,
2591    ) -> String {
2592        let path_str = input.cursor_path.to_string_lossy();
2593        let excerpt = &*input.cursor_excerpt;
2594        let cursor_offset = input.cursor_offset_in_excerpt;
2595
2596        let mut prompt = String::new();
2597        writeln!(&mut prompt, "```{path_str}").ok();
2598
2599        let starts_at_file_beginning =
2600            input.excerpt_start_row == Some(0) && context_range.start == 0;
2601        if starts_at_file_beginning {
2602            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2603        }
2604
2605        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2606
2607        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2608        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2609        prompt.push_str(CURSOR_MARKER);
2610        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2611        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2612
2613        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2614        write!(prompt, "\n```").ok();
2615
2616        prompt
2617    }
2618
2619    /// Cleans zeta1 model output by extracting content between editable region
2620    /// markers and converting the zeta1 cursor marker to the universal one.
2621    /// Returns `None` if the output doesn't contain the expected markers.
2622    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2623        let content = output.replace(CURSOR_MARKER, "");
2624
2625        let content_start = content
2626            .find(EDITABLE_REGION_START_MARKER)
2627            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2628            .map(|pos| {
2629                if content.as_bytes().get(pos) == Some(&b'\n') {
2630                    pos + 1
2631                } else {
2632                    pos
2633                }
2634            })
2635            .unwrap_or(0);
2636
2637        let content_end = content
2638            .find(EDITABLE_REGION_END_MARKER)
2639            .map(|pos| {
2640                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2641                    pos - 1
2642                } else {
2643                    pos
2644                }
2645            })
2646            .unwrap_or(content.len());
2647
2648        if content_start > content_end {
2649            return Some(String::new());
2650        }
2651
2652        let extracted = &content[content_start..content_end];
2653
2654        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2655            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2656            let text_before_cursor = text_before_cursor
2657                .find(EDITABLE_REGION_START_MARKER)
2658                .map(|pos| {
2659                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2660                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2661                        after_marker + 1
2662                    } else {
2663                        after_marker
2664                    }
2665                })
2666                .unwrap_or(0);
2667            let offset_in_extracted = zeta1_cursor_pos
2668                .saturating_sub(text_before_cursor)
2669                .min(extracted.len());
2670            offset_in_extracted
2671        });
2672
2673        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2674        if let Some(offset) = cursor_offset {
2675            result.push_str(&extracted[..offset]);
2676            result.push_str(super::CURSOR_MARKER);
2677            result.push_str(&extracted[offset..]);
2678        } else {
2679            result.push_str(extracted);
2680        }
2681
2682        Some(result)
2683    }
2684}
2685
2686#[cfg(test)]
2687mod tests {
2688    use super::*;
2689    use indoc::indoc;
2690
2691    fn make_input(
2692        cursor_excerpt: &str,
2693        editable_range: Range<usize>,
2694        cursor_offset: usize,
2695        events: Vec<Event>,
2696        related_files: Vec<RelatedFile>,
2697    ) -> ZetaPromptInput {
2698        let context_range = 0..cursor_excerpt.len();
2699        ZetaPromptInput {
2700            cursor_path: Path::new("test.rs").into(),
2701            cursor_excerpt: cursor_excerpt.into(),
2702            cursor_offset_in_excerpt: cursor_offset,
2703            excerpt_start_row: None,
2704            events: events.into_iter().map(Arc::new).collect(),
2705            related_files,
2706            excerpt_ranges: ExcerptRanges {
2707                editable_150: editable_range.clone(),
2708                editable_180: editable_range.clone(),
2709                editable_350: editable_range,
2710                editable_150_context_350: context_range.clone(),
2711                editable_180_context_350: context_range.clone(),
2712                editable_350_context_150: context_range,
2713                ..Default::default()
2714            },
2715            experiment: None,
2716            in_open_source_repo: false,
2717            can_collect_data: false,
2718        }
2719    }
2720
2721    fn make_event(path: &str, diff: &str) -> Event {
2722        Event::BufferChange {
2723            path: Path::new(path).into(),
2724            old_path: Path::new(path).into(),
2725            diff: diff.to_string(),
2726            predicted: false,
2727            in_open_source_repo: false,
2728        }
2729    }
2730
2731    fn make_related_file(path: &str, content: &str) -> RelatedFile {
2732        RelatedFile {
2733            path: Path::new(path).into(),
2734            max_row: content.lines().count() as u32,
2735            excerpts: vec![RelatedExcerpt {
2736                row_range: 0..content.lines().count() as u32,
2737                text: content.into(),
2738                order: 0,
2739            }],
2740            in_open_source_repo: false,
2741        }
2742    }
2743
2744    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2745        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2746    }
2747
2748    #[test]
2749    fn test_no_truncation_when_within_budget() {
2750        let input = make_input(
2751            "prefix\neditable\nsuffix",
2752            7..15,
2753            10,
2754            vec![make_event("a.rs", "-old\n+new\n")],
2755            vec![make_related_file("related.rs", "fn helper() {}\n")],
2756        );
2757
2758        assert_eq!(
2759            format_with_budget(&input, 10000),
2760            indoc! {r#"
2761                <|file_sep|>related.rs
2762                fn helper() {}
2763                <|file_sep|>edit history
2764                --- a/a.rs
2765                +++ b/a.rs
2766                -old
2767                +new
2768                <|file_sep|>test.rs
2769                <|fim_prefix|>
2770                prefix
2771                <|fim_middle|>current
2772                edi<|user_cursor|>table
2773                <|fim_suffix|>
2774
2775                suffix
2776                <|fim_middle|>updated
2777            "#}
2778        );
2779    }
2780
2781    #[test]
2782    fn test_truncation_drops_edit_history_when_budget_tight() {
2783        let input = make_input(
2784            "code",
2785            0..4,
2786            2,
2787            vec![make_event("a.rs", "-x\n+y\n")],
2788            vec![
2789                make_related_file("r1.rs", "a\n"),
2790                make_related_file("r2.rs", "b\n"),
2791            ],
2792        );
2793
2794        assert_eq!(
2795            format_with_budget(&input, 10000),
2796            indoc! {r#"
2797                <|file_sep|>r1.rs
2798                a
2799                <|file_sep|>r2.rs
2800                b
2801                <|file_sep|>edit history
2802                --- a/a.rs
2803                +++ b/a.rs
2804                -x
2805                +y
2806                <|file_sep|>test.rs
2807                <|fim_prefix|>
2808                <|fim_middle|>current
2809                co<|user_cursor|>de
2810                <|fim_suffix|>
2811                <|fim_middle|>updated
2812            "#}
2813        );
2814
2815        assert_eq!(
2816            format_with_budget(&input, 50),
2817            indoc! {r#"
2818                <|file_sep|>r1.rs
2819                a
2820                <|file_sep|>r2.rs
2821                b
2822                <|file_sep|>test.rs
2823                <|fim_prefix|>
2824                <|fim_middle|>current
2825                co<|user_cursor|>de
2826                <|fim_suffix|>
2827                <|fim_middle|>updated
2828            "#}
2829        );
2830    }
2831
2832    #[test]
2833    fn test_truncation_includes_partial_excerpts() {
2834        let input = make_input(
2835            "x",
2836            0..1,
2837            0,
2838            vec![],
2839            vec![RelatedFile {
2840                path: Path::new("big.rs").into(),
2841                max_row: 30,
2842                in_open_source_repo: false,
2843                excerpts: vec![
2844                    RelatedExcerpt {
2845                        row_range: 0..10,
2846                        text: "first excerpt\n".into(),
2847                        order: 0,
2848                    },
2849                    RelatedExcerpt {
2850                        row_range: 10..20,
2851                        text: "second excerpt\n".into(),
2852                        order: 0,
2853                    },
2854                    RelatedExcerpt {
2855                        row_range: 20..30,
2856                        text: "third excerpt\n".into(),
2857                        order: 0,
2858                    },
2859                ],
2860            }],
2861        );
2862
2863        assert_eq!(
2864            format_with_budget(&input, 10000),
2865            indoc! {r#"
2866                <|file_sep|>big.rs
2867                first excerpt
2868                ...
2869                second excerpt
2870                ...
2871                third excerpt
2872                <|file_sep|>test.rs
2873                <|fim_prefix|>
2874                <|fim_middle|>current
2875                <|user_cursor|>x
2876                <|fim_suffix|>
2877                <|fim_middle|>updated
2878            "#}
2879        );
2880
2881        assert_eq!(
2882            format_with_budget(&input, 50),
2883            indoc! {r#"
2884                <|file_sep|>big.rs
2885                first excerpt
2886                ...
2887                <|file_sep|>test.rs
2888                <|fim_prefix|>
2889                <|fim_middle|>current
2890                <|user_cursor|>x
2891                <|fim_suffix|>
2892                <|fim_middle|>updated
2893            "#}
2894        );
2895    }
2896
2897    #[test]
2898    fn test_truncation_prioritizes_lower_order_excerpts() {
2899        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2900        // With tight budget, only the lower-order excerpt from file_b should be included.
2901        let input = make_input(
2902            "x",
2903            0..1,
2904            0,
2905            vec![],
2906            vec![
2907                RelatedFile {
2908                    path: Path::new("file_a.rs").into(),
2909                    max_row: 10,
2910                    in_open_source_repo: false,
2911                    excerpts: vec![RelatedExcerpt {
2912                        row_range: 0..10,
2913                        text: "low priority content\n".into(),
2914                        order: 5,
2915                    }],
2916                },
2917                RelatedFile {
2918                    path: Path::new("file_b.rs").into(),
2919                    max_row: 10,
2920                    in_open_source_repo: false,
2921                    excerpts: vec![RelatedExcerpt {
2922                        row_range: 0..10,
2923                        text: "high priority content\n".into(),
2924                        order: 1,
2925                    }],
2926                },
2927            ],
2928        );
2929
2930        // With large budget, both files included; rendered in stable lexicographic order.
2931        assert_eq!(
2932            format_with_budget(&input, 10000),
2933            indoc! {r#"
2934                <|file_sep|>file_a.rs
2935                low priority content
2936                <|file_sep|>file_b.rs
2937                high priority content
2938                <|file_sep|>test.rs
2939                <|fim_prefix|>
2940                <|fim_middle|>current
2941                <|user_cursor|>x
2942                <|fim_suffix|>
2943                <|fim_middle|>updated
2944            "#}
2945        );
2946
2947        // With tight budget, only file_b (lower order) fits.
2948        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2949        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2950        // file_a would need another 14 tokens, which doesn't fit.
2951        assert_eq!(
2952            format_with_budget(&input, 52),
2953            indoc! {r#"
2954                <|file_sep|>file_b.rs
2955                high priority content
2956                <|file_sep|>test.rs
2957                <|fim_prefix|>
2958                <|fim_middle|>current
2959                <|user_cursor|>x
2960                <|fim_suffix|>
2961                <|fim_middle|>updated
2962            "#}
2963        );
2964    }
2965
2966    #[test]
2967    fn test_truncation_drops_high_order_excerpts_within_file() {
2968        // A single file has excerpts at order 1 and order 3. With a tight budget,
2969        // only the order-1 excerpts are included while the order-3 excerpt is
2970        // dropped — even though they belong to the same file. This also preserves
2971        // the parent invariant: parent outline items have order ≤ their best
2972        // child, so they're always included when any child is.
2973        let input = make_input(
2974            "x",
2975            0..1,
2976            0,
2977            vec![],
2978            vec![RelatedFile {
2979                path: Path::new("mod.rs").into(),
2980                max_row: 30,
2981                in_open_source_repo: false,
2982                excerpts: vec![
2983                    RelatedExcerpt {
2984                        row_range: 0..5,
2985                        text: "mod header\n".into(),
2986                        order: 1,
2987                    },
2988                    RelatedExcerpt {
2989                        row_range: 5..15,
2990                        text: "important fn\n".into(),
2991                        order: 1,
2992                    },
2993                    RelatedExcerpt {
2994                        row_range: 15..30,
2995                        text: "less important fn\n".into(),
2996                        order: 3,
2997                    },
2998                ],
2999            }],
3000        );
3001
3002        // With large budget, all three excerpts included.
3003        assert_eq!(
3004            format_with_budget(&input, 10000),
3005            indoc! {r#"
3006                <|file_sep|>mod.rs
3007                mod header
3008                ...
3009                important fn
3010                ...
3011                less important fn
3012                <|file_sep|>test.rs
3013                <|fim_prefix|>
3014                <|fim_middle|>current
3015                <|user_cursor|>x
3016                <|fim_suffix|>
3017                <|fim_middle|>updated
3018            "#}
3019        );
3020
3021        // With tight budget, only order<=1 excerpts included (header + important fn).
3022        assert_eq!(
3023            format_with_budget(&input, 55),
3024            indoc! {r#"
3025                <|file_sep|>mod.rs
3026                mod header
3027                ...
3028                important fn
3029                ...
3030                <|file_sep|>test.rs
3031                <|fim_prefix|>
3032                <|fim_middle|>current
3033                <|user_cursor|>x
3034                <|fim_suffix|>
3035                <|fim_middle|>updated
3036            "#}
3037        );
3038    }
3039
3040    #[test]
3041    fn test_truncation_drops_older_events_first() {
3042        let input = make_input(
3043            "x",
3044            0..1,
3045            0,
3046            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3047            vec![],
3048        );
3049
3050        assert_eq!(
3051            format_with_budget(&input, 10000),
3052            indoc! {r#"
3053                <|file_sep|>edit history
3054                --- a/old.rs
3055                +++ b/old.rs
3056                -1
3057                --- a/new.rs
3058                +++ b/new.rs
3059                -2
3060                <|file_sep|>test.rs
3061                <|fim_prefix|>
3062                <|fim_middle|>current
3063                <|user_cursor|>x
3064                <|fim_suffix|>
3065                <|fim_middle|>updated
3066            "#}
3067        );
3068
3069        assert_eq!(
3070            format_with_budget(&input, 55),
3071            indoc! {r#"
3072                <|file_sep|>edit history
3073                --- a/new.rs
3074                +++ b/new.rs
3075                -2
3076                <|file_sep|>test.rs
3077                <|fim_prefix|>
3078                <|fim_middle|>current
3079                <|user_cursor|>x
3080                <|fim_suffix|>
3081                <|fim_middle|>updated
3082            "#}
3083        );
3084    }
3085
3086    #[test]
3087    fn test_cursor_excerpt_always_included_with_minimal_budget() {
3088        let input = make_input(
3089            "fn main() {}",
3090            0..12,
3091            3,
3092            vec![make_event("a.rs", "-old\n+new\n")],
3093            vec![make_related_file("related.rs", "helper\n")],
3094        );
3095
3096        assert_eq!(
3097            format_with_budget(&input, 30),
3098            indoc! {r#"
3099                <|file_sep|>test.rs
3100                <|fim_prefix|>
3101                <|fim_middle|>current
3102                fn <|user_cursor|>main() {}
3103                <|fim_suffix|>
3104                <|fim_middle|>updated
3105            "#}
3106        );
3107    }
3108
3109    fn format_seed_coder(input: &ZetaPromptInput) -> String {
3110        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3111    }
3112
3113    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3114        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3115    }
3116
3117    #[test]
3118    fn test_seed_coder_basic_format() {
3119        let input = make_input(
3120            "prefix\neditable\nsuffix",
3121            7..15,
3122            10,
3123            vec![make_event("a.rs", "-old\n+new\n")],
3124            vec![make_related_file("related.rs", "fn helper() {}\n")],
3125        );
3126
3127        assert_eq!(
3128            format_seed_coder(&input),
3129            indoc! {r#"
3130                <[fim-suffix]>
3131                suffix
3132                <[fim-prefix]><filename>related.rs
3133                fn helper() {}
3134
3135                <filename>edit_history
3136                --- a/a.rs
3137                +++ b/a.rs
3138                -old
3139                +new
3140
3141                <filename>test.rs
3142                prefix
3143                <<<<<<< CURRENT
3144                edi<|user_cursor|>table
3145                =======
3146                <[fim-middle]>"#}
3147        );
3148    }
3149
3150    #[test]
3151    fn test_seed_coder_no_context() {
3152        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3153
3154        assert_eq!(
3155            format_seed_coder(&input),
3156            indoc! {r#"
3157                <[fim-suffix]>
3158                after
3159                <[fim-prefix]><filename>test.rs
3160                before
3161                <<<<<<< CURRENT
3162                mid<|user_cursor|>dle
3163                =======
3164                <[fim-middle]>"#}
3165        );
3166    }
3167
3168    #[test]
3169    fn test_seed_coder_truncation_drops_context() {
3170        let input = make_input(
3171            "code",
3172            0..4,
3173            2,
3174            vec![make_event("a.rs", "-x\n+y\n")],
3175            vec![make_related_file("r1.rs", "content\n")],
3176        );
3177
3178        // With large budget, everything is included
3179        assert_eq!(
3180            format_seed_coder(&input),
3181            indoc! {r#"
3182                <[fim-suffix]>
3183                <[fim-prefix]><filename>r1.rs
3184                content
3185
3186                <filename>edit_history
3187                --- a/a.rs
3188                +++ b/a.rs
3189                -x
3190                +y
3191
3192                <filename>test.rs
3193                <<<<<<< CURRENT
3194                co<|user_cursor|>de
3195                =======
3196                <[fim-middle]>"#}
3197        );
3198
3199        // With tight budget, context is dropped but cursor section remains
3200        assert_eq!(
3201            format_seed_coder_with_budget(&input, 30),
3202            indoc! {r#"
3203                <[fim-suffix]>
3204                <[fim-prefix]><filename>test.rs
3205                <<<<<<< CURRENT
3206                co<|user_cursor|>de
3207                =======
3208                <[fim-middle]>"#}
3209        );
3210    }
3211
3212    #[test]
3213    fn test_seed_coder_truncation_prioritizes_lower_order() {
3214        let input = make_input(
3215            "code",
3216            0..4,
3217            2,
3218            vec![],
3219            vec![
3220                RelatedFile {
3221                    path: Path::new("low_prio.rs").into(),
3222                    max_row: 5,
3223                    in_open_source_repo: false,
3224                    excerpts: vec![RelatedExcerpt {
3225                        row_range: 0..5,
3226                        text: "low prio\n".into(),
3227                        order: 10,
3228                    }],
3229                },
3230                RelatedFile {
3231                    path: Path::new("high_prio.rs").into(),
3232                    max_row: 5,
3233                    in_open_source_repo: false,
3234                    excerpts: vec![RelatedExcerpt {
3235                        row_range: 0..5,
3236                        text: "high prio\n".into(),
3237                        order: 1,
3238                    }],
3239                },
3240            ],
3241        );
3242
3243        // With large budget, both included; rendered in stable lexicographic order.
3244        assert_eq!(
3245            format_seed_coder(&input),
3246            indoc! {r#"
3247                <[fim-suffix]>
3248                <[fim-prefix]><filename>low_prio.rs
3249                low prio
3250                <filename>high_prio.rs
3251                high prio
3252
3253                <filename>test.rs
3254                <<<<<<< CURRENT
3255                co<|user_cursor|>de
3256                =======
3257                <[fim-middle]>"#}
3258        );
3259
3260        // With tight budget, only high_prio included.
3261        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3262        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3263        assert_eq!(
3264            format_seed_coder_with_budget(&input, 44),
3265            indoc! {r#"
3266                <[fim-suffix]>
3267                <[fim-prefix]><filename>high_prio.rs
3268                high prio
3269
3270                <filename>test.rs
3271                <<<<<<< CURRENT
3272                co<|user_cursor|>de
3273                =======
3274                <[fim-middle]>"#}
3275        );
3276    }
3277
3278    #[test]
3279    fn test_seed_coder_clean_output() {
3280        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3281        let output_without_marker = "new code\n";
3282
3283        assert_eq!(
3284            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3285            "new code\n"
3286        );
3287        assert_eq!(
3288            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3289            "new code\n"
3290        );
3291    }
3292
3293    #[test]
3294    fn test_format_zeta1_from_input_basic() {
3295        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
3296        let input = ZetaPromptInput {
3297            cursor_path: Path::new("src/main.rs").into(),
3298            cursor_excerpt: excerpt.into(),
3299            cursor_offset_in_excerpt: 30,
3300            excerpt_start_row: Some(0),
3301            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3302            related_files: vec![],
3303            excerpt_ranges: ExcerptRanges {
3304                editable_150: 15..41,
3305                editable_180: 15..41,
3306                editable_350: 15..41,
3307                editable_150_context_350: 0..excerpt.len(),
3308                editable_180_context_350: 0..excerpt.len(),
3309                editable_350_context_150: 0..excerpt.len(),
3310                ..Default::default()
3311            },
3312            experiment: None,
3313            in_open_source_repo: false,
3314            can_collect_data: false,
3315        };
3316
3317        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3318
3319        assert_eq!(
3320            prompt,
3321            concat!(
3322                "### Instruction:\n",
3323                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3324                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3325                "into account the cursor location.\n",
3326                "\n",
3327                "### User Edits:\n",
3328                "\n",
3329                "User edited other.rs:\n",
3330                "```diff\n",
3331                "-old\n",
3332                "+new\n",
3333                "\n",
3334                "```\n",
3335                "\n",
3336                "### User Excerpt:\n",
3337                "\n",
3338                "```src/main.rs\n",
3339                "<|start_of_file|>\n",
3340                "fn before() {}\n",
3341                "<|editable_region_start|>\n",
3342                "fn foo() {\n",
3343                "    <|user_cursor_is_here|>let x = 1;\n",
3344                "\n",
3345                "<|editable_region_end|>}\n",
3346                "fn after() {}\n",
3347                "\n",
3348                "```\n",
3349                "\n",
3350                "### Response:\n",
3351            ),
3352        );
3353    }
3354
3355    #[test]
3356    fn test_format_zeta1_from_input_no_start_of_file() {
3357        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
3358        let input = ZetaPromptInput {
3359            cursor_path: Path::new("src/main.rs").into(),
3360            cursor_excerpt: excerpt.into(),
3361            cursor_offset_in_excerpt: 15,
3362            excerpt_start_row: Some(10),
3363            events: vec![],
3364            related_files: vec![],
3365            excerpt_ranges: ExcerptRanges {
3366                editable_150: 0..28,
3367                editable_180: 0..28,
3368                editable_350: 0..28,
3369                editable_150_context_350: 0..28,
3370                editable_180_context_350: 0..28,
3371                editable_350_context_150: 0..28,
3372                ..Default::default()
3373            },
3374            experiment: None,
3375            in_open_source_repo: false,
3376            can_collect_data: false,
3377        };
3378
3379        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3380
3381        assert_eq!(
3382            prompt,
3383            concat!(
3384                "### Instruction:\n",
3385                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3386                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3387                "into account the cursor location.\n",
3388                "\n",
3389                "### User Edits:\n",
3390                "\n",
3391                "\n",
3392                "\n",
3393                "### User Excerpt:\n",
3394                "\n",
3395                "```src/main.rs\n",
3396                "<|editable_region_start|>\n",
3397                "fn foo() {\n",
3398                "    <|user_cursor_is_here|>let x = 1;\n",
3399                "}\n",
3400                "\n",
3401                "<|editable_region_end|>\n",
3402                "```\n",
3403                "\n",
3404                "### Response:\n",
3405            ),
3406        );
3407    }
3408
3409    #[test]
3410    fn test_format_zeta1_from_input_with_sub_ranges() {
3411        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
3412        let editable_range = 10..37;
3413        let context_range = 0..excerpt.len();
3414
3415        let input = ZetaPromptInput {
3416            cursor_path: Path::new("test.rs").into(),
3417            cursor_excerpt: excerpt.into(),
3418            cursor_offset_in_excerpt: 25,
3419            excerpt_start_row: Some(0),
3420            events: vec![],
3421            related_files: vec![],
3422            excerpt_ranges: ExcerptRanges {
3423                editable_150: editable_range.clone(),
3424                editable_180: editable_range.clone(),
3425                editable_350: editable_range.clone(),
3426                editable_150_context_350: context_range.clone(),
3427                editable_180_context_350: context_range.clone(),
3428                editable_350_context_150: context_range.clone(),
3429                ..Default::default()
3430            },
3431            experiment: None,
3432            in_open_source_repo: false,
3433            can_collect_data: false,
3434        };
3435
3436        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3437
3438        assert_eq!(
3439            prompt,
3440            concat!(
3441                "### Instruction:\n",
3442                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3443                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3444                "into account the cursor location.\n",
3445                "\n",
3446                "### User Edits:\n",
3447                "\n",
3448                "\n",
3449                "\n",
3450                "### User Excerpt:\n",
3451                "\n",
3452                "```test.rs\n",
3453                "<|start_of_file|>\n",
3454                "// prefix\n",
3455                "<|editable_region_start|>\n",
3456                "fn foo() {\n",
3457                "    <|user_cursor_is_here|>let x = 1;\n",
3458                "}\n",
3459                "<|editable_region_end|>\n",
3460                "// suffix\n",
3461                "\n",
3462                "```\n",
3463                "\n",
3464                "### Response:\n",
3465            ),
3466        );
3467    }
3468
3469    #[test]
3470    fn test_clean_zeta1_model_output_basic() {
3471        let output = indoc! {"
3472            <|editable_region_start|>
3473            fn main() {
3474                println!(\"hello\");
3475            }
3476            <|editable_region_end|>
3477        "};
3478
3479        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3480        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
3481    }
3482
3483    #[test]
3484    fn test_clean_zeta1_model_output_with_cursor() {
3485        let output = indoc! {"
3486            <|editable_region_start|>
3487            fn main() {
3488                <|user_cursor_is_here|>println!(\"hello\");
3489            }
3490            <|editable_region_end|>
3491        "};
3492
3493        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3494        assert_eq!(
3495            cleaned,
3496            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
3497        );
3498    }
3499
3500    #[test]
3501    fn test_clean_zeta1_model_output_no_markers() {
3502        let output = "fn main() {}\n";
3503        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3504        assert_eq!(cleaned, "fn main() {}\n");
3505    }
3506
3507    #[test]
3508    fn test_clean_zeta1_model_output_empty_region() {
3509        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3510        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3511        assert_eq!(cleaned, "");
3512    }
3513}