zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub repo_url: Option<String>,
  66}
  67
  68#[derive(
  69    Default,
  70    Clone,
  71    Copy,
  72    Debug,
  73    PartialEq,
  74    Eq,
  75    Hash,
  76    EnumIter,
  77    IntoStaticStr,
  78    Serialize,
  79    Deserialize,
  80)]
  81#[allow(non_camel_case_types)]
  82pub enum ZetaFormat {
  83    V0112MiddleAtEnd,
  84    V0113Ordered,
  85    V0114180EditableRegion,
  86    V0120GitMergeMarkers,
  87    #[default]
  88    V0131GitMergeMarkersPrefix,
  89    V0211Prefill,
  90    V0211SeedCoder,
  91    v0226Hashline,
  92}
  93
  94impl std::fmt::Display for ZetaFormat {
  95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  96        write!(f, "{}", <&'static str>::from(self))
  97    }
  98}
  99
 100impl ZetaFormat {
 101    pub fn parse(format_name: &str) -> Result<Self> {
 102        let mut results = ZetaFormat::iter().filter(|version| {
 103            <&'static str>::from(version)
 104                .to_lowercase()
 105                .contains(&format_name.to_lowercase())
 106        });
 107        let Some(result) = results.next() else {
 108            anyhow::bail!(
 109                "`{format_name}` did not match any of:\n{}",
 110                Self::options_as_string()
 111            );
 112        };
 113        if results.next().is_some() {
 114            anyhow::bail!(
 115                "`{format_name}` matched more than one of:\n{}",
 116                Self::options_as_string()
 117            );
 118        }
 119        Ok(result)
 120    }
 121
 122    pub fn options_as_string() -> String {
 123        ZetaFormat::iter()
 124            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 125            .collect::<Vec<_>>()
 126            .concat()
 127    }
 128}
 129
 130#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 131#[serde(tag = "event")]
 132pub enum Event {
 133    BufferChange {
 134        path: Arc<Path>,
 135        old_path: Arc<Path>,
 136        diff: String,
 137        predicted: bool,
 138        in_open_source_repo: bool,
 139    },
 140}
 141
 142impl Event {
 143    pub fn in_open_source_repo(&self) -> bool {
 144        match self {
 145            Event::BufferChange {
 146                in_open_source_repo,
 147                ..
 148            } => *in_open_source_repo,
 149        }
 150    }
 151}
 152
 153pub fn write_event(prompt: &mut String, event: &Event) {
 154    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 155        for component in path.components() {
 156            prompt.push('/');
 157            write!(prompt, "{}", component.as_os_str().display()).ok();
 158        }
 159    }
 160    match event {
 161        Event::BufferChange {
 162            path,
 163            old_path,
 164            diff,
 165            predicted,
 166            in_open_source_repo: _,
 167        } => {
 168            if *predicted {
 169                prompt.push_str("// User accepted prediction:\n");
 170            }
 171            prompt.push_str("--- a");
 172            write_path_as_unix_str(prompt, old_path.as_ref());
 173            prompt.push_str("\n+++ b");
 174            write_path_as_unix_str(prompt, path.as_ref());
 175            prompt.push('\n');
 176            prompt.push_str(diff);
 177        }
 178    }
 179}
 180
 181#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 182pub struct RelatedFile {
 183    pub path: Arc<Path>,
 184    pub max_row: u32,
 185    pub excerpts: Vec<RelatedExcerpt>,
 186    #[serde(default)]
 187    pub in_open_source_repo: bool,
 188}
 189
 190#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 191pub struct RelatedExcerpt {
 192    pub row_range: Range<u32>,
 193    pub text: Arc<str>,
 194    #[serde(default)]
 195    pub order: usize,
 196}
 197
 198pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 199    special_tokens_for_format(format)
 200        .iter()
 201        .any(|token| input.cursor_excerpt.contains(token))
 202}
 203
 204pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 205    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 206}
 207
 208pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 209    match format {
 210        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 211        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 212        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 213        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 214        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 215        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 216        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 217        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 218    }
 219}
 220
 221pub fn excerpt_ranges_for_format(
 222    format: ZetaFormat,
 223    ranges: &ExcerptRanges,
 224) -> (Range<usize>, Range<usize>) {
 225    match format {
 226        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 227            ranges.editable_150.clone(),
 228            ranges.editable_150_context_350.clone(),
 229        ),
 230        ZetaFormat::V0114180EditableRegion => (
 231            ranges.editable_180.clone(),
 232            ranges.editable_180_context_350.clone(),
 233        ),
 234        ZetaFormat::V0120GitMergeMarkers
 235        | ZetaFormat::V0131GitMergeMarkersPrefix
 236        | ZetaFormat::V0211Prefill
 237        | ZetaFormat::V0211SeedCoder
 238        | ZetaFormat::v0226Hashline => (
 239            ranges.editable_350.clone(),
 240            ranges.editable_350_context_150.clone(),
 241        ),
 242    }
 243}
 244
 245pub fn write_cursor_excerpt_section_for_format(
 246    format: ZetaFormat,
 247    prompt: &mut String,
 248    path: &Path,
 249    context: &str,
 250    editable_range: &Range<usize>,
 251    cursor_offset: usize,
 252) {
 253    match format {
 254        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 255            prompt,
 256            path,
 257            context,
 258            editable_range,
 259            cursor_offset,
 260        ),
 261        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 262            v0113_ordered::write_cursor_excerpt_section(
 263                prompt,
 264                path,
 265                context,
 266                editable_range,
 267                cursor_offset,
 268            )
 269        }
 270        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 271            prompt,
 272            path,
 273            context,
 274            editable_range,
 275            cursor_offset,
 276        ),
 277        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 278            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 279                prompt,
 280                path,
 281                context,
 282                editable_range,
 283                cursor_offset,
 284            )
 285        }
 286        ZetaFormat::V0211SeedCoder => seed_coder::write_cursor_excerpt_section(
 287            prompt,
 288            path,
 289            context,
 290            editable_range,
 291            cursor_offset,
 292        ),
 293        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 294            prompt,
 295            path,
 296            context,
 297            editable_range,
 298            cursor_offset,
 299        ),
 300    }
 301}
 302
 303pub fn format_prompt_with_budget_for_format(
 304    input: &ZetaPromptInput,
 305    format: ZetaFormat,
 306    max_tokens: usize,
 307) -> String {
 308    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 309    let path = &*input.cursor_path;
 310
 311    match format {
 312        ZetaFormat::V0211SeedCoder => seed_coder::format_prompt_with_budget(
 313            path,
 314            context,
 315            &editable_range,
 316            cursor_offset,
 317            &input.events,
 318            &input.related_files,
 319            max_tokens,
 320        ),
 321        _ => {
 322            let mut cursor_section = String::new();
 323            write_cursor_excerpt_section_for_format(
 324                format,
 325                &mut cursor_section,
 326                path,
 327                context,
 328                &editable_range,
 329                cursor_offset,
 330            );
 331
 332            let cursor_tokens = estimate_tokens(cursor_section.len());
 333            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 334
 335            let edit_history_section = format_edit_history_within_budget(
 336                &input.events,
 337                "<|file_sep|>",
 338                "edit history",
 339                budget_after_cursor,
 340            );
 341            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 342            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 343
 344            let related_files_section = format_related_files_within_budget(
 345                &input.related_files,
 346                "<|file_sep|>",
 347                "",
 348                budget_after_edit_history,
 349            );
 350
 351            let mut prompt = String::new();
 352            prompt.push_str(&related_files_section);
 353            prompt.push_str(&edit_history_section);
 354            prompt.push_str(&cursor_section);
 355            prompt
 356        }
 357    }
 358}
 359
 360pub fn get_prefill_for_format(
 361    format: ZetaFormat,
 362    context: &str,
 363    editable_range: &Range<usize>,
 364) -> String {
 365    match format {
 366        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 367        ZetaFormat::V0112MiddleAtEnd
 368        | ZetaFormat::V0113Ordered
 369        | ZetaFormat::V0114180EditableRegion
 370        | ZetaFormat::V0120GitMergeMarkers
 371        | ZetaFormat::V0131GitMergeMarkersPrefix
 372        | ZetaFormat::V0211SeedCoder
 373        | ZetaFormat::v0226Hashline => String::new(),
 374    }
 375}
 376
 377pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 378    match format {
 379        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 380        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 381        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 382        ZetaFormat::V0211SeedCoder => Some(seed_coder::END_MARKER),
 383        ZetaFormat::V0112MiddleAtEnd
 384        | ZetaFormat::V0113Ordered
 385        | ZetaFormat::V0114180EditableRegion
 386        | ZetaFormat::v0226Hashline => None,
 387    }
 388}
 389
 390pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
 391    match format {
 392        ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
 393        ZetaFormat::V0113Ordered
 394        | ZetaFormat::V0114180EditableRegion
 395        | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
 396        ZetaFormat::V0120GitMergeMarkers
 397        | ZetaFormat::V0131GitMergeMarkersPrefix
 398        | ZetaFormat::V0211Prefill => (
 399            v0120_git_merge_markers::START_MARKER,
 400            v0120_git_merge_markers::SEPARATOR,
 401        ),
 402        ZetaFormat::V0211SeedCoder => (seed_coder::START_MARKER, seed_coder::SEPARATOR),
 403    }
 404}
 405
 406pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
 407    match format {
 408        ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
 409        _ => region.to_string(),
 410    }
 411}
 412
 413pub fn encode_patch_as_output_for_format(
 414    format: ZetaFormat,
 415    old_editable_region: &str,
 416    patch: &str,
 417    cursor_offset: Option<usize>,
 418) -> Result<Option<String>> {
 419    match format {
 420        ZetaFormat::v0226Hashline => {
 421            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 422        }
 423        _ => Ok(None),
 424    }
 425}
 426
 427pub fn output_with_context_for_format(
 428    format: ZetaFormat,
 429    old_editable_region: &str,
 430    output: &str,
 431) -> Result<Option<String>> {
 432    match format {
 433        ZetaFormat::v0226Hashline => {
 434            if hashline::output_has_edit_commands(output) {
 435                Ok(Some(hashline::apply_edit_commands(
 436                    old_editable_region,
 437                    output,
 438                )))
 439            } else {
 440                Ok(None)
 441            }
 442        }
 443        _ => Ok(None),
 444    }
 445}
 446
 447/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 448pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 449    match output_end_marker_for_format(format) {
 450        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 451        None => output,
 452    }
 453}
 454
 455pub fn excerpt_range_for_format(
 456    format: ZetaFormat,
 457    ranges: &ExcerptRanges,
 458) -> (Range<usize>, Range<usize>) {
 459    excerpt_ranges_for_format(format, ranges)
 460}
 461
 462pub fn resolve_cursor_region(
 463    input: &ZetaPromptInput,
 464    format: ZetaFormat,
 465) -> (&str, Range<usize>, usize) {
 466    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 467    let context_start = context_range.start;
 468    let context_text = &input.cursor_excerpt[context_range];
 469    let adjusted_editable =
 470        (editable_range.start - context_start)..(editable_range.end - context_start);
 471    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 472
 473    (context_text, adjusted_editable, adjusted_cursor)
 474}
 475
 476pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 477    let (context, editable_range, _) = resolve_cursor_region(input, format);
 478    get_prefill_for_format(format, context, &editable_range)
 479}
 480
 481fn format_edit_history_within_budget(
 482    events: &[Arc<Event>],
 483    file_marker: &str,
 484    edit_history_name: &str,
 485    max_tokens: usize,
 486) -> String {
 487    let header = format!("{}{}\n", file_marker, edit_history_name);
 488    let header_tokens = estimate_tokens(header.len());
 489    if header_tokens >= max_tokens {
 490        return String::new();
 491    }
 492
 493    let mut event_strings: Vec<String> = Vec::new();
 494    let mut total_tokens = header_tokens;
 495
 496    for event in events.iter().rev() {
 497        let mut event_str = String::new();
 498        write_event(&mut event_str, event);
 499        let event_tokens = estimate_tokens(event_str.len());
 500
 501        if total_tokens + event_tokens > max_tokens {
 502            break;
 503        }
 504        total_tokens += event_tokens;
 505        event_strings.push(event_str);
 506    }
 507
 508    if event_strings.is_empty() {
 509        return String::new();
 510    }
 511
 512    let mut result = header;
 513    for event_str in event_strings.iter().rev() {
 514        result.push_str(event_str);
 515    }
 516    result
 517}
 518
 519fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 520    let needs_newline = !excerpt.text.ends_with('\n');
 521    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 522    let len = excerpt.text.len()
 523        + if needs_newline { "\n".len() } else { 0 }
 524        + if needs_ellipsis { "...\n".len() } else { 0 };
 525    estimate_tokens(len)
 526}
 527
 528pub fn format_related_files_within_budget(
 529    related_files: &[RelatedFile],
 530    file_prefix: &str,
 531    file_suffix: &str,
 532    max_tokens: usize,
 533) -> String {
 534    struct ExcerptCandidate {
 535        file_ix: usize,
 536        excerpt_ix: usize,
 537        order: usize,
 538    }
 539
 540    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 541        .iter()
 542        .enumerate()
 543        .flat_map(|(file_ix, file)| {
 544            file.excerpts
 545                .iter()
 546                .enumerate()
 547                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 548                    file_ix,
 549                    excerpt_ix,
 550                    order: e.order,
 551                })
 552        })
 553        .collect();
 554
 555    // Pre-compute file header strings and their token costs.
 556    let file_headers: Vec<String> = related_files
 557        .iter()
 558        .map(|file| {
 559            let path_str = file.path.to_string_lossy();
 560            format!("{}{}\n", file_prefix, path_str)
 561        })
 562        .collect();
 563
 564    // Sort the excerpts by their order and determine how many fit within the budget.
 565    let mut total_tokens = 0;
 566    let mut included_excerpt_count = 0_usize;
 567    let mut included_file_indices = vec![false; related_files.len()];
 568    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 569    for candidate in &excerpt_candidates {
 570        let file = &related_files[candidate.file_ix];
 571        let excerpt = &file.excerpts[candidate.excerpt_ix];
 572        let file_already_included = included_file_indices[candidate.file_ix];
 573        let header_cost = if file_already_included {
 574            0
 575        } else {
 576            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 577        };
 578        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 579        if total_tokens + header_cost + excerpt_cost > max_tokens {
 580            break;
 581        }
 582        total_tokens += header_cost + excerpt_cost;
 583        if !file_already_included {
 584            included_file_indices[candidate.file_ix] = true;
 585        }
 586        included_excerpt_count += 1;
 587    }
 588
 589    excerpt_candidates.truncate(included_excerpt_count);
 590    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 591
 592    // Render all of the files that fit within the token budget, in the original order.
 593    let mut result = String::new();
 594    let mut last_file_ix = None;
 595    for candidate in &excerpt_candidates {
 596        if last_file_ix != Some(candidate.file_ix) {
 597            if last_file_ix.is_some() {
 598                result.push_str(file_suffix);
 599            }
 600            result.push_str(&file_headers[candidate.file_ix]);
 601            last_file_ix = Some(candidate.file_ix);
 602        }
 603        let file = &related_files[candidate.file_ix];
 604        let excerpt = &file.excerpts[candidate.excerpt_ix];
 605        result.push_str(&excerpt.text);
 606        if !result.ends_with('\n') {
 607            result.push('\n');
 608        }
 609        if excerpt.row_range.end < file.max_row {
 610            result.push_str("...\n");
 611        }
 612    }
 613
 614    result
 615}
 616
 617pub fn write_related_files(
 618    prompt: &mut String,
 619    related_files: &[RelatedFile],
 620) -> Vec<Range<usize>> {
 621    let mut ranges = Vec::new();
 622    for file in related_files {
 623        let start = prompt.len();
 624        let path_str = file.path.to_string_lossy();
 625        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 626        for excerpt in &file.excerpts {
 627            prompt.push_str(&excerpt.text);
 628            if !prompt.ends_with('\n') {
 629                prompt.push('\n');
 630            }
 631            if excerpt.row_range.end < file.max_row {
 632                prompt.push_str("...\n");
 633            }
 634        }
 635        let end = prompt.len();
 636        ranges.push(start..end);
 637    }
 638    ranges
 639}
 640
 641mod v0112_middle_at_end {
 642    use super::*;
 643
 644    pub fn special_tokens() -> &'static [&'static str] {
 645        &[
 646            "<|fim_prefix|>",
 647            "<|fim_suffix|>",
 648            "<|fim_middle|>",
 649            "<|file_sep|>",
 650            CURSOR_MARKER,
 651        ]
 652    }
 653
 654    pub fn write_cursor_excerpt_section(
 655        prompt: &mut String,
 656        path: &Path,
 657        context: &str,
 658        editable_range: &Range<usize>,
 659        cursor_offset: usize,
 660    ) {
 661        let path_str = path.to_string_lossy();
 662        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 663
 664        prompt.push_str("<|fim_prefix|>\n");
 665        prompt.push_str(&context[..editable_range.start]);
 666
 667        prompt.push_str("<|fim_suffix|>\n");
 668        prompt.push_str(&context[editable_range.end..]);
 669        if !prompt.ends_with('\n') {
 670            prompt.push('\n');
 671        }
 672
 673        prompt.push_str("<|fim_middle|>current\n");
 674        prompt.push_str(&context[editable_range.start..cursor_offset]);
 675        prompt.push_str(CURSOR_MARKER);
 676        prompt.push_str(&context[cursor_offset..editable_range.end]);
 677        if !prompt.ends_with('\n') {
 678            prompt.push('\n');
 679        }
 680
 681        prompt.push_str("<|fim_middle|>updated\n");
 682    }
 683}
 684
 685mod v0113_ordered {
 686    use super::*;
 687
 688    pub fn special_tokens() -> &'static [&'static str] {
 689        &[
 690            "<|fim_prefix|>",
 691            "<|fim_suffix|>",
 692            "<|fim_middle|>",
 693            "<|file_sep|>",
 694            CURSOR_MARKER,
 695        ]
 696    }
 697
 698    pub fn write_cursor_excerpt_section(
 699        prompt: &mut String,
 700        path: &Path,
 701        context: &str,
 702        editable_range: &Range<usize>,
 703        cursor_offset: usize,
 704    ) {
 705        let path_str = path.to_string_lossy();
 706        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 707
 708        prompt.push_str("<|fim_prefix|>\n");
 709        prompt.push_str(&context[..editable_range.start]);
 710        if !prompt.ends_with('\n') {
 711            prompt.push('\n');
 712        }
 713
 714        prompt.push_str("<|fim_middle|>current\n");
 715        prompt.push_str(&context[editable_range.start..cursor_offset]);
 716        prompt.push_str(CURSOR_MARKER);
 717        prompt.push_str(&context[cursor_offset..editable_range.end]);
 718        if !prompt.ends_with('\n') {
 719            prompt.push('\n');
 720        }
 721
 722        prompt.push_str("<|fim_suffix|>\n");
 723        prompt.push_str(&context[editable_range.end..]);
 724        if !prompt.ends_with('\n') {
 725            prompt.push('\n');
 726        }
 727
 728        prompt.push_str("<|fim_middle|>updated\n");
 729    }
 730}
 731
 732mod v0114180_editable_region {
 733    use super::*;
 734
 735    pub fn special_tokens() -> &'static [&'static str] {
 736        v0113_ordered::special_tokens()
 737    }
 738}
 739
 740pub mod v0120_git_merge_markers {
 741    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 742    //!
 743    //! Example prompt:
 744    //!
 745    //! <|file_sep|>path/to/target_file.py
 746    //! <|fim_prefix|>
 747    //! code before editable region
 748    //! <|fim_suffix|>
 749    //! code after editable region
 750    //! <|fim_middle|>
 751    //! <<<<<<< CURRENT
 752    //! code that
 753    //! needs to<|user_cursor|>
 754    //! be rewritten
 755    //! =======
 756    //!
 757    //! Expected output (should be generated by the model):
 758    //!
 759    //! updated
 760    //! code with
 761    //! changes applied
 762    //! >>>>>>> UPDATED
 763
 764    use super::*;
 765
 766    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 767    pub const SEPARATOR: &str = "=======\n";
 768    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 769
 770    pub fn special_tokens() -> &'static [&'static str] {
 771        &[
 772            "<|fim_prefix|>",
 773            "<|fim_suffix|>",
 774            "<|fim_middle|>",
 775            "<|file_sep|>",
 776            START_MARKER,
 777            SEPARATOR,
 778            END_MARKER,
 779            CURSOR_MARKER,
 780        ]
 781    }
 782
 783    pub fn write_cursor_excerpt_section(
 784        prompt: &mut String,
 785        path: &Path,
 786        context: &str,
 787        editable_range: &Range<usize>,
 788        cursor_offset: usize,
 789    ) {
 790        let path_str = path.to_string_lossy();
 791        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 792
 793        prompt.push_str("<|fim_prefix|>");
 794        prompt.push_str(&context[..editable_range.start]);
 795
 796        prompt.push_str("<|fim_suffix|>");
 797        prompt.push_str(&context[editable_range.end..]);
 798        if !prompt.ends_with('\n') {
 799            prompt.push('\n');
 800        }
 801
 802        prompt.push_str("<|fim_middle|>");
 803        prompt.push_str(START_MARKER);
 804        prompt.push_str(&context[editable_range.start..cursor_offset]);
 805        prompt.push_str(CURSOR_MARKER);
 806        prompt.push_str(&context[cursor_offset..editable_range.end]);
 807        if !prompt.ends_with('\n') {
 808            prompt.push('\n');
 809        }
 810        prompt.push_str(SEPARATOR);
 811    }
 812}
 813
 814pub mod v0131_git_merge_markers_prefix {
 815    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 816    //!
 817    //! Example prompt:
 818    //!
 819    //! <|file_sep|>path/to/target_file.py
 820    //! <|fim_prefix|>
 821    //! code before editable region
 822    //! <<<<<<< CURRENT
 823    //! code that
 824    //! needs to<|user_cursor|>
 825    //! be rewritten
 826    //! =======
 827    //! <|fim_suffix|>
 828    //! code after editable region
 829    //! <|fim_middle|>
 830    //!
 831    //! Expected output (should be generated by the model):
 832    //!
 833    //! updated
 834    //! code with
 835    //! changes applied
 836    //! >>>>>>> UPDATED
 837
 838    use super::*;
 839
 840    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 841    pub const SEPARATOR: &str = "=======\n";
 842    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 843
 844    pub fn special_tokens() -> &'static [&'static str] {
 845        &[
 846            "<|fim_prefix|>",
 847            "<|fim_suffix|>",
 848            "<|fim_middle|>",
 849            "<|file_sep|>",
 850            START_MARKER,
 851            SEPARATOR,
 852            END_MARKER,
 853            CURSOR_MARKER,
 854        ]
 855    }
 856
 857    pub fn write_cursor_excerpt_section(
 858        prompt: &mut String,
 859        path: &Path,
 860        context: &str,
 861        editable_range: &Range<usize>,
 862        cursor_offset: usize,
 863    ) {
 864        let path_str = path.to_string_lossy();
 865        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 866
 867        prompt.push_str("<|fim_prefix|>");
 868        prompt.push_str(&context[..editable_range.start]);
 869        prompt.push_str(START_MARKER);
 870        prompt.push_str(&context[editable_range.start..cursor_offset]);
 871        prompt.push_str(CURSOR_MARKER);
 872        prompt.push_str(&context[cursor_offset..editable_range.end]);
 873        if !prompt.ends_with('\n') {
 874            prompt.push('\n');
 875        }
 876        prompt.push_str(SEPARATOR);
 877
 878        prompt.push_str("<|fim_suffix|>");
 879        prompt.push_str(&context[editable_range.end..]);
 880        if !prompt.ends_with('\n') {
 881            prompt.push('\n');
 882        }
 883
 884        prompt.push_str("<|fim_middle|>");
 885    }
 886}
 887
 888pub mod v0211_prefill {
 889    use super::*;
 890
 891    pub fn special_tokens() -> &'static [&'static str] {
 892        v0131_git_merge_markers_prefix::special_tokens()
 893    }
 894
 895    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 896        let editable_region = &context[editable_range.start..editable_range.end];
 897
 898        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 899        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 900
 901        // Find a token boundary to avoid splitting tokens in the prefill.
 902        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 903        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 904        // the \n and consume any consecutive \n characters after it.
 905        let prefill = &editable_region[..prefill_len];
 906        match prefill.rfind('\n') {
 907            Some(pos) => {
 908                let mut end = pos + 1;
 909                while end < editable_region.len()
 910                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 911                {
 912                    end += 1;
 913                }
 914                editable_region[..end].to_string()
 915            }
 916            // No newline found. Fall back to splitting before the last space
 917            // (word-level boundary)
 918            None => match prefill.rfind(' ') {
 919                Some(pos) => prefill[..pos].to_string(),
 920                None => prefill.to_string(),
 921            },
 922        }
 923    }
 924}
 925
 926pub mod hashline {
 927
 928    use std::fmt::Display;
 929
 930    pub const END_MARKER: &str = "<|fim_middle|>updated";
 931    pub const START_MARKER: &str = "<|fim_middle|>current";
 932
 933    use super::*;
 934
 935    const SET_COMMAND_MARKER: &str = "<|set|>";
 936    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
 937
 938    pub fn special_tokens() -> &'static [&'static str] {
 939        return &[
 940            SET_COMMAND_MARKER,
 941            "<|set_range|>",
 942            INSERT_COMMAND_MARKER,
 943            CURSOR_MARKER,
 944            "<|file_sep|>",
 945            "<|fim_prefix|>",
 946            "<|fim_suffix|>",
 947            "<|fim_middle|>",
 948        ];
 949    }
 950
 951    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
 952    #[derive(Debug, Clone, PartialEq, Eq)]
 953    struct LineRef {
 954        index: usize,
 955        hash: u8,
 956    }
 957
 958    impl Display for LineRef {
 959        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 960            write!(f, "{}:{:02x}", self.index, self.hash)
 961        }
 962    }
 963
 964    pub fn hash_line(line: &[u8]) -> u8 {
 965        let mut h: u8 = 0;
 966        for &byte in line {
 967            h = h.wrapping_add(byte);
 968        }
 969        return h;
 970    }
 971
 972    /// Write the hashline-encoded editable region into `out`. Each line of
 973    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
 974    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
 975    /// to the start of `editable_text`).
 976    pub fn write_hashline_editable_region(
 977        out: &mut String,
 978        editable_text: &str,
 979        cursor_offset_in_editable: usize,
 980    ) {
 981        let mut offset = 0;
 982        for (i, line) in editable_text.lines().enumerate() {
 983            let (head, cursor, tail) = if cursor_offset_in_editable > offset
 984                && cursor_offset_in_editable < offset + line.len()
 985            {
 986                (
 987                    &line[..cursor_offset_in_editable - offset],
 988                    CURSOR_MARKER,
 989                    &line[cursor_offset_in_editable - offset..],
 990                )
 991            } else {
 992                (line, "", "")
 993            };
 994            write!(
 995                out,
 996                "\n{}|{head}{cursor}{tail}",
 997                LineRef {
 998                    index: i,
 999                    hash: hash_line(line.as_bytes())
1000                }
1001            )
1002            .unwrap();
1003            offset += line.len() + 1;
1004        }
1005    }
1006
1007    pub fn write_cursor_excerpt_section(
1008        prompt: &mut String,
1009        path: &Path,
1010        context: &str,
1011        editable_range: &Range<usize>,
1012        cursor_offset: usize,
1013    ) {
1014        let path_str = path.to_string_lossy();
1015        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1016
1017        prompt.push_str("<|fim_prefix|>\n");
1018        prompt.push_str(&context[..editable_range.start]);
1019        prompt.push_str(START_MARKER);
1020
1021        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1022        let editable_region = &context[editable_range.clone()];
1023        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1024
1025        if !prompt.ends_with('\n') {
1026            prompt.push('\n');
1027        }
1028
1029        prompt.push_str("<|fim_suffix|>\n");
1030        prompt.push_str(&context[editable_range.end..]);
1031        if !prompt.ends_with('\n') {
1032            prompt.push('\n');
1033        }
1034
1035        prompt.push_str(END_MARKER);
1036    }
1037
1038    /// A single edit command parsed from the model output.
1039    #[derive(Debug)]
1040    enum EditCommand<'a> {
1041        /// Replace a range of lines (inclusive on both ends). Single-line set is
1042        /// represented by `start == end`.
1043        Set {
1044            start: LineRef,
1045            end: LineRef,
1046            content: &'a str,
1047        },
1048        /// Insert new lines after the given line, or before the first line if
1049        /// `after` is `None`.
1050        Insert {
1051            after: Option<LineRef>,
1052            content: &'a str,
1053        },
1054    }
1055
1056    /// Parse a line reference like `3:c3` into a `LineRef`.
1057    fn parse_line_ref(s: &str) -> Option<LineRef> {
1058        let (idx_str, hash_str) = s.split_once(':')?;
1059        let index = idx_str.parse::<usize>().ok()?;
1060        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1061        Some(LineRef { index, hash })
1062    }
1063
1064    /// Parse the model output into a list of `EditCommand`s.
1065    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1066        let mut commands = Vec::new();
1067        let mut offset = 0usize;
1068
1069        while offset < model_output.len() {
1070            let next_nl = model_output[offset..]
1071                .find('\n')
1072                .map(|i| offset + i)
1073                .unwrap_or(model_output.len());
1074            let line = &model_output[offset..next_nl];
1075            let line_end = if next_nl < model_output.len() {
1076                next_nl + 1
1077            } else {
1078                next_nl
1079            };
1080
1081            let trimmed = line.trim();
1082            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1083                (true, spec)
1084            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1085                (false, spec)
1086            } else {
1087                offset = line_end;
1088                continue;
1089            };
1090
1091            let mut content_end = line_end;
1092            let mut scan = line_end;
1093
1094            while scan < model_output.len() {
1095                let body_nl = model_output[scan..]
1096                    .find('\n')
1097                    .map(|i| scan + i)
1098                    .unwrap_or(model_output.len());
1099                let body_line = &model_output[scan..body_nl];
1100                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1101                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1102                {
1103                    break;
1104                }
1105                scan = if body_nl < model_output.len() {
1106                    body_nl + 1
1107                } else {
1108                    body_nl
1109                };
1110                content_end = scan;
1111            }
1112
1113            let content = &model_output[line_end..content_end];
1114
1115            if is_set {
1116                if let Some((start_str, end_str)) = specifier.split_once('-') {
1117                    if let (Some(start), Some(end)) =
1118                        (parse_line_ref(start_str), parse_line_ref(end_str))
1119                    {
1120                        commands.push(EditCommand::Set {
1121                            start,
1122                            end,
1123                            content,
1124                        });
1125                    }
1126                } else if let Some(target) = parse_line_ref(specifier) {
1127                    commands.push(EditCommand::Set {
1128                        start: target.clone(),
1129                        end: target,
1130                        content,
1131                    });
1132                }
1133            } else {
1134                let after = parse_line_ref(specifier);
1135                commands.push(EditCommand::Insert { after, content });
1136            }
1137
1138            offset = scan;
1139        }
1140
1141        commands
1142    }
1143
1144    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1145    /// (as opposed to being a plain full-replacement output).
1146    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1147    /// editable region, returning the plain text content.
1148    pub fn strip_hashline_prefixes(region: &str) -> String {
1149        let mut decoded: String = region
1150            .lines()
1151            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1152            .collect::<Vec<_>>()
1153            .join("\n");
1154        if region.ends_with('\n') {
1155            decoded.push('\n');
1156        }
1157        decoded
1158    }
1159
1160    pub fn output_has_edit_commands(model_output: &str) -> bool {
1161        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1162    }
1163
1164    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1165    /// original editable region text.
1166    ///
1167    /// `editable_region` is the original text of the editable region (without hash
1168    /// prefixes). `model_output` is the raw model response containing edit commands.
1169    ///
1170    /// Returns the full replacement text for the editable region.
1171    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1172        let original_lines: Vec<&str> = editable_region.lines().collect();
1173        let old_hashes: Vec<u8> = original_lines
1174            .iter()
1175            .map(|line| hash_line(line.as_bytes()))
1176            .collect();
1177
1178        let commands = parse_edit_commands(model_output);
1179
1180        // For set operations: indexed by start line → Some((end line index, content))
1181        // For insert operations: indexed by line index → vec of content to insert after
1182        // Insert-before-first is tracked separately.
1183        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1184        let mut insert_before_first: Vec<&str> = Vec::new();
1185        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1186
1187        for command in &commands {
1188            match command {
1189                EditCommand::Set {
1190                    start,
1191                    end,
1192                    content,
1193                } => {
1194                    if start.index < old_hashes.len()
1195                        && end.index < old_hashes.len()
1196                        && start.index <= end.index
1197                        && old_hashes[start.index] == start.hash
1198                        && old_hashes[end.index] == end.hash
1199                    {
1200                        set_ops[start.index] = Some((end.index, *content));
1201                    }
1202                }
1203                EditCommand::Insert { after, content } => match after {
1204                    None => insert_before_first.push(*content),
1205                    Some(line_ref) => {
1206                        if line_ref.index < old_hashes.len()
1207                            && old_hashes[line_ref.index] == line_ref.hash
1208                        {
1209                            insert_after[line_ref.index].push(*content);
1210                        }
1211                    }
1212                },
1213            }
1214        }
1215
1216        let mut result = String::new();
1217
1218        // Emit any insertions before the first line
1219        for content in &insert_before_first {
1220            result.push_str(content);
1221            if !content.ends_with('\n') {
1222                result.push('\n');
1223            }
1224        }
1225
1226        let mut i = 0;
1227        while i < original_lines.len() {
1228            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1229                // Replace lines i..=end_index with the replacement content
1230                result.push_str(replacement);
1231                if !replacement.is_empty() && !replacement.ends_with('\n') {
1232                    result.push('\n');
1233                }
1234                // Emit any insertions after the end of this set range
1235                if *end_index < insert_after.len() {
1236                    for content in &insert_after[*end_index] {
1237                        result.push_str(content);
1238                        if !content.ends_with('\n') {
1239                            result.push('\n');
1240                        }
1241                    }
1242                }
1243                i = end_index + 1;
1244            } else {
1245                // Keep the original line
1246                result.push_str(original_lines[i]);
1247                result.push('\n');
1248                // Emit any insertions after this line
1249                for content in &insert_after[i] {
1250                    result.push_str(content);
1251                    if !content.ends_with('\n') {
1252                        result.push('\n');
1253                    }
1254                }
1255                i += 1;
1256            }
1257        }
1258
1259        // Preserve trailing newline behavior: if the original ended with a
1260        // newline the result already has one; if it didn't, trim the extra one
1261        // we added.
1262        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1263            result.pop();
1264        }
1265
1266        result
1267    }
1268
1269    /// Convert a unified diff patch into hashline edit commands.
1270    ///
1271    /// Parses the unified diff `patch` directly to determine which lines of
1272    /// `old_text` are deleted/replaced and what new lines are added, then emits
1273    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1274    /// `{index}:{hash}` identifiers.
1275    ///
1276    /// `cursor_offset` is an optional byte offset into the first hunk's new
1277    /// text (context + additions) where the cursor marker should be placed.
1278    pub fn patch_to_edit_commands(
1279        old_text: &str,
1280        patch: &str,
1281        cursor_offset: Option<usize>,
1282    ) -> Result<String> {
1283        let old_lines: Vec<&str> = old_text.lines().collect();
1284        let old_hashes: Vec<u8> = old_lines
1285            .iter()
1286            .map(|line| hash_line(line.as_bytes()))
1287            .collect();
1288
1289        let mut result = String::new();
1290        let mut first_hunk = true;
1291
1292        struct Hunk<'a> {
1293            line_range: Range<usize>,
1294            new_text_lines: Vec<&'a str>,
1295            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1296        }
1297
1298        // Parse the patch line by line. We only care about hunk headers,
1299        // context, deletions, and additions.
1300        let mut old_line_index: usize = 0;
1301        let mut current_hunk: Option<Hunk> = None;
1302        // Byte offset tracking within the hunk's new text for cursor placement.
1303        let mut new_text_byte_offset: usize = 0;
1304        // The line index of the last old line seen before/in the current hunk
1305        // (used for insert-after reference).
1306        let mut last_old_line_before_hunk: Option<usize> = None;
1307
1308        fn flush_hunk(
1309            hunk: Hunk,
1310            last_old_line: Option<usize>,
1311            result: &mut String,
1312            old_hashes: &[u8],
1313        ) {
1314            if hunk.line_range.is_empty() {
1315                // Pure insertion — reference the old line to insert after when in bounds.
1316                if let Some(after) = last_old_line
1317                    && let Some(&hash) = old_hashes.get(after)
1318                {
1319                    write!(
1320                        result,
1321                        "{INSERT_COMMAND_MARKER}{}\n",
1322                        LineRef { index: after, hash }
1323                    )
1324                    .unwrap();
1325                } else {
1326                    result.push_str(INSERT_COMMAND_MARKER);
1327                    result.push('\n');
1328                }
1329            } else {
1330                let start = hunk.line_range.start;
1331                let end_exclusive = hunk.line_range.end;
1332                let deleted_line_count = end_exclusive.saturating_sub(start);
1333
1334                if deleted_line_count == 1 {
1335                    if let Some(&hash) = old_hashes.get(start) {
1336                        write!(
1337                            result,
1338                            "{SET_COMMAND_MARKER}{}\n",
1339                            LineRef { index: start, hash }
1340                        )
1341                        .unwrap();
1342                    } else {
1343                        result.push_str(SET_COMMAND_MARKER);
1344                        result.push('\n');
1345                    }
1346                } else {
1347                    let end_inclusive = end_exclusive - 1;
1348                    match (
1349                        old_hashes.get(start).copied(),
1350                        old_hashes.get(end_inclusive).copied(),
1351                    ) {
1352                        (Some(start_hash), Some(end_hash)) => {
1353                            write!(
1354                                result,
1355                                "{SET_COMMAND_MARKER}{}-{}\n",
1356                                LineRef {
1357                                    index: start,
1358                                    hash: start_hash
1359                                },
1360                                LineRef {
1361                                    index: end_inclusive,
1362                                    hash: end_hash
1363                                }
1364                            )
1365                            .unwrap();
1366                        }
1367                        _ => {
1368                            result.push_str(SET_COMMAND_MARKER);
1369                            result.push('\n');
1370                        }
1371                    }
1372                }
1373            }
1374            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1375                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1376                    && line_offset == cursor_line_offset
1377                {
1378                    result.push_str(&line[..char_offset]);
1379                    result.push_str(CURSOR_MARKER);
1380                    result.push_str(&line[char_offset..]);
1381                    continue;
1382                }
1383
1384                result.push_str(line);
1385            }
1386        }
1387
1388        for raw_line in patch.split_inclusive('\n') {
1389            if raw_line.starts_with("@@") {
1390                // Flush any pending change hunk from a previous patch hunk.
1391                if let Some(hunk) = current_hunk.take() {
1392                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1393                }
1394
1395                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1396                // We intentionally do not trust old_start as a direct local index into `old_text`,
1397                // because some patches are produced against a larger file region and carry
1398                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1399                if first_hunk {
1400                    new_text_byte_offset = 0;
1401                    first_hunk = false;
1402                }
1403                continue;
1404            }
1405
1406            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1407                continue;
1408            }
1409            if raw_line.starts_with("\\ No newline") {
1410                continue;
1411            }
1412
1413            if raw_line.starts_with('-') {
1414                // Extend or start a change hunk with this deleted old line.
1415                match &mut current_hunk {
1416                    Some(Hunk {
1417                        line_range: range, ..
1418                    }) => range.end = old_line_index + 1,
1419                    None => {
1420                        current_hunk = Some(Hunk {
1421                            line_range: old_line_index..old_line_index + 1,
1422                            new_text_lines: Vec::new(),
1423                            cursor_line_offset_in_new_text: None,
1424                        });
1425                    }
1426                }
1427                old_line_index += 1;
1428            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1429                // Place cursor marker if cursor_offset falls within this line.
1430                let mut cursor_line_offset = None;
1431                if let Some(cursor_off) = cursor_offset
1432                    && (first_hunk
1433                        || cursor_off >= new_text_byte_offset
1434                            && cursor_off <= new_text_byte_offset + added_content.len())
1435                {
1436                    let line_offset = added_content.floor_char_boundary(
1437                        cursor_off
1438                            .saturating_sub(new_text_byte_offset)
1439                            .min(added_content.len()),
1440                    );
1441                    cursor_line_offset = Some(line_offset);
1442                }
1443
1444                new_text_byte_offset += added_content.len();
1445
1446                let hunk = current_hunk.get_or_insert(Hunk {
1447                    line_range: old_line_index..old_line_index,
1448                    new_text_lines: vec![],
1449                    cursor_line_offset_in_new_text: None,
1450                });
1451                hunk.new_text_lines.push(added_content);
1452                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1453                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1454            } else {
1455                // Context line (starts with ' ' or is empty).
1456                if let Some(hunk) = current_hunk.take() {
1457                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1458                }
1459                last_old_line_before_hunk = Some(old_line_index);
1460                old_line_index += 1;
1461                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1462                new_text_byte_offset += content.len();
1463            }
1464        }
1465
1466        // Flush final group.
1467        if let Some(hunk) = current_hunk.take() {
1468            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1469        }
1470
1471        // Trim a single trailing newline.
1472        if result.ends_with('\n') {
1473            result.pop();
1474        }
1475
1476        Ok(result)
1477    }
1478
1479    #[cfg(test)]
1480    mod tests {
1481        use super::*;
1482        use indoc::indoc;
1483
1484        #[test]
1485        fn test_format_cursor_region() {
1486            struct Case {
1487                name: &'static str,
1488                context: &'static str,
1489                editable_range: Range<usize>,
1490                cursor_offset: usize,
1491                expected: &'static str,
1492            }
1493
1494            let cases = [
1495                Case {
1496                    name: "basic_cursor_placement",
1497                    context: "hello world\n",
1498                    editable_range: 0..12,
1499                    cursor_offset: 5,
1500                    expected: indoc! {"
1501                    <|file_sep|>test.rs
1502                    <|fim_prefix|>
1503                    <|fim_middle|>current
1504                    0:5c|hello<|user_cursor|> world
1505                    <|fim_suffix|>
1506                    <|fim_middle|>updated"},
1507                },
1508                Case {
1509                    name: "multiline_cursor_on_second_line",
1510                    context: "aaa\nbbb\nccc\n",
1511                    editable_range: 0..12,
1512                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1513                    expected: indoc! {"
1514                    <|file_sep|>test.rs
1515                    <|fim_prefix|>
1516                    <|fim_middle|>current
1517                    0:23|aaa
1518                    1:26|b<|user_cursor|>bb
1519                    2:29|ccc
1520                    <|fim_suffix|>
1521                    <|fim_middle|>updated"},
1522                },
1523                Case {
1524                    name: "no_trailing_newline_in_context",
1525                    context: "line1\nline2",
1526                    editable_range: 0..11,
1527                    cursor_offset: 3,
1528                    expected: indoc! {"
1529                    <|file_sep|>test.rs
1530                    <|fim_prefix|>
1531                    <|fim_middle|>current
1532                    0:d9|lin<|user_cursor|>e1
1533                    1:da|line2
1534                    <|fim_suffix|>
1535                    <|fim_middle|>updated"},
1536                },
1537                Case {
1538                    name: "leading_newline_in_editable_region",
1539                    context: "\nabc\n",
1540                    editable_range: 0..5,
1541                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1542                    expected: indoc! {"
1543                    <|file_sep|>test.rs
1544                    <|fim_prefix|>
1545                    <|fim_middle|>current
1546                    0:00|
1547                    1:26|a<|user_cursor|>bc
1548                    <|fim_suffix|>
1549                    <|fim_middle|>updated"},
1550                },
1551                Case {
1552                    name: "with_suffix",
1553                    context: "abc\ndef",
1554                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1555                    cursor_offset: 2,
1556                    expected: indoc! {"
1557                    <|file_sep|>test.rs
1558                    <|fim_prefix|>
1559                    <|fim_middle|>current
1560                    0:26|ab<|user_cursor|>c
1561                    <|fim_suffix|>
1562                    def
1563                    <|fim_middle|>updated"},
1564                },
1565                Case {
1566                    name: "unicode_two_byte_chars",
1567                    context: "héllo\n",
1568                    editable_range: 0..7,
1569                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1570                    expected: indoc! {"
1571                    <|file_sep|>test.rs
1572                    <|fim_prefix|>
1573                    <|fim_middle|>current
1574                    0:1b|hé<|user_cursor|>llo
1575                    <|fim_suffix|>
1576                    <|fim_middle|>updated"},
1577                },
1578                Case {
1579                    name: "unicode_three_byte_chars",
1580                    context: "日本語\n",
1581                    editable_range: 0..10,
1582                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1583                    expected: indoc! {"
1584                    <|file_sep|>test.rs
1585                    <|fim_prefix|>
1586                    <|fim_middle|>current
1587                    0:80|日本<|user_cursor|>語
1588                    <|fim_suffix|>
1589                    <|fim_middle|>updated"},
1590                },
1591                Case {
1592                    name: "unicode_four_byte_chars",
1593                    context: "a🌍b\n",
1594                    editable_range: 0..7,
1595                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1596                    expected: indoc! {"
1597                    <|file_sep|>test.rs
1598                    <|fim_prefix|>
1599                    <|fim_middle|>current
1600                    0:6b|a🌍<|user_cursor|>b
1601                    <|fim_suffix|>
1602                    <|fim_middle|>updated"},
1603                },
1604                Case {
1605                    name: "cursor_at_start_of_region_not_placed",
1606                    context: "abc\n",
1607                    editable_range: 0..4,
1608                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1609                    expected: indoc! {"
1610                    <|file_sep|>test.rs
1611                    <|fim_prefix|>
1612                    <|fim_middle|>current
1613                    0:26|abc
1614                    <|fim_suffix|>
1615                    <|fim_middle|>updated"},
1616                },
1617                Case {
1618                    name: "cursor_at_end_of_line_not_placed",
1619                    context: "abc\ndef\n",
1620                    editable_range: 0..8,
1621                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1622                    expected: indoc! {"
1623                    <|file_sep|>test.rs
1624                    <|fim_prefix|>
1625                    <|fim_middle|>current
1626                    0:26|abc
1627                    1:2f|def
1628                    <|fim_suffix|>
1629                    <|fim_middle|>updated"},
1630                },
1631                Case {
1632                    name: "cursor_offset_relative_to_context_not_editable_region",
1633                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1634                    // write_cursor_excerpt_section must subtract it before comparing against
1635                    // per-line offsets within the editable region.
1636                    context: "pre\naaa\nbbb\nsuf\n",
1637                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1638                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1639                    expected: indoc! {"
1640                    <|file_sep|>test.rs
1641                    <|fim_prefix|>
1642                    pre
1643                    <|fim_middle|>current
1644                    0:23|aaa
1645                    1:26|b<|user_cursor|>bb
1646                    <|fim_suffix|>
1647                    suf
1648                    <|fim_middle|>updated"},
1649                },
1650            ];
1651
1652            for case in &cases {
1653                let mut prompt = String::new();
1654                hashline::write_cursor_excerpt_section(
1655                    &mut prompt,
1656                    Path::new("test.rs"),
1657                    case.context,
1658                    &case.editable_range,
1659                    case.cursor_offset,
1660                );
1661                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1662            }
1663        }
1664
1665        #[test]
1666        fn test_apply_edit_commands() {
1667            struct Case {
1668                name: &'static str,
1669                original: &'static str,
1670                model_output: &'static str,
1671                expected: &'static str,
1672            }
1673
1674            let cases = vec![
1675                Case {
1676                    name: "set_single_line",
1677                    original: indoc! {"
1678                    let mut total = 0;
1679                    for product in products {
1680                        total += ;
1681                    }
1682                    total
1683                "},
1684                    model_output: indoc! {"
1685                    <|set|>2:87
1686                        total += product.price;
1687                "},
1688                    expected: indoc! {"
1689                    let mut total = 0;
1690                    for product in products {
1691                        total += product.price;
1692                    }
1693                    total
1694                "},
1695                },
1696                Case {
1697                    name: "set_range",
1698                    original: indoc! {"
1699                    fn foo() {
1700                        let x = 1;
1701                        let y = 2;
1702                        let z = 3;
1703                    }
1704                "},
1705                    model_output: indoc! {"
1706                    <|set|>1:46-3:4a
1707                        let sum = 6;
1708                "},
1709                    expected: indoc! {"
1710                    fn foo() {
1711                        let sum = 6;
1712                    }
1713                "},
1714                },
1715                Case {
1716                    name: "insert_after_line",
1717                    original: indoc! {"
1718                    fn main() {
1719                        let x = 1;
1720                    }
1721                "},
1722                    model_output: indoc! {"
1723                    <|insert|>1:46
1724                        let y = 2;
1725                "},
1726                    expected: indoc! {"
1727                    fn main() {
1728                        let x = 1;
1729                        let y = 2;
1730                    }
1731                "},
1732                },
1733                Case {
1734                    name: "insert_before_first",
1735                    original: indoc! {"
1736                    let x = 1;
1737                    let y = 2;
1738                "},
1739                    model_output: indoc! {"
1740                    <|insert|>
1741                    use std::io;
1742                "},
1743                    expected: indoc! {"
1744                    use std::io;
1745                    let x = 1;
1746                    let y = 2;
1747                "},
1748                },
1749                Case {
1750                    name: "set_with_cursor_marker",
1751                    original: indoc! {"
1752                    fn main() {
1753                        println!();
1754                    }
1755                "},
1756                    model_output: indoc! {"
1757                    <|set|>1:34
1758                        eprintln!(\"<|user_cursor|>\");
1759                "},
1760                    expected: indoc! {"
1761                    fn main() {
1762                        eprintln!(\"<|user_cursor|>\");
1763                    }
1764                "},
1765                },
1766                Case {
1767                    name: "multiple_set_commands",
1768                    original: indoc! {"
1769                    aaa
1770                    bbb
1771                    ccc
1772                    ddd
1773                "},
1774                    model_output: indoc! {"
1775                    <|set|>0:23
1776                    AAA
1777                    <|set|>2:29
1778                    CCC
1779                "},
1780                    expected: indoc! {"
1781                    AAA
1782                    bbb
1783                    CCC
1784                    ddd
1785                "},
1786                },
1787                Case {
1788                    name: "set_range_multiline_replacement",
1789                    original: indoc! {"
1790                    fn handle_submit() {
1791                    }
1792
1793                    fn handle_keystroke() {
1794                "},
1795                    model_output: indoc! {"
1796                    <|set|>0:3f-1:7d
1797                    fn handle_submit(modal_state: &mut ModalState) {
1798                        <|user_cursor|>
1799                    }
1800                "},
1801                    expected: indoc! {"
1802                    fn handle_submit(modal_state: &mut ModalState) {
1803                        <|user_cursor|>
1804                    }
1805
1806                    fn handle_keystroke() {
1807                "},
1808                },
1809                Case {
1810                    name: "no_edit_commands_returns_original",
1811                    original: indoc! {"
1812                    hello
1813                    world
1814                "},
1815                    model_output: "some random text with no commands",
1816                    expected: indoc! {"
1817                    hello
1818                    world
1819                "},
1820                },
1821                Case {
1822                    name: "wrong_hash_set_ignored",
1823                    original: indoc! {"
1824                    aaa
1825                    bbb
1826                "},
1827                    model_output: indoc! {"
1828                    <|set|>0:ff
1829                    ZZZ
1830                "},
1831                    expected: indoc! {"
1832                    aaa
1833                    bbb
1834                "},
1835                },
1836                Case {
1837                    name: "insert_and_set_combined",
1838                    original: indoc! {"
1839                    alpha
1840                    beta
1841                    gamma
1842                "},
1843                    model_output: indoc! {"
1844                    <|set|>0:06
1845                    ALPHA
1846                    <|insert|>1:9c
1847                    beta_extra
1848                "},
1849                    expected: indoc! {"
1850                    ALPHA
1851                    beta
1852                    beta_extra
1853                    gamma
1854                "},
1855                },
1856                Case {
1857                    name: "no_trailing_newline_preserved",
1858                    original: "hello\nworld",
1859                    model_output: indoc! {"
1860                    <|set|>0:14
1861                    HELLO
1862                "},
1863                    expected: "HELLO\nworld",
1864                },
1865                Case {
1866                    name: "set_range_hash_mismatch_in_end_bound",
1867                    original: indoc! {"
1868                    one
1869                    two
1870                    three
1871                "},
1872                    model_output: indoc! {"
1873                    <|set|>0:42-2:ff
1874                    ONE_TWO_THREE
1875                "},
1876                    expected: indoc! {"
1877                    one
1878                    two
1879                    three
1880                "},
1881                },
1882                Case {
1883                    name: "set_range_start_greater_than_end_ignored",
1884                    original: indoc! {"
1885                    a
1886                    b
1887                    c
1888                "},
1889                    model_output: indoc! {"
1890                    <|set|>2:63-1:62
1891                    X
1892                "},
1893                    expected: indoc! {"
1894                    a
1895                    b
1896                    c
1897                "},
1898                },
1899                Case {
1900                    name: "insert_out_of_bounds_ignored",
1901                    original: indoc! {"
1902                    x
1903                    y
1904                "},
1905                    model_output: indoc! {"
1906                    <|insert|>99:aa
1907                    z
1908                "},
1909                    expected: indoc! {"
1910                    x
1911                    y
1912                "},
1913                },
1914                Case {
1915                    name: "set_out_of_bounds_ignored",
1916                    original: indoc! {"
1917                    x
1918                    y
1919                "},
1920                    model_output: indoc! {"
1921                    <|set|>99:aa
1922                    z
1923                "},
1924                    expected: indoc! {"
1925                    x
1926                    y
1927                "},
1928                },
1929                Case {
1930                    name: "malformed_set_command_ignored",
1931                    original: indoc! {"
1932                    alpha
1933                    beta
1934                "},
1935                    model_output: indoc! {"
1936                    <|set|>not-a-line-ref
1937                    UPDATED
1938                "},
1939                    expected: indoc! {"
1940                    alpha
1941                    beta
1942                "},
1943                },
1944                Case {
1945                    name: "malformed_insert_hash_treated_as_before_first",
1946                    original: indoc! {"
1947                    alpha
1948                    beta
1949                "},
1950                    model_output: indoc! {"
1951                    <|insert|>1:nothex
1952                    preamble
1953                "},
1954                    expected: indoc! {"
1955                    preamble
1956                    alpha
1957                    beta
1958                "},
1959                },
1960                Case {
1961                    name: "set_then_insert_same_target_orders_insert_after_replacement",
1962                    original: indoc! {"
1963                    cat
1964                    dog
1965                "},
1966                    model_output: indoc! {"
1967                    <|set|>0:38
1968                    CAT
1969                    <|insert|>0:38
1970                    TAIL
1971                "},
1972                    expected: indoc! {"
1973                    CAT
1974                    TAIL
1975                    dog
1976                "},
1977                },
1978                Case {
1979                    name: "overlapping_set_ranges_last_wins",
1980                    original: indoc! {"
1981                    a
1982                    b
1983                    c
1984                    d
1985                "},
1986                    model_output: indoc! {"
1987                    <|set|>0:61-2:63
1988                    FIRST
1989                    <|set|>1:62-3:64
1990                    SECOND
1991                "},
1992                    expected: indoc! {"
1993                    FIRST
1994                    d
1995                "},
1996                },
1997                Case {
1998                    name: "insert_before_first_and_after_line",
1999                    original: indoc! {"
2000                    a
2001                    b
2002                "},
2003                    model_output: indoc! {"
2004                    <|insert|>
2005                    HEAD
2006                    <|insert|>0:61
2007                    MID
2008                "},
2009                    expected: indoc! {"
2010                    HEAD
2011                    a
2012                    MID
2013                    b
2014                "},
2015                },
2016            ];
2017
2018            for case in &cases {
2019                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2020                assert_eq!(result, case.expected, "failed case: {}", case.name);
2021            }
2022        }
2023
2024        #[test]
2025        fn test_output_has_edit_commands() {
2026            assert!(hashline::output_has_edit_commands(&format!(
2027                "{}0:ab\nnew",
2028                SET_COMMAND_MARKER
2029            )));
2030            assert!(hashline::output_has_edit_commands(&format!(
2031                "{}0:ab\nnew",
2032                INSERT_COMMAND_MARKER
2033            )));
2034            assert!(hashline::output_has_edit_commands(&format!(
2035                "some text\n{}1:cd\nstuff",
2036                SET_COMMAND_MARKER
2037            )));
2038            assert!(!hashline::output_has_edit_commands("just plain text"));
2039            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2040        }
2041
2042        // ---- hashline::patch_to_edit_commands round-trip tests ----
2043
2044        #[test]
2045        fn test_patch_to_edit_commands() {
2046            struct Case {
2047                name: &'static str,
2048                old: &'static str,
2049                patch: &'static str,
2050                expected_new: &'static str,
2051            }
2052
2053            let cases = [
2054                Case {
2055                    name: "single_line_replacement",
2056                    old: indoc! {"
2057                    let mut total = 0;
2058                    for product in products {
2059                        total += ;
2060                    }
2061                    total
2062                "},
2063                    patch: indoc! {"
2064                    @@ -1,5 +1,5 @@
2065                     let mut total = 0;
2066                     for product in products {
2067                    -    total += ;
2068                    +    total += product.price;
2069                     }
2070                     total
2071                "},
2072                    expected_new: indoc! {"
2073                    let mut total = 0;
2074                    for product in products {
2075                        total += product.price;
2076                    }
2077                    total
2078                "},
2079                },
2080                Case {
2081                    name: "multiline_replacement",
2082                    old: indoc! {"
2083                    fn foo() {
2084                        let x = 1;
2085                        let y = 2;
2086                        let z = 3;
2087                    }
2088                "},
2089                    patch: indoc! {"
2090                    @@ -1,5 +1,3 @@
2091                     fn foo() {
2092                    -    let x = 1;
2093                    -    let y = 2;
2094                    -    let z = 3;
2095                    +    let sum = 1 + 2 + 3;
2096                     }
2097                "},
2098                    expected_new: indoc! {"
2099                    fn foo() {
2100                        let sum = 1 + 2 + 3;
2101                    }
2102                "},
2103                },
2104                Case {
2105                    name: "insertion",
2106                    old: indoc! {"
2107                    fn main() {
2108                        let x = 1;
2109                    }
2110                "},
2111                    patch: indoc! {"
2112                    @@ -1,3 +1,4 @@
2113                     fn main() {
2114                         let x = 1;
2115                    +    let y = 2;
2116                     }
2117                "},
2118                    expected_new: indoc! {"
2119                    fn main() {
2120                        let x = 1;
2121                        let y = 2;
2122                    }
2123                "},
2124                },
2125                Case {
2126                    name: "insertion_before_first",
2127                    old: indoc! {"
2128                    let x = 1;
2129                    let y = 2;
2130                "},
2131                    patch: indoc! {"
2132                    @@ -1,2 +1,3 @@
2133                    +use std::io;
2134                     let x = 1;
2135                     let y = 2;
2136                "},
2137                    expected_new: indoc! {"
2138                    use std::io;
2139                    let x = 1;
2140                    let y = 2;
2141                "},
2142                },
2143                Case {
2144                    name: "deletion",
2145                    old: indoc! {"
2146                    aaa
2147                    bbb
2148                    ccc
2149                    ddd
2150                "},
2151                    patch: indoc! {"
2152                    @@ -1,4 +1,2 @@
2153                     aaa
2154                    -bbb
2155                    -ccc
2156                     ddd
2157                "},
2158                    expected_new: indoc! {"
2159                    aaa
2160                    ddd
2161                "},
2162                },
2163                Case {
2164                    name: "multiple_changes",
2165                    old: indoc! {"
2166                    alpha
2167                    beta
2168                    gamma
2169                    delta
2170                    epsilon
2171                "},
2172                    patch: indoc! {"
2173                    @@ -1,5 +1,5 @@
2174                    -alpha
2175                    +ALPHA
2176                     beta
2177                     gamma
2178                    -delta
2179                    +DELTA
2180                     epsilon
2181                "},
2182                    expected_new: indoc! {"
2183                    ALPHA
2184                    beta
2185                    gamma
2186                    DELTA
2187                    epsilon
2188                "},
2189                },
2190                Case {
2191                    name: "replace_with_insertion",
2192                    old: indoc! {r#"
2193                    fn handle() {
2194                        modal_state.close();
2195                        modal_state.dismiss();
2196                "#},
2197                    patch: indoc! {r#"
2198                    @@ -1,3 +1,4 @@
2199                     fn handle() {
2200                         modal_state.close();
2201                    +    eprintln!("");
2202                         modal_state.dismiss();
2203                "#},
2204                    expected_new: indoc! {r#"
2205                    fn handle() {
2206                        modal_state.close();
2207                        eprintln!("");
2208                        modal_state.dismiss();
2209                "#},
2210                },
2211                Case {
2212                    name: "complete_replacement",
2213                    old: indoc! {"
2214                    aaa
2215                    bbb
2216                    ccc
2217                "},
2218                    patch: indoc! {"
2219                    @@ -1,3 +1,3 @@
2220                    -aaa
2221                    -bbb
2222                    -ccc
2223                    +xxx
2224                    +yyy
2225                    +zzz
2226                "},
2227                    expected_new: indoc! {"
2228                    xxx
2229                    yyy
2230                    zzz
2231                "},
2232                },
2233                Case {
2234                    name: "add_function_body",
2235                    old: indoc! {"
2236                    fn foo() {
2237                        modal_state.dismiss();
2238                    }
2239
2240                    fn
2241
2242                    fn handle_keystroke() {
2243                "},
2244                    patch: indoc! {"
2245                    @@ -1,6 +1,8 @@
2246                     fn foo() {
2247                         modal_state.dismiss();
2248                     }
2249
2250                    -fn
2251                    +fn handle_submit() {
2252                    +    todo()
2253                    +}
2254
2255                     fn handle_keystroke() {
2256                "},
2257                    expected_new: indoc! {"
2258                    fn foo() {
2259                        modal_state.dismiss();
2260                    }
2261
2262                    fn handle_submit() {
2263                        todo()
2264                    }
2265
2266                    fn handle_keystroke() {
2267                "},
2268                },
2269                Case {
2270                    name: "with_cursor_offset",
2271                    old: indoc! {r#"
2272                    fn main() {
2273                        println!();
2274                    }
2275                "#},
2276                    patch: indoc! {r#"
2277                    @@ -1,3 +1,3 @@
2278                     fn main() {
2279                    -    println!();
2280                    +    eprintln!("");
2281                     }
2282                "#},
2283                    expected_new: indoc! {r#"
2284                    fn main() {
2285                        eprintln!("<|user_cursor|>");
2286                    }
2287                "#},
2288                },
2289                Case {
2290                    name: "non_local_hunk_header_pure_insertion_repro",
2291                    old: indoc! {"
2292                    aaa
2293                    bbb
2294                "},
2295                    patch: indoc! {"
2296                    @@ -20,2 +20,3 @@
2297                     aaa
2298                    +xxx
2299                     bbb
2300                "},
2301                    expected_new: indoc! {"
2302                    aaa
2303                    xxx
2304                    bbb
2305                "},
2306                },
2307            ];
2308
2309            for case in &cases {
2310                // The cursor_offset for patch_to_edit_commands is relative to
2311                // the first hunk's new text (context + additions). We compute
2312                // it by finding where the marker sits in the expected output
2313                // (which mirrors the new text of the hunk).
2314                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2315
2316                let commands =
2317                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2318                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2319
2320                assert!(
2321                    hashline::output_has_edit_commands(&commands),
2322                    "case {}: expected edit commands, got: {commands:?}",
2323                    case.name,
2324                );
2325
2326                let applied = hashline::apply_edit_commands(case.old, &commands);
2327                assert_eq!(applied, case.expected_new, "case {}", case.name);
2328            }
2329        }
2330    }
2331}
2332
2333pub mod seed_coder {
2334    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2335    //!
2336    //! Seed-Coder uses different FIM tokens and order than Qwen:
2337    //! - SPM order: suffix comes FIRST, then prefix, then middle
2338    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2339    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2340    //!
2341    //! All context (related files, edit history) goes in the PREFIX section.
2342    //! The suffix contains only code after the editable region.
2343    //!
2344    //! Example prompt:
2345    //!
2346    //! <[fim-suffix]>
2347    //! code after editable region
2348    //! <[fim-prefix]><filename>related/file.py
2349    //! related file content
2350    //!
2351    //! <filename>edit_history
2352    //! --- a/some_file.py
2353    //! +++ b/some_file.py
2354    //! -old
2355    //! +new
2356    //!
2357    //! <filename>path/to/target_file.py
2358    //! code before editable region
2359    //! <<<<<<< CURRENT
2360    //! code that
2361    //! needs to<|user_cursor|>
2362    //! be rewritten
2363    //! =======
2364    //! <[fim-middle]>
2365    //!
2366    //! Expected output (model generates):
2367    //!
2368    //! updated
2369    //! code with
2370    //! changes applied
2371    //! >>>>>>> UPDATED
2372
2373    use super::*;
2374
2375    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2376    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2377    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2378    pub const FILE_MARKER: &str = "<filename>";
2379
2380    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2381    pub const SEPARATOR: &str = "=======\n";
2382    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2383
2384    pub fn special_tokens() -> &'static [&'static str] {
2385        &[
2386            FIM_SUFFIX,
2387            FIM_PREFIX,
2388            FIM_MIDDLE,
2389            FILE_MARKER,
2390            START_MARKER,
2391            SEPARATOR,
2392            END_MARKER,
2393            CURSOR_MARKER,
2394        ]
2395    }
2396
2397    pub fn write_cursor_excerpt_section(
2398        prompt: &mut String,
2399        path: &Path,
2400        context: &str,
2401        editable_range: &Range<usize>,
2402        cursor_offset: usize,
2403    ) {
2404        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2405        prompt.push_str(&section);
2406    }
2407
2408    pub fn format_prompt_with_budget(
2409        path: &Path,
2410        context: &str,
2411        editable_range: &Range<usize>,
2412        cursor_offset: usize,
2413        events: &[Arc<Event>],
2414        related_files: &[RelatedFile],
2415        max_tokens: usize,
2416    ) -> String {
2417        let suffix_section = build_suffix_section(context, editable_range);
2418        let cursor_prefix_section =
2419            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2420
2421        let suffix_tokens = estimate_tokens(suffix_section.len());
2422        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2423        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2424
2425        let edit_history_section = super::format_edit_history_within_budget(
2426            events,
2427            FILE_MARKER,
2428            "edit_history",
2429            budget_after_cursor,
2430        );
2431        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2432        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2433
2434        let related_files_section = super::format_related_files_within_budget(
2435            related_files,
2436            FILE_MARKER,
2437            "",
2438            budget_after_edit_history,
2439        );
2440
2441        let mut prompt = String::new();
2442        prompt.push_str(&suffix_section);
2443        prompt.push_str(FIM_PREFIX);
2444        prompt.push_str(&related_files_section);
2445        if !related_files_section.is_empty() {
2446            prompt.push('\n');
2447        }
2448        prompt.push_str(&edit_history_section);
2449        if !edit_history_section.is_empty() {
2450            prompt.push('\n');
2451        }
2452        prompt.push_str(&cursor_prefix_section);
2453        prompt.push_str(FIM_MIDDLE);
2454        prompt
2455    }
2456
2457    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2458        let mut section = String::new();
2459        section.push_str(FIM_SUFFIX);
2460        section.push_str(&context[editable_range.end..]);
2461        if !section.ends_with('\n') {
2462            section.push('\n');
2463        }
2464        section
2465    }
2466
2467    fn build_cursor_prefix_section(
2468        path: &Path,
2469        context: &str,
2470        editable_range: &Range<usize>,
2471        cursor_offset: usize,
2472    ) -> String {
2473        let mut section = String::new();
2474        let path_str = path.to_string_lossy();
2475        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2476
2477        section.push_str(&context[..editable_range.start]);
2478        section.push_str(START_MARKER);
2479        section.push_str(&context[editable_range.start..cursor_offset]);
2480        section.push_str(CURSOR_MARKER);
2481        section.push_str(&context[cursor_offset..editable_range.end]);
2482        if !section.ends_with('\n') {
2483            section.push('\n');
2484        }
2485        section.push_str(SEPARATOR);
2486        section
2487    }
2488}
2489
2490/// The zeta1 prompt format
2491pub mod zeta1 {
2492    use super::*;
2493    use std::fmt::Write;
2494
2495    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2496    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2497    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2498    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2499
2500    const INSTRUCTION_HEADER: &str = concat!(
2501        "### Instruction:\n",
2502        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2503        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2504        "into account the cursor location.\n\n",
2505        "### User Edits:\n\n"
2506    );
2507    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2508    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2509
2510    /// Formats a complete zeta1 prompt from the input events and excerpt.
2511    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2512        let mut prompt = String::with_capacity(
2513            INSTRUCTION_HEADER.len()
2514                + input_events.len()
2515                + EXCERPT_HEADER.len()
2516                + input_excerpt.len()
2517                + RESPONSE_HEADER.len(),
2518        );
2519        prompt.push_str(INSTRUCTION_HEADER);
2520        prompt.push_str(input_events);
2521        prompt.push_str(EXCERPT_HEADER);
2522        prompt.push_str(input_excerpt);
2523        prompt.push_str(RESPONSE_HEADER);
2524        prompt
2525    }
2526
2527    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2528    /// editable and context byte-offset ranges within `cursor_excerpt`.
2529    pub fn format_zeta1_from_input(
2530        input: &ZetaPromptInput,
2531        editable_range: Range<usize>,
2532        context_range: Range<usize>,
2533    ) -> String {
2534        let events = format_zeta1_events(&input.events);
2535        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2536        format_zeta1_prompt(&events, &excerpt)
2537    }
2538
2539    /// Formats events in zeta1 style (oldest first).
2540    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2541        let mut result = String::new();
2542        for event in events {
2543            let event_string = format_zeta1_event(event);
2544            if event_string.is_empty() {
2545                continue;
2546            }
2547            if !result.is_empty() {
2548                result.push_str("\n\n");
2549            }
2550            result.push_str(&event_string);
2551        }
2552        result
2553    }
2554
2555    fn format_zeta1_event(event: &Event) -> String {
2556        match event {
2557            Event::BufferChange {
2558                path,
2559                old_path,
2560                diff,
2561                ..
2562            } => {
2563                let mut prompt = String::new();
2564                if old_path != path {
2565                    writeln!(
2566                        prompt,
2567                        "User renamed {} to {}\n",
2568                        old_path.display(),
2569                        path.display()
2570                    )
2571                    .ok();
2572                }
2573                if !diff.is_empty() {
2574                    write!(
2575                        prompt,
2576                        "User edited {}:\n```diff\n{}\n```",
2577                        path.display(),
2578                        diff
2579                    )
2580                    .ok();
2581                }
2582                prompt
2583            }
2584        }
2585    }
2586
2587    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2588    /// within `cursor_excerpt`.
2589    fn format_zeta1_excerpt(
2590        input: &ZetaPromptInput,
2591        editable_range: Range<usize>,
2592        context_range: Range<usize>,
2593    ) -> String {
2594        let path_str = input.cursor_path.to_string_lossy();
2595        let excerpt = &*input.cursor_excerpt;
2596        let cursor_offset = input.cursor_offset_in_excerpt;
2597
2598        let mut prompt = String::new();
2599        writeln!(&mut prompt, "```{path_str}").ok();
2600
2601        let starts_at_file_beginning =
2602            input.excerpt_start_row == Some(0) && context_range.start == 0;
2603        if starts_at_file_beginning {
2604            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2605        }
2606
2607        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2608
2609        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2610        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2611        prompt.push_str(CURSOR_MARKER);
2612        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2613        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2614
2615        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2616        write!(prompt, "\n```").ok();
2617
2618        prompt
2619    }
2620
2621    /// Cleans zeta1 model output by extracting content between editable region
2622    /// markers and converting the zeta1 cursor marker to the universal one.
2623    /// Returns `None` if the output doesn't contain the expected markers.
2624    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2625        let content = output.replace(CURSOR_MARKER, "");
2626
2627        let content_start = content
2628            .find(EDITABLE_REGION_START_MARKER)
2629            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2630            .map(|pos| {
2631                if content.as_bytes().get(pos) == Some(&b'\n') {
2632                    pos + 1
2633                } else {
2634                    pos
2635                }
2636            })
2637            .unwrap_or(0);
2638
2639        let content_end = content
2640            .find(EDITABLE_REGION_END_MARKER)
2641            .map(|pos| {
2642                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2643                    pos - 1
2644                } else {
2645                    pos
2646                }
2647            })
2648            .unwrap_or(content.len());
2649
2650        if content_start > content_end {
2651            return Some(String::new());
2652        }
2653
2654        let extracted = &content[content_start..content_end];
2655
2656        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2657            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2658            let text_before_cursor = text_before_cursor
2659                .find(EDITABLE_REGION_START_MARKER)
2660                .map(|pos| {
2661                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2662                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2663                        after_marker + 1
2664                    } else {
2665                        after_marker
2666                    }
2667                })
2668                .unwrap_or(0);
2669            let offset_in_extracted = zeta1_cursor_pos
2670                .saturating_sub(text_before_cursor)
2671                .min(extracted.len());
2672            offset_in_extracted
2673        });
2674
2675        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2676        if let Some(offset) = cursor_offset {
2677            result.push_str(&extracted[..offset]);
2678            result.push_str(super::CURSOR_MARKER);
2679            result.push_str(&extracted[offset..]);
2680        } else {
2681            result.push_str(extracted);
2682        }
2683
2684        Some(result)
2685    }
2686}
2687
2688#[cfg(test)]
2689mod tests {
2690    use super::*;
2691    use indoc::indoc;
2692
2693    fn make_input(
2694        cursor_excerpt: &str,
2695        editable_range: Range<usize>,
2696        cursor_offset: usize,
2697        events: Vec<Event>,
2698        related_files: Vec<RelatedFile>,
2699    ) -> ZetaPromptInput {
2700        let context_range = 0..cursor_excerpt.len();
2701        ZetaPromptInput {
2702            cursor_path: Path::new("test.rs").into(),
2703            cursor_excerpt: cursor_excerpt.into(),
2704            cursor_offset_in_excerpt: cursor_offset,
2705            excerpt_start_row: None,
2706            events: events.into_iter().map(Arc::new).collect(),
2707            related_files,
2708            excerpt_ranges: ExcerptRanges {
2709                editable_150: editable_range.clone(),
2710                editable_180: editable_range.clone(),
2711                editable_350: editable_range,
2712                editable_150_context_350: context_range.clone(),
2713                editable_180_context_350: context_range.clone(),
2714                editable_350_context_150: context_range,
2715                ..Default::default()
2716            },
2717            experiment: None,
2718            in_open_source_repo: false,
2719            can_collect_data: false,
2720            repo_url: None,
2721        }
2722    }
2723
2724    fn make_event(path: &str, diff: &str) -> Event {
2725        Event::BufferChange {
2726            path: Path::new(path).into(),
2727            old_path: Path::new(path).into(),
2728            diff: diff.to_string(),
2729            predicted: false,
2730            in_open_source_repo: false,
2731        }
2732    }
2733
2734    fn make_related_file(path: &str, content: &str) -> RelatedFile {
2735        RelatedFile {
2736            path: Path::new(path).into(),
2737            max_row: content.lines().count() as u32,
2738            excerpts: vec![RelatedExcerpt {
2739                row_range: 0..content.lines().count() as u32,
2740                text: content.into(),
2741                order: 0,
2742            }],
2743            in_open_source_repo: false,
2744        }
2745    }
2746
2747    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2748        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2749    }
2750
2751    #[test]
2752    fn test_no_truncation_when_within_budget() {
2753        let input = make_input(
2754            "prefix\neditable\nsuffix",
2755            7..15,
2756            10,
2757            vec![make_event("a.rs", "-old\n+new\n")],
2758            vec![make_related_file("related.rs", "fn helper() {}\n")],
2759        );
2760
2761        assert_eq!(
2762            format_with_budget(&input, 10000),
2763            indoc! {r#"
2764                <|file_sep|>related.rs
2765                fn helper() {}
2766                <|file_sep|>edit history
2767                --- a/a.rs
2768                +++ b/a.rs
2769                -old
2770                +new
2771                <|file_sep|>test.rs
2772                <|fim_prefix|>
2773                prefix
2774                <|fim_middle|>current
2775                edi<|user_cursor|>table
2776                <|fim_suffix|>
2777
2778                suffix
2779                <|fim_middle|>updated
2780            "#}
2781        );
2782    }
2783
2784    #[test]
2785    fn test_truncation_drops_edit_history_when_budget_tight() {
2786        let input = make_input(
2787            "code",
2788            0..4,
2789            2,
2790            vec![make_event("a.rs", "-x\n+y\n")],
2791            vec![
2792                make_related_file("r1.rs", "a\n"),
2793                make_related_file("r2.rs", "b\n"),
2794            ],
2795        );
2796
2797        assert_eq!(
2798            format_with_budget(&input, 10000),
2799            indoc! {r#"
2800                <|file_sep|>r1.rs
2801                a
2802                <|file_sep|>r2.rs
2803                b
2804                <|file_sep|>edit history
2805                --- a/a.rs
2806                +++ b/a.rs
2807                -x
2808                +y
2809                <|file_sep|>test.rs
2810                <|fim_prefix|>
2811                <|fim_middle|>current
2812                co<|user_cursor|>de
2813                <|fim_suffix|>
2814                <|fim_middle|>updated
2815            "#}
2816        );
2817
2818        assert_eq!(
2819            format_with_budget(&input, 50),
2820            indoc! {r#"
2821                <|file_sep|>r1.rs
2822                a
2823                <|file_sep|>r2.rs
2824                b
2825                <|file_sep|>test.rs
2826                <|fim_prefix|>
2827                <|fim_middle|>current
2828                co<|user_cursor|>de
2829                <|fim_suffix|>
2830                <|fim_middle|>updated
2831            "#}
2832        );
2833    }
2834
2835    #[test]
2836    fn test_truncation_includes_partial_excerpts() {
2837        let input = make_input(
2838            "x",
2839            0..1,
2840            0,
2841            vec![],
2842            vec![RelatedFile {
2843                path: Path::new("big.rs").into(),
2844                max_row: 30,
2845                in_open_source_repo: false,
2846                excerpts: vec![
2847                    RelatedExcerpt {
2848                        row_range: 0..10,
2849                        text: "first excerpt\n".into(),
2850                        order: 0,
2851                    },
2852                    RelatedExcerpt {
2853                        row_range: 10..20,
2854                        text: "second excerpt\n".into(),
2855                        order: 0,
2856                    },
2857                    RelatedExcerpt {
2858                        row_range: 20..30,
2859                        text: "third excerpt\n".into(),
2860                        order: 0,
2861                    },
2862                ],
2863            }],
2864        );
2865
2866        assert_eq!(
2867            format_with_budget(&input, 10000),
2868            indoc! {r#"
2869                <|file_sep|>big.rs
2870                first excerpt
2871                ...
2872                second excerpt
2873                ...
2874                third excerpt
2875                <|file_sep|>test.rs
2876                <|fim_prefix|>
2877                <|fim_middle|>current
2878                <|user_cursor|>x
2879                <|fim_suffix|>
2880                <|fim_middle|>updated
2881            "#}
2882        );
2883
2884        assert_eq!(
2885            format_with_budget(&input, 50),
2886            indoc! {r#"
2887                <|file_sep|>big.rs
2888                first excerpt
2889                ...
2890                <|file_sep|>test.rs
2891                <|fim_prefix|>
2892                <|fim_middle|>current
2893                <|user_cursor|>x
2894                <|fim_suffix|>
2895                <|fim_middle|>updated
2896            "#}
2897        );
2898    }
2899
2900    #[test]
2901    fn test_truncation_prioritizes_lower_order_excerpts() {
2902        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2903        // With tight budget, only the lower-order excerpt from file_b should be included.
2904        let input = make_input(
2905            "x",
2906            0..1,
2907            0,
2908            vec![],
2909            vec![
2910                RelatedFile {
2911                    path: Path::new("file_a.rs").into(),
2912                    max_row: 10,
2913                    in_open_source_repo: false,
2914                    excerpts: vec![RelatedExcerpt {
2915                        row_range: 0..10,
2916                        text: "low priority content\n".into(),
2917                        order: 5,
2918                    }],
2919                },
2920                RelatedFile {
2921                    path: Path::new("file_b.rs").into(),
2922                    max_row: 10,
2923                    in_open_source_repo: false,
2924                    excerpts: vec![RelatedExcerpt {
2925                        row_range: 0..10,
2926                        text: "high priority content\n".into(),
2927                        order: 1,
2928                    }],
2929                },
2930            ],
2931        );
2932
2933        // With large budget, both files included; rendered in stable lexicographic order.
2934        assert_eq!(
2935            format_with_budget(&input, 10000),
2936            indoc! {r#"
2937                <|file_sep|>file_a.rs
2938                low priority content
2939                <|file_sep|>file_b.rs
2940                high priority content
2941                <|file_sep|>test.rs
2942                <|fim_prefix|>
2943                <|fim_middle|>current
2944                <|user_cursor|>x
2945                <|fim_suffix|>
2946                <|fim_middle|>updated
2947            "#}
2948        );
2949
2950        // With tight budget, only file_b (lower order) fits.
2951        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2952        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2953        // file_a would need another 14 tokens, which doesn't fit.
2954        assert_eq!(
2955            format_with_budget(&input, 52),
2956            indoc! {r#"
2957                <|file_sep|>file_b.rs
2958                high priority content
2959                <|file_sep|>test.rs
2960                <|fim_prefix|>
2961                <|fim_middle|>current
2962                <|user_cursor|>x
2963                <|fim_suffix|>
2964                <|fim_middle|>updated
2965            "#}
2966        );
2967    }
2968
2969    #[test]
2970    fn test_truncation_drops_high_order_excerpts_within_file() {
2971        // A single file has excerpts at order 1 and order 3. With a tight budget,
2972        // only the order-1 excerpts are included while the order-3 excerpt is
2973        // dropped — even though they belong to the same file. This also preserves
2974        // the parent invariant: parent outline items have order ≤ their best
2975        // child, so they're always included when any child is.
2976        let input = make_input(
2977            "x",
2978            0..1,
2979            0,
2980            vec![],
2981            vec![RelatedFile {
2982                path: Path::new("mod.rs").into(),
2983                max_row: 30,
2984                in_open_source_repo: false,
2985                excerpts: vec![
2986                    RelatedExcerpt {
2987                        row_range: 0..5,
2988                        text: "mod header\n".into(),
2989                        order: 1,
2990                    },
2991                    RelatedExcerpt {
2992                        row_range: 5..15,
2993                        text: "important fn\n".into(),
2994                        order: 1,
2995                    },
2996                    RelatedExcerpt {
2997                        row_range: 15..30,
2998                        text: "less important fn\n".into(),
2999                        order: 3,
3000                    },
3001                ],
3002            }],
3003        );
3004
3005        // With large budget, all three excerpts included.
3006        assert_eq!(
3007            format_with_budget(&input, 10000),
3008            indoc! {r#"
3009                <|file_sep|>mod.rs
3010                mod header
3011                ...
3012                important fn
3013                ...
3014                less important fn
3015                <|file_sep|>test.rs
3016                <|fim_prefix|>
3017                <|fim_middle|>current
3018                <|user_cursor|>x
3019                <|fim_suffix|>
3020                <|fim_middle|>updated
3021            "#}
3022        );
3023
3024        // With tight budget, only order<=1 excerpts included (header + important fn).
3025        assert_eq!(
3026            format_with_budget(&input, 55),
3027            indoc! {r#"
3028                <|file_sep|>mod.rs
3029                mod header
3030                ...
3031                important fn
3032                ...
3033                <|file_sep|>test.rs
3034                <|fim_prefix|>
3035                <|fim_middle|>current
3036                <|user_cursor|>x
3037                <|fim_suffix|>
3038                <|fim_middle|>updated
3039            "#}
3040        );
3041    }
3042
3043    #[test]
3044    fn test_truncation_drops_older_events_first() {
3045        let input = make_input(
3046            "x",
3047            0..1,
3048            0,
3049            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3050            vec![],
3051        );
3052
3053        assert_eq!(
3054            format_with_budget(&input, 10000),
3055            indoc! {r#"
3056                <|file_sep|>edit history
3057                --- a/old.rs
3058                +++ b/old.rs
3059                -1
3060                --- a/new.rs
3061                +++ b/new.rs
3062                -2
3063                <|file_sep|>test.rs
3064                <|fim_prefix|>
3065                <|fim_middle|>current
3066                <|user_cursor|>x
3067                <|fim_suffix|>
3068                <|fim_middle|>updated
3069            "#}
3070        );
3071
3072        assert_eq!(
3073            format_with_budget(&input, 55),
3074            indoc! {r#"
3075                <|file_sep|>edit history
3076                --- a/new.rs
3077                +++ b/new.rs
3078                -2
3079                <|file_sep|>test.rs
3080                <|fim_prefix|>
3081                <|fim_middle|>current
3082                <|user_cursor|>x
3083                <|fim_suffix|>
3084                <|fim_middle|>updated
3085            "#}
3086        );
3087    }
3088
3089    #[test]
3090    fn test_cursor_excerpt_always_included_with_minimal_budget() {
3091        let input = make_input(
3092            "fn main() {}",
3093            0..12,
3094            3,
3095            vec![make_event("a.rs", "-old\n+new\n")],
3096            vec![make_related_file("related.rs", "helper\n")],
3097        );
3098
3099        assert_eq!(
3100            format_with_budget(&input, 30),
3101            indoc! {r#"
3102                <|file_sep|>test.rs
3103                <|fim_prefix|>
3104                <|fim_middle|>current
3105                fn <|user_cursor|>main() {}
3106                <|fim_suffix|>
3107                <|fim_middle|>updated
3108            "#}
3109        );
3110    }
3111
3112    fn format_seed_coder(input: &ZetaPromptInput) -> String {
3113        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3114    }
3115
3116    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3117        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3118    }
3119
3120    #[test]
3121    fn test_seed_coder_basic_format() {
3122        let input = make_input(
3123            "prefix\neditable\nsuffix",
3124            7..15,
3125            10,
3126            vec![make_event("a.rs", "-old\n+new\n")],
3127            vec![make_related_file("related.rs", "fn helper() {}\n")],
3128        );
3129
3130        assert_eq!(
3131            format_seed_coder(&input),
3132            indoc! {r#"
3133                <[fim-suffix]>
3134                suffix
3135                <[fim-prefix]><filename>related.rs
3136                fn helper() {}
3137
3138                <filename>edit_history
3139                --- a/a.rs
3140                +++ b/a.rs
3141                -old
3142                +new
3143
3144                <filename>test.rs
3145                prefix
3146                <<<<<<< CURRENT
3147                edi<|user_cursor|>table
3148                =======
3149                <[fim-middle]>"#}
3150        );
3151    }
3152
3153    #[test]
3154    fn test_seed_coder_no_context() {
3155        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3156
3157        assert_eq!(
3158            format_seed_coder(&input),
3159            indoc! {r#"
3160                <[fim-suffix]>
3161                after
3162                <[fim-prefix]><filename>test.rs
3163                before
3164                <<<<<<< CURRENT
3165                mid<|user_cursor|>dle
3166                =======
3167                <[fim-middle]>"#}
3168        );
3169    }
3170
3171    #[test]
3172    fn test_seed_coder_truncation_drops_context() {
3173        let input = make_input(
3174            "code",
3175            0..4,
3176            2,
3177            vec![make_event("a.rs", "-x\n+y\n")],
3178            vec![make_related_file("r1.rs", "content\n")],
3179        );
3180
3181        // With large budget, everything is included
3182        assert_eq!(
3183            format_seed_coder(&input),
3184            indoc! {r#"
3185                <[fim-suffix]>
3186                <[fim-prefix]><filename>r1.rs
3187                content
3188
3189                <filename>edit_history
3190                --- a/a.rs
3191                +++ b/a.rs
3192                -x
3193                +y
3194
3195                <filename>test.rs
3196                <<<<<<< CURRENT
3197                co<|user_cursor|>de
3198                =======
3199                <[fim-middle]>"#}
3200        );
3201
3202        // With tight budget, context is dropped but cursor section remains
3203        assert_eq!(
3204            format_seed_coder_with_budget(&input, 30),
3205            indoc! {r#"
3206                <[fim-suffix]>
3207                <[fim-prefix]><filename>test.rs
3208                <<<<<<< CURRENT
3209                co<|user_cursor|>de
3210                =======
3211                <[fim-middle]>"#}
3212        );
3213    }
3214
3215    #[test]
3216    fn test_seed_coder_truncation_prioritizes_lower_order() {
3217        let input = make_input(
3218            "code",
3219            0..4,
3220            2,
3221            vec![],
3222            vec![
3223                RelatedFile {
3224                    path: Path::new("low_prio.rs").into(),
3225                    max_row: 5,
3226                    in_open_source_repo: false,
3227                    excerpts: vec![RelatedExcerpt {
3228                        row_range: 0..5,
3229                        text: "low prio\n".into(),
3230                        order: 10,
3231                    }],
3232                },
3233                RelatedFile {
3234                    path: Path::new("high_prio.rs").into(),
3235                    max_row: 5,
3236                    in_open_source_repo: false,
3237                    excerpts: vec![RelatedExcerpt {
3238                        row_range: 0..5,
3239                        text: "high prio\n".into(),
3240                        order: 1,
3241                    }],
3242                },
3243            ],
3244        );
3245
3246        // With large budget, both included; rendered in stable lexicographic order.
3247        assert_eq!(
3248            format_seed_coder(&input),
3249            indoc! {r#"
3250                <[fim-suffix]>
3251                <[fim-prefix]><filename>low_prio.rs
3252                low prio
3253                <filename>high_prio.rs
3254                high prio
3255
3256                <filename>test.rs
3257                <<<<<<< CURRENT
3258                co<|user_cursor|>de
3259                =======
3260                <[fim-middle]>"#}
3261        );
3262
3263        // With tight budget, only high_prio included.
3264        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3265        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3266        assert_eq!(
3267            format_seed_coder_with_budget(&input, 44),
3268            indoc! {r#"
3269                <[fim-suffix]>
3270                <[fim-prefix]><filename>high_prio.rs
3271                high prio
3272
3273                <filename>test.rs
3274                <<<<<<< CURRENT
3275                co<|user_cursor|>de
3276                =======
3277                <[fim-middle]>"#}
3278        );
3279    }
3280
3281    #[test]
3282    fn test_seed_coder_clean_output() {
3283        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3284        let output_without_marker = "new code\n";
3285
3286        assert_eq!(
3287            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3288            "new code\n"
3289        );
3290        assert_eq!(
3291            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3292            "new code\n"
3293        );
3294    }
3295
3296    #[test]
3297    fn test_format_zeta1_from_input_basic() {
3298        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
3299        let input = ZetaPromptInput {
3300            cursor_path: Path::new("src/main.rs").into(),
3301            cursor_excerpt: excerpt.into(),
3302            cursor_offset_in_excerpt: 30,
3303            excerpt_start_row: Some(0),
3304            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3305            related_files: vec![],
3306            excerpt_ranges: ExcerptRanges {
3307                editable_150: 15..41,
3308                editable_180: 15..41,
3309                editable_350: 15..41,
3310                editable_150_context_350: 0..excerpt.len(),
3311                editable_180_context_350: 0..excerpt.len(),
3312                editable_350_context_150: 0..excerpt.len(),
3313                ..Default::default()
3314            },
3315            experiment: None,
3316            in_open_source_repo: false,
3317            can_collect_data: false,
3318            repo_url: None,
3319        };
3320
3321        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3322
3323        assert_eq!(
3324            prompt,
3325            concat!(
3326                "### Instruction:\n",
3327                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3328                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3329                "into account the cursor location.\n",
3330                "\n",
3331                "### User Edits:\n",
3332                "\n",
3333                "User edited other.rs:\n",
3334                "```diff\n",
3335                "-old\n",
3336                "+new\n",
3337                "\n",
3338                "```\n",
3339                "\n",
3340                "### User Excerpt:\n",
3341                "\n",
3342                "```src/main.rs\n",
3343                "<|start_of_file|>\n",
3344                "fn before() {}\n",
3345                "<|editable_region_start|>\n",
3346                "fn foo() {\n",
3347                "    <|user_cursor_is_here|>let x = 1;\n",
3348                "\n",
3349                "<|editable_region_end|>}\n",
3350                "fn after() {}\n",
3351                "\n",
3352                "```\n",
3353                "\n",
3354                "### Response:\n",
3355            ),
3356        );
3357    }
3358
3359    #[test]
3360    fn test_format_zeta1_from_input_no_start_of_file() {
3361        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
3362        let input = ZetaPromptInput {
3363            cursor_path: Path::new("src/main.rs").into(),
3364            cursor_excerpt: excerpt.into(),
3365            cursor_offset_in_excerpt: 15,
3366            excerpt_start_row: Some(10),
3367            events: vec![],
3368            related_files: vec![],
3369            excerpt_ranges: ExcerptRanges {
3370                editable_150: 0..28,
3371                editable_180: 0..28,
3372                editable_350: 0..28,
3373                editable_150_context_350: 0..28,
3374                editable_180_context_350: 0..28,
3375                editable_350_context_150: 0..28,
3376                ..Default::default()
3377            },
3378            experiment: None,
3379            in_open_source_repo: false,
3380            can_collect_data: false,
3381            repo_url: None,
3382        };
3383
3384        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3385
3386        assert_eq!(
3387            prompt,
3388            concat!(
3389                "### Instruction:\n",
3390                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3391                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3392                "into account the cursor location.\n",
3393                "\n",
3394                "### User Edits:\n",
3395                "\n",
3396                "\n",
3397                "\n",
3398                "### User Excerpt:\n",
3399                "\n",
3400                "```src/main.rs\n",
3401                "<|editable_region_start|>\n",
3402                "fn foo() {\n",
3403                "    <|user_cursor_is_here|>let x = 1;\n",
3404                "}\n",
3405                "\n",
3406                "<|editable_region_end|>\n",
3407                "```\n",
3408                "\n",
3409                "### Response:\n",
3410            ),
3411        );
3412    }
3413
3414    #[test]
3415    fn test_format_zeta1_from_input_with_sub_ranges() {
3416        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
3417        let editable_range = 10..37;
3418        let context_range = 0..excerpt.len();
3419
3420        let input = ZetaPromptInput {
3421            cursor_path: Path::new("test.rs").into(),
3422            cursor_excerpt: excerpt.into(),
3423            cursor_offset_in_excerpt: 25,
3424            excerpt_start_row: Some(0),
3425            events: vec![],
3426            related_files: vec![],
3427            excerpt_ranges: ExcerptRanges {
3428                editable_150: editable_range.clone(),
3429                editable_180: editable_range.clone(),
3430                editable_350: editable_range.clone(),
3431                editable_150_context_350: context_range.clone(),
3432                editable_180_context_350: context_range.clone(),
3433                editable_350_context_150: context_range.clone(),
3434                ..Default::default()
3435            },
3436            experiment: None,
3437            in_open_source_repo: false,
3438            can_collect_data: false,
3439            repo_url: None,
3440        };
3441
3442        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3443
3444        assert_eq!(
3445            prompt,
3446            concat!(
3447                "### Instruction:\n",
3448                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3449                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3450                "into account the cursor location.\n",
3451                "\n",
3452                "### User Edits:\n",
3453                "\n",
3454                "\n",
3455                "\n",
3456                "### User Excerpt:\n",
3457                "\n",
3458                "```test.rs\n",
3459                "<|start_of_file|>\n",
3460                "// prefix\n",
3461                "<|editable_region_start|>\n",
3462                "fn foo() {\n",
3463                "    <|user_cursor_is_here|>let x = 1;\n",
3464                "}\n",
3465                "<|editable_region_end|>\n",
3466                "// suffix\n",
3467                "\n",
3468                "```\n",
3469                "\n",
3470                "### Response:\n",
3471            ),
3472        );
3473    }
3474
3475    #[test]
3476    fn test_clean_zeta1_model_output_basic() {
3477        let output = indoc! {"
3478            <|editable_region_start|>
3479            fn main() {
3480                println!(\"hello\");
3481            }
3482            <|editable_region_end|>
3483        "};
3484
3485        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3486        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
3487    }
3488
3489    #[test]
3490    fn test_clean_zeta1_model_output_with_cursor() {
3491        let output = indoc! {"
3492            <|editable_region_start|>
3493            fn main() {
3494                <|user_cursor_is_here|>println!(\"hello\");
3495            }
3496            <|editable_region_end|>
3497        "};
3498
3499        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3500        assert_eq!(
3501            cleaned,
3502            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
3503        );
3504    }
3505
3506    #[test]
3507    fn test_clean_zeta1_model_output_no_markers() {
3508        let output = "fn main() {}\n";
3509        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3510        assert_eq!(cleaned, "fn main() {}\n");
3511    }
3512
3513    #[test]
3514    fn test_clean_zeta1_model_output_empty_region() {
3515        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3516        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3517        assert_eq!(cleaned, "");
3518    }
3519}