zeta_prompt.rs

   1use anyhow::Result;
   2use serde::{Deserialize, Serialize};
   3use std::fmt::Write;
   4use std::ops::Range;
   5use std::path::Path;
   6use std::sync::Arc;
   7use strum::{EnumIter, IntoEnumIterator as _, IntoStaticStr};
   8
   9pub const CURSOR_MARKER: &str = "<|user_cursor|>";
  10pub const MAX_PROMPT_TOKENS: usize = 4096;
  11
  12/// Use up to this amount of the editable region for prefill.
  13/// Larger values may result in more robust generation, but
  14/// this region becomes non-editable.
  15pub const PREFILL_RATIO: f64 = 0.1; // 10%
  16
  17fn estimate_tokens(bytes: usize) -> usize {
  18    bytes / 3
  19}
  20
  21/// Pre-computed byte offset ranges within `cursor_excerpt` for different
  22/// editable and context token budgets. Allows the server to select the
  23/// appropriate ranges for whichever model it uses.
  24#[derive(Clone, Debug, Default, PartialEq, Hash, Serialize, Deserialize)]
  25pub struct ExcerptRanges {
  26    /// Editable region computed with a 150-token budget.
  27    pub editable_150: Range<usize>,
  28    /// Editable region computed with a 180-token budget.
  29    pub editable_180: Range<usize>,
  30    /// Editable region computed with a 350-token budget.
  31    pub editable_350: Range<usize>,
  32    /// Editable region computed with a 350-token budget.
  33    pub editable_512: Option<Range<usize>>,
  34    /// Context boundary when using editable_150 with 350 tokens of additional context.
  35    pub editable_150_context_350: Range<usize>,
  36    /// Context boundary when using editable_180 with 350 tokens of additional context.
  37    pub editable_180_context_350: Range<usize>,
  38    /// Context boundary when using editable_350 with 150 tokens of additional context.
  39    pub editable_350_context_150: Range<usize>,
  40    pub editable_350_context_512: Option<Range<usize>>,
  41    pub editable_350_context_1024: Option<Range<usize>>,
  42    pub context_4096: Option<Range<usize>>,
  43    pub context_8192: Option<Range<usize>>,
  44}
  45
  46#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
  47pub struct ZetaPromptInput {
  48    pub cursor_path: Arc<Path>,
  49    pub cursor_excerpt: Arc<str>,
  50    pub cursor_offset_in_excerpt: usize,
  51    #[serde(default, skip_serializing_if = "Option::is_none")]
  52    pub excerpt_start_row: Option<u32>,
  53    pub events: Vec<Arc<Event>>,
  54    pub related_files: Vec<RelatedFile>,
  55    /// These ranges let the server select model-appropriate subsets.
  56    pub excerpt_ranges: ExcerptRanges,
  57    /// The name of the edit prediction model experiment to use.
  58    #[serde(default, skip_serializing_if = "Option::is_none")]
  59    pub experiment: Option<String>,
  60    #[serde(default)]
  61    pub in_open_source_repo: bool,
  62    #[serde(default)]
  63    pub can_collect_data: bool,
  64    #[serde(default, skip_serializing_if = "Option::is_none")]
  65    pub repo_url: Option<String>,
  66}
  67
  68#[derive(
  69    Default,
  70    Clone,
  71    Copy,
  72    Debug,
  73    PartialEq,
  74    Eq,
  75    Hash,
  76    EnumIter,
  77    IntoStaticStr,
  78    Serialize,
  79    Deserialize,
  80)]
  81#[allow(non_camel_case_types)]
  82pub enum ZetaFormat {
  83    V0112MiddleAtEnd,
  84    V0113Ordered,
  85    V0114180EditableRegion,
  86    V0120GitMergeMarkers,
  87    #[default]
  88    V0131GitMergeMarkersPrefix,
  89    V0211Prefill,
  90    V0211SeedCoder,
  91    v0226Hashline,
  92    V0304SeedNoEdits,
  93}
  94
  95impl std::fmt::Display for ZetaFormat {
  96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  97        write!(f, "{}", <&'static str>::from(self))
  98    }
  99}
 100
 101impl ZetaFormat {
 102    pub fn parse(format_name: &str) -> Result<Self> {
 103        let mut results = ZetaFormat::iter().filter(|version| {
 104            <&'static str>::from(version)
 105                .to_lowercase()
 106                .contains(&format_name.to_lowercase())
 107        });
 108        let Some(result) = results.next() else {
 109            anyhow::bail!(
 110                "`{format_name}` did not match any of:\n{}",
 111                Self::options_as_string()
 112            );
 113        };
 114        if results.next().is_some() {
 115            anyhow::bail!(
 116                "`{format_name}` matched more than one of:\n{}",
 117                Self::options_as_string()
 118            );
 119        }
 120        Ok(result)
 121    }
 122
 123    pub fn options_as_string() -> String {
 124        ZetaFormat::iter()
 125            .map(|format| format!("- {}\n", <&'static str>::from(format)))
 126            .collect::<Vec<_>>()
 127            .concat()
 128    }
 129}
 130
 131#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 132#[serde(tag = "event")]
 133pub enum Event {
 134    BufferChange {
 135        path: Arc<Path>,
 136        old_path: Arc<Path>,
 137        diff: String,
 138        predicted: bool,
 139        in_open_source_repo: bool,
 140    },
 141}
 142
 143impl Event {
 144    pub fn in_open_source_repo(&self) -> bool {
 145        match self {
 146            Event::BufferChange {
 147                in_open_source_repo,
 148                ..
 149            } => *in_open_source_repo,
 150        }
 151    }
 152}
 153
 154pub fn write_event(prompt: &mut String, event: &Event) {
 155    fn write_path_as_unix_str(prompt: &mut String, path: &Path) {
 156        for component in path.components() {
 157            prompt.push('/');
 158            write!(prompt, "{}", component.as_os_str().display()).ok();
 159        }
 160    }
 161    match event {
 162        Event::BufferChange {
 163            path,
 164            old_path,
 165            diff,
 166            predicted,
 167            in_open_source_repo: _,
 168        } => {
 169            if *predicted {
 170                prompt.push_str("// User accepted prediction:\n");
 171            }
 172            prompt.push_str("--- a");
 173            write_path_as_unix_str(prompt, old_path.as_ref());
 174            prompt.push_str("\n+++ b");
 175            write_path_as_unix_str(prompt, path.as_ref());
 176            prompt.push('\n');
 177            prompt.push_str(diff);
 178        }
 179    }
 180}
 181
 182#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 183pub struct RelatedFile {
 184    pub path: Arc<Path>,
 185    pub max_row: u32,
 186    pub excerpts: Vec<RelatedExcerpt>,
 187    #[serde(default)]
 188    pub in_open_source_repo: bool,
 189}
 190
 191#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
 192pub struct RelatedExcerpt {
 193    pub row_range: Range<u32>,
 194    pub text: Arc<str>,
 195    #[serde(default)]
 196    pub order: usize,
 197}
 198
 199pub fn prompt_input_contains_special_tokens(input: &ZetaPromptInput, format: ZetaFormat) -> bool {
 200    special_tokens_for_format(format)
 201        .iter()
 202        .any(|token| input.cursor_excerpt.contains(token))
 203}
 204
 205pub fn format_zeta_prompt(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 206    format_prompt_with_budget_for_format(input, format, MAX_PROMPT_TOKENS)
 207}
 208
 209pub fn special_tokens_for_format(format: ZetaFormat) -> &'static [&'static str] {
 210    match format {
 211        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::special_tokens(),
 212        ZetaFormat::V0113Ordered => v0113_ordered::special_tokens(),
 213        ZetaFormat::V0114180EditableRegion => v0114180_editable_region::special_tokens(),
 214        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::special_tokens(),
 215        ZetaFormat::V0131GitMergeMarkersPrefix => v0131_git_merge_markers_prefix::special_tokens(),
 216        ZetaFormat::V0211Prefill => v0211_prefill::special_tokens(),
 217        ZetaFormat::V0211SeedCoder => seed_coder::special_tokens(),
 218        ZetaFormat::v0226Hashline => hashline::special_tokens(),
 219        ZetaFormat::V0304SeedNoEdits => seed_coder::special_tokens(),
 220    }
 221}
 222
 223pub fn excerpt_ranges_for_format(
 224    format: ZetaFormat,
 225    ranges: &ExcerptRanges,
 226) -> (Range<usize>, Range<usize>) {
 227    match format {
 228        ZetaFormat::V0112MiddleAtEnd | ZetaFormat::V0113Ordered => (
 229            ranges.editable_150.clone(),
 230            ranges.editable_150_context_350.clone(),
 231        ),
 232        ZetaFormat::V0114180EditableRegion => (
 233            ranges.editable_180.clone(),
 234            ranges.editable_180_context_350.clone(),
 235        ),
 236        ZetaFormat::V0120GitMergeMarkers
 237        | ZetaFormat::V0131GitMergeMarkersPrefix
 238        | ZetaFormat::V0211Prefill
 239        | ZetaFormat::V0211SeedCoder
 240        | ZetaFormat::v0226Hashline
 241        | ZetaFormat::V0304SeedNoEdits => (
 242            ranges.editable_350.clone(),
 243            ranges.editable_350_context_150.clone(),
 244        ),
 245    }
 246}
 247
 248pub fn write_cursor_excerpt_section_for_format(
 249    format: ZetaFormat,
 250    prompt: &mut String,
 251    path: &Path,
 252    context: &str,
 253    editable_range: &Range<usize>,
 254    cursor_offset: usize,
 255) {
 256    match format {
 257        ZetaFormat::V0112MiddleAtEnd => v0112_middle_at_end::write_cursor_excerpt_section(
 258            prompt,
 259            path,
 260            context,
 261            editable_range,
 262            cursor_offset,
 263        ),
 264        ZetaFormat::V0113Ordered | ZetaFormat::V0114180EditableRegion => {
 265            v0113_ordered::write_cursor_excerpt_section(
 266                prompt,
 267                path,
 268                context,
 269                editable_range,
 270                cursor_offset,
 271            )
 272        }
 273        ZetaFormat::V0120GitMergeMarkers => v0120_git_merge_markers::write_cursor_excerpt_section(
 274            prompt,
 275            path,
 276            context,
 277            editable_range,
 278            cursor_offset,
 279        ),
 280        ZetaFormat::V0131GitMergeMarkersPrefix | ZetaFormat::V0211Prefill => {
 281            v0131_git_merge_markers_prefix::write_cursor_excerpt_section(
 282                prompt,
 283                path,
 284                context,
 285                editable_range,
 286                cursor_offset,
 287            )
 288        }
 289        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 290            seed_coder::write_cursor_excerpt_section(
 291                prompt,
 292                path,
 293                context,
 294                editable_range,
 295                cursor_offset,
 296            )
 297        }
 298        ZetaFormat::v0226Hashline => hashline::write_cursor_excerpt_section(
 299            prompt,
 300            path,
 301            context,
 302            editable_range,
 303            cursor_offset,
 304        ),
 305    }
 306}
 307
 308pub fn format_prompt_with_budget_for_format(
 309    input: &ZetaPromptInput,
 310    format: ZetaFormat,
 311    max_tokens: usize,
 312) -> String {
 313    let (context, editable_range, cursor_offset) = resolve_cursor_region(input, format);
 314    let path = &*input.cursor_path;
 315
 316    match format {
 317        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 318            seed_coder::format_prompt_with_budget(
 319                path,
 320                context,
 321                &editable_range,
 322                cursor_offset,
 323                &input.events,
 324                &input.related_files,
 325                max_tokens,
 326            )
 327        }
 328        _ => {
 329            let mut cursor_section = String::new();
 330            write_cursor_excerpt_section_for_format(
 331                format,
 332                &mut cursor_section,
 333                path,
 334                context,
 335                &editable_range,
 336                cursor_offset,
 337            );
 338
 339            let cursor_tokens = estimate_tokens(cursor_section.len());
 340            let budget_after_cursor = max_tokens.saturating_sub(cursor_tokens);
 341
 342            let edit_history_section = format_edit_history_within_budget(
 343                &input.events,
 344                "<|file_sep|>",
 345                "edit history",
 346                budget_after_cursor,
 347            );
 348            let edit_history_tokens = estimate_tokens(edit_history_section.len());
 349            let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
 350
 351            let related_files_section = format_related_files_within_budget(
 352                &input.related_files,
 353                "<|file_sep|>",
 354                "",
 355                budget_after_edit_history,
 356            );
 357
 358            let mut prompt = String::new();
 359            prompt.push_str(&related_files_section);
 360            prompt.push_str(&edit_history_section);
 361            prompt.push_str(&cursor_section);
 362            prompt
 363        }
 364    }
 365}
 366
 367pub fn get_prefill_for_format(
 368    format: ZetaFormat,
 369    context: &str,
 370    editable_range: &Range<usize>,
 371) -> String {
 372    match format {
 373        ZetaFormat::V0211Prefill => v0211_prefill::get_prefill(context, editable_range),
 374        ZetaFormat::V0112MiddleAtEnd
 375        | ZetaFormat::V0113Ordered
 376        | ZetaFormat::V0114180EditableRegion
 377        | ZetaFormat::V0120GitMergeMarkers
 378        | ZetaFormat::V0131GitMergeMarkersPrefix
 379        | ZetaFormat::V0211SeedCoder
 380        | ZetaFormat::v0226Hashline
 381        | ZetaFormat::V0304SeedNoEdits => String::new(),
 382    }
 383}
 384
 385pub fn output_end_marker_for_format(format: ZetaFormat) -> Option<&'static str> {
 386    match format {
 387        ZetaFormat::V0120GitMergeMarkers => Some(v0120_git_merge_markers::END_MARKER),
 388        ZetaFormat::V0131GitMergeMarkersPrefix => Some(v0131_git_merge_markers_prefix::END_MARKER),
 389        ZetaFormat::V0211Prefill => Some(v0131_git_merge_markers_prefix::END_MARKER),
 390        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => Some(seed_coder::END_MARKER),
 391        ZetaFormat::V0112MiddleAtEnd
 392        | ZetaFormat::V0113Ordered
 393        | ZetaFormat::V0114180EditableRegion
 394        | ZetaFormat::v0226Hashline => None,
 395    }
 396}
 397
 398pub fn current_region_markers_for_format(format: ZetaFormat) -> (&'static str, &'static str) {
 399    match format {
 400        ZetaFormat::V0112MiddleAtEnd => ("<|fim_middle|>current\n", "<|fim_middle|>updated"),
 401        ZetaFormat::V0113Ordered
 402        | ZetaFormat::V0114180EditableRegion
 403        | ZetaFormat::v0226Hashline => ("<|fim_middle|>current\n", "<|fim_suffix|>"),
 404        ZetaFormat::V0120GitMergeMarkers
 405        | ZetaFormat::V0131GitMergeMarkersPrefix
 406        | ZetaFormat::V0211Prefill => (
 407            v0120_git_merge_markers::START_MARKER,
 408            v0120_git_merge_markers::SEPARATOR,
 409        ),
 410        ZetaFormat::V0211SeedCoder | ZetaFormat::V0304SeedNoEdits => {
 411            (seed_coder::START_MARKER, seed_coder::SEPARATOR)
 412        }
 413    }
 414}
 415
 416pub fn clean_extracted_region_for_format(format: ZetaFormat, region: &str) -> String {
 417    match format {
 418        ZetaFormat::v0226Hashline => hashline::strip_hashline_prefixes(region),
 419        _ => region.to_string(),
 420    }
 421}
 422
 423pub fn encode_patch_as_output_for_format(
 424    format: ZetaFormat,
 425    old_editable_region: &str,
 426    patch: &str,
 427    cursor_offset: Option<usize>,
 428) -> Result<Option<String>> {
 429    match format {
 430        ZetaFormat::v0226Hashline => {
 431            hashline::patch_to_edit_commands(old_editable_region, patch, cursor_offset).map(Some)
 432        }
 433        ZetaFormat::V0304SeedNoEdits => Ok(seed_coder::no_edits(patch)),
 434        _ => Ok(None),
 435    }
 436}
 437
 438pub fn output_with_context_for_format(
 439    format: ZetaFormat,
 440    old_editable_region: &str,
 441    output: &str,
 442) -> Result<Option<String>> {
 443    match format {
 444        ZetaFormat::v0226Hashline => {
 445            if hashline::output_has_edit_commands(output) {
 446                Ok(Some(hashline::apply_edit_commands(
 447                    old_editable_region,
 448                    output,
 449                )))
 450            } else {
 451                Ok(None)
 452            }
 453        }
 454        ZetaFormat::V0304SeedNoEdits => {
 455            if output.starts_with(seed_coder::NO_EDITS) {
 456                Ok(Some(old_editable_region.to_owned()))
 457            } else {
 458                Ok(None)
 459            }
 460        }
 461        _ => Ok(None),
 462    }
 463}
 464
 465/// Post-processes model output for the given zeta format by stripping format-specific suffixes.
 466pub fn clean_zeta2_model_output(output: &str, format: ZetaFormat) -> &str {
 467    match output_end_marker_for_format(format) {
 468        Some(marker) => output.strip_suffix(marker).unwrap_or(output),
 469        None => output,
 470    }
 471}
 472
 473pub fn excerpt_range_for_format(
 474    format: ZetaFormat,
 475    ranges: &ExcerptRanges,
 476) -> (Range<usize>, Range<usize>) {
 477    excerpt_ranges_for_format(format, ranges)
 478}
 479
 480pub fn resolve_cursor_region(
 481    input: &ZetaPromptInput,
 482    format: ZetaFormat,
 483) -> (&str, Range<usize>, usize) {
 484    let (editable_range, context_range) = excerpt_range_for_format(format, &input.excerpt_ranges);
 485    let context_start = context_range.start;
 486    let context_text = &input.cursor_excerpt[context_range];
 487    let adjusted_editable =
 488        (editable_range.start - context_start)..(editable_range.end - context_start);
 489    let adjusted_cursor = input.cursor_offset_in_excerpt - context_start;
 490
 491    (context_text, adjusted_editable, adjusted_cursor)
 492}
 493
 494pub fn get_prefill(input: &ZetaPromptInput, format: ZetaFormat) -> String {
 495    let (context, editable_range, _) = resolve_cursor_region(input, format);
 496    get_prefill_for_format(format, context, &editable_range)
 497}
 498
 499fn format_edit_history_within_budget(
 500    events: &[Arc<Event>],
 501    file_marker: &str,
 502    edit_history_name: &str,
 503    max_tokens: usize,
 504) -> String {
 505    let header = format!("{}{}\n", file_marker, edit_history_name);
 506    let header_tokens = estimate_tokens(header.len());
 507    if header_tokens >= max_tokens {
 508        return String::new();
 509    }
 510
 511    let mut event_strings: Vec<String> = Vec::new();
 512    let mut total_tokens = header_tokens;
 513
 514    for event in events.iter().rev() {
 515        let mut event_str = String::new();
 516        write_event(&mut event_str, event);
 517        let event_tokens = estimate_tokens(event_str.len());
 518
 519        if total_tokens + event_tokens > max_tokens {
 520            break;
 521        }
 522        total_tokens += event_tokens;
 523        event_strings.push(event_str);
 524    }
 525
 526    if event_strings.is_empty() {
 527        return String::new();
 528    }
 529
 530    let mut result = header;
 531    for event_str in event_strings.iter().rev() {
 532        result.push_str(event_str);
 533    }
 534    result
 535}
 536
 537fn excerpt_rendered_tokens(excerpt: &RelatedExcerpt, file_max_row: u32) -> usize {
 538    let needs_newline = !excerpt.text.ends_with('\n');
 539    let needs_ellipsis = excerpt.row_range.end < file_max_row;
 540    let len = excerpt.text.len()
 541        + if needs_newline { "\n".len() } else { 0 }
 542        + if needs_ellipsis { "...\n".len() } else { 0 };
 543    estimate_tokens(len)
 544}
 545
 546pub fn format_related_files_within_budget(
 547    related_files: &[RelatedFile],
 548    file_prefix: &str,
 549    file_suffix: &str,
 550    max_tokens: usize,
 551) -> String {
 552    struct ExcerptCandidate {
 553        file_ix: usize,
 554        excerpt_ix: usize,
 555        order: usize,
 556    }
 557
 558    let mut excerpt_candidates: Vec<ExcerptCandidate> = related_files
 559        .iter()
 560        .enumerate()
 561        .flat_map(|(file_ix, file)| {
 562            file.excerpts
 563                .iter()
 564                .enumerate()
 565                .map(move |(excerpt_ix, e)| ExcerptCandidate {
 566                    file_ix,
 567                    excerpt_ix,
 568                    order: e.order,
 569                })
 570        })
 571        .collect();
 572
 573    // Pre-compute file header strings and their token costs.
 574    let file_headers: Vec<String> = related_files
 575        .iter()
 576        .map(|file| {
 577            let path_str = file.path.to_string_lossy();
 578            format!("{}{}\n", file_prefix, path_str)
 579        })
 580        .collect();
 581
 582    // Sort the excerpts by their order and determine how many fit within the budget.
 583    let mut total_tokens = 0;
 584    let mut included_excerpt_count = 0_usize;
 585    let mut included_file_indices = vec![false; related_files.len()];
 586    excerpt_candidates.sort_by_key(|e| (e.order, e.file_ix, e.excerpt_ix));
 587    for candidate in &excerpt_candidates {
 588        let file = &related_files[candidate.file_ix];
 589        let excerpt = &file.excerpts[candidate.excerpt_ix];
 590        let file_already_included = included_file_indices[candidate.file_ix];
 591        let header_cost = if file_already_included {
 592            0
 593        } else {
 594            estimate_tokens(file_headers[candidate.file_ix].len() + file_suffix.len())
 595        };
 596        let excerpt_cost = excerpt_rendered_tokens(excerpt, file.max_row);
 597        if total_tokens + header_cost + excerpt_cost > max_tokens {
 598            break;
 599        }
 600        total_tokens += header_cost + excerpt_cost;
 601        if !file_already_included {
 602            included_file_indices[candidate.file_ix] = true;
 603        }
 604        included_excerpt_count += 1;
 605    }
 606
 607    excerpt_candidates.truncate(included_excerpt_count);
 608    excerpt_candidates.sort_unstable_by_key(|c| (c.file_ix, c.excerpt_ix));
 609
 610    // Render all of the files that fit within the token budget, in the original order.
 611    let mut result = String::new();
 612    let mut last_file_ix = None;
 613    for candidate in &excerpt_candidates {
 614        if last_file_ix != Some(candidate.file_ix) {
 615            if last_file_ix.is_some() {
 616                result.push_str(file_suffix);
 617            }
 618            result.push_str(&file_headers[candidate.file_ix]);
 619            last_file_ix = Some(candidate.file_ix);
 620        }
 621        let file = &related_files[candidate.file_ix];
 622        let excerpt = &file.excerpts[candidate.excerpt_ix];
 623        result.push_str(&excerpt.text);
 624        if !result.ends_with('\n') {
 625            result.push('\n');
 626        }
 627        if excerpt.row_range.end < file.max_row {
 628            result.push_str("...\n");
 629        }
 630    }
 631
 632    result
 633}
 634
 635pub fn write_related_files(
 636    prompt: &mut String,
 637    related_files: &[RelatedFile],
 638) -> Vec<Range<usize>> {
 639    let mut ranges = Vec::new();
 640    for file in related_files {
 641        let start = prompt.len();
 642        let path_str = file.path.to_string_lossy();
 643        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 644        for excerpt in &file.excerpts {
 645            prompt.push_str(&excerpt.text);
 646            if !prompt.ends_with('\n') {
 647                prompt.push('\n');
 648            }
 649            if excerpt.row_range.end < file.max_row {
 650                prompt.push_str("...\n");
 651            }
 652        }
 653        let end = prompt.len();
 654        ranges.push(start..end);
 655    }
 656    ranges
 657}
 658
 659mod v0112_middle_at_end {
 660    use super::*;
 661
 662    pub fn special_tokens() -> &'static [&'static str] {
 663        &[
 664            "<|fim_prefix|>",
 665            "<|fim_suffix|>",
 666            "<|fim_middle|>",
 667            "<|file_sep|>",
 668            CURSOR_MARKER,
 669        ]
 670    }
 671
 672    pub fn write_cursor_excerpt_section(
 673        prompt: &mut String,
 674        path: &Path,
 675        context: &str,
 676        editable_range: &Range<usize>,
 677        cursor_offset: usize,
 678    ) {
 679        let path_str = path.to_string_lossy();
 680        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 681
 682        prompt.push_str("<|fim_prefix|>\n");
 683        prompt.push_str(&context[..editable_range.start]);
 684
 685        prompt.push_str("<|fim_suffix|>\n");
 686        prompt.push_str(&context[editable_range.end..]);
 687        if !prompt.ends_with('\n') {
 688            prompt.push('\n');
 689        }
 690
 691        prompt.push_str("<|fim_middle|>current\n");
 692        prompt.push_str(&context[editable_range.start..cursor_offset]);
 693        prompt.push_str(CURSOR_MARKER);
 694        prompt.push_str(&context[cursor_offset..editable_range.end]);
 695        if !prompt.ends_with('\n') {
 696            prompt.push('\n');
 697        }
 698
 699        prompt.push_str("<|fim_middle|>updated\n");
 700    }
 701}
 702
 703mod v0113_ordered {
 704    use super::*;
 705
 706    pub fn special_tokens() -> &'static [&'static str] {
 707        &[
 708            "<|fim_prefix|>",
 709            "<|fim_suffix|>",
 710            "<|fim_middle|>",
 711            "<|file_sep|>",
 712            CURSOR_MARKER,
 713        ]
 714    }
 715
 716    pub fn write_cursor_excerpt_section(
 717        prompt: &mut String,
 718        path: &Path,
 719        context: &str,
 720        editable_range: &Range<usize>,
 721        cursor_offset: usize,
 722    ) {
 723        let path_str = path.to_string_lossy();
 724        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 725
 726        prompt.push_str("<|fim_prefix|>\n");
 727        prompt.push_str(&context[..editable_range.start]);
 728        if !prompt.ends_with('\n') {
 729            prompt.push('\n');
 730        }
 731
 732        prompt.push_str("<|fim_middle|>current\n");
 733        prompt.push_str(&context[editable_range.start..cursor_offset]);
 734        prompt.push_str(CURSOR_MARKER);
 735        prompt.push_str(&context[cursor_offset..editable_range.end]);
 736        if !prompt.ends_with('\n') {
 737            prompt.push('\n');
 738        }
 739
 740        prompt.push_str("<|fim_suffix|>\n");
 741        prompt.push_str(&context[editable_range.end..]);
 742        if !prompt.ends_with('\n') {
 743            prompt.push('\n');
 744        }
 745
 746        prompt.push_str("<|fim_middle|>updated\n");
 747    }
 748}
 749
 750mod v0114180_editable_region {
 751    use super::*;
 752
 753    pub fn special_tokens() -> &'static [&'static str] {
 754        v0113_ordered::special_tokens()
 755    }
 756}
 757
 758pub mod v0120_git_merge_markers {
 759    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 760    //!
 761    //! Example prompt:
 762    //!
 763    //! <|file_sep|>path/to/target_file.py
 764    //! <|fim_prefix|>
 765    //! code before editable region
 766    //! <|fim_suffix|>
 767    //! code after editable region
 768    //! <|fim_middle|>
 769    //! <<<<<<< CURRENT
 770    //! code that
 771    //! needs to<|user_cursor|>
 772    //! be rewritten
 773    //! =======
 774    //!
 775    //! Expected output (should be generated by the model):
 776    //!
 777    //! updated
 778    //! code with
 779    //! changes applied
 780    //! >>>>>>> UPDATED
 781
 782    use super::*;
 783
 784    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 785    pub const SEPARATOR: &str = "=======\n";
 786    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 787
 788    pub fn special_tokens() -> &'static [&'static str] {
 789        &[
 790            "<|fim_prefix|>",
 791            "<|fim_suffix|>",
 792            "<|fim_middle|>",
 793            "<|file_sep|>",
 794            START_MARKER,
 795            SEPARATOR,
 796            END_MARKER,
 797            CURSOR_MARKER,
 798        ]
 799    }
 800
 801    pub fn write_cursor_excerpt_section(
 802        prompt: &mut String,
 803        path: &Path,
 804        context: &str,
 805        editable_range: &Range<usize>,
 806        cursor_offset: usize,
 807    ) {
 808        let path_str = path.to_string_lossy();
 809        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 810
 811        prompt.push_str("<|fim_prefix|>");
 812        prompt.push_str(&context[..editable_range.start]);
 813
 814        prompt.push_str("<|fim_suffix|>");
 815        prompt.push_str(&context[editable_range.end..]);
 816        if !prompt.ends_with('\n') {
 817            prompt.push('\n');
 818        }
 819
 820        prompt.push_str("<|fim_middle|>");
 821        prompt.push_str(START_MARKER);
 822        prompt.push_str(&context[editable_range.start..cursor_offset]);
 823        prompt.push_str(CURSOR_MARKER);
 824        prompt.push_str(&context[cursor_offset..editable_range.end]);
 825        if !prompt.ends_with('\n') {
 826            prompt.push('\n');
 827        }
 828        prompt.push_str(SEPARATOR);
 829    }
 830}
 831
 832pub mod v0131_git_merge_markers_prefix {
 833    //! A prompt that uses git-style merge conflict markers to represent the editable region.
 834    //!
 835    //! Example prompt:
 836    //!
 837    //! <|file_sep|>path/to/target_file.py
 838    //! <|fim_prefix|>
 839    //! code before editable region
 840    //! <<<<<<< CURRENT
 841    //! code that
 842    //! needs to<|user_cursor|>
 843    //! be rewritten
 844    //! =======
 845    //! <|fim_suffix|>
 846    //! code after editable region
 847    //! <|fim_middle|>
 848    //!
 849    //! Expected output (should be generated by the model):
 850    //!
 851    //! updated
 852    //! code with
 853    //! changes applied
 854    //! >>>>>>> UPDATED
 855
 856    use super::*;
 857
 858    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
 859    pub const SEPARATOR: &str = "=======\n";
 860    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
 861
 862    pub fn special_tokens() -> &'static [&'static str] {
 863        &[
 864            "<|fim_prefix|>",
 865            "<|fim_suffix|>",
 866            "<|fim_middle|>",
 867            "<|file_sep|>",
 868            START_MARKER,
 869            SEPARATOR,
 870            END_MARKER,
 871            CURSOR_MARKER,
 872        ]
 873    }
 874
 875    pub fn write_cursor_excerpt_section(
 876        prompt: &mut String,
 877        path: &Path,
 878        context: &str,
 879        editable_range: &Range<usize>,
 880        cursor_offset: usize,
 881    ) {
 882        let path_str = path.to_string_lossy();
 883        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
 884
 885        prompt.push_str("<|fim_prefix|>");
 886        prompt.push_str(&context[..editable_range.start]);
 887        prompt.push_str(START_MARKER);
 888        prompt.push_str(&context[editable_range.start..cursor_offset]);
 889        prompt.push_str(CURSOR_MARKER);
 890        prompt.push_str(&context[cursor_offset..editable_range.end]);
 891        if !prompt.ends_with('\n') {
 892            prompt.push('\n');
 893        }
 894        prompt.push_str(SEPARATOR);
 895
 896        prompt.push_str("<|fim_suffix|>");
 897        prompt.push_str(&context[editable_range.end..]);
 898        if !prompt.ends_with('\n') {
 899            prompt.push('\n');
 900        }
 901
 902        prompt.push_str("<|fim_middle|>");
 903    }
 904}
 905
 906pub mod v0211_prefill {
 907    use super::*;
 908
 909    pub fn special_tokens() -> &'static [&'static str] {
 910        v0131_git_merge_markers_prefix::special_tokens()
 911    }
 912
 913    pub fn get_prefill(context: &str, editable_range: &Range<usize>) -> String {
 914        let editable_region = &context[editable_range.start..editable_range.end];
 915
 916        let prefill_len = (editable_region.len() as f64 * PREFILL_RATIO) as usize;
 917        let prefill_len = editable_region.floor_char_boundary(prefill_len);
 918
 919        // Find a token boundary to avoid splitting tokens in the prefill.
 920        // In Qwen2.5-Coder, \n is always the END of a token (e.g. `;\n`,
 921        // ` {\n`), and \n\n / \n\n\n are single tokens, so we must include
 922        // the \n and consume any consecutive \n characters after it.
 923        let prefill = &editable_region[..prefill_len];
 924        match prefill.rfind('\n') {
 925            Some(pos) => {
 926                let mut end = pos + 1;
 927                while end < editable_region.len()
 928                    && editable_region.as_bytes().get(end) == Some(&b'\n')
 929                {
 930                    end += 1;
 931                }
 932                editable_region[..end].to_string()
 933            }
 934            // No newline found. Fall back to splitting before the last space
 935            // (word-level boundary)
 936            None => match prefill.rfind(' ') {
 937                Some(pos) => prefill[..pos].to_string(),
 938                None => prefill.to_string(),
 939            },
 940        }
 941    }
 942}
 943
 944pub mod hashline {
 945
 946    use std::fmt::Display;
 947
 948    pub const END_MARKER: &str = "<|fim_middle|>updated";
 949    pub const START_MARKER: &str = "<|fim_middle|>current";
 950
 951    use super::*;
 952
 953    const SET_COMMAND_MARKER: &str = "<|set|>";
 954    const INSERT_COMMAND_MARKER: &str = "<|insert|>";
 955
 956    pub fn special_tokens() -> &'static [&'static str] {
 957        return &[
 958            SET_COMMAND_MARKER,
 959            "<|set_range|>",
 960            INSERT_COMMAND_MARKER,
 961            CURSOR_MARKER,
 962            "<|file_sep|>",
 963            "<|fim_prefix|>",
 964            "<|fim_suffix|>",
 965            "<|fim_middle|>",
 966        ];
 967    }
 968
 969    /// A parsed line reference like `3:c3` (line index 3 with hash 0xc3).
 970    #[derive(Debug, Clone, PartialEq, Eq)]
 971    struct LineRef {
 972        index: usize,
 973        hash: u8,
 974    }
 975
 976    impl Display for LineRef {
 977        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 978            write!(f, "{}:{:02x}", self.index, self.hash)
 979        }
 980    }
 981
 982    pub fn hash_line(line: &[u8]) -> u8 {
 983        let mut h: u8 = 0;
 984        for &byte in line {
 985            h = h.wrapping_add(byte);
 986        }
 987        return h;
 988    }
 989
 990    /// Write the hashline-encoded editable region into `out`. Each line of
 991    /// `editable_text` is prefixed with `{line_index}:{hash}|` and the cursor
 992    /// marker is inserted at `cursor_offset_in_editable` (byte offset relative
 993    /// to the start of `editable_text`).
 994    pub fn write_hashline_editable_region(
 995        out: &mut String,
 996        editable_text: &str,
 997        cursor_offset_in_editable: usize,
 998    ) {
 999        let mut offset = 0;
1000        for (i, line) in editable_text.lines().enumerate() {
1001            let (head, cursor, tail) = if cursor_offset_in_editable > offset
1002                && cursor_offset_in_editable < offset + line.len()
1003            {
1004                (
1005                    &line[..cursor_offset_in_editable - offset],
1006                    CURSOR_MARKER,
1007                    &line[cursor_offset_in_editable - offset..],
1008                )
1009            } else {
1010                (line, "", "")
1011            };
1012            write!(
1013                out,
1014                "\n{}|{head}{cursor}{tail}",
1015                LineRef {
1016                    index: i,
1017                    hash: hash_line(line.as_bytes())
1018                }
1019            )
1020            .unwrap();
1021            offset += line.len() + 1;
1022        }
1023    }
1024
1025    pub fn write_cursor_excerpt_section(
1026        prompt: &mut String,
1027        path: &Path,
1028        context: &str,
1029        editable_range: &Range<usize>,
1030        cursor_offset: usize,
1031    ) {
1032        let path_str = path.to_string_lossy();
1033        write!(prompt, "<|file_sep|>{}\n", path_str).ok();
1034
1035        prompt.push_str("<|fim_prefix|>\n");
1036        prompt.push_str(&context[..editable_range.start]);
1037        prompt.push_str(START_MARKER);
1038
1039        let cursor_offset_in_editable = cursor_offset.saturating_sub(editable_range.start);
1040        let editable_region = &context[editable_range.clone()];
1041        write_hashline_editable_region(prompt, editable_region, cursor_offset_in_editable);
1042
1043        if !prompt.ends_with('\n') {
1044            prompt.push('\n');
1045        }
1046
1047        prompt.push_str("<|fim_suffix|>\n");
1048        prompt.push_str(&context[editable_range.end..]);
1049        if !prompt.ends_with('\n') {
1050            prompt.push('\n');
1051        }
1052
1053        prompt.push_str(END_MARKER);
1054    }
1055
1056    /// A single edit command parsed from the model output.
1057    #[derive(Debug)]
1058    enum EditCommand<'a> {
1059        /// Replace a range of lines (inclusive on both ends). Single-line set is
1060        /// represented by `start == end`.
1061        Set {
1062            start: LineRef,
1063            end: LineRef,
1064            content: &'a str,
1065        },
1066        /// Insert new lines after the given line, or before the first line if
1067        /// `after` is `None`.
1068        Insert {
1069            after: Option<LineRef>,
1070            content: &'a str,
1071        },
1072    }
1073
1074    /// Parse a line reference like `3:c3` into a `LineRef`.
1075    fn parse_line_ref(s: &str) -> Option<LineRef> {
1076        let (idx_str, hash_str) = s.split_once(':')?;
1077        let index = idx_str.parse::<usize>().ok()?;
1078        let hash = u8::from_str_radix(hash_str, 16).ok()?;
1079        Some(LineRef { index, hash })
1080    }
1081
1082    /// Parse the model output into a list of `EditCommand`s.
1083    fn parse_edit_commands(model_output: &str) -> Vec<EditCommand<'_>> {
1084        let mut commands = Vec::new();
1085        let mut offset = 0usize;
1086
1087        while offset < model_output.len() {
1088            let next_nl = model_output[offset..]
1089                .find('\n')
1090                .map(|i| offset + i)
1091                .unwrap_or(model_output.len());
1092            let line = &model_output[offset..next_nl];
1093            let line_end = if next_nl < model_output.len() {
1094                next_nl + 1
1095            } else {
1096                next_nl
1097            };
1098
1099            let trimmed = line.trim();
1100            let (is_set, specifier) = if let Some(spec) = trimmed.strip_prefix(SET_COMMAND_MARKER) {
1101                (true, spec)
1102            } else if let Some(spec) = trimmed.strip_prefix(INSERT_COMMAND_MARKER) {
1103                (false, spec)
1104            } else {
1105                offset = line_end;
1106                continue;
1107            };
1108
1109            let mut content_end = line_end;
1110            let mut scan = line_end;
1111
1112            while scan < model_output.len() {
1113                let body_nl = model_output[scan..]
1114                    .find('\n')
1115                    .map(|i| scan + i)
1116                    .unwrap_or(model_output.len());
1117                let body_line = &model_output[scan..body_nl];
1118                if body_line.trim().starts_with(SET_COMMAND_MARKER)
1119                    || body_line.trim().starts_with(INSERT_COMMAND_MARKER)
1120                {
1121                    break;
1122                }
1123                scan = if body_nl < model_output.len() {
1124                    body_nl + 1
1125                } else {
1126                    body_nl
1127                };
1128                content_end = scan;
1129            }
1130
1131            let content = &model_output[line_end..content_end];
1132
1133            if is_set {
1134                if let Some((start_str, end_str)) = specifier.split_once('-') {
1135                    if let (Some(start), Some(end)) =
1136                        (parse_line_ref(start_str), parse_line_ref(end_str))
1137                    {
1138                        commands.push(EditCommand::Set {
1139                            start,
1140                            end,
1141                            content,
1142                        });
1143                    }
1144                } else if let Some(target) = parse_line_ref(specifier) {
1145                    commands.push(EditCommand::Set {
1146                        start: target.clone(),
1147                        end: target,
1148                        content,
1149                    });
1150                }
1151            } else {
1152                let after = parse_line_ref(specifier);
1153                commands.push(EditCommand::Insert { after, content });
1154            }
1155
1156            offset = scan;
1157        }
1158
1159        commands
1160    }
1161
1162    /// Returns `true` if the model output contains `<|set|>` or `<|insert|>` commands
1163    /// (as opposed to being a plain full-replacement output).
1164    /// Strip the `{line_num}:{hash}|` prefixes from each line of a hashline-encoded
1165    /// editable region, returning the plain text content.
1166    pub fn strip_hashline_prefixes(region: &str) -> String {
1167        let mut decoded: String = region
1168            .lines()
1169            .map(|line| line.find('|').map_or(line, |pos| &line[pos + 1..]))
1170            .collect::<Vec<_>>()
1171            .join("\n");
1172        if region.ends_with('\n') {
1173            decoded.push('\n');
1174        }
1175        decoded
1176    }
1177
1178    pub fn output_has_edit_commands(model_output: &str) -> bool {
1179        model_output.contains(SET_COMMAND_MARKER) || model_output.contains(INSERT_COMMAND_MARKER)
1180    }
1181
1182    /// Apply `<|set|>` and `<|insert|>` edit commands from the model output to the
1183    /// original editable region text.
1184    ///
1185    /// `editable_region` is the original text of the editable region (without hash
1186    /// prefixes). `model_output` is the raw model response containing edit commands.
1187    ///
1188    /// Returns the full replacement text for the editable region.
1189    pub fn apply_edit_commands(editable_region: &str, model_output: &str) -> String {
1190        let original_lines: Vec<&str> = editable_region.lines().collect();
1191        let old_hashes: Vec<u8> = original_lines
1192            .iter()
1193            .map(|line| hash_line(line.as_bytes()))
1194            .collect();
1195
1196        let commands = parse_edit_commands(model_output);
1197
1198        // For set operations: indexed by start line → Some((end line index, content))
1199        // For insert operations: indexed by line index → vec of content to insert after
1200        // Insert-before-first is tracked separately.
1201        let mut set_ops: Vec<Option<(usize, &str)>> = vec![None; original_lines.len()];
1202        let mut insert_before_first: Vec<&str> = Vec::new();
1203        let mut insert_after: Vec<Vec<&str>> = vec![Vec::new(); original_lines.len()];
1204
1205        for command in &commands {
1206            match command {
1207                EditCommand::Set {
1208                    start,
1209                    end,
1210                    content,
1211                } => {
1212                    if start.index < old_hashes.len()
1213                        && end.index < old_hashes.len()
1214                        && start.index <= end.index
1215                        && old_hashes[start.index] == start.hash
1216                        && old_hashes[end.index] == end.hash
1217                    {
1218                        set_ops[start.index] = Some((end.index, *content));
1219                    }
1220                }
1221                EditCommand::Insert { after, content } => match after {
1222                    None => insert_before_first.push(*content),
1223                    Some(line_ref) => {
1224                        if line_ref.index < old_hashes.len()
1225                            && old_hashes[line_ref.index] == line_ref.hash
1226                        {
1227                            insert_after[line_ref.index].push(*content);
1228                        }
1229                    }
1230                },
1231            }
1232        }
1233
1234        let mut result = String::new();
1235
1236        // Emit any insertions before the first line
1237        for content in &insert_before_first {
1238            result.push_str(content);
1239            if !content.ends_with('\n') {
1240                result.push('\n');
1241            }
1242        }
1243
1244        let mut i = 0;
1245        while i < original_lines.len() {
1246            if let Some((end_index, replacement)) = set_ops[i].as_ref() {
1247                // Replace lines i..=end_index with the replacement content
1248                result.push_str(replacement);
1249                if !replacement.is_empty() && !replacement.ends_with('\n') {
1250                    result.push('\n');
1251                }
1252                // Emit any insertions after the end of this set range
1253                if *end_index < insert_after.len() {
1254                    for content in &insert_after[*end_index] {
1255                        result.push_str(content);
1256                        if !content.ends_with('\n') {
1257                            result.push('\n');
1258                        }
1259                    }
1260                }
1261                i = end_index + 1;
1262            } else {
1263                // Keep the original line
1264                result.push_str(original_lines[i]);
1265                result.push('\n');
1266                // Emit any insertions after this line
1267                for content in &insert_after[i] {
1268                    result.push_str(content);
1269                    if !content.ends_with('\n') {
1270                        result.push('\n');
1271                    }
1272                }
1273                i += 1;
1274            }
1275        }
1276
1277        // Preserve trailing newline behavior: if the original ended with a
1278        // newline the result already has one; if it didn't, trim the extra one
1279        // we added.
1280        if !editable_region.ends_with('\n') && result.ends_with('\n') {
1281            result.pop();
1282        }
1283
1284        result
1285    }
1286
1287    /// Convert a unified diff patch into hashline edit commands.
1288    ///
1289    /// Parses the unified diff `patch` directly to determine which lines of
1290    /// `old_text` are deleted/replaced and what new lines are added, then emits
1291    /// `<|set|>` and `<|insert|>` edit commands referencing old lines by their
1292    /// `{index}:{hash}` identifiers.
1293    ///
1294    /// `cursor_offset` is an optional byte offset into the first hunk's new
1295    /// text (context + additions) where the cursor marker should be placed.
1296    pub fn patch_to_edit_commands(
1297        old_text: &str,
1298        patch: &str,
1299        cursor_offset: Option<usize>,
1300    ) -> Result<String> {
1301        let old_lines: Vec<&str> = old_text.lines().collect();
1302        let old_hashes: Vec<u8> = old_lines
1303            .iter()
1304            .map(|line| hash_line(line.as_bytes()))
1305            .collect();
1306
1307        let mut result = String::new();
1308        let mut first_hunk = true;
1309
1310        struct Hunk<'a> {
1311            line_range: Range<usize>,
1312            new_text_lines: Vec<&'a str>,
1313            cursor_line_offset_in_new_text: Option<(usize, usize)>,
1314        }
1315
1316        // Parse the patch line by line. We only care about hunk headers,
1317        // context, deletions, and additions.
1318        let mut old_line_index: usize = 0;
1319        let mut current_hunk: Option<Hunk> = None;
1320        // Byte offset tracking within the hunk's new text for cursor placement.
1321        let mut new_text_byte_offset: usize = 0;
1322        // The line index of the last old line seen before/in the current hunk
1323        // (used for insert-after reference).
1324        let mut last_old_line_before_hunk: Option<usize> = None;
1325
1326        fn flush_hunk(
1327            hunk: Hunk,
1328            last_old_line: Option<usize>,
1329            result: &mut String,
1330            old_hashes: &[u8],
1331        ) {
1332            if hunk.line_range.is_empty() {
1333                // Pure insertion — reference the old line to insert after when in bounds.
1334                if let Some(after) = last_old_line
1335                    && let Some(&hash) = old_hashes.get(after)
1336                {
1337                    write!(
1338                        result,
1339                        "{INSERT_COMMAND_MARKER}{}\n",
1340                        LineRef { index: after, hash }
1341                    )
1342                    .unwrap();
1343                } else {
1344                    result.push_str(INSERT_COMMAND_MARKER);
1345                    result.push('\n');
1346                }
1347            } else {
1348                let start = hunk.line_range.start;
1349                let end_exclusive = hunk.line_range.end;
1350                let deleted_line_count = end_exclusive.saturating_sub(start);
1351
1352                if deleted_line_count == 1 {
1353                    if let Some(&hash) = old_hashes.get(start) {
1354                        write!(
1355                            result,
1356                            "{SET_COMMAND_MARKER}{}\n",
1357                            LineRef { index: start, hash }
1358                        )
1359                        .unwrap();
1360                    } else {
1361                        result.push_str(SET_COMMAND_MARKER);
1362                        result.push('\n');
1363                    }
1364                } else {
1365                    let end_inclusive = end_exclusive - 1;
1366                    match (
1367                        old_hashes.get(start).copied(),
1368                        old_hashes.get(end_inclusive).copied(),
1369                    ) {
1370                        (Some(start_hash), Some(end_hash)) => {
1371                            write!(
1372                                result,
1373                                "{SET_COMMAND_MARKER}{}-{}\n",
1374                                LineRef {
1375                                    index: start,
1376                                    hash: start_hash
1377                                },
1378                                LineRef {
1379                                    index: end_inclusive,
1380                                    hash: end_hash
1381                                }
1382                            )
1383                            .unwrap();
1384                        }
1385                        _ => {
1386                            result.push_str(SET_COMMAND_MARKER);
1387                            result.push('\n');
1388                        }
1389                    }
1390                }
1391            }
1392            for (line_offset, line) in hunk.new_text_lines.iter().enumerate() {
1393                if let Some((cursor_line_offset, char_offset)) = hunk.cursor_line_offset_in_new_text
1394                    && line_offset == cursor_line_offset
1395                {
1396                    result.push_str(&line[..char_offset]);
1397                    result.push_str(CURSOR_MARKER);
1398                    result.push_str(&line[char_offset..]);
1399                    continue;
1400                }
1401
1402                result.push_str(line);
1403            }
1404        }
1405
1406        for raw_line in patch.split_inclusive('\n') {
1407            if raw_line.starts_with("@@") {
1408                // Flush any pending change hunk from a previous patch hunk.
1409                if let Some(hunk) = current_hunk.take() {
1410                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1411                }
1412
1413                // Parse hunk header: @@ -old_start[,old_count] +new_start[,new_count] @@
1414                // We intentionally do not trust old_start as a direct local index into `old_text`,
1415                // because some patches are produced against a larger file region and carry
1416                // non-local line numbers. We keep indexing local by advancing from parsed patch lines.
1417                if first_hunk {
1418                    new_text_byte_offset = 0;
1419                    first_hunk = false;
1420                }
1421                continue;
1422            }
1423
1424            if raw_line.starts_with("---") || raw_line.starts_with("+++") {
1425                continue;
1426            }
1427            if raw_line.starts_with("\\ No newline") {
1428                continue;
1429            }
1430
1431            if raw_line.starts_with('-') {
1432                // Extend or start a change hunk with this deleted old line.
1433                match &mut current_hunk {
1434                    Some(Hunk {
1435                        line_range: range, ..
1436                    }) => range.end = old_line_index + 1,
1437                    None => {
1438                        current_hunk = Some(Hunk {
1439                            line_range: old_line_index..old_line_index + 1,
1440                            new_text_lines: Vec::new(),
1441                            cursor_line_offset_in_new_text: None,
1442                        });
1443                    }
1444                }
1445                old_line_index += 1;
1446            } else if let Some(added_content) = raw_line.strip_prefix('+') {
1447                // Place cursor marker if cursor_offset falls within this line.
1448                let mut cursor_line_offset = None;
1449                if let Some(cursor_off) = cursor_offset
1450                    && (first_hunk
1451                        || cursor_off >= new_text_byte_offset
1452                            && cursor_off <= new_text_byte_offset + added_content.len())
1453                {
1454                    let line_offset = added_content.floor_char_boundary(
1455                        cursor_off
1456                            .saturating_sub(new_text_byte_offset)
1457                            .min(added_content.len()),
1458                    );
1459                    cursor_line_offset = Some(line_offset);
1460                }
1461
1462                new_text_byte_offset += added_content.len();
1463
1464                let hunk = current_hunk.get_or_insert(Hunk {
1465                    line_range: old_line_index..old_line_index,
1466                    new_text_lines: vec![],
1467                    cursor_line_offset_in_new_text: None,
1468                });
1469                hunk.new_text_lines.push(added_content);
1470                hunk.cursor_line_offset_in_new_text = cursor_line_offset
1471                    .map(|offset_in_line| (hunk.new_text_lines.len() - 1, offset_in_line));
1472            } else {
1473                // Context line (starts with ' ' or is empty).
1474                if let Some(hunk) = current_hunk.take() {
1475                    flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1476                }
1477                last_old_line_before_hunk = Some(old_line_index);
1478                old_line_index += 1;
1479                let content = raw_line.strip_prefix(' ').unwrap_or(raw_line);
1480                new_text_byte_offset += content.len();
1481            }
1482        }
1483
1484        // Flush final group.
1485        if let Some(hunk) = current_hunk.take() {
1486            flush_hunk(hunk, last_old_line_before_hunk, &mut result, &old_hashes);
1487        }
1488
1489        // Trim a single trailing newline.
1490        if result.ends_with('\n') {
1491            result.pop();
1492        }
1493
1494        Ok(result)
1495    }
1496
1497    #[cfg(test)]
1498    mod tests {
1499        use super::*;
1500        use indoc::indoc;
1501
1502        #[test]
1503        fn test_format_cursor_region() {
1504            struct Case {
1505                name: &'static str,
1506                context: &'static str,
1507                editable_range: Range<usize>,
1508                cursor_offset: usize,
1509                expected: &'static str,
1510            }
1511
1512            let cases = [
1513                Case {
1514                    name: "basic_cursor_placement",
1515                    context: "hello world\n",
1516                    editable_range: 0..12,
1517                    cursor_offset: 5,
1518                    expected: indoc! {"
1519                    <|file_sep|>test.rs
1520                    <|fim_prefix|>
1521                    <|fim_middle|>current
1522                    0:5c|hello<|user_cursor|> world
1523                    <|fim_suffix|>
1524                    <|fim_middle|>updated"},
1525                },
1526                Case {
1527                    name: "multiline_cursor_on_second_line",
1528                    context: "aaa\nbbb\nccc\n",
1529                    editable_range: 0..12,
1530                    cursor_offset: 5, // byte 5 → 1 byte into "bbb"
1531                    expected: indoc! {"
1532                    <|file_sep|>test.rs
1533                    <|fim_prefix|>
1534                    <|fim_middle|>current
1535                    0:23|aaa
1536                    1:26|b<|user_cursor|>bb
1537                    2:29|ccc
1538                    <|fim_suffix|>
1539                    <|fim_middle|>updated"},
1540                },
1541                Case {
1542                    name: "no_trailing_newline_in_context",
1543                    context: "line1\nline2",
1544                    editable_range: 0..11,
1545                    cursor_offset: 3,
1546                    expected: indoc! {"
1547                    <|file_sep|>test.rs
1548                    <|fim_prefix|>
1549                    <|fim_middle|>current
1550                    0:d9|lin<|user_cursor|>e1
1551                    1:da|line2
1552                    <|fim_suffix|>
1553                    <|fim_middle|>updated"},
1554                },
1555                Case {
1556                    name: "leading_newline_in_editable_region",
1557                    context: "\nabc\n",
1558                    editable_range: 0..5,
1559                    cursor_offset: 2, // byte 2 = 'a' in "abc" (after leading \n)
1560                    expected: indoc! {"
1561                    <|file_sep|>test.rs
1562                    <|fim_prefix|>
1563                    <|fim_middle|>current
1564                    0:00|
1565                    1:26|a<|user_cursor|>bc
1566                    <|fim_suffix|>
1567                    <|fim_middle|>updated"},
1568                },
1569                Case {
1570                    name: "with_suffix",
1571                    context: "abc\ndef",
1572                    editable_range: 0..4, // editable region = "abc\n", suffix = "def"
1573                    cursor_offset: 2,
1574                    expected: indoc! {"
1575                    <|file_sep|>test.rs
1576                    <|fim_prefix|>
1577                    <|fim_middle|>current
1578                    0:26|ab<|user_cursor|>c
1579                    <|fim_suffix|>
1580                    def
1581                    <|fim_middle|>updated"},
1582                },
1583                Case {
1584                    name: "unicode_two_byte_chars",
1585                    context: "héllo\n",
1586                    editable_range: 0..7,
1587                    cursor_offset: 3, // byte 3 = after "hé" (h=1 byte, é=2 bytes), before "llo"
1588                    expected: indoc! {"
1589                    <|file_sep|>test.rs
1590                    <|fim_prefix|>
1591                    <|fim_middle|>current
1592                    0:1b|hé<|user_cursor|>llo
1593                    <|fim_suffix|>
1594                    <|fim_middle|>updated"},
1595                },
1596                Case {
1597                    name: "unicode_three_byte_chars",
1598                    context: "日本語\n",
1599                    editable_range: 0..10,
1600                    cursor_offset: 6, // byte 6 = after "日本" (3+3 bytes), before "語"
1601                    expected: indoc! {"
1602                    <|file_sep|>test.rs
1603                    <|fim_prefix|>
1604                    <|fim_middle|>current
1605                    0:80|日本<|user_cursor|>語
1606                    <|fim_suffix|>
1607                    <|fim_middle|>updated"},
1608                },
1609                Case {
1610                    name: "unicode_four_byte_chars",
1611                    context: "a🌍b\n",
1612                    editable_range: 0..7,
1613                    cursor_offset: 5, // byte 5 = after "a🌍" (1+4 bytes), before "b"
1614                    expected: indoc! {"
1615                    <|file_sep|>test.rs
1616                    <|fim_prefix|>
1617                    <|fim_middle|>current
1618                    0:6b|a🌍<|user_cursor|>b
1619                    <|fim_suffix|>
1620                    <|fim_middle|>updated"},
1621                },
1622                Case {
1623                    name: "cursor_at_start_of_region_not_placed",
1624                    context: "abc\n",
1625                    editable_range: 0..4,
1626                    cursor_offset: 0, // cursor_offset(0) > offset(0) is false → cursor not placed
1627                    expected: indoc! {"
1628                    <|file_sep|>test.rs
1629                    <|fim_prefix|>
1630                    <|fim_middle|>current
1631                    0:26|abc
1632                    <|fim_suffix|>
1633                    <|fim_middle|>updated"},
1634                },
1635                Case {
1636                    name: "cursor_at_end_of_line_not_placed",
1637                    context: "abc\ndef\n",
1638                    editable_range: 0..8,
1639                    cursor_offset: 3, // byte 3 = the \n after "abc" → falls between lines, not placed
1640                    expected: indoc! {"
1641                    <|file_sep|>test.rs
1642                    <|fim_prefix|>
1643                    <|fim_middle|>current
1644                    0:26|abc
1645                    1:2f|def
1646                    <|fim_suffix|>
1647                    <|fim_middle|>updated"},
1648                },
1649                Case {
1650                    name: "cursor_offset_relative_to_context_not_editable_region",
1651                    // cursor_offset is relative to `context`, so when editable_range.start > 0,
1652                    // write_cursor_excerpt_section must subtract it before comparing against
1653                    // per-line offsets within the editable region.
1654                    context: "pre\naaa\nbbb\nsuf\n",
1655                    editable_range: 4..12, // editable region = "aaa\nbbb\n"
1656                    cursor_offset: 9,      // byte 9 in context = second 'b' in "bbb"
1657                    expected: indoc! {"
1658                    <|file_sep|>test.rs
1659                    <|fim_prefix|>
1660                    pre
1661                    <|fim_middle|>current
1662                    0:23|aaa
1663                    1:26|b<|user_cursor|>bb
1664                    <|fim_suffix|>
1665                    suf
1666                    <|fim_middle|>updated"},
1667                },
1668            ];
1669
1670            for case in &cases {
1671                let mut prompt = String::new();
1672                hashline::write_cursor_excerpt_section(
1673                    &mut prompt,
1674                    Path::new("test.rs"),
1675                    case.context,
1676                    &case.editable_range,
1677                    case.cursor_offset,
1678                );
1679                assert_eq!(prompt, case.expected, "failed case: {}", case.name);
1680            }
1681        }
1682
1683        #[test]
1684        fn test_apply_edit_commands() {
1685            struct Case {
1686                name: &'static str,
1687                original: &'static str,
1688                model_output: &'static str,
1689                expected: &'static str,
1690            }
1691
1692            let cases = vec![
1693                Case {
1694                    name: "set_single_line",
1695                    original: indoc! {"
1696                    let mut total = 0;
1697                    for product in products {
1698                        total += ;
1699                    }
1700                    total
1701                "},
1702                    model_output: indoc! {"
1703                    <|set|>2:87
1704                        total += product.price;
1705                "},
1706                    expected: indoc! {"
1707                    let mut total = 0;
1708                    for product in products {
1709                        total += product.price;
1710                    }
1711                    total
1712                "},
1713                },
1714                Case {
1715                    name: "set_range",
1716                    original: indoc! {"
1717                    fn foo() {
1718                        let x = 1;
1719                        let y = 2;
1720                        let z = 3;
1721                    }
1722                "},
1723                    model_output: indoc! {"
1724                    <|set|>1:46-3:4a
1725                        let sum = 6;
1726                "},
1727                    expected: indoc! {"
1728                    fn foo() {
1729                        let sum = 6;
1730                    }
1731                "},
1732                },
1733                Case {
1734                    name: "insert_after_line",
1735                    original: indoc! {"
1736                    fn main() {
1737                        let x = 1;
1738                    }
1739                "},
1740                    model_output: indoc! {"
1741                    <|insert|>1:46
1742                        let y = 2;
1743                "},
1744                    expected: indoc! {"
1745                    fn main() {
1746                        let x = 1;
1747                        let y = 2;
1748                    }
1749                "},
1750                },
1751                Case {
1752                    name: "insert_before_first",
1753                    original: indoc! {"
1754                    let x = 1;
1755                    let y = 2;
1756                "},
1757                    model_output: indoc! {"
1758                    <|insert|>
1759                    use std::io;
1760                "},
1761                    expected: indoc! {"
1762                    use std::io;
1763                    let x = 1;
1764                    let y = 2;
1765                "},
1766                },
1767                Case {
1768                    name: "set_with_cursor_marker",
1769                    original: indoc! {"
1770                    fn main() {
1771                        println!();
1772                    }
1773                "},
1774                    model_output: indoc! {"
1775                    <|set|>1:34
1776                        eprintln!(\"<|user_cursor|>\");
1777                "},
1778                    expected: indoc! {"
1779                    fn main() {
1780                        eprintln!(\"<|user_cursor|>\");
1781                    }
1782                "},
1783                },
1784                Case {
1785                    name: "multiple_set_commands",
1786                    original: indoc! {"
1787                    aaa
1788                    bbb
1789                    ccc
1790                    ddd
1791                "},
1792                    model_output: indoc! {"
1793                    <|set|>0:23
1794                    AAA
1795                    <|set|>2:29
1796                    CCC
1797                "},
1798                    expected: indoc! {"
1799                    AAA
1800                    bbb
1801                    CCC
1802                    ddd
1803                "},
1804                },
1805                Case {
1806                    name: "set_range_multiline_replacement",
1807                    original: indoc! {"
1808                    fn handle_submit() {
1809                    }
1810
1811                    fn handle_keystroke() {
1812                "},
1813                    model_output: indoc! {"
1814                    <|set|>0:3f-1:7d
1815                    fn handle_submit(modal_state: &mut ModalState) {
1816                        <|user_cursor|>
1817                    }
1818                "},
1819                    expected: indoc! {"
1820                    fn handle_submit(modal_state: &mut ModalState) {
1821                        <|user_cursor|>
1822                    }
1823
1824                    fn handle_keystroke() {
1825                "},
1826                },
1827                Case {
1828                    name: "no_edit_commands_returns_original",
1829                    original: indoc! {"
1830                    hello
1831                    world
1832                "},
1833                    model_output: "some random text with no commands",
1834                    expected: indoc! {"
1835                    hello
1836                    world
1837                "},
1838                },
1839                Case {
1840                    name: "wrong_hash_set_ignored",
1841                    original: indoc! {"
1842                    aaa
1843                    bbb
1844                "},
1845                    model_output: indoc! {"
1846                    <|set|>0:ff
1847                    ZZZ
1848                "},
1849                    expected: indoc! {"
1850                    aaa
1851                    bbb
1852                "},
1853                },
1854                Case {
1855                    name: "insert_and_set_combined",
1856                    original: indoc! {"
1857                    alpha
1858                    beta
1859                    gamma
1860                "},
1861                    model_output: indoc! {"
1862                    <|set|>0:06
1863                    ALPHA
1864                    <|insert|>1:9c
1865                    beta_extra
1866                "},
1867                    expected: indoc! {"
1868                    ALPHA
1869                    beta
1870                    beta_extra
1871                    gamma
1872                "},
1873                },
1874                Case {
1875                    name: "no_trailing_newline_preserved",
1876                    original: "hello\nworld",
1877                    model_output: indoc! {"
1878                    <|set|>0:14
1879                    HELLO
1880                "},
1881                    expected: "HELLO\nworld",
1882                },
1883                Case {
1884                    name: "set_range_hash_mismatch_in_end_bound",
1885                    original: indoc! {"
1886                    one
1887                    two
1888                    three
1889                "},
1890                    model_output: indoc! {"
1891                    <|set|>0:42-2:ff
1892                    ONE_TWO_THREE
1893                "},
1894                    expected: indoc! {"
1895                    one
1896                    two
1897                    three
1898                "},
1899                },
1900                Case {
1901                    name: "set_range_start_greater_than_end_ignored",
1902                    original: indoc! {"
1903                    a
1904                    b
1905                    c
1906                "},
1907                    model_output: indoc! {"
1908                    <|set|>2:63-1:62
1909                    X
1910                "},
1911                    expected: indoc! {"
1912                    a
1913                    b
1914                    c
1915                "},
1916                },
1917                Case {
1918                    name: "insert_out_of_bounds_ignored",
1919                    original: indoc! {"
1920                    x
1921                    y
1922                "},
1923                    model_output: indoc! {"
1924                    <|insert|>99:aa
1925                    z
1926                "},
1927                    expected: indoc! {"
1928                    x
1929                    y
1930                "},
1931                },
1932                Case {
1933                    name: "set_out_of_bounds_ignored",
1934                    original: indoc! {"
1935                    x
1936                    y
1937                "},
1938                    model_output: indoc! {"
1939                    <|set|>99:aa
1940                    z
1941                "},
1942                    expected: indoc! {"
1943                    x
1944                    y
1945                "},
1946                },
1947                Case {
1948                    name: "malformed_set_command_ignored",
1949                    original: indoc! {"
1950                    alpha
1951                    beta
1952                "},
1953                    model_output: indoc! {"
1954                    <|set|>not-a-line-ref
1955                    UPDATED
1956                "},
1957                    expected: indoc! {"
1958                    alpha
1959                    beta
1960                "},
1961                },
1962                Case {
1963                    name: "malformed_insert_hash_treated_as_before_first",
1964                    original: indoc! {"
1965                    alpha
1966                    beta
1967                "},
1968                    model_output: indoc! {"
1969                    <|insert|>1:nothex
1970                    preamble
1971                "},
1972                    expected: indoc! {"
1973                    preamble
1974                    alpha
1975                    beta
1976                "},
1977                },
1978                Case {
1979                    name: "set_then_insert_same_target_orders_insert_after_replacement",
1980                    original: indoc! {"
1981                    cat
1982                    dog
1983                "},
1984                    model_output: indoc! {"
1985                    <|set|>0:38
1986                    CAT
1987                    <|insert|>0:38
1988                    TAIL
1989                "},
1990                    expected: indoc! {"
1991                    CAT
1992                    TAIL
1993                    dog
1994                "},
1995                },
1996                Case {
1997                    name: "overlapping_set_ranges_last_wins",
1998                    original: indoc! {"
1999                    a
2000                    b
2001                    c
2002                    d
2003                "},
2004                    model_output: indoc! {"
2005                    <|set|>0:61-2:63
2006                    FIRST
2007                    <|set|>1:62-3:64
2008                    SECOND
2009                "},
2010                    expected: indoc! {"
2011                    FIRST
2012                    d
2013                "},
2014                },
2015                Case {
2016                    name: "insert_before_first_and_after_line",
2017                    original: indoc! {"
2018                    a
2019                    b
2020                "},
2021                    model_output: indoc! {"
2022                    <|insert|>
2023                    HEAD
2024                    <|insert|>0:61
2025                    MID
2026                "},
2027                    expected: indoc! {"
2028                    HEAD
2029                    a
2030                    MID
2031                    b
2032                "},
2033                },
2034            ];
2035
2036            for case in &cases {
2037                let result = hashline::apply_edit_commands(case.original, &case.model_output);
2038                assert_eq!(result, case.expected, "failed case: {}", case.name);
2039            }
2040        }
2041
2042        #[test]
2043        fn test_output_has_edit_commands() {
2044            assert!(hashline::output_has_edit_commands(&format!(
2045                "{}0:ab\nnew",
2046                SET_COMMAND_MARKER
2047            )));
2048            assert!(hashline::output_has_edit_commands(&format!(
2049                "{}0:ab\nnew",
2050                INSERT_COMMAND_MARKER
2051            )));
2052            assert!(hashline::output_has_edit_commands(&format!(
2053                "some text\n{}1:cd\nstuff",
2054                SET_COMMAND_MARKER
2055            )));
2056            assert!(!hashline::output_has_edit_commands("just plain text"));
2057            assert!(!hashline::output_has_edit_commands("NO_EDITS"));
2058        }
2059
2060        // ---- hashline::patch_to_edit_commands round-trip tests ----
2061
2062        #[test]
2063        fn test_patch_to_edit_commands() {
2064            struct Case {
2065                name: &'static str,
2066                old: &'static str,
2067                patch: &'static str,
2068                expected_new: &'static str,
2069            }
2070
2071            let cases = [
2072                Case {
2073                    name: "single_line_replacement",
2074                    old: indoc! {"
2075                    let mut total = 0;
2076                    for product in products {
2077                        total += ;
2078                    }
2079                    total
2080                "},
2081                    patch: indoc! {"
2082                    @@ -1,5 +1,5 @@
2083                     let mut total = 0;
2084                     for product in products {
2085                    -    total += ;
2086                    +    total += product.price;
2087                     }
2088                     total
2089                "},
2090                    expected_new: indoc! {"
2091                    let mut total = 0;
2092                    for product in products {
2093                        total += product.price;
2094                    }
2095                    total
2096                "},
2097                },
2098                Case {
2099                    name: "multiline_replacement",
2100                    old: indoc! {"
2101                    fn foo() {
2102                        let x = 1;
2103                        let y = 2;
2104                        let z = 3;
2105                    }
2106                "},
2107                    patch: indoc! {"
2108                    @@ -1,5 +1,3 @@
2109                     fn foo() {
2110                    -    let x = 1;
2111                    -    let y = 2;
2112                    -    let z = 3;
2113                    +    let sum = 1 + 2 + 3;
2114                     }
2115                "},
2116                    expected_new: indoc! {"
2117                    fn foo() {
2118                        let sum = 1 + 2 + 3;
2119                    }
2120                "},
2121                },
2122                Case {
2123                    name: "insertion",
2124                    old: indoc! {"
2125                    fn main() {
2126                        let x = 1;
2127                    }
2128                "},
2129                    patch: indoc! {"
2130                    @@ -1,3 +1,4 @@
2131                     fn main() {
2132                         let x = 1;
2133                    +    let y = 2;
2134                     }
2135                "},
2136                    expected_new: indoc! {"
2137                    fn main() {
2138                        let x = 1;
2139                        let y = 2;
2140                    }
2141                "},
2142                },
2143                Case {
2144                    name: "insertion_before_first",
2145                    old: indoc! {"
2146                    let x = 1;
2147                    let y = 2;
2148                "},
2149                    patch: indoc! {"
2150                    @@ -1,2 +1,3 @@
2151                    +use std::io;
2152                     let x = 1;
2153                     let y = 2;
2154                "},
2155                    expected_new: indoc! {"
2156                    use std::io;
2157                    let x = 1;
2158                    let y = 2;
2159                "},
2160                },
2161                Case {
2162                    name: "deletion",
2163                    old: indoc! {"
2164                    aaa
2165                    bbb
2166                    ccc
2167                    ddd
2168                "},
2169                    patch: indoc! {"
2170                    @@ -1,4 +1,2 @@
2171                     aaa
2172                    -bbb
2173                    -ccc
2174                     ddd
2175                "},
2176                    expected_new: indoc! {"
2177                    aaa
2178                    ddd
2179                "},
2180                },
2181                Case {
2182                    name: "multiple_changes",
2183                    old: indoc! {"
2184                    alpha
2185                    beta
2186                    gamma
2187                    delta
2188                    epsilon
2189                "},
2190                    patch: indoc! {"
2191                    @@ -1,5 +1,5 @@
2192                    -alpha
2193                    +ALPHA
2194                     beta
2195                     gamma
2196                    -delta
2197                    +DELTA
2198                     epsilon
2199                "},
2200                    expected_new: indoc! {"
2201                    ALPHA
2202                    beta
2203                    gamma
2204                    DELTA
2205                    epsilon
2206                "},
2207                },
2208                Case {
2209                    name: "replace_with_insertion",
2210                    old: indoc! {r#"
2211                    fn handle() {
2212                        modal_state.close();
2213                        modal_state.dismiss();
2214                "#},
2215                    patch: indoc! {r#"
2216                    @@ -1,3 +1,4 @@
2217                     fn handle() {
2218                         modal_state.close();
2219                    +    eprintln!("");
2220                         modal_state.dismiss();
2221                "#},
2222                    expected_new: indoc! {r#"
2223                    fn handle() {
2224                        modal_state.close();
2225                        eprintln!("");
2226                        modal_state.dismiss();
2227                "#},
2228                },
2229                Case {
2230                    name: "complete_replacement",
2231                    old: indoc! {"
2232                    aaa
2233                    bbb
2234                    ccc
2235                "},
2236                    patch: indoc! {"
2237                    @@ -1,3 +1,3 @@
2238                    -aaa
2239                    -bbb
2240                    -ccc
2241                    +xxx
2242                    +yyy
2243                    +zzz
2244                "},
2245                    expected_new: indoc! {"
2246                    xxx
2247                    yyy
2248                    zzz
2249                "},
2250                },
2251                Case {
2252                    name: "add_function_body",
2253                    old: indoc! {"
2254                    fn foo() {
2255                        modal_state.dismiss();
2256                    }
2257
2258                    fn
2259
2260                    fn handle_keystroke() {
2261                "},
2262                    patch: indoc! {"
2263                    @@ -1,6 +1,8 @@
2264                     fn foo() {
2265                         modal_state.dismiss();
2266                     }
2267
2268                    -fn
2269                    +fn handle_submit() {
2270                    +    todo()
2271                    +}
2272
2273                     fn handle_keystroke() {
2274                "},
2275                    expected_new: indoc! {"
2276                    fn foo() {
2277                        modal_state.dismiss();
2278                    }
2279
2280                    fn handle_submit() {
2281                        todo()
2282                    }
2283
2284                    fn handle_keystroke() {
2285                "},
2286                },
2287                Case {
2288                    name: "with_cursor_offset",
2289                    old: indoc! {r#"
2290                    fn main() {
2291                        println!();
2292                    }
2293                "#},
2294                    patch: indoc! {r#"
2295                    @@ -1,3 +1,3 @@
2296                     fn main() {
2297                    -    println!();
2298                    +    eprintln!("");
2299                     }
2300                "#},
2301                    expected_new: indoc! {r#"
2302                    fn main() {
2303                        eprintln!("<|user_cursor|>");
2304                    }
2305                "#},
2306                },
2307                Case {
2308                    name: "non_local_hunk_header_pure_insertion_repro",
2309                    old: indoc! {"
2310                    aaa
2311                    bbb
2312                "},
2313                    patch: indoc! {"
2314                    @@ -20,2 +20,3 @@
2315                     aaa
2316                    +xxx
2317                     bbb
2318                "},
2319                    expected_new: indoc! {"
2320                    aaa
2321                    xxx
2322                    bbb
2323                "},
2324                },
2325            ];
2326
2327            for case in &cases {
2328                // The cursor_offset for patch_to_edit_commands is relative to
2329                // the first hunk's new text (context + additions). We compute
2330                // it by finding where the marker sits in the expected output
2331                // (which mirrors the new text of the hunk).
2332                let cursor_offset = case.expected_new.find(CURSOR_MARKER);
2333
2334                let commands =
2335                    hashline::patch_to_edit_commands(case.old, case.patch, cursor_offset)
2336                        .unwrap_or_else(|e| panic!("failed case {}: {e}", case.name));
2337
2338                assert!(
2339                    hashline::output_has_edit_commands(&commands),
2340                    "case {}: expected edit commands, got: {commands:?}",
2341                    case.name,
2342                );
2343
2344                let applied = hashline::apply_edit_commands(case.old, &commands);
2345                assert_eq!(applied, case.expected_new, "case {}", case.name);
2346            }
2347        }
2348    }
2349}
2350
2351pub mod seed_coder {
2352    //! Seed-Coder prompt format using SPM (Suffix-Prefix-Middle) FIM mode.
2353    //!
2354    //! Seed-Coder uses different FIM tokens and order than Qwen:
2355    //! - SPM order: suffix comes FIRST, then prefix, then middle
2356    //! - Tokens: `<[fim-suffix]>`, `<[fim-prefix]>`, `<[fim-middle]>`
2357    //! - File markers: StarCoder-style `<filename>path` (single token + path)
2358    //!
2359    //! All context (related files, edit history) goes in the PREFIX section.
2360    //! The suffix contains only code after the editable region.
2361    //!
2362    //! Example prompt:
2363    //!
2364    //! <[fim-suffix]>
2365    //! code after editable region
2366    //! <[fim-prefix]><filename>related/file.py
2367    //! related file content
2368    //!
2369    //! <filename>edit_history
2370    //! --- a/some_file.py
2371    //! +++ b/some_file.py
2372    //! -old
2373    //! +new
2374    //!
2375    //! <filename>path/to/target_file.py
2376    //! code before editable region
2377    //! <<<<<<< CURRENT
2378    //! code that
2379    //! needs to<|user_cursor|>
2380    //! be rewritten
2381    //! =======
2382    //! <[fim-middle]>
2383    //!
2384    //! Expected output (model generates):
2385    //!
2386    //! updated
2387    //! code with
2388    //! changes applied
2389    //! >>>>>>> UPDATED
2390
2391    use super::*;
2392
2393    pub const FIM_SUFFIX: &str = "<[fim-suffix]>";
2394    pub const FIM_PREFIX: &str = "<[fim-prefix]>";
2395    pub const FIM_MIDDLE: &str = "<[fim-middle]>";
2396    pub const FILE_MARKER: &str = "<filename>";
2397
2398    pub const START_MARKER: &str = "<<<<<<< CURRENT\n";
2399    pub const SEPARATOR: &str = "=======\n";
2400    pub const END_MARKER: &str = ">>>>>>> UPDATED\n";
2401
2402    pub const NO_EDITS: &str = "NO_EDITS\n";
2403
2404    pub fn special_tokens() -> &'static [&'static str] {
2405        &[
2406            FIM_SUFFIX,
2407            FIM_PREFIX,
2408            FIM_MIDDLE,
2409            FILE_MARKER,
2410            START_MARKER,
2411            SEPARATOR,
2412            END_MARKER,
2413            CURSOR_MARKER,
2414        ]
2415    }
2416
2417    pub fn write_cursor_excerpt_section(
2418        prompt: &mut String,
2419        path: &Path,
2420        context: &str,
2421        editable_range: &Range<usize>,
2422        cursor_offset: usize,
2423    ) {
2424        let section = build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2425        prompt.push_str(&section);
2426    }
2427
2428    pub fn format_prompt_with_budget(
2429        path: &Path,
2430        context: &str,
2431        editable_range: &Range<usize>,
2432        cursor_offset: usize,
2433        events: &[Arc<Event>],
2434        related_files: &[RelatedFile],
2435        max_tokens: usize,
2436    ) -> String {
2437        let suffix_section = build_suffix_section(context, editable_range);
2438        let cursor_prefix_section =
2439            build_cursor_prefix_section(path, context, editable_range, cursor_offset);
2440
2441        let suffix_tokens = estimate_tokens(suffix_section.len());
2442        let cursor_prefix_tokens = estimate_tokens(cursor_prefix_section.len());
2443        let budget_after_cursor = max_tokens.saturating_sub(suffix_tokens + cursor_prefix_tokens);
2444
2445        let edit_history_section = super::format_edit_history_within_budget(
2446            events,
2447            FILE_MARKER,
2448            "edit_history",
2449            budget_after_cursor,
2450        );
2451        let edit_history_tokens = estimate_tokens(edit_history_section.len());
2452        let budget_after_edit_history = budget_after_cursor.saturating_sub(edit_history_tokens);
2453
2454        let related_files_section = super::format_related_files_within_budget(
2455            related_files,
2456            FILE_MARKER,
2457            "",
2458            budget_after_edit_history,
2459        );
2460
2461        let mut prompt = String::new();
2462        prompt.push_str(&suffix_section);
2463        prompt.push_str(FIM_PREFIX);
2464        prompt.push_str(&related_files_section);
2465        if !related_files_section.is_empty() {
2466            prompt.push('\n');
2467        }
2468        prompt.push_str(&edit_history_section);
2469        if !edit_history_section.is_empty() {
2470            prompt.push('\n');
2471        }
2472        prompt.push_str(&cursor_prefix_section);
2473        prompt.push_str(FIM_MIDDLE);
2474        prompt
2475    }
2476
2477    fn build_suffix_section(context: &str, editable_range: &Range<usize>) -> String {
2478        let mut section = String::new();
2479        section.push_str(FIM_SUFFIX);
2480        section.push_str(&context[editable_range.end..]);
2481        if !section.ends_with('\n') {
2482            section.push('\n');
2483        }
2484        section
2485    }
2486
2487    fn build_cursor_prefix_section(
2488        path: &Path,
2489        context: &str,
2490        editable_range: &Range<usize>,
2491        cursor_offset: usize,
2492    ) -> String {
2493        let mut section = String::new();
2494        let path_str = path.to_string_lossy();
2495        write!(section, "{}{}\n", FILE_MARKER, path_str).ok();
2496
2497        section.push_str(&context[..editable_range.start]);
2498        section.push_str(START_MARKER);
2499        section.push_str(&context[editable_range.start..cursor_offset]);
2500        section.push_str(CURSOR_MARKER);
2501        section.push_str(&context[cursor_offset..editable_range.end]);
2502        if !section.ends_with('\n') {
2503            section.push('\n');
2504        }
2505        section.push_str(SEPARATOR);
2506        section
2507    }
2508
2509    /// Format patch as containing no changes if it's empty; otherwise return None.
2510    pub(crate) fn no_edits(patch: &str) -> Option<String> {
2511        // Count lines in the patch
2512        let empty_patch = patch.lines().count() <= 3;
2513        if empty_patch {
2514            Some(format!("{NO_EDITS}{END_MARKER}"))
2515        } else {
2516            None
2517        }
2518    }
2519}
2520
2521/// The zeta1 prompt format
2522pub mod zeta1 {
2523    use super::*;
2524    use std::fmt::Write;
2525
2526    pub const CURSOR_MARKER: &str = "<|user_cursor_is_here|>";
2527    pub const START_OF_FILE_MARKER: &str = "<|start_of_file|>";
2528    pub const EDITABLE_REGION_START_MARKER: &str = "<|editable_region_start|>";
2529    pub const EDITABLE_REGION_END_MARKER: &str = "<|editable_region_end|>";
2530
2531    const INSTRUCTION_HEADER: &str = concat!(
2532        "### Instruction:\n",
2533        "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
2534        "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
2535        "into account the cursor location.\n\n",
2536        "### User Edits:\n\n"
2537    );
2538    const EXCERPT_HEADER: &str = "\n\n### User Excerpt:\n\n";
2539    const RESPONSE_HEADER: &str = "\n\n### Response:\n";
2540
2541    /// Formats a complete zeta1 prompt from the input events and excerpt.
2542    pub fn format_zeta1_prompt(input_events: &str, input_excerpt: &str) -> String {
2543        let mut prompt = String::with_capacity(
2544            INSTRUCTION_HEADER.len()
2545                + input_events.len()
2546                + EXCERPT_HEADER.len()
2547                + input_excerpt.len()
2548                + RESPONSE_HEADER.len(),
2549        );
2550        prompt.push_str(INSTRUCTION_HEADER);
2551        prompt.push_str(input_events);
2552        prompt.push_str(EXCERPT_HEADER);
2553        prompt.push_str(input_excerpt);
2554        prompt.push_str(RESPONSE_HEADER);
2555        prompt
2556    }
2557
2558    /// Formats a complete zeta1 prompt from a `ZetaPromptInput` using the given
2559    /// editable and context byte-offset ranges within `cursor_excerpt`.
2560    pub fn format_zeta1_from_input(
2561        input: &ZetaPromptInput,
2562        editable_range: Range<usize>,
2563        context_range: Range<usize>,
2564    ) -> String {
2565        let events = format_zeta1_events(&input.events);
2566        let excerpt = format_zeta1_excerpt(input, editable_range, context_range);
2567        format_zeta1_prompt(&events, &excerpt)
2568    }
2569
2570    /// Formats events in zeta1 style (oldest first).
2571    fn format_zeta1_events(events: &[Arc<Event>]) -> String {
2572        let mut result = String::new();
2573        for event in events {
2574            let event_string = format_zeta1_event(event);
2575            if event_string.is_empty() {
2576                continue;
2577            }
2578            if !result.is_empty() {
2579                result.push_str("\n\n");
2580            }
2581            result.push_str(&event_string);
2582        }
2583        result
2584    }
2585
2586    fn format_zeta1_event(event: &Event) -> String {
2587        match event {
2588            Event::BufferChange {
2589                path,
2590                old_path,
2591                diff,
2592                ..
2593            } => {
2594                let mut prompt = String::new();
2595                if old_path != path {
2596                    writeln!(
2597                        prompt,
2598                        "User renamed {} to {}\n",
2599                        old_path.display(),
2600                        path.display()
2601                    )
2602                    .ok();
2603                }
2604                if !diff.is_empty() {
2605                    write!(
2606                        prompt,
2607                        "User edited {}:\n```diff\n{}\n```",
2608                        path.display(),
2609                        diff
2610                    )
2611                    .ok();
2612                }
2613                prompt
2614            }
2615        }
2616    }
2617
2618    /// Formats the excerpt section of a zeta1 prompt using byte-offset ranges
2619    /// within `cursor_excerpt`.
2620    fn format_zeta1_excerpt(
2621        input: &ZetaPromptInput,
2622        editable_range: Range<usize>,
2623        context_range: Range<usize>,
2624    ) -> String {
2625        let path_str = input.cursor_path.to_string_lossy();
2626        let excerpt = &*input.cursor_excerpt;
2627        let cursor_offset = input.cursor_offset_in_excerpt;
2628
2629        let mut prompt = String::new();
2630        writeln!(&mut prompt, "```{path_str}").ok();
2631
2632        let starts_at_file_beginning =
2633            input.excerpt_start_row == Some(0) && context_range.start == 0;
2634        if starts_at_file_beginning {
2635            writeln!(&mut prompt, "{START_OF_FILE_MARKER}").ok();
2636        }
2637
2638        prompt.push_str(&excerpt[context_range.start..editable_range.start]);
2639
2640        writeln!(&mut prompt, "{EDITABLE_REGION_START_MARKER}").ok();
2641        prompt.push_str(&excerpt[editable_range.start..cursor_offset]);
2642        prompt.push_str(CURSOR_MARKER);
2643        prompt.push_str(&excerpt[cursor_offset..editable_range.end]);
2644        write!(&mut prompt, "\n{EDITABLE_REGION_END_MARKER}").ok();
2645
2646        prompt.push_str(&excerpt[editable_range.end..context_range.end]);
2647        write!(prompt, "\n```").ok();
2648
2649        prompt
2650    }
2651
2652    /// Cleans zeta1 model output by extracting content between editable region
2653    /// markers and converting the zeta1 cursor marker to the universal one.
2654    /// Returns `None` if the output doesn't contain the expected markers.
2655    pub fn clean_zeta1_model_output(output: &str) -> Option<String> {
2656        let content = output.replace(CURSOR_MARKER, "");
2657
2658        let content_start = content
2659            .find(EDITABLE_REGION_START_MARKER)
2660            .map(|pos| pos + EDITABLE_REGION_START_MARKER.len())
2661            .map(|pos| {
2662                if content.as_bytes().get(pos) == Some(&b'\n') {
2663                    pos + 1
2664                } else {
2665                    pos
2666                }
2667            })
2668            .unwrap_or(0);
2669
2670        let content_end = content
2671            .find(EDITABLE_REGION_END_MARKER)
2672            .map(|pos| {
2673                if pos > 0 && content.as_bytes().get(pos - 1) == Some(&b'\n') {
2674                    pos - 1
2675                } else {
2676                    pos
2677                }
2678            })
2679            .unwrap_or(content.len());
2680
2681        if content_start > content_end {
2682            return Some(String::new());
2683        }
2684
2685        let extracted = &content[content_start..content_end];
2686
2687        let cursor_offset = output.find(CURSOR_MARKER).map(|zeta1_cursor_pos| {
2688            let text_before_cursor = output[..zeta1_cursor_pos].replace(CURSOR_MARKER, "");
2689            let text_before_cursor = text_before_cursor
2690                .find(EDITABLE_REGION_START_MARKER)
2691                .map(|pos| {
2692                    let after_marker = pos + EDITABLE_REGION_START_MARKER.len();
2693                    if text_before_cursor.as_bytes().get(after_marker) == Some(&b'\n') {
2694                        after_marker + 1
2695                    } else {
2696                        after_marker
2697                    }
2698                })
2699                .unwrap_or(0);
2700            let offset_in_extracted = zeta1_cursor_pos
2701                .saturating_sub(text_before_cursor)
2702                .min(extracted.len());
2703            offset_in_extracted
2704        });
2705
2706        let mut result = String::with_capacity(extracted.len() + super::CURSOR_MARKER.len());
2707        if let Some(offset) = cursor_offset {
2708            result.push_str(&extracted[..offset]);
2709            result.push_str(super::CURSOR_MARKER);
2710            result.push_str(&extracted[offset..]);
2711        } else {
2712            result.push_str(extracted);
2713        }
2714
2715        Some(result)
2716    }
2717}
2718
2719#[cfg(test)]
2720mod tests {
2721    use super::*;
2722    use indoc::indoc;
2723
2724    fn make_input(
2725        cursor_excerpt: &str,
2726        editable_range: Range<usize>,
2727        cursor_offset: usize,
2728        events: Vec<Event>,
2729        related_files: Vec<RelatedFile>,
2730    ) -> ZetaPromptInput {
2731        let context_range = 0..cursor_excerpt.len();
2732        ZetaPromptInput {
2733            cursor_path: Path::new("test.rs").into(),
2734            cursor_excerpt: cursor_excerpt.into(),
2735            cursor_offset_in_excerpt: cursor_offset,
2736            excerpt_start_row: None,
2737            events: events.into_iter().map(Arc::new).collect(),
2738            related_files,
2739            excerpt_ranges: ExcerptRanges {
2740                editable_150: editable_range.clone(),
2741                editable_180: editable_range.clone(),
2742                editable_350: editable_range,
2743                editable_150_context_350: context_range.clone(),
2744                editable_180_context_350: context_range.clone(),
2745                editable_350_context_150: context_range,
2746                ..Default::default()
2747            },
2748            experiment: None,
2749            in_open_source_repo: false,
2750            can_collect_data: false,
2751            repo_url: None,
2752        }
2753    }
2754
2755    fn make_event(path: &str, diff: &str) -> Event {
2756        Event::BufferChange {
2757            path: Path::new(path).into(),
2758            old_path: Path::new(path).into(),
2759            diff: diff.to_string(),
2760            predicted: false,
2761            in_open_source_repo: false,
2762        }
2763    }
2764
2765    fn make_related_file(path: &str, content: &str) -> RelatedFile {
2766        RelatedFile {
2767            path: Path::new(path).into(),
2768            max_row: content.lines().count() as u32,
2769            excerpts: vec![RelatedExcerpt {
2770                row_range: 0..content.lines().count() as u32,
2771                text: content.into(),
2772                order: 0,
2773            }],
2774            in_open_source_repo: false,
2775        }
2776    }
2777
2778    fn format_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
2779        format_prompt_with_budget_for_format(input, ZetaFormat::V0114180EditableRegion, max_tokens)
2780    }
2781
2782    #[test]
2783    fn test_no_truncation_when_within_budget() {
2784        let input = make_input(
2785            "prefix\neditable\nsuffix",
2786            7..15,
2787            10,
2788            vec![make_event("a.rs", "-old\n+new\n")],
2789            vec![make_related_file("related.rs", "fn helper() {}\n")],
2790        );
2791
2792        assert_eq!(
2793            format_with_budget(&input, 10000),
2794            indoc! {r#"
2795                <|file_sep|>related.rs
2796                fn helper() {}
2797                <|file_sep|>edit history
2798                --- a/a.rs
2799                +++ b/a.rs
2800                -old
2801                +new
2802                <|file_sep|>test.rs
2803                <|fim_prefix|>
2804                prefix
2805                <|fim_middle|>current
2806                edi<|user_cursor|>table
2807                <|fim_suffix|>
2808
2809                suffix
2810                <|fim_middle|>updated
2811            "#}
2812        );
2813    }
2814
2815    #[test]
2816    fn test_truncation_drops_edit_history_when_budget_tight() {
2817        let input = make_input(
2818            "code",
2819            0..4,
2820            2,
2821            vec![make_event("a.rs", "-x\n+y\n")],
2822            vec![
2823                make_related_file("r1.rs", "a\n"),
2824                make_related_file("r2.rs", "b\n"),
2825            ],
2826        );
2827
2828        assert_eq!(
2829            format_with_budget(&input, 10000),
2830            indoc! {r#"
2831                <|file_sep|>r1.rs
2832                a
2833                <|file_sep|>r2.rs
2834                b
2835                <|file_sep|>edit history
2836                --- a/a.rs
2837                +++ b/a.rs
2838                -x
2839                +y
2840                <|file_sep|>test.rs
2841                <|fim_prefix|>
2842                <|fim_middle|>current
2843                co<|user_cursor|>de
2844                <|fim_suffix|>
2845                <|fim_middle|>updated
2846            "#}
2847        );
2848
2849        assert_eq!(
2850            format_with_budget(&input, 50),
2851            indoc! {r#"
2852                <|file_sep|>r1.rs
2853                a
2854                <|file_sep|>r2.rs
2855                b
2856                <|file_sep|>test.rs
2857                <|fim_prefix|>
2858                <|fim_middle|>current
2859                co<|user_cursor|>de
2860                <|fim_suffix|>
2861                <|fim_middle|>updated
2862            "#}
2863        );
2864    }
2865
2866    #[test]
2867    fn test_truncation_includes_partial_excerpts() {
2868        let input = make_input(
2869            "x",
2870            0..1,
2871            0,
2872            vec![],
2873            vec![RelatedFile {
2874                path: Path::new("big.rs").into(),
2875                max_row: 30,
2876                in_open_source_repo: false,
2877                excerpts: vec![
2878                    RelatedExcerpt {
2879                        row_range: 0..10,
2880                        text: "first excerpt\n".into(),
2881                        order: 0,
2882                    },
2883                    RelatedExcerpt {
2884                        row_range: 10..20,
2885                        text: "second excerpt\n".into(),
2886                        order: 0,
2887                    },
2888                    RelatedExcerpt {
2889                        row_range: 20..30,
2890                        text: "third excerpt\n".into(),
2891                        order: 0,
2892                    },
2893                ],
2894            }],
2895        );
2896
2897        assert_eq!(
2898            format_with_budget(&input, 10000),
2899            indoc! {r#"
2900                <|file_sep|>big.rs
2901                first excerpt
2902                ...
2903                second excerpt
2904                ...
2905                third excerpt
2906                <|file_sep|>test.rs
2907                <|fim_prefix|>
2908                <|fim_middle|>current
2909                <|user_cursor|>x
2910                <|fim_suffix|>
2911                <|fim_middle|>updated
2912            "#}
2913        );
2914
2915        assert_eq!(
2916            format_with_budget(&input, 50),
2917            indoc! {r#"
2918                <|file_sep|>big.rs
2919                first excerpt
2920                ...
2921                <|file_sep|>test.rs
2922                <|fim_prefix|>
2923                <|fim_middle|>current
2924                <|user_cursor|>x
2925                <|fim_suffix|>
2926                <|fim_middle|>updated
2927            "#}
2928        );
2929    }
2930
2931    #[test]
2932    fn test_truncation_prioritizes_lower_order_excerpts() {
2933        // Two files: file_a has a high-order excerpt, file_b has a low-order one.
2934        // With tight budget, only the lower-order excerpt from file_b should be included.
2935        let input = make_input(
2936            "x",
2937            0..1,
2938            0,
2939            vec![],
2940            vec![
2941                RelatedFile {
2942                    path: Path::new("file_a.rs").into(),
2943                    max_row: 10,
2944                    in_open_source_repo: false,
2945                    excerpts: vec![RelatedExcerpt {
2946                        row_range: 0..10,
2947                        text: "low priority content\n".into(),
2948                        order: 5,
2949                    }],
2950                },
2951                RelatedFile {
2952                    path: Path::new("file_b.rs").into(),
2953                    max_row: 10,
2954                    in_open_source_repo: false,
2955                    excerpts: vec![RelatedExcerpt {
2956                        row_range: 0..10,
2957                        text: "high priority content\n".into(),
2958                        order: 1,
2959                    }],
2960                },
2961            ],
2962        );
2963
2964        // With large budget, both files included; rendered in stable lexicographic order.
2965        assert_eq!(
2966            format_with_budget(&input, 10000),
2967            indoc! {r#"
2968                <|file_sep|>file_a.rs
2969                low priority content
2970                <|file_sep|>file_b.rs
2971                high priority content
2972                <|file_sep|>test.rs
2973                <|fim_prefix|>
2974                <|fim_middle|>current
2975                <|user_cursor|>x
2976                <|fim_suffix|>
2977                <|fim_middle|>updated
2978            "#}
2979        );
2980
2981        // With tight budget, only file_b (lower order) fits.
2982        // Cursor section is ~37 tokens, so budget 52 leaves ~15 for related files.
2983        // file_b header (7) + excerpt (7) = 14 tokens, which fits.
2984        // file_a would need another 14 tokens, which doesn't fit.
2985        assert_eq!(
2986            format_with_budget(&input, 52),
2987            indoc! {r#"
2988                <|file_sep|>file_b.rs
2989                high priority content
2990                <|file_sep|>test.rs
2991                <|fim_prefix|>
2992                <|fim_middle|>current
2993                <|user_cursor|>x
2994                <|fim_suffix|>
2995                <|fim_middle|>updated
2996            "#}
2997        );
2998    }
2999
3000    #[test]
3001    fn test_truncation_drops_high_order_excerpts_within_file() {
3002        // A single file has excerpts at order 1 and order 3. With a tight budget,
3003        // only the order-1 excerpts are included while the order-3 excerpt is
3004        // dropped — even though they belong to the same file. This also preserves
3005        // the parent invariant: parent outline items have order ≤ their best
3006        // child, so they're always included when any child is.
3007        let input = make_input(
3008            "x",
3009            0..1,
3010            0,
3011            vec![],
3012            vec![RelatedFile {
3013                path: Path::new("mod.rs").into(),
3014                max_row: 30,
3015                in_open_source_repo: false,
3016                excerpts: vec![
3017                    RelatedExcerpt {
3018                        row_range: 0..5,
3019                        text: "mod header\n".into(),
3020                        order: 1,
3021                    },
3022                    RelatedExcerpt {
3023                        row_range: 5..15,
3024                        text: "important fn\n".into(),
3025                        order: 1,
3026                    },
3027                    RelatedExcerpt {
3028                        row_range: 15..30,
3029                        text: "less important fn\n".into(),
3030                        order: 3,
3031                    },
3032                ],
3033            }],
3034        );
3035
3036        // With large budget, all three excerpts included.
3037        assert_eq!(
3038            format_with_budget(&input, 10000),
3039            indoc! {r#"
3040                <|file_sep|>mod.rs
3041                mod header
3042                ...
3043                important fn
3044                ...
3045                less important fn
3046                <|file_sep|>test.rs
3047                <|fim_prefix|>
3048                <|fim_middle|>current
3049                <|user_cursor|>x
3050                <|fim_suffix|>
3051                <|fim_middle|>updated
3052            "#}
3053        );
3054
3055        // With tight budget, only order<=1 excerpts included (header + important fn).
3056        assert_eq!(
3057            format_with_budget(&input, 55),
3058            indoc! {r#"
3059                <|file_sep|>mod.rs
3060                mod header
3061                ...
3062                important fn
3063                ...
3064                <|file_sep|>test.rs
3065                <|fim_prefix|>
3066                <|fim_middle|>current
3067                <|user_cursor|>x
3068                <|fim_suffix|>
3069                <|fim_middle|>updated
3070            "#}
3071        );
3072    }
3073
3074    #[test]
3075    fn test_truncation_drops_older_events_first() {
3076        let input = make_input(
3077            "x",
3078            0..1,
3079            0,
3080            vec![make_event("old.rs", "-1\n"), make_event("new.rs", "-2\n")],
3081            vec![],
3082        );
3083
3084        assert_eq!(
3085            format_with_budget(&input, 10000),
3086            indoc! {r#"
3087                <|file_sep|>edit history
3088                --- a/old.rs
3089                +++ b/old.rs
3090                -1
3091                --- a/new.rs
3092                +++ b/new.rs
3093                -2
3094                <|file_sep|>test.rs
3095                <|fim_prefix|>
3096                <|fim_middle|>current
3097                <|user_cursor|>x
3098                <|fim_suffix|>
3099                <|fim_middle|>updated
3100            "#}
3101        );
3102
3103        assert_eq!(
3104            format_with_budget(&input, 55),
3105            indoc! {r#"
3106                <|file_sep|>edit history
3107                --- a/new.rs
3108                +++ b/new.rs
3109                -2
3110                <|file_sep|>test.rs
3111                <|fim_prefix|>
3112                <|fim_middle|>current
3113                <|user_cursor|>x
3114                <|fim_suffix|>
3115                <|fim_middle|>updated
3116            "#}
3117        );
3118    }
3119
3120    #[test]
3121    fn test_cursor_excerpt_always_included_with_minimal_budget() {
3122        let input = make_input(
3123            "fn main() {}",
3124            0..12,
3125            3,
3126            vec![make_event("a.rs", "-old\n+new\n")],
3127            vec![make_related_file("related.rs", "helper\n")],
3128        );
3129
3130        assert_eq!(
3131            format_with_budget(&input, 30),
3132            indoc! {r#"
3133                <|file_sep|>test.rs
3134                <|fim_prefix|>
3135                <|fim_middle|>current
3136                fn <|user_cursor|>main() {}
3137                <|fim_suffix|>
3138                <|fim_middle|>updated
3139            "#}
3140        );
3141    }
3142
3143    fn format_seed_coder(input: &ZetaPromptInput) -> String {
3144        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, 10000)
3145    }
3146
3147    fn format_seed_coder_with_budget(input: &ZetaPromptInput, max_tokens: usize) -> String {
3148        format_prompt_with_budget_for_format(input, ZetaFormat::V0211SeedCoder, max_tokens)
3149    }
3150
3151    #[test]
3152    fn test_seed_coder_basic_format() {
3153        let input = make_input(
3154            "prefix\neditable\nsuffix",
3155            7..15,
3156            10,
3157            vec![make_event("a.rs", "-old\n+new\n")],
3158            vec![make_related_file("related.rs", "fn helper() {}\n")],
3159        );
3160
3161        assert_eq!(
3162            format_seed_coder(&input),
3163            indoc! {r#"
3164                <[fim-suffix]>
3165                suffix
3166                <[fim-prefix]><filename>related.rs
3167                fn helper() {}
3168
3169                <filename>edit_history
3170                --- a/a.rs
3171                +++ b/a.rs
3172                -old
3173                +new
3174
3175                <filename>test.rs
3176                prefix
3177                <<<<<<< CURRENT
3178                edi<|user_cursor|>table
3179                =======
3180                <[fim-middle]>"#}
3181        );
3182    }
3183
3184    #[test]
3185    fn test_seed_coder_no_context() {
3186        let input = make_input("before\nmiddle\nafter", 7..13, 10, vec![], vec![]);
3187
3188        assert_eq!(
3189            format_seed_coder(&input),
3190            indoc! {r#"
3191                <[fim-suffix]>
3192                after
3193                <[fim-prefix]><filename>test.rs
3194                before
3195                <<<<<<< CURRENT
3196                mid<|user_cursor|>dle
3197                =======
3198                <[fim-middle]>"#}
3199        );
3200    }
3201
3202    #[test]
3203    fn test_seed_coder_truncation_drops_context() {
3204        let input = make_input(
3205            "code",
3206            0..4,
3207            2,
3208            vec![make_event("a.rs", "-x\n+y\n")],
3209            vec![make_related_file("r1.rs", "content\n")],
3210        );
3211
3212        // With large budget, everything is included
3213        assert_eq!(
3214            format_seed_coder(&input),
3215            indoc! {r#"
3216                <[fim-suffix]>
3217                <[fim-prefix]><filename>r1.rs
3218                content
3219
3220                <filename>edit_history
3221                --- a/a.rs
3222                +++ b/a.rs
3223                -x
3224                +y
3225
3226                <filename>test.rs
3227                <<<<<<< CURRENT
3228                co<|user_cursor|>de
3229                =======
3230                <[fim-middle]>"#}
3231        );
3232
3233        // With tight budget, context is dropped but cursor section remains
3234        assert_eq!(
3235            format_seed_coder_with_budget(&input, 30),
3236            indoc! {r#"
3237                <[fim-suffix]>
3238                <[fim-prefix]><filename>test.rs
3239                <<<<<<< CURRENT
3240                co<|user_cursor|>de
3241                =======
3242                <[fim-middle]>"#}
3243        );
3244    }
3245
3246    #[test]
3247    fn test_seed_coder_truncation_prioritizes_lower_order() {
3248        let input = make_input(
3249            "code",
3250            0..4,
3251            2,
3252            vec![],
3253            vec![
3254                RelatedFile {
3255                    path: Path::new("low_prio.rs").into(),
3256                    max_row: 5,
3257                    in_open_source_repo: false,
3258                    excerpts: vec![RelatedExcerpt {
3259                        row_range: 0..5,
3260                        text: "low prio\n".into(),
3261                        order: 10,
3262                    }],
3263                },
3264                RelatedFile {
3265                    path: Path::new("high_prio.rs").into(),
3266                    max_row: 5,
3267                    in_open_source_repo: false,
3268                    excerpts: vec![RelatedExcerpt {
3269                        row_range: 0..5,
3270                        text: "high prio\n".into(),
3271                        order: 1,
3272                    }],
3273                },
3274            ],
3275        );
3276
3277        // With large budget, both included; rendered in stable lexicographic order.
3278        assert_eq!(
3279            format_seed_coder(&input),
3280            indoc! {r#"
3281                <[fim-suffix]>
3282                <[fim-prefix]><filename>low_prio.rs
3283                low prio
3284                <filename>high_prio.rs
3285                high prio
3286
3287                <filename>test.rs
3288                <<<<<<< CURRENT
3289                co<|user_cursor|>de
3290                =======
3291                <[fim-middle]>"#}
3292        );
3293
3294        // With tight budget, only high_prio included.
3295        // Cursor sections cost 25 tokens, so budget 44 leaves 19 for related files.
3296        // high_prio header (7) + excerpt (3) = 10, fits. low_prio would add 10 more = 20 > 19.
3297        assert_eq!(
3298            format_seed_coder_with_budget(&input, 44),
3299            indoc! {r#"
3300                <[fim-suffix]>
3301                <[fim-prefix]><filename>high_prio.rs
3302                high prio
3303
3304                <filename>test.rs
3305                <<<<<<< CURRENT
3306                co<|user_cursor|>de
3307                =======
3308                <[fim-middle]>"#}
3309        );
3310    }
3311
3312    #[test]
3313    fn test_seed_coder_clean_output() {
3314        let output_with_marker = "new code\n>>>>>>> UPDATED\n";
3315        let output_without_marker = "new code\n";
3316
3317        assert_eq!(
3318            clean_zeta2_model_output(output_with_marker, ZetaFormat::V0211SeedCoder),
3319            "new code\n"
3320        );
3321        assert_eq!(
3322            clean_zeta2_model_output(output_without_marker, ZetaFormat::V0211SeedCoder),
3323            "new code\n"
3324        );
3325    }
3326
3327    #[test]
3328    fn test_format_zeta1_from_input_basic() {
3329        let excerpt = "fn before() {}\nfn foo() {\n    let x = 1;\n}\nfn after() {}\n";
3330        let input = ZetaPromptInput {
3331            cursor_path: Path::new("src/main.rs").into(),
3332            cursor_excerpt: excerpt.into(),
3333            cursor_offset_in_excerpt: 30,
3334            excerpt_start_row: Some(0),
3335            events: vec![Arc::new(make_event("other.rs", "-old\n+new\n"))],
3336            related_files: vec![],
3337            excerpt_ranges: ExcerptRanges {
3338                editable_150: 15..41,
3339                editable_180: 15..41,
3340                editable_350: 15..41,
3341                editable_150_context_350: 0..excerpt.len(),
3342                editable_180_context_350: 0..excerpt.len(),
3343                editable_350_context_150: 0..excerpt.len(),
3344                ..Default::default()
3345            },
3346            experiment: None,
3347            in_open_source_repo: false,
3348            can_collect_data: false,
3349            repo_url: None,
3350        };
3351
3352        let prompt = zeta1::format_zeta1_from_input(&input, 15..41, 0..excerpt.len());
3353
3354        assert_eq!(
3355            prompt,
3356            concat!(
3357                "### Instruction:\n",
3358                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3359                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3360                "into account the cursor location.\n",
3361                "\n",
3362                "### User Edits:\n",
3363                "\n",
3364                "User edited other.rs:\n",
3365                "```diff\n",
3366                "-old\n",
3367                "+new\n",
3368                "\n",
3369                "```\n",
3370                "\n",
3371                "### User Excerpt:\n",
3372                "\n",
3373                "```src/main.rs\n",
3374                "<|start_of_file|>\n",
3375                "fn before() {}\n",
3376                "<|editable_region_start|>\n",
3377                "fn foo() {\n",
3378                "    <|user_cursor_is_here|>let x = 1;\n",
3379                "\n",
3380                "<|editable_region_end|>}\n",
3381                "fn after() {}\n",
3382                "\n",
3383                "```\n",
3384                "\n",
3385                "### Response:\n",
3386            ),
3387        );
3388    }
3389
3390    #[test]
3391    fn test_format_zeta1_from_input_no_start_of_file() {
3392        let excerpt = "fn foo() {\n    let x = 1;\n}\n";
3393        let input = ZetaPromptInput {
3394            cursor_path: Path::new("src/main.rs").into(),
3395            cursor_excerpt: excerpt.into(),
3396            cursor_offset_in_excerpt: 15,
3397            excerpt_start_row: Some(10),
3398            events: vec![],
3399            related_files: vec![],
3400            excerpt_ranges: ExcerptRanges {
3401                editable_150: 0..28,
3402                editable_180: 0..28,
3403                editable_350: 0..28,
3404                editable_150_context_350: 0..28,
3405                editable_180_context_350: 0..28,
3406                editable_350_context_150: 0..28,
3407                ..Default::default()
3408            },
3409            experiment: None,
3410            in_open_source_repo: false,
3411            can_collect_data: false,
3412            repo_url: None,
3413        };
3414
3415        let prompt = zeta1::format_zeta1_from_input(&input, 0..28, 0..28);
3416
3417        assert_eq!(
3418            prompt,
3419            concat!(
3420                "### Instruction:\n",
3421                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3422                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3423                "into account the cursor location.\n",
3424                "\n",
3425                "### User Edits:\n",
3426                "\n",
3427                "\n",
3428                "\n",
3429                "### User Excerpt:\n",
3430                "\n",
3431                "```src/main.rs\n",
3432                "<|editable_region_start|>\n",
3433                "fn foo() {\n",
3434                "    <|user_cursor_is_here|>let x = 1;\n",
3435                "}\n",
3436                "\n",
3437                "<|editable_region_end|>\n",
3438                "```\n",
3439                "\n",
3440                "### Response:\n",
3441            ),
3442        );
3443    }
3444
3445    #[test]
3446    fn test_format_zeta1_from_input_with_sub_ranges() {
3447        let excerpt = "// prefix\nfn foo() {\n    let x = 1;\n}\n// suffix\n";
3448        let editable_range = 10..37;
3449        let context_range = 0..excerpt.len();
3450
3451        let input = ZetaPromptInput {
3452            cursor_path: Path::new("test.rs").into(),
3453            cursor_excerpt: excerpt.into(),
3454            cursor_offset_in_excerpt: 25,
3455            excerpt_start_row: Some(0),
3456            events: vec![],
3457            related_files: vec![],
3458            excerpt_ranges: ExcerptRanges {
3459                editable_150: editable_range.clone(),
3460                editable_180: editable_range.clone(),
3461                editable_350: editable_range.clone(),
3462                editable_150_context_350: context_range.clone(),
3463                editable_180_context_350: context_range.clone(),
3464                editable_350_context_150: context_range.clone(),
3465                ..Default::default()
3466            },
3467            experiment: None,
3468            in_open_source_repo: false,
3469            can_collect_data: false,
3470            repo_url: None,
3471        };
3472
3473        let prompt = zeta1::format_zeta1_from_input(&input, editable_range, context_range);
3474
3475        assert_eq!(
3476            prompt,
3477            concat!(
3478                "### Instruction:\n",
3479                "You are a code completion assistant and your task is to analyze user edits and then rewrite an ",
3480                "excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking ",
3481                "into account the cursor location.\n",
3482                "\n",
3483                "### User Edits:\n",
3484                "\n",
3485                "\n",
3486                "\n",
3487                "### User Excerpt:\n",
3488                "\n",
3489                "```test.rs\n",
3490                "<|start_of_file|>\n",
3491                "// prefix\n",
3492                "<|editable_region_start|>\n",
3493                "fn foo() {\n",
3494                "    <|user_cursor_is_here|>let x = 1;\n",
3495                "}\n",
3496                "<|editable_region_end|>\n",
3497                "// suffix\n",
3498                "\n",
3499                "```\n",
3500                "\n",
3501                "### Response:\n",
3502            ),
3503        );
3504    }
3505
3506    #[test]
3507    fn test_clean_zeta1_model_output_basic() {
3508        let output = indoc! {"
3509            <|editable_region_start|>
3510            fn main() {
3511                println!(\"hello\");
3512            }
3513            <|editable_region_end|>
3514        "};
3515
3516        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3517        assert_eq!(cleaned, "fn main() {\n    println!(\"hello\");\n}");
3518    }
3519
3520    #[test]
3521    fn test_clean_zeta1_model_output_with_cursor() {
3522        let output = indoc! {"
3523            <|editable_region_start|>
3524            fn main() {
3525                <|user_cursor_is_here|>println!(\"hello\");
3526            }
3527            <|editable_region_end|>
3528        "};
3529
3530        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3531        assert_eq!(
3532            cleaned,
3533            "fn main() {\n    <|user_cursor|>println!(\"hello\");\n}"
3534        );
3535    }
3536
3537    #[test]
3538    fn test_clean_zeta1_model_output_no_markers() {
3539        let output = "fn main() {}\n";
3540        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3541        assert_eq!(cleaned, "fn main() {}\n");
3542    }
3543
3544    #[test]
3545    fn test_clean_zeta1_model_output_empty_region() {
3546        let output = "<|editable_region_start|>\n<|editable_region_end|>\n";
3547        let cleaned = zeta1::clean_zeta1_model_output(output).unwrap();
3548        assert_eq!(cleaned, "");
3549    }
3550}